def __init__(self, sid, pending_queue, completed_queue, rmgr, rmq_conn_params, rts): if not isinstance(sid, str): raise TypeError(expected_type=str, actual_type=type(sid)) if not isinstance(pending_queue, list): raise TypeError(expected_type=str, actual_type=type(pending_queue)) if not isinstance(completed_queue, list): raise TypeError(expected_type=str, actual_type=type(completed_queue)) if not isinstance(rmgr, Base_ResourceManager): raise TypeError(expected_type=Base_ResourceManager, actual_type=type(rmgr)) if not isinstance(rmq_conn_params, pika.connection.ConnectionParameters): raise TypeError(expected_type=pika.connection.ConnectionParameters, actual_type=type(rmq_conn_params)) self._sid = sid self._pending_queue = pending_queue self._completed_queue = completed_queue self._rmgr = rmgr self._rts = rts self._rmq_conn_params = rmq_conn_params # Utility parameters self._uid = ru.generate_id('task_manager.%(counter)04d', ru.ID_CUSTOM) self._path = os.getcwd() + '/' + self._sid name = 'radical.entk.%s' % self._uid self._log = ru.Logger(name, path=self._path) self._prof = ru.Profiler(name, path=self._path) self._dh = ru.DebugHelper(name=name) # Thread should run till terminate condtion is encountered mq_connection = pika.BlockingConnection(rmq_conn_params) self._hb_request_q = '%s-hb-request' % self._sid self._hb_response_q = '%s-hb-response' % self._sid mq_channel = mq_connection.channel() # To respond to heartbeat - get request from rpc_queue mq_channel.queue_delete(queue=self._hb_response_q) mq_channel.queue_declare(queue=self._hb_response_q) # To respond to heartbeat - get request from rpc_queue mq_channel.queue_delete(queue=self._hb_request_q) mq_channel.queue_declare(queue=self._hb_request_q) self._tmgr_process = None self._hb_thread = None self._hb_interval = int(os.getenv('ENTK_HB_INTERVAL', 30)) mq_connection.close()
def __init__(self, name, cfg, term): mp.Process.__init__(self) self.is_parent = True self.uid = name self.cfg = cfg self.term = term self.dh = ru.DebugHelper() self.stopped = False self.killed = False self.watcher = Watcher(cfg) self.watcher.start()
def test(): s = None try: cfg = ru.read_json("%s/session.json" % os.path.dirname(__file__)) dh = ru.DebugHelper() s = rp.Session(cfg=cfg) ca1 = CompA(s) cb1 = CompB(s) cb2 = CompB(s) ca1.start() cb1.start() cb2.start() # s._controller.add_things([ca1, cb1, cb2]) time.sleep(3) finally: if s: print 'close' s.close()
#!/usr/bin/env python __copyright__ = "Copyright 2013-2015, http://radical.rutgers.edu" __license__ = "MIT" import sys import radical.pilot as rp import radical.utils as ru import time dh = ru.DebugHelper () RUNTIME = 600 SLEEP = 10 PILOTS = 1 UNITS = 8 SCHED = rp.SCHED_BACKFILLING resources = { 'osg.xsede-virt-clust' : { 'project' : 'TG-CCR140028', 'queue' : None, 'schema' : 'ssh' }, 'osg.connect' : { 'project' : 'RADICAL', 'queue' : None, 'schema' : 'ssh' } } start_time = time.time()
def __init__(self, dburl=None, uid=None, cfg=None, _connect=True): """ Creates a new session. A new Session instance is created and stored in the database. **Arguments:** * **dburl** (`string`): The MongoDB URL. If none is given, RP uses the environment variable RADICAL_PILOT_DBURL. If that is not set, an error will be raises. * **uid** (`string`): Create a session with this UID. *Only use this when you know what you are doing!* **Returns:** * A new Session instance. **Raises:** * :class:`radical.pilot.DatabaseError` """ if os.uname()[0] == 'Darwin': # on MacOS, we are running out of file descriptors soon. The code # below attempts to increase the limit of open files - but any error # is silently ignored, so this is an best-effort, no guarantee. We # leave responsibility for system limits with the user. try: import resource limits = list(resource.getrlimit(resource.RLIMIT_NOFILE)) limits[0] = 512 resource.setrlimit(resource.RLIMIT_NOFILE, limits) except: pass self._dh = ru.DebugHelper() self._valid = True self._closed = False self._valid_iter = 0 # detect recursive calls of `is_valid()` # class state self._dbs = None self._uid = None self._dburl = None self._reconnected = False self._cache = dict() # cache sandboxes etc. self._cache_lock = threading.RLock() self._cache['resource_sandbox'] = dict() self._cache['session_sandbox'] = dict() self._cache['pilot_sandbox'] = dict() # before doing anything else, set up the debug helper for the lifetime # of the session. self._debug_helper = ru.DebugHelper() # Dictionaries holding all manager objects created during the session. # NOTE: should this also include agents? self._pmgrs = dict() self._umgrs = dict() self._bridges = list() self._components = list() # FIXME: we work around some garbage collection issues we don't yet # understand: instead of relying on the GC to eventually collect # some stuff, we actively free those on `session.close()`, at # least for the current process. Usually, all resources get # nicely collected on process termination - but not when we # create many sessions (one after the other) in the same # application instance (ie. the same process). This workarounf # takes care of that use case. # The clean solution would be to ensure clean termination # sequence, something which I seem to be unable to implement... # :/ self._to_close = list() self._to_stop = list() self._to_destroy = list() # cache the client sandbox # FIXME: this needs to be overwritten if configured differently in the # session config, as should be the case for any agent side # session instance. self._client_sandbox = os.getcwd() # The resource configuration dictionary associated with the session. self._resource_configs = {} # if a config is given, us its values: if cfg: self._cfg = copy.deepcopy(cfg) else: # otherwise we need a config self._cfg = ru.read_json("%s/configs/session_%s.json" \ % (os.path.dirname(__file__), os.environ.get('RADICAL_PILOT_SESSION_CFG', 'default'))) # fall back to config data where possible # sanity check on parameters if not uid: uid = self._cfg.get('session_id') if uid: self._uid = uid self._reconnected = True else: # generate new uid, reset all other ID counters # FIXME: this will screw up counters for *concurrent* sessions, # as the ID generation is managed in a process singleton. self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) ru.reset_id_counters(prefix='rp.session', reset_all_others=True) if not self._cfg.get('session_id'): self._cfg['session_id'] = self._uid if not self._cfg.get('owner'): self._cfg['owner'] = self._uid if not self._cfg.get('logdir'): self._cfg['logdir'] = '%s/%s' \ % (os.getcwd(), self._uid) self._logdir = self._cfg['logdir'] self._prof = self._get_profiler(name=self._cfg['owner']) self._rep = self._get_reporter(name=self._cfg['owner']) self._log = self._get_logger(name=self._cfg['owner'], level=self._cfg.get('debug')) if _connect: # we need a dburl to connect to. if not dburl: dburl = os.environ.get("RADICAL_PILOT_DBURL") if not dburl: dburl = self._cfg.get('default_dburl') if not dburl: dburl = self._cfg.get('dburl') if not dburl: # we forgive missing dburl on reconnect, but not otherwise raise RuntimeError("no database URL (set RADICAL_PILOT_DBURL)") self._dburl = ru.Url(dburl) self._cfg['dburl'] = str(self._dburl) # now we have config and uid - initialize base class (saga session) rs.Session.__init__(self, uid=self._uid) # ---------------------------------------------------------------------- # create new session if _connect: self._log.info("using database %s" % self._dburl) # if the database url contains a path element, we interpret that as # database name (without the leading slash) if not self._dburl.path or \ self._dburl.path[0] != '/' or \ len(self._dburl.path) <= 1 : if not uid: # we fake reconnnect if no DB is available -- but otherwise we # really really need a db connection... raise ValueError("incomplete DBURL '%s' no db name!" % self._dburl) if not self._reconnected: self._prof.prof('session_start', uid=self._uid) self._rep.info('<<new session: ') self._rep.plain('[%s]' % self._uid) self._rep.info('<<database : ') self._rep.plain('[%s]' % self._dburl) self._load_resource_configs() self._rec = os.environ.get('RADICAL_PILOT_RECORD_SESSION') if self._rec: # NOTE: Session recording cannot handle reconnected sessions, yet. # We thus turn it off here with a warning if self._reconnected: self._log.warn("no session recording on reconnected session") else: # append session ID to recording path self._rec = "%s/%s" % (self._rec, self._uid) # create recording path and record session os.system('mkdir -p %s' % self._rec) ru.write_json({'dburl': str(self.dburl)}, "%s/session.json" % self._rec) self._log.info("recording session in %s" % self._rec) # create/connect database handle try: self._dbs = DBSession(sid=self.uid, dburl=str(self._dburl), cfg=self._cfg, logger=self._log, connect=_connect) # from here on we should be able to close the session again self._log.info("New Session created: %s." % self.uid) except Exception, ex: self._rep.error(">>err\n") self._log.exception('session create failed') raise RuntimeError("Couldn't create new session (database URL '%s' incorrect?): %s" \ % (dburl, ex))
def __init__(self, database_url=None, database_name=None, name=None): """Creates a new session. If called without a uid, a new Session instance is created and stored in the database. If uid is set, an existing session is retrieved from the database. **Arguments:** * **database_url** (`string`): The MongoDB URL. If none is given, RP uses the environment variable RADICAL_PILOT_DBURL. If that is not set, an error will be raises. * **database_name** (`string`): An alternative database name (default: 'radicalpilot'). * **uid** (`string`): If uid is set, we try re-connect to an existing session instead of creating a new one. * **name** (`string`): An optional human readable name. **Returns:** * A new Session instance. **Raises:** * :class:`radical.pilot.DatabaseError` """ logger = ru.get_logger('radical.pilot') if database_name: logger.error( "The 'database_name' parameter is deprecated - please specify an URL path" ) else: database_name = 'radicalpilot' # init the base class inits saga.Session.__init__(self) self._dh = ru.DebugHelper() self._valid = True self._terminate = threading.Event() self._terminate.clear() # before doing anything else, set up the debug helper for the lifetime # of the session. self._debug_helper = ru.DebugHelper() # Dictionaries holding all manager objects created during the session. self._pilot_manager_objects = dict() self._unit_manager_objects = dict() # The resource configuration dictionary associated with the session. self._resource_configs = {} if not database_url: database_url = os.getenv("RADICAL_PILOT_DBURL", None) if not database_url: raise PilotException("no database URL (set RADICAL_PILOT_DBURL)") self._dburl = ru.Url(database_url) # if the database url contains a path element, we interpret that as # database name (without the leading slash) if not self._dburl.path or \ self._dburl.path[0] != '/' or \ len(self._dburl.path) <= 1 : logger.error( "incomplete URLs are deprecated -- missing database name!") self._dburl.path = database_name # defaults to 'radicalpilot' logger.info("using database %s" % self._dburl) # ---------------------------------------------------------------------- # create new session try: if name: self._name = name self._uid = name # self._uid = ru.generate_id ('rp.session.'+name+'.%(item_counter)06d', mode=ru.ID_CUSTOM) else: self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) self._name = self._uid logger.report.info('<<create session %s' % self._uid) self._dbs = dbSession(sid=self._uid, name=self._name, dburl=self._dburl) self._dburl = self._dbs._dburl logger.info("New Session created: %s." % str(self)) except Exception, ex: logger.exception('session create failed') raise PilotException("Couldn't create new session (database URL '%s' incorrect?): %s" \ % (self._dburl, ex))
def __init__(self, database_url=None, database_name="radicalpilot", uid=None, name=None): """Creates a new or reconnects to an exising session. If called without a uid, a new Session instance is created and stored in the database. If uid is set, an existing session is retrieved from the database. **Arguments:** * **database_url** (`string`): The MongoDB URL. If none is given, RP uses the environment variable RADICAL_PILOT_DBURL. If that is not set, an error will be raises. * **database_name** (`string`): An alternative database name (default: 'radicalpilot'). * **uid** (`string`): If uid is set, we try re-connect to an existing session instead of creating a new one. * **name** (`string`): An optional human readable name. **Returns:** * A new Session instance. **Raises:** * :class:`radical.pilot.DatabaseError` """ # init the base class inits saga.Session.__init__(self) Object.__init__(self) # before doing anything else, set up the debug helper for the lifetime # of the session. self._debug_helper = ru.DebugHelper() # Dictionaries holding all manager objects created during the session. self._pilot_manager_objects = list() self._unit_manager_objects = list() # Create a new process registry. All objects belonging to this # session will register their worker processes (if they have any) # in this registry. This makes it easier to shut down things in # a more coordinate fashion. self._process_registry = _ProcessRegistry() # The resource configuration dictionary associated with the session. self._resource_configs = {} self._database_url = database_url self._database_name = database_name if not self._database_url: self._database_url = os.getenv("RADICAL_PILOT_DBURL", None) if not self._database_url: raise PilotException("no database URL (set RADICAL_PILOT_DBURL)") logger.info("using database url %s" % self._database_url) # if the database url contains a path element, we interpret that as # database name (without the leading slash) tmp_url = ru.Url(self._database_url) if tmp_url.path and \ tmp_url.path[0] == '/' and \ len(tmp_url.path) > 1 : self._database_name = tmp_url.path[1:] logger.info("using database path %s" % self._database_name) else: logger.info("using database name %s" % self._database_name) # Loading all "default" resource configurations module_path = os.path.dirname(os.path.abspath(__file__)) default_cfgs = "%s/configs/*.json" % module_path config_files = glob.glob(default_cfgs) for config_file in config_files: try: rcs = ResourceConfig.from_file(config_file) except Exception as e: logger.error("skip config file %s: %s" % (config_file, e)) continue for rc in rcs: logger.info("Loaded resource configurations for %s" % rc) self._resource_configs[rc] = rcs[rc].as_dict() user_cfgs = "%s/.radical/pilot/configs/*.json" % os.environ.get('HOME') config_files = glob.glob(user_cfgs) for config_file in config_files: try: rcs = ResourceConfig.from_file(config_file) except Exception as e: logger.error("skip config file %s: %s" % (config_file, e)) continue for rc in rcs: logger.info("Loaded resource configurations for %s" % rc) if rc in self._resource_configs: # config exists -- merge user config into it ru.dict_merge(self._resource_configs[rc], rcs[rc].as_dict(), policy='overwrite') else: # new config -- add as is self._resource_configs[rc] = rcs[rc].as_dict() default_aliases = "%s/configs/aliases.json" % module_path self._resource_aliases = ru.read_json_str(default_aliases)['aliases'] ########################## ## CREATE A NEW SESSION ## ########################## if uid is None: try: self._connected = None if name: self._name = name self._uid = name # self._uid = ru.generate_id ('rp.session.'+name+'.%(item_counter)06d', mode=ru.ID_CUSTOM) else: self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE) self._name = self._uid self._dbs, self._created, self._connection_info = \ dbSession.new(sid = self._uid, name = self._name, db_url = self._database_url, db_name = database_name) logger.info("New Session created%s." % str(self)) except Exception, ex: logger.exception('session create failed') raise PilotException("Couldn't create new session (database URL '%s' incorrect?): %s" \ % (self._database_url, ex))