Example #1
0
    def __init__(self, sid, pending_queue, completed_queue, rmgr,
                 rmq_conn_params, rts):

        if not isinstance(sid, str):
            raise TypeError(expected_type=str, actual_type=type(sid))

        if not isinstance(pending_queue, list):
            raise TypeError(expected_type=str, actual_type=type(pending_queue))

        if not isinstance(completed_queue, list):
            raise TypeError(expected_type=str,
                            actual_type=type(completed_queue))

        if not isinstance(rmgr, Base_ResourceManager):
            raise TypeError(expected_type=Base_ResourceManager,
                            actual_type=type(rmgr))

        if not isinstance(rmq_conn_params,
                          pika.connection.ConnectionParameters):
            raise TypeError(expected_type=pika.connection.ConnectionParameters,
                            actual_type=type(rmq_conn_params))

        self._sid = sid
        self._pending_queue = pending_queue
        self._completed_queue = completed_queue
        self._rmgr = rmgr
        self._rts = rts
        self._rmq_conn_params = rmq_conn_params

        # Utility parameters
        self._uid = ru.generate_id('task_manager.%(counter)04d', ru.ID_CUSTOM)
        self._path = os.getcwd() + '/' + self._sid

        name = 'radical.entk.%s' % self._uid
        self._log = ru.Logger(name, path=self._path)
        self._prof = ru.Profiler(name, path=self._path)
        self._dh = ru.DebugHelper(name=name)

        # Thread should run till terminate condtion is encountered
        mq_connection = pika.BlockingConnection(rmq_conn_params)

        self._hb_request_q = '%s-hb-request' % self._sid
        self._hb_response_q = '%s-hb-response' % self._sid

        mq_channel = mq_connection.channel()

        # To respond to heartbeat - get request from rpc_queue
        mq_channel.queue_delete(queue=self._hb_response_q)
        mq_channel.queue_declare(queue=self._hb_response_q)

        # To respond to heartbeat - get request from rpc_queue
        mq_channel.queue_delete(queue=self._hb_request_q)
        mq_channel.queue_declare(queue=self._hb_request_q)

        self._tmgr_process = None
        self._hb_thread = None
        self._hb_interval = int(os.getenv('ENTK_HB_INTERVAL', 30))

        mq_connection.close()
Example #2
0
 def __init__(self, name, cfg, term):
     mp.Process.__init__(self)
     self.is_parent = True
     self.uid       = name
     self.cfg       = cfg
     self.term      = term
     self.dh        = ru.DebugHelper()
     self.stopped   = False
     self.killed    = False
     self.watcher   = Watcher(cfg)
     self.watcher.start()
def test():

    s = None
    try:
        cfg = ru.read_json("%s/session.json" % os.path.dirname(__file__))
        dh = ru.DebugHelper()
        s = rp.Session(cfg=cfg)

        ca1 = CompA(s)
        cb1 = CompB(s)
        cb2 = CompB(s)

        ca1.start()
        cb1.start()
        cb2.start()

        # s._controller.add_things([ca1, cb1, cb2])

        time.sleep(3)

    finally:
        if s:
            print 'close'
            s.close()
Example #4
0
#!/usr/bin/env python

__copyright__ = "Copyright 2013-2015, http://radical.rutgers.edu"
__license__   = "MIT"

import sys
import radical.pilot as rp
import radical.utils as ru
import time

dh = ru.DebugHelper ()

RUNTIME  =   600
SLEEP    =    10
PILOTS   =     1
UNITS    =     8
SCHED    = rp.SCHED_BACKFILLING

resources = {
    'osg.xsede-virt-clust' : {
        'project'  : 'TG-CCR140028',
        'queue'    : None,
        'schema'   : 'ssh'
    },
    'osg.connect' : {
        'project'  : 'RADICAL',
        'queue'    : None,
        'schema'   : 'ssh'
    }
}
start_time = time.time()
Example #5
0
    def __init__(self, dburl=None, uid=None, cfg=None, _connect=True):
        """
        Creates a new session.  A new Session instance is created and 
        stored in the database.

        **Arguments:**
            * **dburl** (`string`): The MongoDB URL.  If none is given,
              RP uses the environment variable RADICAL_PILOT_DBURL.  If that is
              not set, an error will be raises.

            * **uid** (`string`): Create a session with this UID.  
              *Only use this when you know what you are doing!*

        **Returns:**
            * A new Session instance.

        **Raises:**
            * :class:`radical.pilot.DatabaseError`

        """

        if os.uname()[0] == 'Darwin':
            # on MacOS, we are running out of file descriptors soon.  The code
            # below attempts to increase the limit of open files - but any error
            # is silently ignored, so this is an best-effort, no guarantee.  We
            # leave responsibility for system limits with the user.
            try:
                import resource
                limits = list(resource.getrlimit(resource.RLIMIT_NOFILE))
                limits[0] = 512
                resource.setrlimit(resource.RLIMIT_NOFILE, limits)
            except:
                pass

        self._dh = ru.DebugHelper()
        self._valid = True
        self._closed = False
        self._valid_iter = 0  # detect recursive calls of `is_valid()`

        # class state
        self._dbs = None
        self._uid = None
        self._dburl = None
        self._reconnected = False

        self._cache = dict()  # cache sandboxes etc.
        self._cache_lock = threading.RLock()

        self._cache['resource_sandbox'] = dict()
        self._cache['session_sandbox'] = dict()
        self._cache['pilot_sandbox'] = dict()

        # before doing anything else, set up the debug helper for the lifetime
        # of the session.
        self._debug_helper = ru.DebugHelper()

        # Dictionaries holding all manager objects created during the session.
        # NOTE: should this also include agents?
        self._pmgrs = dict()
        self._umgrs = dict()
        self._bridges = list()
        self._components = list()

        # FIXME: we work around some garbage collection issues we don't yet
        #        understand: instead of relying on the GC to eventually collect
        #        some stuff, we actively free those on `session.close()`, at
        #        least for the current process.  Usually, all resources get
        #        nicely collected on process termination - but not when we
        #        create many sessions (one after the other) in the same
        #        application instance (ie. the same process).  This workarounf
        #        takes care of that use case.
        #        The clean solution would be to ensure clean termination
        #        sequence, something which I seem to be unable to implement...
        #        :/
        self._to_close = list()
        self._to_stop = list()
        self._to_destroy = list()

        # cache the client sandbox
        # FIXME: this needs to be overwritten if configured differently in the
        #        session config, as should be the case for any agent side
        #        session instance.
        self._client_sandbox = os.getcwd()

        # The resource configuration dictionary associated with the session.
        self._resource_configs = {}

        # if a config is given, us its values:
        if cfg:
            self._cfg = copy.deepcopy(cfg)
        else:
            # otherwise we need a config
            self._cfg = ru.read_json("%s/configs/session_%s.json" \
                    % (os.path.dirname(__file__),
                       os.environ.get('RADICAL_PILOT_SESSION_CFG', 'default')))

        # fall back to config data where possible
        # sanity check on parameters
        if not uid:
            uid = self._cfg.get('session_id')

        if uid:
            self._uid = uid
            self._reconnected = True
        else:
            # generate new uid, reset all other ID counters
            # FIXME: this will screw up counters for *concurrent* sessions,
            #        as the ID generation is managed in a process singleton.
            self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE)
            ru.reset_id_counters(prefix='rp.session', reset_all_others=True)

        if not self._cfg.get('session_id'): self._cfg['session_id'] = self._uid
        if not self._cfg.get('owner'): self._cfg['owner'] = self._uid
        if not self._cfg.get('logdir'):            self._cfg['logdir']     = '%s/%s' \
                     % (os.getcwd(), self._uid)

        self._logdir = self._cfg['logdir']
        self._prof = self._get_profiler(name=self._cfg['owner'])
        self._rep = self._get_reporter(name=self._cfg['owner'])
        self._log = self._get_logger(name=self._cfg['owner'],
                                     level=self._cfg.get('debug'))

        if _connect:
            # we need a dburl to connect to.

            if not dburl:
                dburl = os.environ.get("RADICAL_PILOT_DBURL")

            if not dburl:
                dburl = self._cfg.get('default_dburl')

            if not dburl:
                dburl = self._cfg.get('dburl')

            if not dburl:
                # we forgive missing dburl on reconnect, but not otherwise
                raise RuntimeError("no database URL (set RADICAL_PILOT_DBURL)")

        self._dburl = ru.Url(dburl)
        self._cfg['dburl'] = str(self._dburl)

        # now we have config and uid - initialize base class (saga session)
        rs.Session.__init__(self, uid=self._uid)

        # ----------------------------------------------------------------------
        # create new session
        if _connect:
            self._log.info("using database %s" % self._dburl)

            # if the database url contains a path element, we interpret that as
            # database name (without the leading slash)
            if  not self._dburl.path         or \
                self._dburl.path[0]   != '/' or \
                len(self._dburl.path) <=  1  :
                if not uid:
                    # we fake reconnnect if no DB is available -- but otherwise we
                    # really really need a db connection...
                    raise ValueError("incomplete DBURL '%s' no db name!" %
                                     self._dburl)

        if not self._reconnected:
            self._prof.prof('session_start', uid=self._uid)
            self._rep.info('<<new session: ')
            self._rep.plain('[%s]' % self._uid)
            self._rep.info('<<database   : ')
            self._rep.plain('[%s]' % self._dburl)

        self._load_resource_configs()

        self._rec = os.environ.get('RADICAL_PILOT_RECORD_SESSION')
        if self._rec:
            # NOTE: Session recording cannot handle reconnected sessions, yet.
            #       We thus turn it off here with a warning
            if self._reconnected:
                self._log.warn("no session recording on reconnected session")

            else:
                # append session ID to recording path
                self._rec = "%s/%s" % (self._rec, self._uid)

                # create recording path and record session
                os.system('mkdir -p %s' % self._rec)
                ru.write_json({'dburl': str(self.dburl)},
                              "%s/session.json" % self._rec)
                self._log.info("recording session in %s" % self._rec)

        # create/connect database handle
        try:
            self._dbs = DBSession(sid=self.uid,
                                  dburl=str(self._dburl),
                                  cfg=self._cfg,
                                  logger=self._log,
                                  connect=_connect)

            # from here on we should be able to close the session again
            self._log.info("New Session created: %s." % self.uid)

        except Exception, ex:
            self._rep.error(">>err\n")
            self._log.exception('session create failed')
            raise RuntimeError("Couldn't create new session (database URL '%s' incorrect?): %s" \
                            % (dburl, ex))
Example #6
0
    def __init__(self, database_url=None, database_name=None, name=None):
        """Creates a new session.

        If called without a uid, a new Session instance is created and 
        stored in the database. If uid is set, an existing session is 
        retrieved from the database. 

        **Arguments:**
            * **database_url** (`string`): The MongoDB URL.  If none is given,
              RP uses the environment variable RADICAL_PILOT_DBURL.  If that is
              not set, an error will be raises.

            * **database_name** (`string`): An alternative database name 
              (default: 'radicalpilot').

            * **uid** (`string`): If uid is set, we try 
              re-connect to an existing session instead of creating a new one.

            * **name** (`string`): An optional human readable name.

        **Returns:**
            * A new Session instance.

        **Raises:**
            * :class:`radical.pilot.DatabaseError`

        """

        logger = ru.get_logger('radical.pilot')

        if database_name:
            logger.error(
                "The 'database_name' parameter is deprecated - please specify an URL path"
            )
        else:
            database_name = 'radicalpilot'

        # init the base class inits
        saga.Session.__init__(self)
        self._dh = ru.DebugHelper()
        self._valid = True
        self._terminate = threading.Event()
        self._terminate.clear()

        # before doing anything else, set up the debug helper for the lifetime
        # of the session.
        self._debug_helper = ru.DebugHelper()

        # Dictionaries holding all manager objects created during the session.
        self._pilot_manager_objects = dict()
        self._unit_manager_objects = dict()

        # The resource configuration dictionary associated with the session.
        self._resource_configs = {}

        if not database_url:
            database_url = os.getenv("RADICAL_PILOT_DBURL", None)

        if not database_url:
            raise PilotException("no database URL (set RADICAL_PILOT_DBURL)")

        self._dburl = ru.Url(database_url)

        # if the database url contains a path element, we interpret that as
        # database name (without the leading slash)
        if  not self._dburl.path         or \
            self._dburl.path[0]   != '/' or \
            len(self._dburl.path) <=  1  :
            logger.error(
                "incomplete URLs are deprecated -- missing database name!")
            self._dburl.path = database_name  # defaults to 'radicalpilot'

        logger.info("using database %s" % self._dburl)

        # ----------------------------------------------------------------------
        # create new session
        try:
            if name:
                self._name = name
                self._uid = name
            # self._uid  = ru.generate_id ('rp.session.'+name+'.%(item_counter)06d', mode=ru.ID_CUSTOM)
            else:
                self._uid = ru.generate_id('rp.session', mode=ru.ID_PRIVATE)
                self._name = self._uid

            logger.report.info('<<create session %s' % self._uid)

            self._dbs = dbSession(sid=self._uid,
                                  name=self._name,
                                  dburl=self._dburl)

            self._dburl = self._dbs._dburl

            logger.info("New Session created: %s." % str(self))

        except Exception, ex:
            logger.exception('session create failed')
            raise PilotException("Couldn't create new session (database URL '%s' incorrect?): %s" \
                            % (self._dburl, ex))
Example #7
0
    def __init__(self,
                 database_url=None,
                 database_name="radicalpilot",
                 uid=None,
                 name=None):
        """Creates a new or reconnects to an exising session.

        If called without a uid, a new Session instance is created and 
        stored in the database. If uid is set, an existing session is 
        retrieved from the database. 

        **Arguments:**
            * **database_url** (`string`): The MongoDB URL.  If none is given,
              RP uses the environment variable RADICAL_PILOT_DBURL.  If that is
              not set, an error will be raises.

            * **database_name** (`string`): An alternative database name 
              (default: 'radicalpilot').

            * **uid** (`string`): If uid is set, we try 
              re-connect to an existing session instead of creating a new one.

            * **name** (`string`): An optional human readable name.

        **Returns:**
            * A new Session instance.

        **Raises:**
            * :class:`radical.pilot.DatabaseError`

        """

        # init the base class inits
        saga.Session.__init__(self)
        Object.__init__(self)

        # before doing anything else, set up the debug helper for the lifetime
        # of the session.
        self._debug_helper = ru.DebugHelper()

        # Dictionaries holding all manager objects created during the session.
        self._pilot_manager_objects = list()
        self._unit_manager_objects = list()

        # Create a new process registry. All objects belonging to this
        # session will register their worker processes (if they have any)
        # in this registry. This makes it easier to shut down things in
        # a more coordinate fashion.
        self._process_registry = _ProcessRegistry()

        # The resource configuration dictionary associated with the session.
        self._resource_configs = {}

        self._database_url = database_url
        self._database_name = database_name

        if not self._database_url:
            self._database_url = os.getenv("RADICAL_PILOT_DBURL", None)

        if not self._database_url:
            raise PilotException("no database URL (set RADICAL_PILOT_DBURL)")

        logger.info("using database url  %s" % self._database_url)

        # if the database url contains a path element, we interpret that as
        # database name (without the leading slash)
        tmp_url = ru.Url(self._database_url)
        if  tmp_url.path            and \
            tmp_url.path[0]  == '/' and \
            len(tmp_url.path) >  1  :
            self._database_name = tmp_url.path[1:]
            logger.info("using database path %s" % self._database_name)
        else:
            logger.info("using database name %s" % self._database_name)

        # Loading all "default" resource configurations
        module_path = os.path.dirname(os.path.abspath(__file__))
        default_cfgs = "%s/configs/*.json" % module_path
        config_files = glob.glob(default_cfgs)

        for config_file in config_files:

            try:
                rcs = ResourceConfig.from_file(config_file)
            except Exception as e:
                logger.error("skip config file %s: %s" % (config_file, e))
                continue

            for rc in rcs:
                logger.info("Loaded resource configurations for %s" % rc)
                self._resource_configs[rc] = rcs[rc].as_dict()

        user_cfgs = "%s/.radical/pilot/configs/*.json" % os.environ.get('HOME')
        config_files = glob.glob(user_cfgs)

        for config_file in config_files:

            try:
                rcs = ResourceConfig.from_file(config_file)
            except Exception as e:
                logger.error("skip config file %s: %s" % (config_file, e))
                continue

            for rc in rcs:
                logger.info("Loaded resource configurations for %s" % rc)

                if rc in self._resource_configs:
                    # config exists -- merge user config into it
                    ru.dict_merge(self._resource_configs[rc],
                                  rcs[rc].as_dict(),
                                  policy='overwrite')
                else:
                    # new config -- add as is
                    self._resource_configs[rc] = rcs[rc].as_dict()

        default_aliases = "%s/configs/aliases.json" % module_path
        self._resource_aliases = ru.read_json_str(default_aliases)['aliases']

        ##########################
        ## CREATE A NEW SESSION ##
        ##########################
        if uid is None:
            try:
                self._connected = None

                if name:
                    self._name = name
                    self._uid = name
                # self._uid  = ru.generate_id ('rp.session.'+name+'.%(item_counter)06d', mode=ru.ID_CUSTOM)
                else:
                    self._uid = ru.generate_id('rp.session',
                                               mode=ru.ID_PRIVATE)
                    self._name = self._uid


                self._dbs, self._created, self._connection_info = \
                        dbSession.new(sid     = self._uid,
                                      name    = self._name,
                                      db_url  = self._database_url,
                                      db_name = database_name)

                logger.info("New Session created%s." % str(self))

            except Exception, ex:
                logger.exception('session create failed')
                raise PilotException("Couldn't create new session (database URL '%s' incorrect?): %s" \
                                % (self._database_url, ex))