Ejemplo n.º 1
0
    def __init__(self, sid, workflow, pending_queue, completed_queue,
                 resubmit_failed, rmq_conn_params):

        # Mandatory arguments
        self._sid = sid
        self._pending_queue = pending_queue
        self._completed_queue = completed_queue
        self._resubmit_failed = resubmit_failed
        self._rmq_conn_params = rmq_conn_params

        # Assign validated workflow
        self._workflow = workflow

        # Create logger and profiler at their specific locations using the sid
        self._path = os.getcwd() + '/' + self._sid
        self._uid = ru.generate_id('wfprocessor.%(item_counter)04d',
                                   ru.ID_CUSTOM,
                                   ns=self._sid)

        name = 'radical.entk.%s' % self._uid
        self._logger = ru.Logger(name, path=self._path)
        self._prof = ru.Profiler(name, path=self._path)
        self._report = ru.Reporter(name)

        # Defaults
        self._wfp_process = None
        self._enqueue_thread = None
        self._dequeue_thread = None
        self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10)

        self._logger.info('Created WFProcessor object: %s' % self._uid)
        self._prof.prof('create_wfp', uid=self._uid)
Ejemplo n.º 2
0
    def __init__(self, log=None, rep=None, prof=None):

        if log: self._log = log
        else: self._log = ru.Logger('radical.nge')

        if rep: self._rep = log
        else: self._rep = ru.Reporter('radical.nge')

        if prof: self._prof = prof
        else: self._prof = ru.Profiler('radical.nge')

        self._session = rp.Session()
        self._pmgr = rp.PilotManager(self._session)
        self._umgr = rp.UnitManager(self._session)

        self._pmgr.register_callback(self._pilot_state_cb)
        self._umgr.register_callback(self._unit_state_cb)

        # create a dir for data staging
        self._pwd = os.getcwd()
        self._data = 'data.%s' % self._session.uid
        os.makedirs('%s/%s/' % (self._pwd, self._data))

        # track submitted tasks
        self._tcnt = 0
        self._tasks = dict()
Ejemplo n.º 3
0
    def __init__(self,
                 config_path=None,
                 hostname=None,
                 port=None,
                 reattempts=None,
                 resubmit_failed=None,
                 autoterminate=None,
                 write_workflow=None,
                 rts=None,
                 rmq_cleanup=None,
                 rts_config=None,
                 name=None):

        # Create a session for each EnTK script execution
        if name:
            self._name = name
            self._sid = name
        else:
            self._name = str()
            self._sid = ru.generate_id('re.session', ru.ID_PRIVATE)

        self._read_config(config_path, hostname, port, reattempts,
                          resubmit_failed, autoterminate, write_workflow, rts,
                          rmq_cleanup, rts_config)

        # Create an uid + logger + profiles for AppManager, under the sid
        # namespace
        path = os.getcwd() + '/' + self._sid
        self._uid = ru.generate_id('appmanager.%(item_counter)04d',
                                   ru.ID_CUSTOM,
                                   namespace=self._sid)
        self._logger = ru.Logger('radical.entk.%s' % self._uid,
                                 path=path,
                                 targets=['2', '.'])
        self._prof = ru.Profiler(name='radical.entk.%s' % self._uid, path=path)
        self._report = ru.Reporter(name='radical.entk.%s' % self._uid)

        self._report.info('EnTK session: %s\n' % self._sid)
        self._prof.prof('create amgr obj', uid=self._uid)
        self._report.info('Creating AppManager')

        self._resource_manager = None
        # RabbitMQ Queues
        self._pending_queue = list()
        self._completed_queue = list()

        # Global parameters to have default values
        self._mqs_setup = False
        self._resource_desc = None
        self._task_manager = None
        self._workflow = None
        self._cur_attempt = 1
        self._shared_data = list()

        self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10)

        self._logger.info('Application Manager initialized')
        self._prof.prof('amgr obj created', uid=self._uid)
        self._report.ok('>>ok\n')
Ejemplo n.º 4
0
    def _get_reporter(self, name):
        """
        This is a thin wrapper around `ru.Reporter()` which makes sure that
        log files end up in a separate directory with the name of `session.uid`.
        """

        return ru.Reporter(name=name, ns='radical.pilot', targets=['stdout'],
                           path=self._logdir)
Ejemplo n.º 5
0
def fetch_json(sid,
               dburl=None,
               tgt=None,
               skip_existing=False,
               session=None,
               log=None):
    '''
    returns file name
    '''

    if not log and session:
        log = session._log
        rep = session._rep
    elif not log:
        log = ru.Logger('radical.pilot.utils')
        rep = ru.Reporter('radical.pilot.utils')

    if not tgt:
        tgt = '.'

    if tgt.startswith('/'):
        # Assume an absolute path
        dst = os.path.join(tgt, '%s.json' % sid)
    else:
        # Assume a relative path
        dst = os.path.join(os.getcwd(), tgt, '%s.json' % sid)

    try:
        os.makedirs(os.path.dirname(tgt))
    except OSError:
        pass  # dir exists

    if skip_existing and os.path.isfile(dst) \
            and os.stat(dst).st_size > 0:

        log.info("session already in %s", dst)

    else:

        if not dburl:
            dburl = os.environ.get('RADICAL_PILOT_DBURL')

        if not dburl:
            raise ValueError('RADICAL_PILOT_DBURL is not set')

        mongo, db, _, _, _ = ru.mongodb_connect(dburl)

        json_docs = get_session_docs(db, sid)
        ru.write_json(json_docs, dst)

        log.info("session written to %s", dst)

        mongo.close()

    rep.ok("+ %s (json)\n" % sid)
    return dst
Ejemplo n.º 6
0
    def _get_reporter(self, name):
        '''
        This is a thin wrapper around `ru.Reporter()` which makes sure that
        log files end up in a separate directory with the name of `session.uid`.
        '''

        if not self._reporter:
            self._reporter = ru.Reporter(name=name, ns='radical.pilot',
                                         path=self._cfg.path)
        return self._reporter
Ejemplo n.º 7
0
    def _assert_reporter(pname, fname, val=True):

        rep  = ru.Reporter(name=pname, ns='radical.utils.test', path='/tmp/')
        rep.info('foo')

        if fname:
            assert(val == os.path.isfile(fname))
            assert(val == _cmd('grep -e "foo" %s' % fname))

            try   : os.unlink(fname)
            except: pass
Ejemplo n.º 8
0
    def __init__(self):
        '''
        initialize the service endpoint:

          - create logger, profile and reporter
          - set up accounts
        '''

        self._log = ru.Logger('radical.nge.service')
        self._rep = ru.Reporter('radical.nge.service')
        self._prof = ru.Profiler('radical.nge.service')
        self._accounts = {
            'andre': _Account('andre', 'erdna'),
            'matteo': _Account('matteo', 'eottam'),
            'daniel': _Account('daniel', 'leinad'),
            'guest': _Account('guest', 'guest'),
        }

        self._rep.header('--- NGE (%s) ---' % rn.version)
Ejemplo n.º 9
0
    def __init__(self, url, log=None, rep=None, prof=None):

        if log: self._log = log
        else: self._log = ru.Logger('radical.nge')

        if rep: self._rep = log
        else: self._rep = ru.Reporter('radical.nge')

        if prof: self._prof = prof
        else: self._prof = ru.Profiler('radical.nge')

        self._cookies = list()
        self._url = ru.Url(url)

        self._qbase = ru.Url(url)
        # self._qbase.username = None
        # self._qbase.password = None
        self._qbase = str(self._qbase).rstrip('/')

        if self._url.username and self._url.password:
            self.login(self._url.username, self._url.password)
Ejemplo n.º 10
0
def fetch_logfiles(sid,
                   dburl=None,
                   src=None,
                   tgt=None,
                   access=None,
                   session=None,
                   skip_existing=False,
                   fetch_client=False,
                   log=None):
    '''
    sid: session for which all logfiles are fetched
    src: dir to look for client session logfiles
    tgt: dir to store the logfile in

    returns list of file names
    '''

    if not log and session:
        log = session._log
        rep = session._rep
    elif not log:
        log = ru.Logger('radical.pilot.utils')
        rep = ru.Reporter('radical.pilot.utils')

    ret = list()

    if not dburl:
        dburl = os.environ['RADICAL_PILOT_DBURL']

    if not dburl:
        raise RuntimeError('Please set RADICAL_PILOT_DBURL')

    if not src:
        src = os.getcwd()

    if not tgt:
        tgt = os.getcwd()

    if not tgt.startswith('/') and '://' not in tgt:
        tgt = "%s/%s" % (os.getcwd(), tgt)

    # we always create a session dir as real target
    tgt_url = saga.Url("%s/%s/" % (tgt, sid))

    # Turn URLs without schema://host into file://localhost,
    # so that they dont become interpreted as relative.
    if not tgt_url.schema:
        tgt_url.schema = 'file'
    if not tgt_url.host:
        tgt_url.host = 'localhost'

    if fetch_client:
        # first fetch session logfile
        client_logfile = "%s/%s.log" % (src, sid)

        ftgt = saga.Url('%s/%s' % (tgt_url, os.path.basename(client_logfile)))
        ret.append("%s" % ftgt.path)

        if skip_existing and os.path.isfile(ftgt.path) \
                and os.stat(ftgt.path).st_size > 0:
            pass
        else:
            log_file = saga.filesystem.File(client_logfile, session=session)
            log_file.copy(ftgt, flags=saga.filesystem.CREATE_PARENTS)
            log_file.close()

    _, db, _, _, _ = ru.mongodb_connect(dburl)

    json_docs = get_session_docs(db, sid)

    pilots = json_docs['pilot']
    num_pilots = len(pilots)
    log.info("Session: %s", sid)
    log.info("Number of pilots in session: %d", num_pilots)

    for pilot in pilots:

        try:
            sandbox_url = saga.Url(pilot['pilot_sandbox'])

            if access:
                # Allow to use a different access schema than used for the the run.
                # Useful if you ran from the headnode, but would like to retrieve
                # the logfiles to your desktop (Hello Titan).
                access_url = saga.Url(access)
                sandbox_url.schema = access_url.schema
                sandbox_url.host = access_url.host

            sandbox = saga.filesystem.Directory(sandbox_url, session=session)

            # Try to fetch a tarball of logfiles, so that we can get them all in one (SAGA) go!
            LOGFILES_TARBALL = '%s.log.tgz' % pilot['uid']
            tarball_available = False
            try:
                if  sandbox.is_file(LOGFILES_TARBALL) and \
                    sandbox.get_size(LOGFILES_TARBALL):

                    log.info("logfiles tarball exists")
                    ftgt = saga.Url('%s/%s' % (tgt_url, LOGFILES_TARBALL))

                    if skip_existing and os.path.isfile(ftgt.path) \
                            and os.stat(ftgt.path).st_size > 0:

                        log.info("Skip fetching of '%s/%s' to '%s'.",
                                 sandbox_url, LOGFILES_TARBALL, tgt_url)
                        tarball_available = True
                    else:

                        log.info("Fetching '%s%s' to '%s'.", sandbox_url,
                                 LOGFILES_TARBALL, tgt_url)
                        log_file = saga.filesystem.File(
                            "%s%s" % (sandbox_url, LOGFILES_TARBALL),
                            session=session)
                        log_file.copy(ftgt,
                                      flags=saga.filesystem.CREATE_PARENTS)
                        log_file.close()

                        tarball_available = True
                else:
                    log.warn("logiles tarball doesnt exists")

            except saga.DoesNotExist:
                log.warn("logfiles tarball doesnt exists")

            try:
                os.mkdir("%s/%s" % (tgt_url.path, pilot['uid']))
            except OSError:
                pass

            # We now have a local tarball
            if tarball_available:
                log.debug("Extract tarball %s to %s", ftgt.path, tgt_url.path)

                try:
                    tarball = tarfile.open(ftgt.path)
                    tarball.extractall("%s/%s" % (tgt_url.path, pilot['uid']))

                    logfiles = glob.glob("%s/%s/*.log" %
                                         (tgt_url.path, pilot['uid']))
                    log.info("tarball %s extracted to '%s/%s/'.", ftgt.path,
                             tgt_url.path, pilot['uid'])
                    ret.extend(logfiles)
                    os.unlink(ftgt.path)

                except Exception as e:
                    log.warn('could not extract tarball %s [%s]', ftgt.path, e)

                # If extract succeeded, no need to fetch individual logfiles
                rep.ok("+ %s (logfiles)\n" % pilot['uid'])
                continue

            # If we dont have a tarball (for whichever reason), fetch individual logfiles
            logfiles = sandbox.list('*.log')

            for logfile in logfiles:

                ftgt = saga.Url('%s/%s/%s' % (tgt_url, pilot['uid'], logfile))
                ret.append("%s" % ftgt.path)

                if skip_existing and os.path.isfile(ftgt.path) \
                                 and os.stat(ftgt.path).st_size > 0:

                    continue

                log_file = saga.filesystem.File("%s%s" %
                                                (sandbox_url, logfile),
                                                session=session)
                log_file.copy(ftgt, flags=saga.filesystem.CREATE_PARENTS)
                log_file.close()

            rep.ok("+ %s (logfiles)\n" % pilot['uid'])

        except Exception as e:
            rep.error("- %s (logfiles)\n" % pilot['uid'])

    return ret
        stage.add_tasks(task)

        return stage


if __name__ == "__main__":

    # Read YAML configuration file from stdin
    try:
        config_filename = sys.argv[1]
    except Exception:
        raise ValueError(f"Usage:\tpython {sys.argv[0]} [config.json]\n\n")

    cfg = ExperimentConfig.from_yaml(config_filename)

    reporter = ru.Reporter(name="radical.entk")
    reporter.title(cfg.title)

    # Create Application Manager
    try:
        appman = AppManager(
            hostname=os.environ["RMQ_HOSTNAME"],
            port=int(os.environ["RMQ_PORT"]),
            username=os.environ["RMQ_USERNAME"],
            password=os.environ["RMQ_PASSWORD"],
        )
    except KeyError:
        raise ValueError(
            "Invalid RMQ environment. Please see README.md for configuring environment."
        )
Ejemplo n.º 12
0
def test_ordered_scheduler():

    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    session = rp.Session()

    try:
        # read the config used for resource details
        report.info('read config')
        report.ok('>>ok\n')

        report.header('submit pilots')

        pd_init = {
            'resource': 'local.localhost',
            'runtime': 5,
            'exit_on_error': True,
            'cores': 10
        }
        pdesc = rp.ComputePilotDescription(pd_init)
        pmgr = rp.PilotManager(session=session)
        pilot = pmgr.submit_pilots(pdesc)

        report.header('submit pipelines')

        umgr = rp.UnitManager(session=session)
        umgr.add_pilots(pilot)

        n_pipes = 2
        n_stages = 5
        n_tasks = 4

        cuds = list()
        for p in range(n_pipes):
            for s in range(n_stages):
                for t in range(n_tasks):
                    cud = rp.ComputeUnitDescription()
                    cud.executable = '%s/pipeline_task.sh' % pwd
                    cud.arguments = [p, s, t, 10]
                    cud.cpu_processes = 1
                    cud.tags = {
                        'order': {
                            'ns': p,
                            'order': s,
                            'size': n_tasks
                        }
                    }
                    cud.name = 'p%03d-s%03d-t%03d' % (p, s, t)
                    cuds.append(cud)
                    report.progress()

        import random
        random.shuffle(cuds)

        # Submit the previously created ComputeUnit descriptions to the
        # PilotManager. This will trigger the selected scheduler to start
        # assigning ComputeUnits to the ComputePilots.
        umgr.submit_units(cuds)

        # Wait for all compute units to reach a final state
        report.header('gather results')
        umgr.wait_units()

    except Exception as e:
        # Something unexpected happened in the pilot code above
        report.error('caught Exception: %s\n' % e)
        ru.print_exception_trace()
        raise

    except (KeyboardInterrupt, SystemExit) as e:
        # the callback called sys.exit(), and we can here catch the
        # corresponding KeyboardInterrupt exception for shutdown.  We also catch
        # SystemExit (which gets raised if the main threads exits for some other
        # reason).
        ru.print_exception_trace()
        report.warn('exit requested\n')

    finally:
        # always clean up the session, no matter if we caught an exception or
        # not.  This will kill all remaining pilots.
        report.header('finalize')
        session.close(download=False)

    report.header()
Ejemplo n.º 13
0
def test_reporter():

    pname = 'ru.%d' % os.getpid()
    fname = '/tmp/%s.prof' % pname

    os.environ['RADICAL_UTILS_REPORT'] = 'True'
    os.environ['RADICAL_UTILS_REPORT_TGT'] = fname

    rep = ru.Reporter(name=pname, ns='radical.utils')

    rep.header('header  \n')
    rep.info('info    \n')
    rep.progress('progress\n')
    rep.ok('ok      \n')
    rep.warn('warn    \n')
    rep.error('error   \n')
    rep.plain('plain   \n')

    rep.info('test idler:')
    rep.idle(mode='start')
    for _ in range(3):
        rep.idle()
        time.sleep(0.03)
    rep.idle(color='ok', c='.')
    rep.idle(color='error', c='.')
    for _ in range(3):
        rep.idle()
        time.sleep(0.01)

    rep.idle(mode='stop')
    rep.ok('>>done\n')

    # pylint disable=E0501
    rep.info('idle test\n')
    rep.info(
        '1234567891         2         3         4         5         6         7         8\n\t'
    )  # noqa
    rep.info(
        '.0.........0.........0.........0.........0.........0.........0.........0'
    )  # noqa
    # pylint enable=E0501

    rep.idle(mode='start')
    for _ in range(200):
        rep.idle()
        time.sleep(0.01)
        rep.idle()
        time.sleep(0.01)
        rep.idle()
        time.sleep(0.01)
        rep.idle()
        time.sleep(0.01)
        rep.idle(color='ok', c="+")
    rep.idle(mode='stop')

    rep.set_style('error', color='yellow', style='ELTTTTMELE', segment='X')
    rep.error('error')

    try:
        rep.exit('exit', 1)
    except SystemExit:
        assert (True)
    except Exception as e:
        assert (False), 'expected system exit, got %s' % e

    assert (os.path.isfile(fname))
    assert (_cmd('grep -e "header"    %s' % fname))

    try:
        os.unlink(fname)
    except:
        pass
Ejemplo n.º 14
0
###############################################################################
#
def collect_all(sessions_to_fetch):

    for sid in sessions_to_fetch:
        try:
            collect(sid)
        except Exception as e:
            report.error("Collection of info for %s failed" % sid)


###############################################################################
#
if __name__ == '__main__':

    report = ru.Reporter("Collect profiling and json data to local disk.")

    session_ids = []

    # Read from file if specified, otherwise read from stdin
    f = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
    for line in f:
        session = line.strip()
        if session:
            session_ids.append(session)

    report.info("Session ids found on input:\n")
    report.plain("%s\n" % session_ids)

    collect_all(session_ids)
Ejemplo n.º 15
0
        self.progress = self._logger.report.progress
        self.ok = self._logger.report.ok
        self.warn = self._logger.report.warn
        self.error = self._logger.report.error
        self.exit = self._logger.report.exit
        self.plain = self._logger.report.plain
        self.set_style = self._logger.report.set_style


# ------------------------------------------------------------------------------

if __name__ == "__main__":

    import radical.utils as ru

    r = ru.Reporter(title='test')

    r.header('header  \n')
    r.info('info    \n')
    r.progress('progress\n')
    r.ok('ok      \n')
    r.warn('warn    \n')
    r.error('error   \n')
    r.plain('plain   \n')

    r.set_style('error', color='yellow', style='ELTTMLE', segment='X')
    r.error('error ')

    i = 0
    j = 0
    for cname, col in r.COLORS.items():
Ejemplo n.º 16
0
def test_amgr_initialization():
    amgr_name = ru.generate_id('test.appmanager.%(item_counter)04d',
                               ru.ID_CUSTOM)
    amgr = Amgr(hostname=hostname, port=port, name=amgr_name)

    assert amgr._name.split('.') == amgr_name.split('.')
    assert amgr._sid.split('.') == amgr_name.split('.')
    assert amgr._uid.split('.') == ['appmanager', '0000']
    assert type(amgr._logger) == type(ru.get_logger('radical.tests'))
    assert type(amgr._prof) == type(ru.Profiler('radical.tests'))
    assert type(amgr._report) == type(ru.Reporter('radical.tests'))
    assert isinstance(amgr.name, str)

    # RabbitMQ inits
    assert amgr._mq_hostname == hostname
    assert amgr._port == port

    # RabbitMQ Queues
    assert amgr._num_pending_qs == 1
    assert amgr._num_completed_qs == 1
    assert isinstance(amgr._pending_queue, list)
    assert isinstance(amgr._completed_queue, list)

    # Global parameters to have default values
    assert amgr._mqs_setup == False
    assert amgr._resource_desc == None
    assert amgr._task_manager == None
    assert amgr._workflow == None
    assert amgr._resubmit_failed == False
    assert amgr._reattempts == 3
    assert amgr._cur_attempt == 1
    assert amgr._autoterminate == True
    assert isinstance(amgr.shared_data, list)

    amgr = Amgr(hostname=hostname, port=port)

    assert amgr._uid.split('.') == ['appmanager', '0000']
    assert type(amgr._logger) == type(ru.get_logger('radical.tests'))
    assert type(amgr._prof) == type(ru.Profiler('radical.tests'))
    assert type(amgr._report) == type(ru.Reporter('radical.tests'))
    assert isinstance(amgr.name, str)

    # RabbitMQ inits
    assert amgr._mq_hostname == hostname
    assert amgr._port == port

    # RabbitMQ Queues
    assert amgr._num_pending_qs == 1
    assert amgr._num_completed_qs == 1
    assert isinstance(amgr._pending_queue, list)
    assert isinstance(amgr._completed_queue, list)

    # Global parameters to have default values
    assert amgr._mqs_setup == False
    assert amgr._resource_desc == None
    assert amgr._task_manager == None
    assert amgr._workflow == None
    assert amgr._resubmit_failed == False
    assert amgr._reattempts == 3
    assert amgr._cur_attempt == 1
    assert amgr._autoterminate == True
    assert isinstance(amgr.shared_data, list)
Ejemplo n.º 17
0
    p.add_stages(s3)

    # --------------------------
    # Outlier identification stage
    s4 = generate_interfacing_stage()
    p.add_stages(s4)

    CUR_STAGE += 1

    return p


# ------------------------------------------------------------------------------
if __name__ == '__main__':

    reporter = ru.Reporter(name='radical.entk')
    reporter.title('COVID-19 - Workflow2')

    # resource specified as argument
    if len(sys.argv) == 2:
        cfg_file = sys.argv[1]
    elif sys.argv[0] == "molecules_adrp.py":
        cfg_file = "adrp_system.json"
    elif sys.argv[0] == "molecules_3clpro.py":
        cfg_file = "3clpro_system.json"
    else:
        reporter.exit('Usage:\t%s [config.json]\n\n' % sys.argv[0])

    cfg = ru.Config(cfg=ru.read_json(cfg_file))
    cfg['node_counts'] = max(1, cfg['md_counts'] // cfg['gpu_per_node'])
Ejemplo n.º 18
0
# set if you want to see what happens behind the scences!


RUNTIME  =    20  # how long to run the pilot
CORES    =    64  # how many cores to use for one pilot
UNITS    =   128  # how many units to create
SLEEP    =     0  # how long each unit sleeps
SCHED    = rp.SCHED_DIRECT_SUBMISSION


#------------------------------------------------------------------------------
#
if __name__ == "__main__":

    # we use a reporter class for nicer output
    report = ru.Reporter("Getting Started")

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # all other pilot code is now tried/excepted.  If an exception is caught, we
    # can rely on the session object to exist and be valid, and we can thus tear
    # the whole RP stack down via a 'session.close()' call in the 'finally'
    # clause...
    try:
        report.info('read configs')
        resources = ru.read_json('%s/config.json', os.path.dirname(__file__))
        report.ok('\\ok\n')

        report.header('submit pilots')
Ejemplo n.º 19
0
#
checks = [
       #  OLD_CUD, GOOD, SCHEMA,
          RU_MUNCH, PYDICT, MYDICT, RU_CFG, NEW_CUD, RU_DICT, PYDANTIC
         ]

data   = list()

for check in checks:

    results = [check.__name__]

    n = 1024 * 1024
    l = list()
    t = list()
    r = ru.Reporter('radical.test')
    r.progress_tgt(n * 5, label=check.__name__)

    t0 = time.time()

    # -----------------------------------------------
    # create n entities
    for i in range(n):
        e = check()
        l.append(e)
        r.progress()

    t1 = time.time()
    results.append(t1 - t0)

    # -----------------------------------------------
Ejemplo n.º 20
0
def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, 
        session=None, skip_existing=False, fetch_client=False, log=None):
    '''
    sid: session for which all profiles are fetched
    src: dir to look for client session profiles ($src/$sid/*.prof)
    tgt: dir to store the profile in
         - $tgt/$sid/*.prof,
         - $tgt/$sid/$pilot_id/*.prof)

    returns list of file names
    '''

    if not log and session:
        log = session._log
        rep = session._rep
    elif not log:
        log = ru.Logger('radical.pilot.utils')
        rep = ru.Reporter('radical.pilot.utils')

    ret = list()

    if not dburl:
        dburl = os.environ['RADICAL_PILOT_DBURL']

    if not dburl:
        raise ValueError('RADICAL_PILOT_DBURL is not set')

    if not src:
        src = os.getcwd()

    if not tgt:
        tgt = os.getcwd()

    if not tgt.startswith('/') and '://' not in tgt:
        tgt = "%s/%s" % (os.getcwd(), tgt)

    # we always create a session dir as real target
    tgt_url = rs.Url("%s/%s/" % (tgt, sid))

    # Turn URLs without schema://host into file://localhost,
    # so that they dont become interpreted as relative.
    if not tgt_url.schema:
        tgt_url.schema = 'file'
    if not tgt_url.host:
        tgt_url.host = 'localhost'

    # first fetch session profile
    if fetch_client:
        client_profiles = glob.glob("%s/%s/*.prof" % (src, sid))
        if not client_profiles:
            raise RuntimeError('no client profiles in %s/%s' % (src, sid))

        for client_profile in client_profiles:

            ftgt = rs.Url('%s/%s' % (tgt_url, os.path.basename(client_profile)))
            ret.append("%s" % ftgt.path)

            if skip_existing and os.path.isfile(ftgt.path) \
                    and os.stat(ftgt.path).st_size > 0:
                pass
            else:
                prof_file = rs.fs.File(client_profile, session=session)
                prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS)
                prof_file.close()

            if not os.path.isfile(client_profile):
                raise RuntimeError('client profilefile %s does not exist' % client_profile)

    _, db, _, _, _ = ru.mongodb_connect (dburl)

    json_docs = get_session_docs(db, sid)

    pilots = json_docs['pilot']
    num_pilots = len(pilots)
    log.debug("Session: %s", sid)
    log.debug("Number of pilots in session: %d", num_pilots)

    for pilot in pilots:

        try:
            log.debug("processing pilot '%s'", pilot['uid'])

            sandbox_url = rs.Url(pilot['pilot_sandbox'])

            if access:
                # Allow to use a different access schema than used for the the run.
                # Useful if you ran from the headnode, but would like to retrieve
                # the profiles to your desktop (Hello Titan).
                access_url = rs.Url(access)
                sandbox_url.schema = access_url.schema
                sandbox_url.host   = access_url.host

              # print "Overriding remote sandbox: %s" % sandbox_url

            sandbox = rs.fs.Directory (sandbox_url, session=session)

            # Try to fetch a tarball of profiles, so that we can get them all in one (SAGA) go!
            PROFILES_TARBALL = '%s.prof.tgz' % pilot['uid']
            tarball_available = False
            try:
                if  sandbox.is_file(PROFILES_TARBALL) and \
                    sandbox.get_size(PROFILES_TARBALL):

                    log.info("profiles tarball exists")
                    ftgt = rs.Url('%s/%s' % (tgt_url, PROFILES_TARBALL))

                    if skip_existing and os.path.isfile(ftgt.path) \
                            and os.stat(ftgt.path).st_size > 0:

                        log.info("skip fetching of '%s/%s' to '%s'.", 
                                 sandbox_url, PROFILES_TARBALL, tgt_url)
                        tarball_available = True
                    else:

                        log.info("fetch '%s%s' to '%s'.", sandbox_url, 
                                 PROFILES_TARBALL, tgt_url)

                        prof_file = rs.fs.File("%s%s" % (sandbox_url,
                                            PROFILES_TARBALL), session=session)
                        prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS)
                        prof_file.close()

                        tarball_available = True
                else:
                    log.warn("profiles tarball doesnt exists!")

            except rs.DoesNotExist:
                log.exception("exception(TODO): profiles tarball doesnt exists!")

            try:
                os.mkdir("%s/%s" % (tgt_url.path, pilot['uid']))
            except OSError:
                pass

            # We now have a local tarball
            if tarball_available:
                log.info("Extract tarball %s to '%s'.", ftgt.path, tgt_url.path)
                try:
                    tarball = tarfile.open(ftgt.path, mode='r:gz')
                    tarball.extractall("%s/%s" % (tgt_url.path, pilot['uid']))

                    profiles = glob.glob("%s/%s/*.prof" % (tgt_url.path, pilot['uid']))
                    ret.extend(profiles)
                    os.unlink(ftgt.path)

                    # If extract succeeded, no need to fetch individual profiles
                    rep.ok("+ %s (profiles)\n" % pilot['uid'])
                    continue

                except Exception as e:
                    log.warn('could not extract tarball %s [%s]', ftgt.path, e)

            # If we dont have a tarball (for whichever reason), fetch individual profiles
            profiles = sandbox.list('*.prof')
            for prof in profiles:

                ftgt = rs.Url('%s/%s/%s' % (tgt_url, pilot['uid'], prof))
                ret.append("%s" % ftgt.path)

                if skip_existing and os.path.isfile(ftgt.path) \
                                 and os.stat(ftgt.path).st_size > 0:
                    pass
                else:
                    prof_file = rs.fs.File("%s%s" % (sandbox_url, prof), session=session)
                    prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS)
                    prof_file.close()

            rep.ok("+ %s (profiles)\n" % pilot['uid'])

        except Exception as e:
            rep.error("- %s (profiles)\n" % pilot['uid'])
            log.exception('failed to fet profile for %s', pilot['uid'])

    return ret
Ejemplo n.º 21
0
def test_bw_tagging():

    # we use a reporter class for nicer output
    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # Add a Pilot Manager. Pilot managers manage one or more ComputePilots.
    pmgr = rp.PilotManager(session=session)

    # Define an [n]-core local pilot that runs for [x] minutes
    # Here we use a dict to initialize the description object
    pd_init = {
        'resource': 'ncsa.bw_aprun',
        'runtime': 10,  # pilot runtime (min)
        'exit_on_error': True,
        'project': 'gk4',
        'queue': 'high',
        'access_schema': 'gsissh',
        'cores': 128
    }
    pdesc = rp.ComputePilotDescription(pd_init)

    # Launch the pilot.
    pilot = pmgr.submit_pilots(pdesc)

    report.header('submit units')

    # Register the ComputePilot in a UnitManager object.
    umgr = rp.UnitManager(session=session)
    umgr.add_pilots(pilot)

    # Create a workload of ComputeUnits.
    # Each compute unit runs '/bin/date'.

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's1_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        # cud.cpu_process_type = rp.MPI
        # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging = {
            'source': 'unit:///s1_t%s_hostname.txt' % i,
            'target': 'client:///s1_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state
    # (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    n = 5  # number of units to run
    report.info('create %d unit description(s)\n\t' % n)

    cuds = list()
    for i in range(0, n):

        # create a new CU description, and fill it.
        # Here we don't use dict initialization.
        cud = rp.ComputeUnitDescription()
        cud.executable = '/bin/hostname'
        cud.arguments = ['>', 's2_t%s_hostname.txt' % i]
        cud.cpu_processes = 1
        cud.cpu_threads = 16
        cud.tag = cus[i].uid
        # cud.cpu_process_type = rp.MPI
        # cud.cpu_thread_type  = rp.OpenMP
        cud.output_staging = {
            'source': 'unit:///s2_t%s_hostname.txt' % i,
            'target': 'client:///s2_t%s_hostname.txt' % i,
            'action': rp.TRANSFER
        }
        cuds.append(cud)
        report.progress()
    report.ok('>>ok\n')

    # Submit the previously created ComputeUnit descriptions to the
    # PilotManager. This will trigger the selected scheduler to start
    # assigning ComputeUnits to the ComputePilots.
    cus = umgr.submit_units(cuds)

    # Wait for all compute units to reach a final state (DONE, CANCELED or FAILED).
    report.header('gather results')
    umgr.wait_units()

    for i in range(0, n):
        assert open('s1_t%s_hostname.txt' % i,'r').readline().strip() == \
               open('s2_t%s_hostname.txt' % i,'r').readline().strip()

    report.header('finalize')
    session.close(download=True)

    report.header()

    for f in glob.glob('%s/*.txt' % os.getcwd()):
        os.remove(f)
Ejemplo n.º 22
0
    session_paths = glob.glob('%s/*.session.*json' % json_dir)
    if not session_paths:
        raise Exception("No session files found in directory %s" % json_dir)

    session_files = [os.path.basename(e) for e in session_paths]

    session_ids = [e.rsplit('.json')[0] for e in session_files]

    print "Found sessions in %s: %s" % (json_dir, session_ids)

    return session_ids


###############################################################################
#
if __name__ == '__main__':

    report = ru.Reporter("Inject profiling and json data into database.")

    session_ids = []

    # Read from file if specified, otherwise read from stdin
    f = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin
    for line in f:
        session_ids.append(line.strip())

    if not session_ids:
        session_ids = find_sessions(JSON_DIR)

    preprocess_all(session_ids)
Ejemplo n.º 23
0
import radical.pilot as rp
import radical.utils as ru

# ------------------------------------------------------------------------------
#
# READ the RADICAL-Pilot documentation: http://radicalpilot.readthedocs.org/
#
# ------------------------------------------------------------------------------

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':

    # we use a reporter class for nicer output
    report = ru.Reporter(name='radical.pilot')
    report.title('Getting Started (RP version %s)' % rp.version)

    # use the resource specified as argument, fall back to localhost
    if len(sys.argv) > 2:
        report.exit('Usage:\t%s [resource]\n\n' % sys.argv[0])
    elif len(sys.argv) == 2:
        resource = sys.argv[1]
    else:
        resource = 'local.localhost'

    # Create a new session. No need to try/except this: if session creation
    # fails, there is not much we can do anyways...
    session = rp.Session()

    # all other pilot code is now tried/excepted.  If an exception is caught, we
Ejemplo n.º 24
0
if __name__ == "__main__":

    if (len(sys.argv) != 2) or (sys.argv[1] not in configs):
        [os.remove(f) for f in session_mover.capture_fwd_logs()]

        print('\nUsage:\t%s [localhost || summit]\n' % sys.argv[0])

        sys.exit(1)

    else:
        resource = sys.argv[1]
        session_mover.use_current()

        # we use a reporter class for nicer output
        reporter = ru.Reporter(name='radical.pilot', )  #, level=verbose)

    # Start mongod on localhost
    mongo = MongoInstance(dbpath)
    mongo.open_mongodb(create_folder=True)
    time.sleep(15)

    reporter.title('Getting Started (RP version %s)' % rp.version)

    session = rp.Session()

    try:
        config = configs[resource]

        reporter.info('reading config\n')
        reporter.info(pformat(config))