def __init__(self, sid, workflow, pending_queue, completed_queue, resubmit_failed, rmq_conn_params): # Mandatory arguments self._sid = sid self._pending_queue = pending_queue self._completed_queue = completed_queue self._resubmit_failed = resubmit_failed self._rmq_conn_params = rmq_conn_params # Assign validated workflow self._workflow = workflow # Create logger and profiler at their specific locations using the sid self._path = os.getcwd() + '/' + self._sid self._uid = ru.generate_id('wfprocessor.%(item_counter)04d', ru.ID_CUSTOM, ns=self._sid) name = 'radical.entk.%s' % self._uid self._logger = ru.Logger(name, path=self._path) self._prof = ru.Profiler(name, path=self._path) self._report = ru.Reporter(name) # Defaults self._wfp_process = None self._enqueue_thread = None self._dequeue_thread = None self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10) self._logger.info('Created WFProcessor object: %s' % self._uid) self._prof.prof('create_wfp', uid=self._uid)
def __init__(self, log=None, rep=None, prof=None): if log: self._log = log else: self._log = ru.Logger('radical.nge') if rep: self._rep = log else: self._rep = ru.Reporter('radical.nge') if prof: self._prof = prof else: self._prof = ru.Profiler('radical.nge') self._session = rp.Session() self._pmgr = rp.PilotManager(self._session) self._umgr = rp.UnitManager(self._session) self._pmgr.register_callback(self._pilot_state_cb) self._umgr.register_callback(self._unit_state_cb) # create a dir for data staging self._pwd = os.getcwd() self._data = 'data.%s' % self._session.uid os.makedirs('%s/%s/' % (self._pwd, self._data)) # track submitted tasks self._tcnt = 0 self._tasks = dict()
def __init__(self, config_path=None, hostname=None, port=None, reattempts=None, resubmit_failed=None, autoterminate=None, write_workflow=None, rts=None, rmq_cleanup=None, rts_config=None, name=None): # Create a session for each EnTK script execution if name: self._name = name self._sid = name else: self._name = str() self._sid = ru.generate_id('re.session', ru.ID_PRIVATE) self._read_config(config_path, hostname, port, reattempts, resubmit_failed, autoterminate, write_workflow, rts, rmq_cleanup, rts_config) # Create an uid + logger + profiles for AppManager, under the sid # namespace path = os.getcwd() + '/' + self._sid self._uid = ru.generate_id('appmanager.%(item_counter)04d', ru.ID_CUSTOM, namespace=self._sid) self._logger = ru.Logger('radical.entk.%s' % self._uid, path=path, targets=['2', '.']) self._prof = ru.Profiler(name='radical.entk.%s' % self._uid, path=path) self._report = ru.Reporter(name='radical.entk.%s' % self._uid) self._report.info('EnTK session: %s\n' % self._sid) self._prof.prof('create amgr obj', uid=self._uid) self._report.info('Creating AppManager') self._resource_manager = None # RabbitMQ Queues self._pending_queue = list() self._completed_queue = list() # Global parameters to have default values self._mqs_setup = False self._resource_desc = None self._task_manager = None self._workflow = None self._cur_attempt = 1 self._shared_data = list() self._rmq_ping_interval = os.getenv('RMQ_PING_INTERVAL', 10) self._logger.info('Application Manager initialized') self._prof.prof('amgr obj created', uid=self._uid) self._report.ok('>>ok\n')
def _get_reporter(self, name): """ This is a thin wrapper around `ru.Reporter()` which makes sure that log files end up in a separate directory with the name of `session.uid`. """ return ru.Reporter(name=name, ns='radical.pilot', targets=['stdout'], path=self._logdir)
def fetch_json(sid, dburl=None, tgt=None, skip_existing=False, session=None, log=None): ''' returns file name ''' if not log and session: log = session._log rep = session._rep elif not log: log = ru.Logger('radical.pilot.utils') rep = ru.Reporter('radical.pilot.utils') if not tgt: tgt = '.' if tgt.startswith('/'): # Assume an absolute path dst = os.path.join(tgt, '%s.json' % sid) else: # Assume a relative path dst = os.path.join(os.getcwd(), tgt, '%s.json' % sid) try: os.makedirs(os.path.dirname(tgt)) except OSError: pass # dir exists if skip_existing and os.path.isfile(dst) \ and os.stat(dst).st_size > 0: log.info("session already in %s", dst) else: if not dburl: dburl = os.environ.get('RADICAL_PILOT_DBURL') if not dburl: raise ValueError('RADICAL_PILOT_DBURL is not set') mongo, db, _, _, _ = ru.mongodb_connect(dburl) json_docs = get_session_docs(db, sid) ru.write_json(json_docs, dst) log.info("session written to %s", dst) mongo.close() rep.ok("+ %s (json)\n" % sid) return dst
def _get_reporter(self, name): ''' This is a thin wrapper around `ru.Reporter()` which makes sure that log files end up in a separate directory with the name of `session.uid`. ''' if not self._reporter: self._reporter = ru.Reporter(name=name, ns='radical.pilot', path=self._cfg.path) return self._reporter
def _assert_reporter(pname, fname, val=True): rep = ru.Reporter(name=pname, ns='radical.utils.test', path='/tmp/') rep.info('foo') if fname: assert(val == os.path.isfile(fname)) assert(val == _cmd('grep -e "foo" %s' % fname)) try : os.unlink(fname) except: pass
def __init__(self): ''' initialize the service endpoint: - create logger, profile and reporter - set up accounts ''' self._log = ru.Logger('radical.nge.service') self._rep = ru.Reporter('radical.nge.service') self._prof = ru.Profiler('radical.nge.service') self._accounts = { 'andre': _Account('andre', 'erdna'), 'matteo': _Account('matteo', 'eottam'), 'daniel': _Account('daniel', 'leinad'), 'guest': _Account('guest', 'guest'), } self._rep.header('--- NGE (%s) ---' % rn.version)
def __init__(self, url, log=None, rep=None, prof=None): if log: self._log = log else: self._log = ru.Logger('radical.nge') if rep: self._rep = log else: self._rep = ru.Reporter('radical.nge') if prof: self._prof = prof else: self._prof = ru.Profiler('radical.nge') self._cookies = list() self._url = ru.Url(url) self._qbase = ru.Url(url) # self._qbase.username = None # self._qbase.password = None self._qbase = str(self._qbase).rstrip('/') if self._url.username and self._url.password: self.login(self._url.username, self._url.password)
def fetch_logfiles(sid, dburl=None, src=None, tgt=None, access=None, session=None, skip_existing=False, fetch_client=False, log=None): ''' sid: session for which all logfiles are fetched src: dir to look for client session logfiles tgt: dir to store the logfile in returns list of file names ''' if not log and session: log = session._log rep = session._rep elif not log: log = ru.Logger('radical.pilot.utils') rep = ru.Reporter('radical.pilot.utils') ret = list() if not dburl: dburl = os.environ['RADICAL_PILOT_DBURL'] if not dburl: raise RuntimeError('Please set RADICAL_PILOT_DBURL') if not src: src = os.getcwd() if not tgt: tgt = os.getcwd() if not tgt.startswith('/') and '://' not in tgt: tgt = "%s/%s" % (os.getcwd(), tgt) # we always create a session dir as real target tgt_url = saga.Url("%s/%s/" % (tgt, sid)) # Turn URLs without schema://host into file://localhost, # so that they dont become interpreted as relative. if not tgt_url.schema: tgt_url.schema = 'file' if not tgt_url.host: tgt_url.host = 'localhost' if fetch_client: # first fetch session logfile client_logfile = "%s/%s.log" % (src, sid) ftgt = saga.Url('%s/%s' % (tgt_url, os.path.basename(client_logfile))) ret.append("%s" % ftgt.path) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: pass else: log_file = saga.filesystem.File(client_logfile, session=session) log_file.copy(ftgt, flags=saga.filesystem.CREATE_PARENTS) log_file.close() _, db, _, _, _ = ru.mongodb_connect(dburl) json_docs = get_session_docs(db, sid) pilots = json_docs['pilot'] num_pilots = len(pilots) log.info("Session: %s", sid) log.info("Number of pilots in session: %d", num_pilots) for pilot in pilots: try: sandbox_url = saga.Url(pilot['pilot_sandbox']) if access: # Allow to use a different access schema than used for the the run. # Useful if you ran from the headnode, but would like to retrieve # the logfiles to your desktop (Hello Titan). access_url = saga.Url(access) sandbox_url.schema = access_url.schema sandbox_url.host = access_url.host sandbox = saga.filesystem.Directory(sandbox_url, session=session) # Try to fetch a tarball of logfiles, so that we can get them all in one (SAGA) go! LOGFILES_TARBALL = '%s.log.tgz' % pilot['uid'] tarball_available = False try: if sandbox.is_file(LOGFILES_TARBALL) and \ sandbox.get_size(LOGFILES_TARBALL): log.info("logfiles tarball exists") ftgt = saga.Url('%s/%s' % (tgt_url, LOGFILES_TARBALL)) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: log.info("Skip fetching of '%s/%s' to '%s'.", sandbox_url, LOGFILES_TARBALL, tgt_url) tarball_available = True else: log.info("Fetching '%s%s' to '%s'.", sandbox_url, LOGFILES_TARBALL, tgt_url) log_file = saga.filesystem.File( "%s%s" % (sandbox_url, LOGFILES_TARBALL), session=session) log_file.copy(ftgt, flags=saga.filesystem.CREATE_PARENTS) log_file.close() tarball_available = True else: log.warn("logiles tarball doesnt exists") except saga.DoesNotExist: log.warn("logfiles tarball doesnt exists") try: os.mkdir("%s/%s" % (tgt_url.path, pilot['uid'])) except OSError: pass # We now have a local tarball if tarball_available: log.debug("Extract tarball %s to %s", ftgt.path, tgt_url.path) try: tarball = tarfile.open(ftgt.path) tarball.extractall("%s/%s" % (tgt_url.path, pilot['uid'])) logfiles = glob.glob("%s/%s/*.log" % (tgt_url.path, pilot['uid'])) log.info("tarball %s extracted to '%s/%s/'.", ftgt.path, tgt_url.path, pilot['uid']) ret.extend(logfiles) os.unlink(ftgt.path) except Exception as e: log.warn('could not extract tarball %s [%s]', ftgt.path, e) # If extract succeeded, no need to fetch individual logfiles rep.ok("+ %s (logfiles)\n" % pilot['uid']) continue # If we dont have a tarball (for whichever reason), fetch individual logfiles logfiles = sandbox.list('*.log') for logfile in logfiles: ftgt = saga.Url('%s/%s/%s' % (tgt_url, pilot['uid'], logfile)) ret.append("%s" % ftgt.path) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: continue log_file = saga.filesystem.File("%s%s" % (sandbox_url, logfile), session=session) log_file.copy(ftgt, flags=saga.filesystem.CREATE_PARENTS) log_file.close() rep.ok("+ %s (logfiles)\n" % pilot['uid']) except Exception as e: rep.error("- %s (logfiles)\n" % pilot['uid']) return ret
stage.add_tasks(task) return stage if __name__ == "__main__": # Read YAML configuration file from stdin try: config_filename = sys.argv[1] except Exception: raise ValueError(f"Usage:\tpython {sys.argv[0]} [config.json]\n\n") cfg = ExperimentConfig.from_yaml(config_filename) reporter = ru.Reporter(name="radical.entk") reporter.title(cfg.title) # Create Application Manager try: appman = AppManager( hostname=os.environ["RMQ_HOSTNAME"], port=int(os.environ["RMQ_PORT"]), username=os.environ["RMQ_USERNAME"], password=os.environ["RMQ_PASSWORD"], ) except KeyError: raise ValueError( "Invalid RMQ environment. Please see README.md for configuring environment." )
def test_ordered_scheduler(): report = ru.Reporter(name='radical.pilot') report.title('Getting Started (RP version %s)' % rp.version) session = rp.Session() try: # read the config used for resource details report.info('read config') report.ok('>>ok\n') report.header('submit pilots') pd_init = { 'resource': 'local.localhost', 'runtime': 5, 'exit_on_error': True, 'cores': 10 } pdesc = rp.ComputePilotDescription(pd_init) pmgr = rp.PilotManager(session=session) pilot = pmgr.submit_pilots(pdesc) report.header('submit pipelines') umgr = rp.UnitManager(session=session) umgr.add_pilots(pilot) n_pipes = 2 n_stages = 5 n_tasks = 4 cuds = list() for p in range(n_pipes): for s in range(n_stages): for t in range(n_tasks): cud = rp.ComputeUnitDescription() cud.executable = '%s/pipeline_task.sh' % pwd cud.arguments = [p, s, t, 10] cud.cpu_processes = 1 cud.tags = { 'order': { 'ns': p, 'order': s, 'size': n_tasks } } cud.name = 'p%03d-s%03d-t%03d' % (p, s, t) cuds.append(cud) report.progress() import random random.shuffle(cuds) # Submit the previously created ComputeUnit descriptions to the # PilotManager. This will trigger the selected scheduler to start # assigning ComputeUnits to the ComputePilots. umgr.submit_units(cuds) # Wait for all compute units to reach a final state report.header('gather results') umgr.wait_units() except Exception as e: # Something unexpected happened in the pilot code above report.error('caught Exception: %s\n' % e) ru.print_exception_trace() raise except (KeyboardInterrupt, SystemExit) as e: # the callback called sys.exit(), and we can here catch the # corresponding KeyboardInterrupt exception for shutdown. We also catch # SystemExit (which gets raised if the main threads exits for some other # reason). ru.print_exception_trace() report.warn('exit requested\n') finally: # always clean up the session, no matter if we caught an exception or # not. This will kill all remaining pilots. report.header('finalize') session.close(download=False) report.header()
def test_reporter(): pname = 'ru.%d' % os.getpid() fname = '/tmp/%s.prof' % pname os.environ['RADICAL_UTILS_REPORT'] = 'True' os.environ['RADICAL_UTILS_REPORT_TGT'] = fname rep = ru.Reporter(name=pname, ns='radical.utils') rep.header('header \n') rep.info('info \n') rep.progress('progress\n') rep.ok('ok \n') rep.warn('warn \n') rep.error('error \n') rep.plain('plain \n') rep.info('test idler:') rep.idle(mode='start') for _ in range(3): rep.idle() time.sleep(0.03) rep.idle(color='ok', c='.') rep.idle(color='error', c='.') for _ in range(3): rep.idle() time.sleep(0.01) rep.idle(mode='stop') rep.ok('>>done\n') # pylint disable=E0501 rep.info('idle test\n') rep.info( '1234567891 2 3 4 5 6 7 8\n\t' ) # noqa rep.info( '.0.........0.........0.........0.........0.........0.........0.........0' ) # noqa # pylint enable=E0501 rep.idle(mode='start') for _ in range(200): rep.idle() time.sleep(0.01) rep.idle() time.sleep(0.01) rep.idle() time.sleep(0.01) rep.idle() time.sleep(0.01) rep.idle(color='ok', c="+") rep.idle(mode='stop') rep.set_style('error', color='yellow', style='ELTTTTMELE', segment='X') rep.error('error') try: rep.exit('exit', 1) except SystemExit: assert (True) except Exception as e: assert (False), 'expected system exit, got %s' % e assert (os.path.isfile(fname)) assert (_cmd('grep -e "header" %s' % fname)) try: os.unlink(fname) except: pass
############################################################################### # def collect_all(sessions_to_fetch): for sid in sessions_to_fetch: try: collect(sid) except Exception as e: report.error("Collection of info for %s failed" % sid) ############################################################################### # if __name__ == '__main__': report = ru.Reporter("Collect profiling and json data to local disk.") session_ids = [] # Read from file if specified, otherwise read from stdin f = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin for line in f: session = line.strip() if session: session_ids.append(session) report.info("Session ids found on input:\n") report.plain("%s\n" % session_ids) collect_all(session_ids)
self.progress = self._logger.report.progress self.ok = self._logger.report.ok self.warn = self._logger.report.warn self.error = self._logger.report.error self.exit = self._logger.report.exit self.plain = self._logger.report.plain self.set_style = self._logger.report.set_style # ------------------------------------------------------------------------------ if __name__ == "__main__": import radical.utils as ru r = ru.Reporter(title='test') r.header('header \n') r.info('info \n') r.progress('progress\n') r.ok('ok \n') r.warn('warn \n') r.error('error \n') r.plain('plain \n') r.set_style('error', color='yellow', style='ELTTMLE', segment='X') r.error('error ') i = 0 j = 0 for cname, col in r.COLORS.items():
def test_amgr_initialization(): amgr_name = ru.generate_id('test.appmanager.%(item_counter)04d', ru.ID_CUSTOM) amgr = Amgr(hostname=hostname, port=port, name=amgr_name) assert amgr._name.split('.') == amgr_name.split('.') assert amgr._sid.split('.') == amgr_name.split('.') assert amgr._uid.split('.') == ['appmanager', '0000'] assert type(amgr._logger) == type(ru.get_logger('radical.tests')) assert type(amgr._prof) == type(ru.Profiler('radical.tests')) assert type(amgr._report) == type(ru.Reporter('radical.tests')) assert isinstance(amgr.name, str) # RabbitMQ inits assert amgr._mq_hostname == hostname assert amgr._port == port # RabbitMQ Queues assert amgr._num_pending_qs == 1 assert amgr._num_completed_qs == 1 assert isinstance(amgr._pending_queue, list) assert isinstance(amgr._completed_queue, list) # Global parameters to have default values assert amgr._mqs_setup == False assert amgr._resource_desc == None assert amgr._task_manager == None assert amgr._workflow == None assert amgr._resubmit_failed == False assert amgr._reattempts == 3 assert amgr._cur_attempt == 1 assert amgr._autoterminate == True assert isinstance(amgr.shared_data, list) amgr = Amgr(hostname=hostname, port=port) assert amgr._uid.split('.') == ['appmanager', '0000'] assert type(amgr._logger) == type(ru.get_logger('radical.tests')) assert type(amgr._prof) == type(ru.Profiler('radical.tests')) assert type(amgr._report) == type(ru.Reporter('radical.tests')) assert isinstance(amgr.name, str) # RabbitMQ inits assert amgr._mq_hostname == hostname assert amgr._port == port # RabbitMQ Queues assert amgr._num_pending_qs == 1 assert amgr._num_completed_qs == 1 assert isinstance(amgr._pending_queue, list) assert isinstance(amgr._completed_queue, list) # Global parameters to have default values assert amgr._mqs_setup == False assert amgr._resource_desc == None assert amgr._task_manager == None assert amgr._workflow == None assert amgr._resubmit_failed == False assert amgr._reattempts == 3 assert amgr._cur_attempt == 1 assert amgr._autoterminate == True assert isinstance(amgr.shared_data, list)
p.add_stages(s3) # -------------------------- # Outlier identification stage s4 = generate_interfacing_stage() p.add_stages(s4) CUR_STAGE += 1 return p # ------------------------------------------------------------------------------ if __name__ == '__main__': reporter = ru.Reporter(name='radical.entk') reporter.title('COVID-19 - Workflow2') # resource specified as argument if len(sys.argv) == 2: cfg_file = sys.argv[1] elif sys.argv[0] == "molecules_adrp.py": cfg_file = "adrp_system.json" elif sys.argv[0] == "molecules_3clpro.py": cfg_file = "3clpro_system.json" else: reporter.exit('Usage:\t%s [config.json]\n\n' % sys.argv[0]) cfg = ru.Config(cfg=ru.read_json(cfg_file)) cfg['node_counts'] = max(1, cfg['md_counts'] // cfg['gpu_per_node'])
# set if you want to see what happens behind the scences! RUNTIME = 20 # how long to run the pilot CORES = 64 # how many cores to use for one pilot UNITS = 128 # how many units to create SLEEP = 0 # how long each unit sleeps SCHED = rp.SCHED_DIRECT_SUBMISSION #------------------------------------------------------------------------------ # if __name__ == "__main__": # we use a reporter class for nicer output report = ru.Reporter("Getting Started") # Create a new session. No need to try/except this: if session creation # fails, there is not much we can do anyways... session = rp.Session() # all other pilot code is now tried/excepted. If an exception is caught, we # can rely on the session object to exist and be valid, and we can thus tear # the whole RP stack down via a 'session.close()' call in the 'finally' # clause... try: report.info('read configs') resources = ru.read_json('%s/config.json', os.path.dirname(__file__)) report.ok('\\ok\n') report.header('submit pilots')
# checks = [ # OLD_CUD, GOOD, SCHEMA, RU_MUNCH, PYDICT, MYDICT, RU_CFG, NEW_CUD, RU_DICT, PYDANTIC ] data = list() for check in checks: results = [check.__name__] n = 1024 * 1024 l = list() t = list() r = ru.Reporter('radical.test') r.progress_tgt(n * 5, label=check.__name__) t0 = time.time() # ----------------------------------------------- # create n entities for i in range(n): e = check() l.append(e) r.progress() t1 = time.time() results.append(t1 - t0) # -----------------------------------------------
def fetch_profiles (sid, dburl=None, src=None, tgt=None, access=None, session=None, skip_existing=False, fetch_client=False, log=None): ''' sid: session for which all profiles are fetched src: dir to look for client session profiles ($src/$sid/*.prof) tgt: dir to store the profile in - $tgt/$sid/*.prof, - $tgt/$sid/$pilot_id/*.prof) returns list of file names ''' if not log and session: log = session._log rep = session._rep elif not log: log = ru.Logger('radical.pilot.utils') rep = ru.Reporter('radical.pilot.utils') ret = list() if not dburl: dburl = os.environ['RADICAL_PILOT_DBURL'] if not dburl: raise ValueError('RADICAL_PILOT_DBURL is not set') if not src: src = os.getcwd() if not tgt: tgt = os.getcwd() if not tgt.startswith('/') and '://' not in tgt: tgt = "%s/%s" % (os.getcwd(), tgt) # we always create a session dir as real target tgt_url = rs.Url("%s/%s/" % (tgt, sid)) # Turn URLs without schema://host into file://localhost, # so that they dont become interpreted as relative. if not tgt_url.schema: tgt_url.schema = 'file' if not tgt_url.host: tgt_url.host = 'localhost' # first fetch session profile if fetch_client: client_profiles = glob.glob("%s/%s/*.prof" % (src, sid)) if not client_profiles: raise RuntimeError('no client profiles in %s/%s' % (src, sid)) for client_profile in client_profiles: ftgt = rs.Url('%s/%s' % (tgt_url, os.path.basename(client_profile))) ret.append("%s" % ftgt.path) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: pass else: prof_file = rs.fs.File(client_profile, session=session) prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS) prof_file.close() if not os.path.isfile(client_profile): raise RuntimeError('client profilefile %s does not exist' % client_profile) _, db, _, _, _ = ru.mongodb_connect (dburl) json_docs = get_session_docs(db, sid) pilots = json_docs['pilot'] num_pilots = len(pilots) log.debug("Session: %s", sid) log.debug("Number of pilots in session: %d", num_pilots) for pilot in pilots: try: log.debug("processing pilot '%s'", pilot['uid']) sandbox_url = rs.Url(pilot['pilot_sandbox']) if access: # Allow to use a different access schema than used for the the run. # Useful if you ran from the headnode, but would like to retrieve # the profiles to your desktop (Hello Titan). access_url = rs.Url(access) sandbox_url.schema = access_url.schema sandbox_url.host = access_url.host # print "Overriding remote sandbox: %s" % sandbox_url sandbox = rs.fs.Directory (sandbox_url, session=session) # Try to fetch a tarball of profiles, so that we can get them all in one (SAGA) go! PROFILES_TARBALL = '%s.prof.tgz' % pilot['uid'] tarball_available = False try: if sandbox.is_file(PROFILES_TARBALL) and \ sandbox.get_size(PROFILES_TARBALL): log.info("profiles tarball exists") ftgt = rs.Url('%s/%s' % (tgt_url, PROFILES_TARBALL)) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: log.info("skip fetching of '%s/%s' to '%s'.", sandbox_url, PROFILES_TARBALL, tgt_url) tarball_available = True else: log.info("fetch '%s%s' to '%s'.", sandbox_url, PROFILES_TARBALL, tgt_url) prof_file = rs.fs.File("%s%s" % (sandbox_url, PROFILES_TARBALL), session=session) prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS) prof_file.close() tarball_available = True else: log.warn("profiles tarball doesnt exists!") except rs.DoesNotExist: log.exception("exception(TODO): profiles tarball doesnt exists!") try: os.mkdir("%s/%s" % (tgt_url.path, pilot['uid'])) except OSError: pass # We now have a local tarball if tarball_available: log.info("Extract tarball %s to '%s'.", ftgt.path, tgt_url.path) try: tarball = tarfile.open(ftgt.path, mode='r:gz') tarball.extractall("%s/%s" % (tgt_url.path, pilot['uid'])) profiles = glob.glob("%s/%s/*.prof" % (tgt_url.path, pilot['uid'])) ret.extend(profiles) os.unlink(ftgt.path) # If extract succeeded, no need to fetch individual profiles rep.ok("+ %s (profiles)\n" % pilot['uid']) continue except Exception as e: log.warn('could not extract tarball %s [%s]', ftgt.path, e) # If we dont have a tarball (for whichever reason), fetch individual profiles profiles = sandbox.list('*.prof') for prof in profiles: ftgt = rs.Url('%s/%s/%s' % (tgt_url, pilot['uid'], prof)) ret.append("%s" % ftgt.path) if skip_existing and os.path.isfile(ftgt.path) \ and os.stat(ftgt.path).st_size > 0: pass else: prof_file = rs.fs.File("%s%s" % (sandbox_url, prof), session=session) prof_file.copy(ftgt, flags=rs.fs.CREATE_PARENTS) prof_file.close() rep.ok("+ %s (profiles)\n" % pilot['uid']) except Exception as e: rep.error("- %s (profiles)\n" % pilot['uid']) log.exception('failed to fet profile for %s', pilot['uid']) return ret
def test_bw_tagging(): # we use a reporter class for nicer output report = ru.Reporter(name='radical.pilot') report.title('Getting Started (RP version %s)' % rp.version) # Create a new session. No need to try/except this: if session creation # fails, there is not much we can do anyways... session = rp.Session() # Add a Pilot Manager. Pilot managers manage one or more ComputePilots. pmgr = rp.PilotManager(session=session) # Define an [n]-core local pilot that runs for [x] minutes # Here we use a dict to initialize the description object pd_init = { 'resource': 'ncsa.bw_aprun', 'runtime': 10, # pilot runtime (min) 'exit_on_error': True, 'project': 'gk4', 'queue': 'high', 'access_schema': 'gsissh', 'cores': 128 } pdesc = rp.ComputePilotDescription(pd_init) # Launch the pilot. pilot = pmgr.submit_pilots(pdesc) report.header('submit units') # Register the ComputePilot in a UnitManager object. umgr = rp.UnitManager(session=session) umgr.add_pilots(pilot) # Create a workload of ComputeUnits. # Each compute unit runs '/bin/date'. n = 5 # number of units to run report.info('create %d unit description(s)\n\t' % n) cuds = list() for i in range(0, n): # create a new CU description, and fill it. # Here we don't use dict initialization. cud = rp.ComputeUnitDescription() cud.executable = '/bin/hostname' cud.arguments = ['>', 's1_t%s_hostname.txt' % i] cud.cpu_processes = 1 cud.cpu_threads = 16 # cud.cpu_process_type = rp.MPI # cud.cpu_thread_type = rp.OpenMP cud.output_staging = { 'source': 'unit:///s1_t%s_hostname.txt' % i, 'target': 'client:///s1_t%s_hostname.txt' % i, 'action': rp.TRANSFER } cuds.append(cud) report.progress() report.ok('>>ok\n') # Submit the previously created ComputeUnit descriptions to the # PilotManager. This will trigger the selected scheduler to start # assigning ComputeUnits to the ComputePilots. cus = umgr.submit_units(cuds) # Wait for all compute units to reach a final state # (DONE, CANCELED or FAILED). report.header('gather results') umgr.wait_units() n = 5 # number of units to run report.info('create %d unit description(s)\n\t' % n) cuds = list() for i in range(0, n): # create a new CU description, and fill it. # Here we don't use dict initialization. cud = rp.ComputeUnitDescription() cud.executable = '/bin/hostname' cud.arguments = ['>', 's2_t%s_hostname.txt' % i] cud.cpu_processes = 1 cud.cpu_threads = 16 cud.tag = cus[i].uid # cud.cpu_process_type = rp.MPI # cud.cpu_thread_type = rp.OpenMP cud.output_staging = { 'source': 'unit:///s2_t%s_hostname.txt' % i, 'target': 'client:///s2_t%s_hostname.txt' % i, 'action': rp.TRANSFER } cuds.append(cud) report.progress() report.ok('>>ok\n') # Submit the previously created ComputeUnit descriptions to the # PilotManager. This will trigger the selected scheduler to start # assigning ComputeUnits to the ComputePilots. cus = umgr.submit_units(cuds) # Wait for all compute units to reach a final state (DONE, CANCELED or FAILED). report.header('gather results') umgr.wait_units() for i in range(0, n): assert open('s1_t%s_hostname.txt' % i,'r').readline().strip() == \ open('s2_t%s_hostname.txt' % i,'r').readline().strip() report.header('finalize') session.close(download=True) report.header() for f in glob.glob('%s/*.txt' % os.getcwd()): os.remove(f)
session_paths = glob.glob('%s/*.session.*json' % json_dir) if not session_paths: raise Exception("No session files found in directory %s" % json_dir) session_files = [os.path.basename(e) for e in session_paths] session_ids = [e.rsplit('.json')[0] for e in session_files] print "Found sessions in %s: %s" % (json_dir, session_ids) return session_ids ############################################################################### # if __name__ == '__main__': report = ru.Reporter("Inject profiling and json data into database.") session_ids = [] # Read from file if specified, otherwise read from stdin f = open(sys.argv[1]) if len(sys.argv) > 1 else sys.stdin for line in f: session_ids.append(line.strip()) if not session_ids: session_ids = find_sessions(JSON_DIR) preprocess_all(session_ids)
import radical.pilot as rp import radical.utils as ru # ------------------------------------------------------------------------------ # # READ the RADICAL-Pilot documentation: http://radicalpilot.readthedocs.org/ # # ------------------------------------------------------------------------------ # ------------------------------------------------------------------------------ # if __name__ == '__main__': # we use a reporter class for nicer output report = ru.Reporter(name='radical.pilot') report.title('Getting Started (RP version %s)' % rp.version) # use the resource specified as argument, fall back to localhost if len(sys.argv) > 2: report.exit('Usage:\t%s [resource]\n\n' % sys.argv[0]) elif len(sys.argv) == 2: resource = sys.argv[1] else: resource = 'local.localhost' # Create a new session. No need to try/except this: if session creation # fails, there is not much we can do anyways... session = rp.Session() # all other pilot code is now tried/excepted. If an exception is caught, we
if __name__ == "__main__": if (len(sys.argv) != 2) or (sys.argv[1] not in configs): [os.remove(f) for f in session_mover.capture_fwd_logs()] print('\nUsage:\t%s [localhost || summit]\n' % sys.argv[0]) sys.exit(1) else: resource = sys.argv[1] session_mover.use_current() # we use a reporter class for nicer output reporter = ru.Reporter(name='radical.pilot', ) #, level=verbose) # Start mongod on localhost mongo = MongoInstance(dbpath) mongo.open_mongodb(create_folder=True) time.sleep(15) reporter.title('Getting Started (RP version %s)' % rp.version) session = rp.Session() try: config = configs[resource] reporter.info('reading config\n') reporter.info(pformat(config))