def get_unit_durations(data, sources): for dname in UNIT_DURATIONS: data[dname] = list() # get the numbers we actually want to plot fout = open('outliers.dat', 'w') ucnt = 0 ocnt = 0 for src in sources: # always point to the tarballs if src[-4:] != '.tbz': src += '.tbz' session = ra.Session(src, 'radical.pilot') units = session.filter(etype='unit', inplace=True) sid = session.uid for unit in units.get(): for dname in UNIT_DURATIONS: dur = unit.duration(event=UNIT_DURATIONS[dname]) if dur > 1000.0: ocnt += 1 fout.write('%10.1f %s\n' % (dur, src)) fout.flush() else: ucnt += 1 data[dname].append(dur) sys.stdout.flush() return data
def get_entk_overheads(loc, sid): sess = ra.Session(stype='radical.entk', src=loc, sid=sid) init_time = sess.duration(event=[{ ru.EVENT: 'create amgr obj' }, { ru.EVENT: 'init rreq submission' }]) res_sub_time = sess.duration(event=[{ ru.EVENT: 'creating rreq' }, { ru.EVENT: 'rreq submitted' }]) total_teardown_time = sess.duration(event=[{ ru.EVENT: 'start termination' }, { ru.EVENT: 'termination done' }]) rts_teardown_time = sess.duration( event=[{ ru.EVENT: 'canceling resource allocation' }, { ru.EVENT: 'resource allocation cancelled' }]) return { 'init_time': init_time, 'res_sub_time': res_sub_time, 'total_teardown_time': total_teardown_time, 'rts_teardown_time': rts_teardown_time }
def extractTimes(path,states,sampling=1): currPath = os.getcwd() times = [] numstates = len(states) os.chdir(path) for i in range(0,numstates+2): times.append([]) for sandbox in os.listdir('.'): if os.path.isdir('./'+sandbox): print("Processing "+sandbox) session = ra.Session(sandbox, 'radical.pilot') temp = extractFromSession(session,states[numstates-1]) value1 = temp[len(temp)-1][0]-temp[0][0] value2 = temp[len(temp)-1][1] print(str(value1) + " " + str(value2)) times[numstates-1].append(value1) countall = 0 queueTime = 0 for i in range(0,numstates-1): time = countTime(extractFromSession(session,states[i]),sampling) times[i].append(time) countall +=time if i == 2: queueTime = time times[numstates].append(countall) times[numstates+1].append(value1-queueTime) del session os.chdir(currPath) return times
def get_adap_time(loc, sid): # Get adap time duration = 0.0 sess = ra.Session(stype='radical.entk', src=loc, sid=sid) stages = sorted(sess.filter(etype='stage', inplace=False).list('uid')) # print stages for stage in stages: duration += sess.duration(event=[{ru.EVENT: 'executing post-exec for stage %s'%stage}, {ru.EVENT: 'post-exec executed for stage %s'%stage}]) return duration
def get_session_tstamps(): time_stamps = list() loc = [dI for dI in os.listdir('../RP/sessions/') if os.path.isdir(os.path.join('../RP/sessions/',dI))] for session in range(len(loc)): src = os.path.dirname(loc[session]) sid = os.path.basename(loc[session]) session = ra.Session(sid=sid, stype='radical.pilot', src=os.path.join('../RP/sessions/',loc[session])) event_entity = 'pilot' pilot = session.filter(etype=event_entity, inplace=True) time_stamps.append(pilot.ttc) return (time_stamps)
def load_sessions_cores(sdir, sessions, sncores): # number of units in the sessions # snunits = sorted(sessions.nunit.unique().tolist()) # load the RA session objects sras = {} for sncore in sncores: sras[sncore] = [] s = sessions[(sessions.ncore == sncore)] for sid in s.sid.tolist(): exp = s.loc[sid]['experiment'] src = '%s/%s/%s' % (sdir, exp, sid) sras[sncore].append(ra.Session(src, 'radical.pilot')) return sras
def process_rp_profiles(src): sid = os.path.basename(src) loc = os.path.dirname(src) tag = '/'.join(loc.split('/')[2:]) proc_data = os.path.join(proc,tag) + '/rp_data.json' data = {'task_mgmt': 0, 'exec_time': 0} sess = ra.Session(stype='radical.pilot', src=loc, sid=sid) units = sess.filter(etype='unit', inplace=False) data['task_mgmt'] = units.duration(state=['NEW','DONE']) data['exec_time'] = units.duration(event=[{ru.EVENT:'exec_start'},{ru.EVENT:'exec_stop'}]) proc_path = write_data(data, proc_data) return proc_path
def extractTimestamp(path): currPath = os.getcwd() times = [] os.chdir(path) timestamps = {} for sandbox in os.listdir('./'): if os.path.isdir("./"+sandbox): session = ra.Session(sandbox, 'radical.pilot') startpilot = session.get(etype='pilot', uid='pilot.0000')[0].states[rp.ACTIVE]['time'] del session fileInput = open("./"+sandbox+'/pmgr.launching.0.prof') fileInput.readline() ts = float(fileInput.readline().split(',')[0]) if ts > 0 and startpilot > 0: timestamps[sandbox]=ts+startpilot os.chdir(currPath) return timestamps
def get_session_tstamps(): s = list() time_stamps = list() loc = [dI for dI in os.listdir('../RP/sessions/') if os.path.isdir(os.path.join('../RP/sessions/',dI))] for session in range(len(loc)): session_id = (loc[session]) src = os.path.dirname(loc[session]) sid = os.path.basename(loc[session]) session = ra.Session(sid=sid, stype='radical.pilot', src=os.path.join('../RP/sessions/',loc[session])) event_entity = 'pilot' pilot = session.filter(etype=event_entity, inplace=True) s.append(session_id) time_stamps.append(pilot.ttc) df = pd.DataFrame(list(zip(s, time_stamps)), columns =['Session', 'Duration']) dd = df.sort_values('Duration') return (dd.reset_index(drop=True))
def get_adap_time(loc, sid): # Get adap time duration = 0.0 sess = ra.Session(stype='radical.entk', src=loc, sid=sid) #stages = sorted(sess.filter(etype='stage', inplace=False).list('uid')) # print stages #for stage in stages: duration += sess.duration(event=[{ ru.EVENT: 'Adap: adding new stage' }, { ru.EVENT: 'Adap: added new stage' }]) duration += sess.duration(event=[{ ru.EVENT: 'Adap: adding new task' }, { ru.EVENT: 'Adap: added new task' }]) return duration
def wrangle_session(sdir, sid): # Get the experiment tag for the current sdir. exp = sdir.split('/')[-2:][0] # RA objects cannot be serialize: every RA session object need # to be constructed at every run. # # FIXME: AM: Actually, RA sessions can be pickled all right: # # import radical.analytics as ra # import pickle # s1 = ra.Session(src=sdir, stype='radical.pilot') # p = pickle.dumps(s1) # s2 = pickle.loads(p) # assert(len(s1.get()) == len(s2.get())) # # If we found cases where this does not work, we can make it work. # sra_session = ra.Session(sdir, 'radical.pilot') # Pilot-unit relationship dictionary pu_rels = sra_session.describe('relations', ['pilot', 'unit']) # Pilots of sra: dervie properties and durations. print '\n\n%s -- %s -- Loading pilots:' % (exp, sid) sra_pilots = sra_session.filter(etype='pilot', inplace=False) pilots = load_pilots(sid, exp, sra_pilots, pdm, pu_rels, pts) # Units of sra: dervie properties and durations. print '\n\n%s -- %s -- Loading units:' % (exp, sid) sra_units = sra_session.filter(etype='unit', inplace=False) units = load_units(sid, exp, sra_units, udm, pilots, sra_session, pu_rels, uts) # Session of sra: derive properties and total durations. print '\n\n%s -- %s -- Loading session:\n' % (exp, sid) load_session(sid, exp, sra_session, sra_pilots, sra_units, sdm, pdm, udm, pilots, units, sts)
for path in glob.glob('%s/%s*' % (datadir, experiment_tag)): for sdir in glob.glob('%s/*' % path): # Session ID and session experiment. sid = glob.glob('%s/*.json' % sdir)[0].split('/')[-1:][0][:-5] exp = path.split('/')[-1:][0] # Consistency check: SID of json file name is the same SID of # directory name. if sid == sdir.split('/')[-1:][0]: # RA objects cannot be serialize: every RA session object need # to be constructed at every run. print exp + '/' + sid print sdir sra_session = ra.Session(sid, 'radical.pilot', src=sdir) # Pilot-unit relationship dictionary pu_rels = sra_session.describe('relations', ['pilot', 'unit']) # Pilots of sra: dervie properties and durations. print '\n\n%s -- %s -- Loading pilots:' % (exp, sid) sra_pilots = sra_session.filter(etype='pilot', inplace=False) pilots = load_pilots(sid, exp, sra_pilots, pdm, pu_rels) # Units of sra: dervie properties and durations. print '\n\n%s -- %s -- Loading units:' % (exp, sid) sra_units = sra_session.filter(etype='unit', inplace=False) units = load_units(sid, exp, sra_units, udm, pilots, sra_session, pu_rels)
import glob import pprint import radical.utils as ru import radical.entk as re import radical.analytics as ra """This example illustrates how to obtain durations for arbitrary (non-state) profile events. Modified from examples under RADICAL Analytics""" # ------------------------------------------------------------------------------ # if __name__ == '__main__': loc = './re.session.two.karahbit.018175.0001' src = os.path.dirname(loc) sid = os.path.basename(loc) session = ra.Session(src=src, sid=sid, stype='radical.entk') # A formatting helper before starting... def ppheader(message): separator = '\n' + 78 * '-' + '\n' print separator + message + separator # We first filter our session to obtain only the task objects tasks = session.filter(etype='task', inplace=False) print '#tasks : %d' % len(tasks.get()) # We use the 're.states.SUBMITTING' and 're.states.DONE' probes to find # the time taken by EnTK to execute all tasks ppheader("Time spent to execute the tasks") duration = tasks.duration(event=[{ ru.EVENT: 'state',
import radical.analytics as ra import radical.pilot as rp import pprint import radical.utils as ru import os import glob import numpy as np nm_run = 'testrun' d_run = 'rawdata/{}'.format(nm_run) newest = max(glob.glob(os.path.join(d_run, 'rp.session.*')), key=os.path.getctime) session = ra.Session(newest, 'radical.pilot') import pprint entities_objects = session.get() pprint.pprint(entities_objects) state_models = session.describe('state_model') pprint.pprint(state_models) state_model = session.describe('state_model', etype='unit') pprint.pprint(state_model) state_models = session.describe('state_model', etype=['unit', 'pilot']) pprint.pprint(state_models) event_models = session.describe('event_model') pprint.pprint(event_models) relations = session.describe('relations') pprint.pprint(relations)
session_names = glob(session_pattern) profile_names = list() for session_name in session_names: profile_names.extend(glob(os.path.join(session_name, '*.json'))) for json_file in profile_names: print json_file try: source = ''.join([x + '/' for x in json_file.split('/')[0:-1]]) json = ru.read_json(json_file) sid = os.path.basename(json_file)[:-5] session = ra.Session(sid=sid, stype='radical.pilot', src=source) units = session.filter(etype='unit', inplace=False) dur = units.ttc session_data = { 'Nodes': nodes, 'Framework': framework, 'Duration': dur, 'Json': json_file } except Exception as e: print e session_data = { 'Nodes': nodes, 'Framework': framework, 'Json': json_file
import sys import radical.utils as ru import radical.pilot as rp import radical.analytics as ra # ------------------------------------------------------------------------------ # if __name__ == '__main__': if len(sys.argv) < 2: print "\n\tusage: %s <session>\n" % sys.argv[0] sys.exit(1) src = sys.argv[1] stype = 'radical.pilot' session = ra.Session(src, stype) units = session.filter(etype='unit', inplace=False) unit_0 = units.get()[0] print print 'session: %s' % session.uid print 'units: %d' % len(units.get()) # collect all events for some unit which relate to prte nevents = list() for e in unit_0.events: if 'prte' in e[1]: nevents.append(e) # for that sample unit, print the events and timestamps (ordered) print
def loadSessions(path): sessions = [] for sandbox in os.listdir(path): if os.path.isdir(path+"/"+sandbox): sessions.append(ra.Session(sandbox, 'radical.pilot')) return sessions
def get_entk_exec_time(loc, sid): sess = ra.Session(stype='radical.entk', src=loc, sid=sid) tasks = sess.filter(etype='task', inplace=False) return tasks.duration(state=['SCHEDULING', 'DONE'])
for dname in UNIT_DURATIONS: data[dname] = list() nm_run = 'testrun' d_run = 'rawdata/{}'.format(nm_run) f_run = 'rp.session.titan-ext1.jrossyra.017614.0002' #f_run = 'rp.session.titan-ext1.jrossyra.017614.0003' a_run = os.path.join(d_run, f_run) #a_run = max(glob.glob( # os.path.join(d_run, 'rp.session.*')), # key=os.path.getctime) session = ra.Session(a_run, 'radical.pilot') # Load wrangled data saved in .csv files for both synapse, microbenchmarks and gromacs. sws_sessions = pd.read_csv(os.path.join(a_run,'sessions.csv'), index_col=0) sws_pilots = pd.read_csv(os.path.join(a_run,'pilots.csv'), index_col=0) sws_units = pd.read_csv(os.path.join(a_run,'units.csv'), index_col=0) print 'Total number of successful runs: %s' % sws_sessions.shape[0] print 'Total number of pilots: %s' % sws_pilots.shape[0] print 'Total number of units: %s\n' % sws_units.shape[0] units = session.filter(etype='unit', inplace=True) for unit in units.get(): print "In unit", unit for dname in UNIT_DURATIONS:
def profiles(path): json_files = glob.glob(path + '/*_32_*/*.json') sessions32 = dict() for json_file in json_files: src = ''.join([x + '/' for x in json_file.split('/')[0:-1]]) json = ru.read_json(json_file) sid = os.path.basename(json_file)[:-5] sessions32[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src) json_files = glob.glob(path + '/*_64_*/*.json') sessions64 = dict() for json_file in json_files: src = ''.join([x + '/' for x in json_file.split('/')[0:-1]]) json = ru.read_json(json_file) sid = os.path.basename(json_file)[:-5] sessions64[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src) json_files = glob.glob(path + '/*_128_*/*.json') sessions128 = dict() for json_file in json_files: src = ''.join([x + '/' for x in json_file.split('/')[0:-1]]) json = ru.read_json(json_file) sid = os.path.basename(json_file)[:-5] sessions128[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src) json_files = glob.glob(path + '/*_256_*/*.json') sessions256 = dict() for json_file in json_files: src = ''.join([x + '/' for x in json_file.split('/')[0:-1]]) json = ru.read_json(json_file) sid = os.path.basename(json_file)[:-5] sessions256[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src) RPexpsW = pd.DataFrame(columns=[ 'Atoms', 'Cores', 'Run', 'RADICAL-Pilot Overhead', 'RADICAL-Pilot Edge Disc', 'RADICAL-Pilot DataMovement', 'RADICAL-Pilot ConnComp' ]) for sid, session in sessions32.iteritems(): uid = list() for i in range(1, 1025): uid.append('unit.%06d' % i) units = session.filter(uid=uid, etype='unit', inplace=False) pilot = session.filter(etype='pilot', inplace=False) compUnit = session.filter(uid='unit.001025', etype='unit', inplace=False) overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) - pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0] execution_time = np.max( units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min( units.timestamps(state=rp.AGENT_EXECUTING)) data_movement = ( compUnit.timestamps(state=rp.AGENT_EXECUTING) - np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0] connComp = compUnit.timestamps( state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps( state=rp.AGENT_EXECUTING)[0] cores = 32 run = int(sid.split('_')[-1]) atoms = int((sid.split('_')[1]).split('K')[0]) RPexpsW.loc[len(RPexpsW)] = [ atoms, cores, run, overhead, execution_time, data_movement, connComp ] for sid, session in sessions64.iteritems(): uid = list() for i in range(1, 1025): uid.append('unit.%06d' % i) units = session.filter(uid=uid, etype='unit', inplace=False) pilot = session.filter(etype='pilot', inplace=False) compUnit = session.filter(uid='unit.001025', etype='unit', inplace=False) overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) - pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0] execution_time = np.max( units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min( units.timestamps(state=rp.AGENT_EXECUTING)) data_movement = ( compUnit.timestamps(state=rp.AGENT_EXECUTING) - np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0] connComp = compUnit.timestamps( state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps( state=rp.AGENT_EXECUTING)[0] cores = 64 run = int(sid.split('_')[-1]) atoms = int((sid.split('_')[1]).split('K')[0]) RPexpsW.loc[len(RPexpsW)] = [ atoms, cores, run, overhead, execution_time, data_movement, connComp ] for sid, session in sessions128.iteritems(): uid = list() for i in range(1, 1025): uid.append('unit.%06d' % i) units = session.filter(uid=uid, etype='unit', inplace=False) pilot = session.filter(etype='pilot', inplace=False) compUnit = session.filter(uid='unit.001025', etype='unit', inplace=False) overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) - pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0] execution_time = np.max( units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min( units.timestamps(state=rp.AGENT_EXECUTING)) data_movement = ( compUnit.timestamps(state=rp.AGENT_EXECUTING) - np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0] connComp = compUnit.timestamps( state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps( state=rp.AGENT_EXECUTING)[0] cores = 128 run = int(sid.split('_')[-1]) atoms = int((sid.split('_')[1]).split('K')[0]) RPexpsW.loc[len(RPexpsW)] = [ atoms, cores, run, overhead, execution_time, data_movement, connComp ] for sid, session in sessions256.iteritems(): uid = list() for i in range(1, 1025): uid.append('unit.%06d' % i) units = session.filter(uid=uid, etype='unit', inplace=False) pilot = session.filter(etype='pilot', inplace=False) compUnit = session.filter(uid='unit.001025', etype='unit', inplace=False) overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) - pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0] execution_time = np.max( units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min( units.timestamps(state=rp.AGENT_EXECUTING)) data_movement = ( compUnit.timestamps(state=rp.AGENT_EXECUTING) - np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0] connComp = compUnit.timestamps( state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps( state=rp.AGENT_EXECUTING)[0] cores = 256 run = int(sid.split('_')[-1]) atoms = int((sid.split('_')[1]).split('K')[0]) RPexpsW.loc[len(RPexpsW)] = [ atoms, cores, run, overhead, execution_time, data_movement, connComp ] return RPexpsW
import sys import pprint import radical.utils as ru import radical.pilot as rp import radical.analytics as ra if __name__ == '__main__': # get the source folder to analyze if len(sys.argv) < 2: print "\n\tusage: %s <dir|tarball>\n" % sys.argv[0] sys.exit(1) src = sys.argv[1] if len(sys.argv) == 2: stype = 'radical.pilot' else: stype = sys.argv[2] session = ra.Session(src, stype) # print the PMGR_ACTIVE and FINAL timestamp for each pilot pilots = session.filter(etype='pilot', inplace=False) durations = pilots.duration([rp.PMGR_ACTIVE, rp.FINAL]) pprint.pprint(durations) sys.exit(0)
__license__ = 'MIT' """ This example illustrates the use of the method ra.Session.get() """ # ------------------------------------------------------------------------------ # if __name__ == '__main__': if len(sys.argv) != 2: print "\n\tusage: %s <dir|tarball>\n" % sys.argv[0] sys.exit(1) src = sys.argv[1] session = ra.Session(src, 'radical.pilot') # A formatting helper before starting... def ppheader(message): separator = '\n' + 78 * '-' + '\n' print separator + message + separator # and here we go. As seen in example 01, we use ra.Session.list() to get the # name of all the types of entity of the session. etypes = session.list('etype') pprint.pprint(etypes) # We limit ourselves to the types 'unit' and 'pilot'. We use the method # ra.Session.get() to get all the objects in our session with etype 'unit': ppheader("properties of the entities with etype 'unit'") units = session.get(etype='unit')
def get_utilization_durations(sources, version): if version != '2017': PILOT_DURATIONS['p_total'] = [{ STATE: None, EVENT: 'bootstrap_0_start' }, { STATE: None, EVENT: 'bootstrap_0_stop' }] PILOT_DURATIONS['p_boot'] = [{ STATE: None, EVENT: 'bootstrap_0_start' }, { STATE: None, EVENT: 'sync_rel' }] PILOT_DURATIONS['p_term'] = [{ STATE: None, EVENT: 'cmd' }, { STATE: None, EVENT: 'bootstrap_0_stop' }] utilization = dict() # dict of contributions to utilization data = dict() # the numbers we ultimately plot sids = list() # used for labels xkeys = list() # x-axis labels # get the numbers we actually want to plot for src in sources: # always point to the tarballs if src[-4:] != '.tbz': src += '.tbz' # print # print '-----------------------------------------------------------' print(src) session = ra.Session(src, 'radical.pilot') pilots = session.filter(etype='pilot', inplace=False) units = session.filter(etype='unit', inplace=True) sid = session.uid sids.append(sid) if len(pilots.get()) > 1: raise ValueError('Cannot handle multiple pilots') # compute how many core-hours each duration consumed (or allocated, # wasted, etc - depending on the semantic type of duration) utilization[sid] = dict() for dname in PILOT_DURATIONS: utilization[sid][dname] = 0.0 for dname in UNIT_DURATIONS: utilization[sid][dname] = 0.0 # some additional durations we derive implicitly for dname in DERIVED_DURATIONS: utilization[sid][dname] = 0.0 for pilot in pilots.get(): # we immediately take of the agent nodes, and change pilot_size # accordingly cpn = pilot.cfg.get('cores_per_node', 16) psize = pilot.description['cores'] anodes = 0 for agent in pilot.cfg.get('agents', []): if pilot.cfg['agents'][agent].get('target') == 'node': anodes += 1 walltime = pilot.duration(event=PILOT_DURATIONS['p_total']) psize_full = psize psize = psize_full - anodes * cpn utilization[sid]['p_total'] += walltime * psize_full utilization[sid]['p_agent'] += walltime * anodes * cpn # now we can derive the utilization for all other pilot durations # specified. Note that this is now off by some amount for the # bootstrapping step where we don't yet have sub-agents, but that # can be justified: the sub-agent nodes are explicitly reserved for # their purpose at that time. too. tot = 0.0 parts = 0.0 for dname in PILOT_DURATIONS: if dname == 'p_total': tot = pilot.duration(event=PILOT_DURATIONS[dname]) continue try: dur = pilot.duration(event=PILOT_DURATIONS[dname]) parts += dur except Exception as e: print('WARN: miss %s: %s' % (dname, e)) dur = 0.0 raise utilization[sid][dname] += dur * psize # we do the same for the unit durations - but here we add up the # contributions for all individual units. for unit in units.get(): if version != '2017': unit.description['cores'] = unit.description[ 'cpu_processes'] * unit.description['cpu_threads'] usize = unit.description['cores'] uparts = 0.0 utot = 0.0 for dname in UNIT_DURATIONS: dur = unit.duration(event=UNIT_DURATIONS[dname]) utilization[sid][dname] += dur * usize if dname == 'u_total': utot += dur else: uparts += dur # ---------------------------------------------------------------------- # # sanity checks and derived values # # we add up 'p_setup_1' and 'p_setup_2' to 'p_setup' p_setup_1 = utilization[sid]['p_setup_1'] p_setup_2 = utilization[sid]['p_setup_2'] utilization[sid]['p_setup'] = p_setup_1 + p_setup_2 del (utilization[sid]['p_setup_1']) del (utilization[sid]['p_setup_2']) # For both the pilot and the unit utilization, the # individual contributions must be the same as the total. parts = 0.0 tot = utilization[sid]['p_total'] for p in utilization[sid]: if p != 'p_total' and not p.startswith('u_'): parts += utilization[sid][p] assert (abs(tot - parts) < 0.0001), '%s == %s' % (tot, parts) # same for unit consistency parts = 0.0 tot = utilization[sid]['u_total'] for p in utilization[sid]: if p != 'u_total' and not p.startswith('p_'): parts += utilization[sid][p] # another sanity check: the pilot `p_uexec` utilization should always be # larger than the unit `total`. p_uexec = utilization[sid]['p_uexec'] u_total = utilization[sid]['u_total'] assert (p_uexec > u_total), '%s > %s' % (p_uexec, u_total) # We in fact know that the difference above, which is not explicitly # accounted for otherwise, is attributed to the agent component # overhead, and to the DB overhead: its the overhead to get from # a functional pilot to the first unit being scheduled, and from the # last unit being unscheduled to the pilot being terminated (witing for # other units to be finished etc). We consider that time 'idle' utilization[sid]['p_idle'] = p_uexec - u_total del (utilization[sid]['p_uexec']) xkeys.append('%s\n%s' % (len(units.get()), psize)) # check that the utilzation contributions add up to the total tot_abs = utilization[sid]['p_total'] sum_abs = 0 sum_rel = 0 for key in keys: if key not in data: data[key] = list() util_abs = utilization[sid][key] util_rel = 100.0 * util_abs / tot_abs sum_abs += util_abs sum_rel += util_rel if ABSOLUTE: data[key].append(util_abs) else: data[key].append(util_rel) return data, sids, utilization, xkeys
def loadSession(path,sandbox): return ra.Session(sandbox, 'radical.pilot')
import sys import glob import pprint import radical.utils as ru import radical.entk as re import radical.analytics as ra # ------------------------------------------------------------------------------ # if __name__ == '__main__': #loc = './rp.session.js-104-191.jetstream-cloud.org.karahbit.018288.0029' loc = sys.argv[1] src = os.path.dirname(loc) sid = os.path.basename(loc) session = ra.Session(src=loc, sid=sid, stype='radical.pilot') # A formatting helper before starting... #def ppheader(message): #separator = '\n' + 78 * '-' + '\n' #print(separator + message + separator) # We first filter our session to obtain only the task objects units = session.filter(etype='unit', inplace=False) #print('#units : %d' % len(units.get())) # We use the 'exec_start' and 'exec_stop' events to find # the time taken by RP to execute all tasks #ppheader("Time spent to execute the units") duration = units.duration(event=[{ru.EVENT: 'exec_start'},{ru.EVENT: 'exec_stop'}]) print('duration : %.2f' % duration)
suds = sys.argv[1:] else: suds = [ 're.session.login5.hrlee.018425.0013', #ttx_u is zero, eliminating for the moment, 're.session.login5.hrlee.018425.0017', 're.session.login5.hrlee.018425.0018', 're.session.login5.hrlee.018425.0020', 're.session.login5.hrlee.018425.0021' ] sids = [s for s in suds] ss = {} for sid in suds: sp = sid ss[sid] = {'s': ra.Session(sp, 'radical.pilot')} ss[sid].update({ 'p': ss[sid]['s'].filter(etype='pilot', inplace=False), 'u': ss[sid]['s'].filter(etype='unit', inplace=False), 't': ss[sid]['s'].filter(etype='task', inplace=False), 'w': ss[sid]['s'].filter(etype='pipeline', inplace=False) }) for sid in suds: ss[sid].update({ 'sid': ss[sid]['s'].uid, 'pid': ss[sid]['p'].list('uid'), 'npilot':
) # Assign the workflow as a set of Pipelines to the Application Manager prof.prof('Run_Cycle_{0}'.format(Cycle + 1), uid=uid1) appman.run() # Run the Application Manager prof.prof('End_Cycle_{0}'.format(Cycle + 1), uid=uid1) #appman.resource_terminate() mdtasks = synchronousExchange.mdtasklist extasks = synchronousExchange.extasklist pwd = os.getcwd() session = ra.Session(sid='./%s' % appman.sid, stype='radical.entk', src=pwd) mdtask_uid_map = dict() for task in mdtasks: mdtask_uid_map[task.name] = task.uid #print task.name extask_uid_map = dict() for task in extasks: extask_uid_map[task.name] = task.uid def get_mdtask_uids(task_names_list): return [mdtask_uid_map[task.name] for task in task_names_list] def get_extask_uids(task_names_list):
def get_entk_exec_time(loc, sid): sess = ra.Session(stype='radical.entk', src=loc, sid=sid) tasks = sess.filter(etype='task', inplace=False) return tasks.duration(state=['SUBMITTED', 'EXECUTED'])
def main(): data = dict() ws_path = 'data/weak_scaling_synapse_titan/optimized' ss_path = 'data/strong_scaling_synapse_titan' t_path = 'data/tests' sources = [ # '%s/rp.session.thinkie.merzky.017494.0007' % t_path, '%s/ws_syn_titan_32_32_1024_60_1.0' % ws_path, '%s/ws_syn_titan_32_32_1024_60_1.1' % ws_path, '%s/ws_syn_titan_64_32_2048_60_2.0' % ws_path, '%s/ws_syn_titan_64_32_2048_60_2.1' % ws_path, '%s/ws_syn_titan_128_32_4096_60_3.0' % ws_path, '%s/ws_syn_titan_128_32_4096_60_3.1' % ws_path, '%s/ws_syn_titan_256_32_8192_60_4.0' % ws_path, '%s/ws_syn_titan_256_32_8192_60_4.1' % ws_path, '%s/ws_syn_titan_512_32_16384_60_5.0' % ws_path, '%s/ws_syn_titan_512_32_16384_60_5.1' % ws_path, '%s/ws_syn_titan_1024_32_32768_60_6.0' % ws_path, '%s/ws_syn_titan_1024_32_32768_60_6.1' % ws_path, '%s/ws_syn_titan_2048_32_65536_60_7.0' % ws_path, '%s/ws_syn_titan_2048_32_65536_60_7.1' % ws_path, '%s/ws_syn_titan_4096_32_131072_60_8.0' % ws_path, '%s/rp.session.titan-ext1.itoman.017473.0000' % ss_path, '%s/rp.session.titan-ext1.itoman.017491.0004' % ss_path, '%s/rp.session.titan-ext1.itoman.017492.0001' % ss_path, '%s/rp.session.titan-ext2.itoman.017467.0000' % ss_path, ] for dname in UNIT_DURATIONS: data[dname] = list() # get the numbers we actually want to plot fout = open('outliers.dat', 'w') ucnt = 0 ocnt = 0 for src in sources: # always point to the tarballs if src[-4:] != '.tbz': src += '.tbz' print print '-----------------------------------------------------------' print src session = ra.Session(src, 'radical.pilot') units = session.filter(etype='unit', inplace=True) sid = session.uid for unit in units.get(): for dname in UNIT_DURATIONS: dur = unit.duration(event=UNIT_DURATIONS[dname]) if dur > 1000.0: ocnt += 1 fout.write('%10.1f %s\n' % (dur, src)) fout.flush() sys.stdout.write('#') else: ucnt += 1 data[dname].append(dur) sys.stdout.write('.') sys.stdout.flush() print # print # pprint.pprint(data) # sys.exit() plt.figure(figsize=(20,14)) for dname in data: tmp = np.array(data[dname]) plt.hist(tmp, alpha=0.5, bins=100, histtype='step') print print dname print ' mean : %10.1f' % tmp.mean() print ' stdev: %10.1f' % tmp.std() print ' min : %10.1f' % tmp.min() print ' max : %10.1f' % tmp.max() print print ' ucnt : %10d' % (ucnt+ocnt) print ' ocnt : %10d' % ocnt plt.xlabel('runtime [s]') plt.ylabel('number of units') plt.title ('distribution of unit runtimes') plt.legend(data.keys(), ncol=5, loc='upper left', bbox_to_anchor=(0,1.13)) plt.savefig('10_unit_durations.png') # plt.show() plt.figure(figsize=(20,14)) plt.hist(data['exec_app'], alpha=0.5, bins=100, histtype='step') plt.xlabel('runtime [s]') plt.ylabel('number of units') plt.title ('distribution of unit runtimes') plt.legend(['exec_app'], ncol=5, loc='upper left', bbox_to_anchor=(0,1.13)) plt.savefig('10_unit_durations_app.png')