예제 #1
0
def get_unit_durations(data, sources):
    for dname in UNIT_DURATIONS:
        data[dname] = list()

    # get the numbers we actually want to plot
    fout = open('outliers.dat', 'w')
    ucnt = 0
    ocnt = 0

    for src in sources:
        # always point to the tarballs
        if src[-4:] != '.tbz':
            src += '.tbz'

        session = ra.Session(src, 'radical.pilot')
        units = session.filter(etype='unit', inplace=True)
        sid = session.uid

        for unit in units.get():
            for dname in UNIT_DURATIONS:
                dur = unit.duration(event=UNIT_DURATIONS[dname])
                if dur > 1000.0:
                    ocnt += 1
                    fout.write('%10.1f  %s\n' % (dur, src))
                    fout.flush()
                else:
                    ucnt += 1
                    data[dname].append(dur)
                sys.stdout.flush()
    return data
예제 #2
0
def get_entk_overheads(loc, sid):

    sess = ra.Session(stype='radical.entk', src=loc, sid=sid)
    init_time = sess.duration(event=[{
        ru.EVENT: 'create amgr obj'
    }, {
        ru.EVENT: 'init rreq submission'
    }])
    res_sub_time = sess.duration(event=[{
        ru.EVENT: 'creating rreq'
    }, {
        ru.EVENT: 'rreq submitted'
    }])
    total_teardown_time = sess.duration(event=[{
        ru.EVENT: 'start termination'
    }, {
        ru.EVENT: 'termination done'
    }])
    rts_teardown_time = sess.duration(
        event=[{
            ru.EVENT: 'canceling resource allocation'
        }, {
            ru.EVENT: 'resource allocation cancelled'
        }])

    return {
        'init_time': init_time,
        'res_sub_time': res_sub_time,
        'total_teardown_time': total_teardown_time,
        'rts_teardown_time': rts_teardown_time
    }
예제 #3
0
def extractTimes(path,states,sampling=1):
	currPath = os.getcwd()
	times = []
	numstates = len(states)
	os.chdir(path)	
	for i in range(0,numstates+2):
		times.append([])

        for sandbox in os.listdir('.'):
                if os.path.isdir('./'+sandbox):
			print("Processing "+sandbox)
                        session = ra.Session(sandbox, 'radical.pilot')
			temp = extractFromSession(session,states[numstates-1])
			value1 = temp[len(temp)-1][0]-temp[0][0] 
			value2 = temp[len(temp)-1][1]
			print(str(value1) + " " + str(value2))
			times[numstates-1].append(value1)
			countall = 0
			queueTime = 0
			for i in range(0,numstates-1):
				time = countTime(extractFromSession(session,states[i]),sampling)
				times[i].append(time)
				countall +=time 
				if i == 2:
					queueTime = time
			times[numstates].append(countall)
			times[numstates+1].append(value1-queueTime)
			del session	
       	os.chdir(currPath) 
	return times
예제 #4
0
def get_adap_time(loc, sid):
    
    # Get adap time
    duration = 0.0
    sess = ra.Session(stype='radical.entk', src=loc, sid=sid)
    stages = sorted(sess.filter(etype='stage', inplace=False).list('uid'))
#     print stages
    for stage in stages:
        duration += sess.duration(event=[{ru.EVENT: 'executing post-exec for stage %s'%stage},
                                         {ru.EVENT: 'post-exec executed for stage %s'%stage}])
    return duration
예제 #5
0
def get_session_tstamps():
    
    time_stamps = list()
    loc = [dI for dI in os.listdir('../RP/sessions/') if os.path.isdir(os.path.join('../RP/sessions/',dI))]
    for session in range(len(loc)):
        src = os.path.dirname(loc[session])
        sid = os.path.basename(loc[session])
        session = ra.Session(sid=sid, stype='radical.pilot', src=os.path.join('../RP/sessions/',loc[session]))
        event_entity = 'pilot'
        pilot = session.filter(etype=event_entity, inplace=True)
        time_stamps.append(pilot.ttc)
    return (time_stamps)
def load_sessions_cores(sdir, sessions, sncores):
    # number of units in the sessions
    # snunits = sorted(sessions.nunit.unique().tolist())

    # load the RA session objects
    sras = {}
    for sncore in sncores:
        sras[sncore] = []
        s = sessions[(sessions.ncore == sncore)]
        for sid in s.sid.tolist():
            exp = s.loc[sid]['experiment']
            src = '%s/%s/%s' % (sdir, exp, sid)
            sras[sncore].append(ra.Session(src, 'radical.pilot'))

    return sras
예제 #7
0
def process_rp_profiles(src):
    
    sid = os.path.basename(src)
    loc = os.path.dirname(src)
    tag = '/'.join(loc.split('/')[2:])
    proc_data = os.path.join(proc,tag) + '/rp_data.json'
    data = {'task_mgmt': 0, 'exec_time': 0}
    
    sess = ra.Session(stype='radical.pilot', src=loc, sid=sid)
    units = sess.filter(etype='unit', inplace=False)
        
    data['task_mgmt'] = units.duration(state=['NEW','DONE'])
    data['exec_time'] = units.duration(event=[{ru.EVENT:'exec_start'},{ru.EVENT:'exec_stop'}])
    
    proc_path = write_data(data, proc_data)
    return proc_path
예제 #8
0
def extractTimestamp(path):
	currPath = os.getcwd()
	times = []
	os.chdir(path)
	timestamps = {}
        for sandbox in os.listdir('./'):
                if os.path.isdir("./"+sandbox):
			session = ra.Session(sandbox, 'radical.pilot')
			startpilot = session.get(etype='pilot', uid='pilot.0000')[0].states[rp.ACTIVE]['time']
			del session
			fileInput = open("./"+sandbox+'/pmgr.launching.0.prof')
			fileInput.readline()
			ts = float(fileInput.readline().split(',')[0])
			if ts > 0 and startpilot > 0:
				timestamps[sandbox]=ts+startpilot
	os.chdir(currPath)
	return timestamps
예제 #9
0
def get_session_tstamps():
    
    s    = list()
    time_stamps = list()
    loc = [dI for dI in os.listdir('../RP/sessions/') if os.path.isdir(os.path.join('../RP/sessions/',dI))]
    for session in range(len(loc)):
        session_id = (loc[session])
        src = os.path.dirname(loc[session])
        sid = os.path.basename(loc[session])
        session = ra.Session(sid=sid, stype='radical.pilot', src=os.path.join('../RP/sessions/',loc[session]))
        event_entity = 'pilot'
        pilot = session.filter(etype=event_entity, inplace=True)
        s.append(session_id)
        time_stamps.append(pilot.ttc)
        
    df = pd.DataFrame(list(zip(s, time_stamps)), columns =['Session', 'Duration'])
    dd = df.sort_values('Duration')
    return (dd.reset_index(drop=True))
예제 #10
0
def get_adap_time(loc, sid):

    # Get adap time
    duration = 0.0
    sess = ra.Session(stype='radical.entk', src=loc, sid=sid)
    #stages = sorted(sess.filter(etype='stage', inplace=False).list('uid'))
    #     print stages
    #for stage in stages:
    duration += sess.duration(event=[{
        ru.EVENT: 'Adap: adding new stage'
    }, {
        ru.EVENT: 'Adap: added new stage'
    }])
    duration += sess.duration(event=[{
        ru.EVENT: 'Adap: adding new task'
    }, {
        ru.EVENT: 'Adap: added new task'
    }])
    return duration
예제 #11
0
def wrangle_session(sdir, sid):

    # Get the experiment tag for the current sdir.
    exp = sdir.split('/')[-2:][0]

    # RA objects cannot be serialize: every RA session object need
    # to be constructed at every run.
    #
    # FIXME: AM: Actually, RA sessions can be pickled all right:
    #
    #        import radical.analytics as ra
    #        import pickle
    #        s1 = ra.Session(src=sdir, stype='radical.pilot')
    #        p  = pickle.dumps(s1)
    #        s2 = pickle.loads(p)
    #        assert(len(s1.get()) == len(s2.get()))
    #
    #        If we found cases where this does not work, we can make it work.
    #
    sra_session = ra.Session(sdir, 'radical.pilot')

    # Pilot-unit relationship dictionary
    pu_rels = sra_session.describe('relations', ['pilot', 'unit'])

    # Pilots of sra: dervie properties and durations.
    print '\n\n%s -- %s -- Loading pilots:' % (exp, sid)
    sra_pilots = sra_session.filter(etype='pilot', inplace=False)
    pilots = load_pilots(sid, exp, sra_pilots, pdm, pu_rels, pts)

    # Units of sra: dervie properties and durations.
    print '\n\n%s -- %s -- Loading units:' % (exp, sid)
    sra_units = sra_session.filter(etype='unit', inplace=False)
    units = load_units(sid, exp, sra_units, udm, pilots, sra_session, pu_rels,
                       uts)

    # Session of sra: derive properties and total durations.
    print '\n\n%s -- %s -- Loading session:\n' % (exp, sid)
    load_session(sid, exp, sra_session, sra_pilots, sra_units, sdm, pdm, udm,
                 pilots, units, sts)
예제 #12
0
    for path in glob.glob('%s/%s*' % (datadir, experiment_tag)):
        for sdir in glob.glob('%s/*' % path):

            # Session ID and session experiment.
            sid = glob.glob('%s/*.json' % sdir)[0].split('/')[-1:][0][:-5]
            exp = path.split('/')[-1:][0]

            # Consistency check: SID of json file name is the same SID of
            # directory name.
            if sid == sdir.split('/')[-1:][0]:

                # RA objects cannot be serialize: every RA session object need
                # to be constructed at every run.
                print exp + '/' + sid
                print sdir
                sra_session = ra.Session(sid, 'radical.pilot', src=sdir)

                # Pilot-unit relationship dictionary
                pu_rels = sra_session.describe('relations', ['pilot', 'unit'])

                # Pilots of sra: dervie properties and durations.
                print '\n\n%s -- %s -- Loading pilots:' % (exp, sid)
                sra_pilots = sra_session.filter(etype='pilot', inplace=False)
                pilots = load_pilots(sid, exp, sra_pilots, pdm, pu_rels)

                # Units of sra: dervie properties and durations.
                print '\n\n%s -- %s -- Loading units:' % (exp, sid)
                sra_units = sra_session.filter(etype='unit', inplace=False)
                units = load_units(sid, exp, sra_units, udm, pilots,
                                   sra_session, pu_rels)
예제 #13
0
import glob
import pprint
import radical.utils as ru
import radical.entk as re
import radical.analytics as ra
"""This example illustrates how to obtain durations for arbitrary (non-state)
profile events. Modified from examples under RADICAL Analytics"""

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':

    loc = './re.session.two.karahbit.018175.0001'
    src = os.path.dirname(loc)
    sid = os.path.basename(loc)
    session = ra.Session(src=src, sid=sid, stype='radical.entk')

    # A formatting helper before starting...
    def ppheader(message):
        separator = '\n' + 78 * '-' + '\n'
        print separator + message + separator

    # We first filter our session to obtain only the task objects
    tasks = session.filter(etype='task', inplace=False)
    print '#tasks   : %d' % len(tasks.get())

    # We use the 're.states.SUBMITTING' and 're.states.DONE' probes to find
    # the time taken by EnTK to execute all tasks
    ppheader("Time spent to execute the tasks")
    duration = tasks.duration(event=[{
        ru.EVENT: 'state',
예제 #14
0
import radical.analytics as ra
import radical.pilot as rp
import pprint
import radical.utils as ru
import os
import glob
import numpy as np

nm_run = 'testrun'
d_run = 'rawdata/{}'.format(nm_run)

newest = max(glob.glob(os.path.join(d_run, 'rp.session.*')),
             key=os.path.getctime)

session = ra.Session(newest, 'radical.pilot')

import pprint

entities_objects = session.get()
pprint.pprint(entities_objects)

state_models = session.describe('state_model')
pprint.pprint(state_models)
state_model = session.describe('state_model', etype='unit')
pprint.pprint(state_model)
state_models = session.describe('state_model', etype=['unit', 'pilot'])
pprint.pprint(state_models)
event_models = session.describe('event_model')
pprint.pprint(event_models)
relations = session.describe('relations')
pprint.pprint(relations)
예제 #15
0
        session_names = glob(session_pattern)
        profile_names = list()

        for session_name in session_names:
            profile_names.extend(glob(os.path.join(session_name, '*.json')))

        for json_file in profile_names:
            print json_file

            try:
                source = ''.join([x + '/' for x in json_file.split('/')[0:-1]])
                json = ru.read_json(json_file)
                sid = os.path.basename(json_file)[:-5]
                session = ra.Session(sid=sid,
                                     stype='radical.pilot',
                                     src=source)
                units = session.filter(etype='unit', inplace=False)
                dur = units.ttc
                session_data = {
                    'Nodes': nodes,
                    'Framework': framework,
                    'Duration': dur,
                    'Json': json_file
                }
            except Exception as e:
                print e
                session_data = {
                    'Nodes': nodes,
                    'Framework': framework,
                    'Json': json_file
예제 #16
0
import sys
import radical.utils as ru
import radical.pilot as rp
import radical.analytics as ra

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print "\n\tusage: %s <session>\n" % sys.argv[0]
        sys.exit(1)

    src = sys.argv[1]
    stype = 'radical.pilot'

    session = ra.Session(src, stype)
    units = session.filter(etype='unit', inplace=False)
    unit_0 = units.get()[0]

    print
    print 'session: %s' % session.uid
    print 'units:   %d' % len(units.get())

    # collect all events for some unit which relate to prte
    nevents = list()
    for e in unit_0.events:
        if 'prte' in e[1]:
            nevents.append(e)

    # for that sample unit, print the events and timestamps (ordered)
    print
예제 #17
0
def loadSessions(path):
        sessions = []
        for sandbox in os.listdir(path):
                if os.path.isdir(path+"/"+sandbox):
                	sessions.append(ra.Session(sandbox, 'radical.pilot'))
        return sessions
예제 #18
0
def get_entk_exec_time(loc, sid):
    sess = ra.Session(stype='radical.entk', src=loc, sid=sid)
    tasks = sess.filter(etype='task', inplace=False)
    return tasks.duration(state=['SCHEDULING', 'DONE'])
예제 #19
0
for dname in UNIT_DURATIONS:
    data[dname]  = list()


nm_run = 'testrun'
d_run  = 'rawdata/{}'.format(nm_run)

f_run  = 'rp.session.titan-ext1.jrossyra.017614.0002'
#f_run  = 'rp.session.titan-ext1.jrossyra.017614.0003'

a_run = os.path.join(d_run, f_run)
#a_run  = max(glob.glob(
#    os.path.join(d_run, 'rp.session.*')),
#    key=os.path.getctime)

session = ra.Session(a_run, 'radical.pilot')

# Load wrangled data saved in .csv files for both synapse, microbenchmarks and gromacs.
sws_sessions = pd.read_csv(os.path.join(a_run,'sessions.csv'), index_col=0)
sws_pilots   = pd.read_csv(os.path.join(a_run,'pilots.csv'),   index_col=0)
sws_units    = pd.read_csv(os.path.join(a_run,'units.csv'),    index_col=0)

print 'Total number of successful runs: %s' % sws_sessions.shape[0]
print 'Total number of pilots: %s'          % sws_pilots.shape[0]
print 'Total number of units: %s\n'         % sws_units.shape[0]


units   = session.filter(etype='unit', inplace=True)
for unit in units.get():
    print "In unit", unit
    for dname in UNIT_DURATIONS:
예제 #20
0
def profiles(path):

    json_files = glob.glob(path + '/*_32_*/*.json')
    sessions32 = dict()
    for json_file in json_files:
        src = ''.join([x + '/' for x in json_file.split('/')[0:-1]])
        json = ru.read_json(json_file)
        sid = os.path.basename(json_file)[:-5]
        sessions32[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src)

    json_files = glob.glob(path + '/*_64_*/*.json')
    sessions64 = dict()
    for json_file in json_files:
        src = ''.join([x + '/' for x in json_file.split('/')[0:-1]])
        json = ru.read_json(json_file)
        sid = os.path.basename(json_file)[:-5]
        sessions64[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src)

    json_files = glob.glob(path + '/*_128_*/*.json')
    sessions128 = dict()
    for json_file in json_files:
        src = ''.join([x + '/' for x in json_file.split('/')[0:-1]])
        json = ru.read_json(json_file)
        sid = os.path.basename(json_file)[:-5]
        sessions128[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src)

    json_files = glob.glob(path + '/*_256_*/*.json')
    sessions256 = dict()
    for json_file in json_files:
        src = ''.join([x + '/' for x in json_file.split('/')[0:-1]])
        json = ru.read_json(json_file)
        sid = os.path.basename(json_file)[:-5]
        sessions256[sid] = ra.Session(sid=sid, stype='radical.pilot', src=src)

    RPexpsW = pd.DataFrame(columns=[
        'Atoms', 'Cores', 'Run', 'RADICAL-Pilot Overhead',
        'RADICAL-Pilot Edge Disc', 'RADICAL-Pilot DataMovement',
        'RADICAL-Pilot ConnComp'
    ])

    for sid, session in sessions32.iteritems():
        uid = list()
        for i in range(1, 1025):
            uid.append('unit.%06d' % i)
        units = session.filter(uid=uid, etype='unit', inplace=False)
        pilot = session.filter(etype='pilot', inplace=False)
        compUnit = session.filter(uid='unit.001025',
                                  etype='unit',
                                  inplace=False)
        overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) -
                    pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0]
        execution_time = np.max(
            units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min(
                units.timestamps(state=rp.AGENT_EXECUTING))
        data_movement = (
            compUnit.timestamps(state=rp.AGENT_EXECUTING) -
            np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0]
        connComp = compUnit.timestamps(
            state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps(
                state=rp.AGENT_EXECUTING)[0]
        cores = 32
        run = int(sid.split('_')[-1])
        atoms = int((sid.split('_')[1]).split('K')[0])
        RPexpsW.loc[len(RPexpsW)] = [
            atoms, cores, run, overhead, execution_time, data_movement,
            connComp
        ]

    for sid, session in sessions64.iteritems():
        uid = list()
        for i in range(1, 1025):
            uid.append('unit.%06d' % i)
        units = session.filter(uid=uid, etype='unit', inplace=False)
        pilot = session.filter(etype='pilot', inplace=False)
        compUnit = session.filter(uid='unit.001025',
                                  etype='unit',
                                  inplace=False)
        overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) -
                    pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0]
        execution_time = np.max(
            units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min(
                units.timestamps(state=rp.AGENT_EXECUTING))
        data_movement = (
            compUnit.timestamps(state=rp.AGENT_EXECUTING) -
            np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0]
        connComp = compUnit.timestamps(
            state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps(
                state=rp.AGENT_EXECUTING)[0]
        cores = 64
        run = int(sid.split('_')[-1])
        atoms = int((sid.split('_')[1]).split('K')[0])
        RPexpsW.loc[len(RPexpsW)] = [
            atoms, cores, run, overhead, execution_time, data_movement,
            connComp
        ]

    for sid, session in sessions128.iteritems():
        uid = list()
        for i in range(1, 1025):
            uid.append('unit.%06d' % i)
        units = session.filter(uid=uid, etype='unit', inplace=False)
        pilot = session.filter(etype='pilot', inplace=False)
        compUnit = session.filter(uid='unit.001025',
                                  etype='unit',
                                  inplace=False)
        overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) -
                    pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0]
        execution_time = np.max(
            units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min(
                units.timestamps(state=rp.AGENT_EXECUTING))
        data_movement = (
            compUnit.timestamps(state=rp.AGENT_EXECUTING) -
            np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0]
        connComp = compUnit.timestamps(
            state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps(
                state=rp.AGENT_EXECUTING)[0]
        cores = 128
        run = int(sid.split('_')[-1])
        atoms = int((sid.split('_')[1]).split('K')[0])
        RPexpsW.loc[len(RPexpsW)] = [
            atoms, cores, run, overhead, execution_time, data_movement,
            connComp
        ]

    for sid, session in sessions256.iteritems():
        uid = list()
        for i in range(1, 1025):
            uid.append('unit.%06d' % i)
        units = session.filter(uid=uid, etype='unit', inplace=False)
        pilot = session.filter(etype='pilot', inplace=False)
        compUnit = session.filter(uid='unit.001025',
                                  etype='unit',
                                  inplace=False)
        overhead = (np.min(units.timestamps(state=rp.AGENT_EXECUTING)) -
                    pilot.timestamps(state=[rp.PMGR_ACTIVE]))[0]
        execution_time = np.max(
            units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)) - np.min(
                units.timestamps(state=rp.AGENT_EXECUTING))
        data_movement = (
            compUnit.timestamps(state=rp.AGENT_EXECUTING) -
            np.max(units.timestamps(state=rp.AGENT_STAGING_OUTPUT_PENDING)))[0]
        connComp = compUnit.timestamps(
            state=rp.AGENT_STAGING_OUTPUT_PENDING)[0] - compUnit.timestamps(
                state=rp.AGENT_EXECUTING)[0]
        cores = 256
        run = int(sid.split('_')[-1])
        atoms = int((sid.split('_')[1]).split('K')[0])
        RPexpsW.loc[len(RPexpsW)] = [
            atoms, cores, run, overhead, execution_time, data_movement,
            connComp
        ]

    return RPexpsW
예제 #21
0
import sys
import pprint
import radical.utils as ru
import radical.pilot as rp
import radical.analytics as ra

if __name__ == '__main__':    # get the source folder to analyze
    if len(sys.argv) < 2:
        print "\n\tusage: %s <dir|tarball>\n" % sys.argv[0]
        sys.exit(1)    
    src = sys.argv[1]    
    if len(sys.argv) == 2: 
        stype = 'radical.pilot'
    else: 
        stype = sys.argv[2]    
    session = ra.Session(src, stype)    # print the PMGR_ACTIVE and FINAL timestamp for each pilot
    pilots = session.filter(etype='pilot', inplace=False)
    durations = pilots.duration([rp.PMGR_ACTIVE, rp.FINAL])
    pprint.pprint(durations)    sys.exit(0)
예제 #22
0
__license__   = 'MIT'

"""
This example illustrates the use of the method ra.Session.get()
"""

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':

    if len(sys.argv) != 2:
        print "\n\tusage: %s <dir|tarball>\n" % sys.argv[0]
        sys.exit(1)

    src     = sys.argv[1]
    session = ra.Session(src, 'radical.pilot')

    # A formatting helper before starting...
    def ppheader(message):
        separator = '\n' + 78 * '-' + '\n'
        print separator + message + separator

    # and here we go. As seen in example 01, we use ra.Session.list() to get the
    # name of all the types of entity of the session.
    etypes = session.list('etype')
    pprint.pprint(etypes)

    # We limit ourselves to the types 'unit' and 'pilot'. We use the method
    # ra.Session.get() to get all the objects in our session with etype 'unit':
    ppheader("properties of the entities with etype 'unit'")
    units = session.get(etype='unit')
def get_utilization_durations(sources, version):

    if version != '2017':
        PILOT_DURATIONS['p_total'] = [{
            STATE: None,
            EVENT: 'bootstrap_0_start'
        }, {
            STATE: None,
            EVENT: 'bootstrap_0_stop'
        }]
        PILOT_DURATIONS['p_boot'] = [{
            STATE: None,
            EVENT: 'bootstrap_0_start'
        }, {
            STATE: None,
            EVENT: 'sync_rel'
        }]
        PILOT_DURATIONS['p_term'] = [{
            STATE: None,
            EVENT: 'cmd'
        }, {
            STATE: None,
            EVENT: 'bootstrap_0_stop'
        }]

    utilization = dict()  # dict of contributions to utilization
    data = dict()  # the numbers we ultimately plot
    sids = list()  # used for labels
    xkeys = list()  # x-axis labels

    # get the numbers we actually want to plot
    for src in sources:

        # always point to the tarballs
        if src[-4:] != '.tbz':
            src += '.tbz'

    # print
    # print '-----------------------------------------------------------'
        print(src)

        session = ra.Session(src, 'radical.pilot')
        pilots = session.filter(etype='pilot', inplace=False)
        units = session.filter(etype='unit', inplace=True)
        sid = session.uid
        sids.append(sid)

        if len(pilots.get()) > 1:
            raise ValueError('Cannot handle multiple pilots')

        # compute how many core-hours each duration consumed (or allocated,
        # wasted, etc - depending on the semantic type of duration)
        utilization[sid] = dict()

        for dname in PILOT_DURATIONS:
            utilization[sid][dname] = 0.0

        for dname in UNIT_DURATIONS:
            utilization[sid][dname] = 0.0

        # some additional durations we derive implicitly
        for dname in DERIVED_DURATIONS:
            utilization[sid][dname] = 0.0

        for pilot in pilots.get():

            # we immediately take of the agent nodes, and change pilot_size
            # accordingly
            cpn = pilot.cfg.get('cores_per_node', 16)
            psize = pilot.description['cores']
            anodes = 0
            for agent in pilot.cfg.get('agents', []):
                if pilot.cfg['agents'][agent].get('target') == 'node':
                    anodes += 1
            walltime = pilot.duration(event=PILOT_DURATIONS['p_total'])
            psize_full = psize
            psize = psize_full - anodes * cpn

            utilization[sid]['p_total'] += walltime * psize_full
            utilization[sid]['p_agent'] += walltime * anodes * cpn

            # now we can derive the utilization for all other pilot durations
            # specified.  Note that this is now off by some amount for the
            # bootstrapping step where we don't yet have sub-agents, but that
            # can be justified: the sub-agent nodes are explicitly reserved for
            # their purpose at that time. too.
            tot = 0.0
            parts = 0.0
            for dname in PILOT_DURATIONS:
                if dname == 'p_total':
                    tot = pilot.duration(event=PILOT_DURATIONS[dname])
                    continue
                try:
                    dur = pilot.duration(event=PILOT_DURATIONS[dname])
                    parts += dur
                except Exception as e:
                    print('WARN: miss %s: %s' % (dname, e))
                    dur = 0.0
                    raise
                utilization[sid][dname] += dur * psize

        # we do the same for the unit durations - but here we add up the
        # contributions for all individual units.
        for unit in units.get():
            if version != '2017':
                unit.description['cores'] = unit.description[
                    'cpu_processes'] * unit.description['cpu_threads']
            usize = unit.description['cores']
            uparts = 0.0
            utot = 0.0
            for dname in UNIT_DURATIONS:
                dur = unit.duration(event=UNIT_DURATIONS[dname])
                utilization[sid][dname] += dur * usize
                if dname == 'u_total': utot += dur
                else: uparts += dur

        # ----------------------------------------------------------------------
        #
        # sanity checks and derived values
        #
        # we add up 'p_setup_1' and 'p_setup_2' to 'p_setup'
        p_setup_1 = utilization[sid]['p_setup_1']
        p_setup_2 = utilization[sid]['p_setup_2']
        utilization[sid]['p_setup'] = p_setup_1 + p_setup_2
        del (utilization[sid]['p_setup_1'])
        del (utilization[sid]['p_setup_2'])

        # For both the pilot and the unit utilization, the
        # individual contributions must be the same as the total.
        parts = 0.0
        tot = utilization[sid]['p_total']

        for p in utilization[sid]:
            if p != 'p_total' and not p.startswith('u_'):
                parts += utilization[sid][p]
        assert (abs(tot - parts) < 0.0001), '%s == %s' % (tot, parts)

        # same for unit consistency
        parts = 0.0
        tot = utilization[sid]['u_total']
        for p in utilization[sid]:
            if p != 'u_total' and not p.startswith('p_'):
                parts += utilization[sid][p]

        # another sanity check: the pilot `p_uexec` utilization should always be
        # larger than the unit `total`.
        p_uexec = utilization[sid]['p_uexec']
        u_total = utilization[sid]['u_total']
        assert (p_uexec > u_total), '%s > %s' % (p_uexec, u_total)

        # We in fact know that the difference above, which is not explicitly
        # accounted for otherwise, is attributed to the agent component
        # overhead, and to the DB overhead: its the overhead to get from
        # a functional pilot to the first unit being scheduled, and from the
        # last unit being unscheduled to the pilot being terminated (witing for
        # other units to be finished etc).  We consider that time 'idle'
        utilization[sid]['p_idle'] = p_uexec - u_total
        del (utilization[sid]['p_uexec'])

        xkeys.append('%s\n%s' % (len(units.get()), psize))

        # check that the utilzation contributions add up to the total
        tot_abs = utilization[sid]['p_total']
        sum_abs = 0
        sum_rel = 0
        for key in keys:
            if key not in data:
                data[key] = list()
            util_abs = utilization[sid][key]
            util_rel = 100.0 * util_abs / tot_abs
            sum_abs += util_abs
            sum_rel += util_rel

            if ABSOLUTE: data[key].append(util_abs)
            else: data[key].append(util_rel)

    return data, sids, utilization, xkeys
예제 #24
0
def loadSession(path,sandbox):
        return ra.Session(sandbox, 'radical.pilot')
예제 #25
0
import sys
import glob
import pprint
import radical.utils as ru
import radical.entk as re
import radical.analytics as ra

# ------------------------------------------------------------------------------
#
if __name__ == '__main__':

    #loc = './rp.session.js-104-191.jetstream-cloud.org.karahbit.018288.0029'
    loc = sys.argv[1]
    src = os.path.dirname(loc)
    sid = os.path.basename(loc)
    session = ra.Session(src=loc, sid=sid, stype='radical.pilot')

    # A formatting helper before starting...
    #def ppheader(message):
        #separator = '\n' + 78 * '-' + '\n'
        #print(separator + message + separator)

    # We first filter our session to obtain only the task objects
    units = session.filter(etype='unit', inplace=False)
    #print('#units   : %d' % len(units.get()))

    # We use the 'exec_start' and 'exec_stop' events to find
    # the time taken by RP to execute all tasks
    #ppheader("Time spent to execute the units")
    duration = units.duration(event=[{ru.EVENT: 'exec_start'},{ru.EVENT: 'exec_stop'}])
    print('duration : %.2f' % duration)
    suds = sys.argv[1:]
else:
    suds = [
        're.session.login5.hrlee.018425.0013',
        #ttx_u is zero, eliminating for the moment, 're.session.login5.hrlee.018425.0017',
        're.session.login5.hrlee.018425.0018',
        're.session.login5.hrlee.018425.0020',
        're.session.login5.hrlee.018425.0021'
    ]

sids = [s for s in suds]

ss = {}
for sid in suds:
    sp = sid
    ss[sid] = {'s': ra.Session(sp, 'radical.pilot')}
    ss[sid].update({
        'p': ss[sid]['s'].filter(etype='pilot', inplace=False),
        'u': ss[sid]['s'].filter(etype='unit', inplace=False),
        't': ss[sid]['s'].filter(etype='task', inplace=False),
        'w': ss[sid]['s'].filter(etype='pipeline', inplace=False)
    })

for sid in suds:

    ss[sid].update({
        'sid':
        ss[sid]['s'].uid,
        'pid':
        ss[sid]['p'].list('uid'),
        'npilot':
예제 #27
0
        )  # Assign the workflow as a set of Pipelines to the Application Manager

        prof.prof('Run_Cycle_{0}'.format(Cycle + 1), uid=uid1)

        appman.run()  # Run the Application Manager

        prof.prof('End_Cycle_{0}'.format(Cycle + 1), uid=uid1)

    #appman.resource_terminate()

    mdtasks = synchronousExchange.mdtasklist
    extasks = synchronousExchange.extasklist

    pwd = os.getcwd()
    session = ra.Session(sid='./%s' % appman.sid,
                         stype='radical.entk',
                         src=pwd)

    mdtask_uid_map = dict()
    for task in mdtasks:
        mdtask_uid_map[task.name] = task.uid
        #print task.name

    extask_uid_map = dict()
    for task in extasks:
        extask_uid_map[task.name] = task.uid

    def get_mdtask_uids(task_names_list):
        return [mdtask_uid_map[task.name] for task in task_names_list]

    def get_extask_uids(task_names_list):
def get_entk_exec_time(loc, sid):
    sess = ra.Session(stype='radical.entk', src=loc, sid=sid)
    tasks = sess.filter(etype='task', inplace=False)
    return tasks.duration(state=['SUBMITTED', 'EXECUTED'])
예제 #29
0
def main():

    data    = dict()
    ws_path = 'data/weak_scaling_synapse_titan/optimized'
    ss_path = 'data/strong_scaling_synapse_titan'
    t_path  = 'data/tests'
    sources = [
             # '%s/rp.session.thinkie.merzky.017494.0007'    % t_path,

            '%s/ws_syn_titan_32_32_1024_60_1.0'           % ws_path,
            '%s/ws_syn_titan_32_32_1024_60_1.1'           % ws_path,
            '%s/ws_syn_titan_64_32_2048_60_2.0'           % ws_path,
            '%s/ws_syn_titan_64_32_2048_60_2.1'           % ws_path,
            '%s/ws_syn_titan_128_32_4096_60_3.0'          % ws_path,
            '%s/ws_syn_titan_128_32_4096_60_3.1'          % ws_path,
            '%s/ws_syn_titan_256_32_8192_60_4.0'          % ws_path,
            '%s/ws_syn_titan_256_32_8192_60_4.1'          % ws_path,
            '%s/ws_syn_titan_512_32_16384_60_5.0'         % ws_path,
            '%s/ws_syn_titan_512_32_16384_60_5.1'         % ws_path,
            '%s/ws_syn_titan_1024_32_32768_60_6.0'        % ws_path,
            '%s/ws_syn_titan_1024_32_32768_60_6.1'        % ws_path,
            '%s/ws_syn_titan_2048_32_65536_60_7.0'        % ws_path,
            '%s/ws_syn_titan_2048_32_65536_60_7.1'        % ws_path,
            '%s/ws_syn_titan_4096_32_131072_60_8.0'       % ws_path,
           
            '%s/rp.session.titan-ext1.itoman.017473.0000' % ss_path,
            '%s/rp.session.titan-ext1.itoman.017491.0004' % ss_path,
            '%s/rp.session.titan-ext1.itoman.017492.0001' % ss_path,
            '%s/rp.session.titan-ext2.itoman.017467.0000' % ss_path,
         
               ]


    for dname in UNIT_DURATIONS:
        data[dname]  = list()

    # get the numbers we actually want to plot
    fout = open('outliers.dat', 'w')
    ucnt = 0
    ocnt = 0
    for src in sources:

        # always point to the tarballs
        if src[-4:] != '.tbz':
            src += '.tbz'

        print
        print '-----------------------------------------------------------'
        print src

        session = ra.Session(src, 'radical.pilot')
        units   = session.filter(etype='unit', inplace=True)
        sid     = session.uid

        for unit in units.get():
            for dname in UNIT_DURATIONS:
                dur = unit.duration(event=UNIT_DURATIONS[dname])
                if dur > 1000.0:
                    ocnt += 1
                    fout.write('%10.1f  %s\n' % (dur, src))
                    fout.flush()
                    sys.stdout.write('#')
                else:
                    ucnt += 1
                    data[dname].append(dur)
                    sys.stdout.write('.')
                sys.stdout.flush()

        print

#   print
#   pprint.pprint(data)
#   sys.exit()

    

    plt.figure(figsize=(20,14))
    for dname in data:
        tmp = np.array(data[dname])
        plt.hist(tmp, alpha=0.5, bins=100, histtype='step')
        print
        print dname
        print '  mean : %10.1f' % tmp.mean()
        print '  stdev: %10.1f' % tmp.std()
        print '  min  : %10.1f' % tmp.min()
        print '  max  : %10.1f' % tmp.max()
    print
    print '  ucnt : %10d' % (ucnt+ocnt)
    print '  ocnt : %10d' %  ocnt

    plt.xlabel('runtime [s]')
    plt.ylabel('number of units')
    plt.title ('distribution of unit runtimes')
    plt.legend(data.keys(), ncol=5, loc='upper left', bbox_to_anchor=(0,1.13))
    plt.savefig('10_unit_durations.png')
  # plt.show()


    plt.figure(figsize=(20,14))
    plt.hist(data['exec_app'], alpha=0.5, bins=100, histtype='step')

    plt.xlabel('runtime [s]')
    plt.ylabel('number of units')
    plt.title ('distribution of unit runtimes')
    plt.legend(['exec_app'], ncol=5, loc='upper left', bbox_to_anchor=(0,1.13))
    plt.savefig('10_unit_durations_app.png')