def plot(sids, value, label='', paper=False): colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] labels = [] first = True for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] #cores = 32 if value == 'cc_fork': spec = { 'in': [ {'info' : 'aec_start_script'} ], 'out' : [ {'info' : 'aec_after_exec'} ] } rpu.add_concurrency (unit_prof_df, 'cc_fork', spec) elif value == 'cc_exit': spec = { 'in': [ {'info' : 'aec_after_exec'} ], 'out' : [ {'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'}, ] } rpu.add_concurrency (unit_prof_df, 'cc_exit', spec) df = unit_prof_df[ (unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', value]] df.columns = ['time', cores] df['time'] -= df['time'].min() if first: df_all = df else: df_all = pd.merge(df_all, df, on='time', how='outer') #labels.append("Cores: %d" % cores) # labels.append("%d" % cores) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) labels.append("%d" % info['metadata.cu_runtime']) first = False df_all.set_index('time', inplace=True) print df_all.head() #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) # Vertial reference #x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') #labels.append("Optimal") location = 'upper right' legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE) # mp.pyplot.ylim(0, 200) mp.pyplot.ylim(-50,) mp.pyplot.xlim(0, 600) #ax.get_xaxis().set_ticks([]) print dir(ax) [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # width = 3.487 width = 3.3 height = width / 1.618 # height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_concurrency.pdf') mp.pyplot.close()
def preprocess(sid): session_dir = os.path.join(PICKLE_DIR, sid) if os.path.isdir(session_dir): report.warn("Session dir '%s' already exists, skipping session." % session_dir) return try: sid_profiles = find_profiles(sid) print sid_profiles report.info("Combining profiles for session: %s.\n" % sid) combined_profiles = rpu.combine_profiles(sid_profiles) uids = set() for p in combined_profiles: uids.add(p['uid']) report.info("Converting profiles to frames for session: %s.\n" % sid) frames = rpu.prof2frame(combined_profiles) print frames report.info("Head of Combined DF for session %s:\n" % sid) print frames.entity.unique() ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames) report.info("Head of CU Profiling DF for session %s:\n" % sid) print cu_prof_fr.head() report.info("Head of Session DF for session %s:\n" % sid) ses_prof_fr.insert(0, 'sid', sid) print ses_prof_fr.head() report.info("Head of Pilot DF for session %s:\n" % sid) pilot_prof_fr.insert(0, 'sid', sid) print pilot_prof_fr.head() report.info("Head of CU DF for session %s:\n" % sid) rpu.add_states(cu_prof_fr) report.info("Head of CU DF for session %s (after states added):\n" % sid) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after info added):\n" % sid) rpu.add_info(cu_prof_fr) print cu_prof_fr.head() report.info( "Head of CU DF for session %s (after concurrency added):\n" % sid) # Add a column with the number of concurrent populating the database spec = { 'in': [{ 'state': rps.STAGING_INPUT, 'event': 'advance' }], 'out': [{ 'state': rps.AGENT_STAGING_INPUT_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_populating', spec) # Add a column with the number of concurrent staging in units spec = { 'in': [{ 'state': rps.AGENT_STAGING_INPUT, 'event': 'advance' }], 'out': [{ 'state': rps.ALLOCATING_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_stage_in', spec) # Add a column with the number of concurrent scheduling units spec = { 'in': [{ 'state': rps.ALLOCATING, 'event': 'advance' }], 'out': [{ 'state': rps.EXECUTING_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_sched', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [{ 'state': rps.EXECUTING, 'event': 'advance' }], 'out': [{ 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_exec', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [{ 'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance' }], 'out': [{ 'state': rps.PENDING_OUTPUT_STAGING, 'event': 'advance' }, { 'state': rps.FAILED, 'event': 'advance' }, { 'state': rps.CANCELED, 'event': 'advance' }] } rpu.add_concurrency(cu_prof_fr, 'cc_stage_out', spec) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after sid added):\n" % sid) cu_prof_fr.insert(0, 'sid', sid) print cu_prof_fr.head() report.info("CU DF columns for session %s:\n" % sid) print cu_prof_fr['info'].unique() # transpose report.info("Head of Transposed CU DF for session %s:\n" % sid) tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr) tr_cu_prof_fr.insert(0, 'sid', sid) print tr_cu_prof_fr.head() report.info("Head of json Docs for session %s:\n" % sid) ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid) print ses_info_fr.head() except Exception as e: report.error("Failed to pre-process data for session %s (%s)" % (sid, e)) return report.header("Writing dataframes to disk.\n") try: os.mkdir(session_dir) tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl')) pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl')) ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl')) pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl')) cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl')) except Exception as e: report.error("Failed to write data: %s" % e) return
'update' : event_filter['update' ]['in'] } for exp in sorted(exp_frames.keys()): print "plotting '%s'" % exp # these are the franes we want to plot for this experiment plot_frames = list() for frame, label in exp_frames[exp]: # we add a data frame column for CU concurrency for the component of # interest. Also, we calibrate t0 to when the first unit enters that # component. rpu.add_concurrency (frame, tgt='cu_num', spec=event_filter[exp]) rpu.calibrate_frame (frame, spec=calib_filter[exp]) # plot_frames.append([frame, label]) plot_frames.append([frame, None]) # create the plots for CU concurrency over time, and also show the plots in the notebook fig, _ = rpu.frame_plot(plot_frames, logx=False, logy=False, title=exp, legend=False, figdir=figdir, axis=[['time', 'time (s)'], ['cu_num', "number of concurrent CUs in '%s'" % exp]]) # fig.show() # # # inverse axis, use logar. scale for time # fig, _ = rpu.frame_plot(plot_frames, logx=False, logy=True, title=exp, # title=exp, legend=False, figdir=figdir, # axis=[['cu_num', 'number of concurrent CUs'],
def preprocess(sid): session_dir = os.path.join(PICKLE_DIR, sid) if os.path.isdir(session_dir): report.warn("Session dir '%s' already exists, skipping session." % session_dir) return try: sid_profiles = find_profiles(sid) print sid_profiles report.info("Combining profiles for session: %s.\n" % sid) combined_profiles = rpu.combine_profiles(sid_profiles) uids = set() for p in combined_profiles: uids.add(p['uid']) report.info("Converting profiles to frames for session: %s.\n" % sid) frames = rpu.prof2frame(combined_profiles) report.info("Head of Combined DF for session %s:\n" % sid) print frames.entity.unique() ses_prof_fr, pilot_prof_fr, cu_prof_fr = rpu.split_frame(frames) report.info("Head of Session DF for session %s:\n" % sid) ses_prof_fr.insert(0, 'sid', sid) print ses_prof_fr.head() report.info("Head of Pilot DF for session %s:\n" % sid) pilot_prof_fr.insert(0, 'sid', sid) print pilot_prof_fr.head() report.info("Head of CU DF for session %s:\n" % sid) rpu.add_states(cu_prof_fr) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after states added):\n" % sid) rpu.add_info(cu_prof_fr) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after info added):\n" % sid) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after concurrency added):\n" % sid) # Add a column with the number of concurrent populating the database spec = { 'in': [ {'state': rps.STAGING_INPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.AGENT_STAGING_INPUT_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_populating', spec) # Add a column with the number of concurrent staging in units spec = { 'in': [ {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.ALLOCATING_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_stage_in', spec) # Add a column with the number of concurrent scheduling units spec = { 'in': [ {'state': rps.ALLOCATING, 'event': 'advance'} ], 'out' : [ {'state':rps.EXECUTING_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_sched', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [ {'state': rps.EXECUTING, 'event': 'advance'} ], 'out' : [ {'state':rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_exec', spec) # Add a column with the number of concurrent Executing units spec = { 'in': [ {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'} ], 'out' : [ {'state':rps.PENDING_OUTPUT_STAGING, 'event': 'advance'}, {'state':rps.FAILED, 'event': 'advance'}, {'state':rps.CANCELED, 'event': 'advance'} ] } rpu.add_concurrency (cu_prof_fr, 'cc_stage_out', spec) print cu_prof_fr.head() report.info("Head of CU DF for session %s (after sid added):\n" % sid) cu_prof_fr.insert(0, 'sid', sid) print cu_prof_fr.head() report.info("CU DF columns for session %s:\n" % sid) print cu_prof_fr['info'].unique() # transpose tr_cu_prof_fr = rpu.get_info_df(cu_prof_fr) tr_cu_prof_fr.insert(0, 'sid', sid) report.info("Head of Transposed CU DF for session %s:\n" % sid) print tr_cu_prof_fr.head() ses_info_fr, pilot_info_fr, unit_info_fr = json2frame(db=None, sid=sid) report.info("Head of json Docs for session %s:\n" % sid) print ses_info_fr.head() except: report.error("Failed to pre-process data for session %s" % sid) return report.header("Writing dataframes to disk.\n") try: os.mkdir(session_dir) ses_info_fr.to_pickle(os.path.join(session_dir, 'session_info.pkl')) pilot_info_fr.to_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_info_fr.to_pickle(os.path.join(session_dir, 'unit_info.pkl')) ses_prof_fr.to_pickle(os.path.join(session_dir, 'session_prof.pkl')) pilot_prof_fr.to_pickle(os.path.join(session_dir, 'pilot_prof.pkl')) cu_prof_fr.to_pickle(os.path.join(session_dir, 'unit_prof.pkl')) tr_cu_prof_fr.to_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) except: report.error("Failed to write data") return
def plot(sids, value, label='', paper=False): colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] labels = [] first = True for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'unit_prof.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] #cores = 32 if value == 'cc_fork': spec = { 'in': [{ 'info': 'aec_start_script' }], 'out': [{ 'info': 'aec_after_exec' }] } rpu.add_concurrency(unit_prof_df, 'cc_fork', spec) elif value == 'cc_exit': spec = { 'in': [{ 'info': 'aec_after_exec' }], 'out': [ { 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' }, ] } rpu.add_concurrency(unit_prof_df, 'cc_exit', spec) df = unit_prof_df[(unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', value]] df.columns = ['time', cores] df['time'] -= df['time'].min() if first: df_all = df else: df_all = pd.merge(df_all, df, on='time', how='outer') #labels.append("Cores: %d" % cores) # labels.append("%d" % cores) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) labels.append("%d" % info['metadata.cu_runtime']) first = False df_all.set_index('time', inplace=True) print df_all.head() #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) # Vertial reference #x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') #labels.append("Optimal") location = 'upper right' legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title( "Concurrent number of CUs in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE) # mp.pyplot.ylim(0, 200) mp.pyplot.ylim(-50, ) mp.pyplot.xlim(0, 600) #ax.get_xaxis().set_ticks([]) print dir(ax) [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # width = 3.487 width = 3.3 height = width / 1.618 # height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_concurrency.pdf') mp.pyplot.close()