def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sids, value, label=''): labels = [] for sid in sids: # Legend info info = info_df.loc[sid] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent tufs = tuf.sort('awo_get_u_pend') if value == 'overhead': ax = (tufs['asc_released'] - tufs['asc_allocated'] - info['metadata.cu_runtime']).plot(kind='line') elif value == 'orte': ax = (tufs['aew_after_exec'] - tufs['aew_after_cd'] - info['metadata.cu_runtime']).plot(kind='line') elif value == 'popen': ax = (tufs['aew_start_script'] - tufs['aec_handover']).plot(kind='line') elif value == 'scheduler': ax = (tufs['asc_get_u_pend'] - tufs['asic_put_u_pend']).plot(kind='line') elif value == 'execworker': ax = (tufs['aew_work_u_pend'] - tufs['asc_put_u_pend']).plot(kind='line') elif value == 'stageout': ax = (tufs['asoc_get_u_pend'] - tufs['aew_put_u_pend']).plot(kind='line') else: raise Exception("Value %s unknown" % value) labels.append("Sub-Agents: %d" % info['metadata.num_sub_agents']) mp.pyplot.legend(labels, loc='upper left', fontsize=5) mp.pyplot.title("'%s' per CU for varying number of Sub-Agents.\n" "%d CUs of %d core(s) with a %ss payload on a %d(+N*16) core pilot on %s.\n" "Varying number of sub-agent with %d ExecWorker(s). All times are per CU.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.cu_count'], resource_label, info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Compute Units (ordered by agent arrival)") mp.pyplot.ylabel("Time (s)") mp.pyplot.ylim(0) ax.get_xaxis().set_ticks([]) mp.pyplot.savefig('plot3_%s%s.pdf' % (value, label)) mp.pyplot.close()
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sids): labels = [] orte_ttc = {} for sid in sids: # Legend info info = info_df.loc[sid] #num_sas = info['metadata.num_sub_agents'] num_sas = info['metadata.num_exec_instances_per_sub_agent'] if num_sas not in orte_ttc: orte_ttc[num_sas] = [] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent tufs = tuf.sort('awo_get_u_pend') orte_ttc[num_sas].append((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min())) labels.append("ExecWorkers: %d" % num_sas) orte_df = pd.DataFrame(orte_ttc) ax = orte_df.mean().plot(kind='bar', colormap='Paired') print 'labels: %s' % labels #mp.pyplot.legend(labels, loc='upper left', fontsize=5) mp.pyplot.title("TTC for varying ExecWorkers.\n" "%d CUs of %d core(s) with a %ss payload on a %d core pilot on %s.\n" "%d sub-agent with varying ExecWorker(s).\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label, info['metadata.num_sub_agents'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Number of Execution Workers") mp.pyplot.ylabel("Time to Completion (s)") #mp.pyplot.ylim(0) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set mp.pyplot.savefig('plot_ttc_ew.pdf') mp.pyplot.close()
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid): print "Plotting %s ..." % sid labels = [] # Legend info info = info_df.loc[sid] mpi = get_mpi(unit_info_df, sid) #mpi = True # For this call assume that there is only one pilot per session lms = get_lm(unit_info_df, pilot_info_df, sid, mpi) assert len(lms) == 1 launch_method = lms.values()[0] # For this call assume that there is only one pilot per session spawners = get_spawners(unit_info_df, pilot_info_df, sid) assert len(spawners) == 1 spawner = spawners.values()[0] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session uf = unit_info_df[unit_info_df['sid'] == sid] result = pd.value_counts(uf['state'].values, sort=False) print result ax = result.plot(kind='pie', autopct='%.2f%%') ax.set_aspect('equal') print info #mp.pyplot.legend(labels, loc='upper left', fontsize=5) mp.pyplot.title("%s (%s)\n" "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n" "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n" "RP: %s - RS: %s - RU: %s" % (sid, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(info['created'])), info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.savefig('%s_plot_states.pdf' % sid) mp.pyplot.close()
def plot(sid, values, label='', paper=False, window=1.0, plot_mean=False): labels = [] means = {} colors = [cmap(i) for i in np.linspace(0, 1, len(values))] c = 0 first = True if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = "bogus" # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] for value in values: if 'stagein_freq' == value: spec = {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'} elif 'sched_freq' == value: spec = {'state': rps.EXECUTING_PENDING, 'event': 'advance'} elif 'exec_freq' == value: spec = {'state' : rps.EXECUTING, 'event' : 'advance'} elif 'fork_freq' == value: spec = {'info' : 'aec_start_script'} elif 'exit_freq' == value: spec = {'info' : 'aec_after_exec'} elif 'stageout_pend_freq' == value: spec = {'state' : rps.AGENT_STAGING_OUTPUT_PENDING, 'event' : 'advance'} elif 'stageout_freq' == value: spec = {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'} else: raise Exception("Value %s unknown" % value) #print unit_prof_df.head() add_frequency(unit_prof_df, value, window, spec) df = unit_prof_df[ (unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', value]] means[value] = df[value].mean() #df.columns = ['time', value] #df['time'] -= df['time'].min() df.time = pd.to_datetime(df.time, unit='s') df.set_index('time', inplace=True, drop=True, append=False) #print ("Head of %s before resample" % value) #print df.head() def _mean(array_like): return np.mean(array_like)/window df = df.resample('%dL' % int(1000.0*window), how=_mean)[value] df = df.fillna(0) #print ("Head of %s after resample" % value) #print df.head() if first: df_all = df else: #df_all = pd.merge(df_all, df, on='time', how='outer') #df_all = pd.merge(df_all, df, on='time') #df_all = pd.merge(df_all, df) df_all = pd.concat([df_all, df], axis=1) #df_all.append(df) #print ("Head of df_all") #print df_all.head() if value == 'exec_freq': labels.append("Launching") elif value == 'sched_freq': labels.append("Scheduling") elif value == 'fork_freq': labels.append("Forking") elif value == 'stageout_pend_freq': labels.append("Completing") else: labels.append("%s" % value) first = False # df.plot(drawstyle='steps-pre') c = 0 for value in values: mean = df_all[value].mean() print "%s mean: %f" % (value, mean) # df_all['mean_%s' % value] = mean #labels.append("Mean %s" % value) print 'means:', means my_colors = colors if plot_mean: my_colors *= 2 my_styles = [] for x in range(len(values)): my_styles.append('-') if plot_mean: for x in range(len(values)): my_styles.append('--') #df_all.set_index('time', inplace=True) #print df_all.head(500) #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') ax = df_all.plot(drawstyle='steps-pre', color=my_colors, style=my_styles, linewidth=LINEWIDTH, fontsize=TICK_FONTSIZE) # df_all.plot(drawstyle='steps') #df_all.plot() # Vertial reference # x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] # mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') # labels.append("Optimal") mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0) if not paper: mp.pyplot.title("Rate of various components: %s'.\n" "%d generations of %d 'concurrent' units of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (values, info['metadata.generations'], cores, info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Rate (Unit/s)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(-1, 400) #mp.pyplot.xlim(-1,) #mp.pyplot.xlim(['1/1/2000', '1/1/2000']) #mp.pyplot.xlim('03:00', '04:00') #mp.pyplot.xlim(380, 400) #mp.pyplot.xlim(675, 680) #ax.get_xaxis().set_ticks([]) # ax.set_yscale('log', basey=10) #mp.pyplot.xlim((291500.0, 1185200.0)) #mp.pyplot.xlim((474000.0, 2367600.0)) print "xlim:", ax.get_xlim() [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 width = 3.3 height = width / 1.618 # height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) #fig.tight_layout() mp.pyplot.savefig('plot_more_rates-%s.pdf' % sid) mp.pyplot.close()
def migrate(options): from ckan import model from ckanext.archiver.model import Archival from ckanext.qa.model import QA resources = common.get_resources(state='active', publisher_ref=options.publisher, resource_id=options.resource, dataset_name=options.dataset) stats = StatsList() widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()] progress = ProgressBar(widgets=widgets) for res in progress(resources): # Gather the details of QA from TaskStatus # to fill all properties of QA apart from: # * package_id # * resource_id fields = {} qa_task_status = model.Session.query(model.TaskStatus)\ .filter_by(entity_id=res.id)\ .filter_by(task_type='qa')\ .filter_by(key='status')\ .first() if not qa_task_status: add_stat('No QA data', res, stats) continue qa_error = json.loads(qa_task_status.error) fields['openness_score'] = int(qa_task_status.value) fields['openness_score_reason'] = qa_error['reason'] fields['format'] = qa_error['format'] qa_date = qa_task_status.last_updated # NB qa_task_status.last_updated appears to be 1hr ahead of the revision # time, so some timezone nonesense going on. Can't do much. archival = Archival.get_for_resource(res.id) if not archival: print add_stat('QA but no Archival data', res, stats) continue archival_date = archival.updated # the state of the resource was as it was archived on the date of # the QA update but we only know when the latest archival was. So # if it was archived before the QA update thenwe know that was the # archival, otherwise we don't know when the relevant archival was. if archival_date and qa_date >= archival_date: fields['archival_timestamp'] = archival_date fields['updated'] = archival_date fields['created'] = archival_date # Assume the resource URL archived was the one when the # archival was done (it may not be if the URL was queued and # there was significant delay before it was archived) get_resource_as_at = archival_date else: # This is common for when a resource is created and qa runs just # before archiver and you get: # "This file had not been downloaded at the time of scoring it." # Just put sensible datetimes since we don't really know the exact # ones fields['archival_timestamp'] = qa_date fields['updated'] = qa_date fields['created'] = qa_date get_resource_as_at = qa_date res_rev = model.Session.query(model.ResourceRevision).\ filter_by(id=res.id).\ filter(model.ResourceRevision.revision_timestamp < get_resource_as_at).\ order_by(model.ResourceRevision.revision_timestamp.desc()).\ first() fields['resource_timestamp'] = res_rev.revision_timestamp # Compare with any existing data in the Archival table qa = QA.get_for_resource(res.id) if qa: changed = None for field, value in fields.items(): if getattr(qa, field) != value: if options.write: setattr(qa, field, value) changed = True if not changed: add_stat('Already exists correctly in QA table', res, stats) continue add_stat('Updated in QA table', res, stats) else: qa = QA.create(res.id) if options.write: for field, value in fields.items(): setattr(qa, field, value) model.Session.add(qa) add_stat('Added to QA table', res, stats) print 'Summary\n', stats.report() if options.write: model.repo.commit_and_remove() print 'Written'
def plot(sids, paper=False): labels = [] for key in sids: orte_ttc = {} for sid in sids[key]: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] cores = info['metadata.effective_cores'] nodes = cores / 32 if nodes not in orte_ttc: orte_ttc[nodes] = pd.Series() if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') #tufs = tuf.sort('asic_get_u_pend') tufs = tuf.sort() orte_ttc[nodes] = orte_ttc[nodes].append( pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(orte_ttc) print 'orte_ttc df:', orte_df labels.append("%s" % resource_legend[key]) ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) # ORTE only # Data for BW #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+') # Data for Stampede #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+') #labels.append("ORTE-only (C)") # Horizontal reference y_ref = info['metadata.generations'] * info['metadata.cu_runtime'] mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH) labels.append("Optimal") print 'labels: %s' % labels location = 'upper left' mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, markerscale=0) if not paper: mp.pyplot.title( "TTC for a varying number of 'concurrent' Full-Node CUs.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("\# Nodes", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(0) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) # #ax.get_xaxis.set #width = 3.487 width = 3.3 height = width / 1.618 # height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_ttc_full_node.pdf') mp.pyplot.close()
def plot(sids, paper=False): labels = [] colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] c = 0 for key in sids: orte_ttc = {} for sid in sids[key]: session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] cores = info['metadata.effective_cores'] if cores not in orte_ttc: orte_ttc[cores] = pd.Series() # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') tufs = tuf.sort('asic_get_u_pend') orte_ttc[cores] = orte_ttc[cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(orte_ttc) print 'orte_ttc df:', orte_df #labels.append("%s" % resource_legend[key]) labels.append("%s" % key) #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH) ax = orte_df.mean().plot(kind='line', color=colors[c]) c += 1 # ORTE only # Data for BW #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+') # Data for Stampede # mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+') # labels.append("ORTE-only (C)") # Horizontal reference y_ref = info['metadata.generations'] * info['metadata.cu_runtime'] mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--') labels.append("Optimal") #print 'labels: %s' % labels mp.pyplot.legend(labels, loc='upper left', fontsize=BARRIER_FONTSIZE) if not paper: mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("# Cores", fontsize=BARRIER_FONTSIZE) mp.pyplot.ylabel("Time to Completion (s)", fontsize=BARRIER_FONTSIZE) #mp.pyplot.ylim(0) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set mp.pyplot.savefig('plot_worker_effect.pdf') mp.pyplot.close()
def migrate(options): from ckan import model from ckanext.archiver.model import Archival, Status resources = common.get_resources(state='active', publisher_ref=options.publisher, resource_id=options.resource, dataset_name=options.dataset) stats = StatsList() widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()] progress = ProgressBar(widgets=widgets) for res in progress(resources): # Gather the details of archivals from TaskStatus and Resource # to fill all properties of Archival apart from: # * package_id # * resource_id fields = {} archiver_task_status = model.Session.query(model.TaskStatus)\ .filter_by(entity_id=res.id)\ .filter_by(task_type='archiver')\ .filter_by(key='status')\ .first() if archiver_task_status: ats_error = json.loads(archiver_task_status.error) fields['status_id'] = Status.by_text(archiver_task_status.value) fields['is_broken'] = Status.is_status_broken(fields['status_id']) fields['reason'] = ats_error['reason'] fields['last_success'] = date_str_to_datetime_or_none(ats_error['last_success']) fields['first_failure'] = date_str_to_datetime_or_none(ats_error['first_failure']) fields['failure_count'] = int(ats_error['failure_count']) fields['url_redirected_to'] = ats_error['url_redirected_to'] fields['updated'] = archiver_task_status.last_updated else: if not (res.cache_url or res.extras.get('cache_filepath') or res.hash or res.size or res.mimetype): add_stat('No archive data', res, stats) continue for field_name in ('status_id', 'is_broken', 'reason', 'last_success', 'first_failure', 'failure_count', 'url_redirected_to', 'updated', 'created'): fields[field_name] = None fields['cache_filepath'] = res.extras.get('cache_filepath') fields['cache_url'] = res.cache_url fields['hash'] = res.hash fields['size'] = res.size fields['mimetype'] = res.mimetype revisions_with_hash = model.Session.query(model.ResourceRevision)\ .filter_by(id=res.id)\ .order_by(model.ResourceRevision.revision_timestamp)\ .filter(model.ResourceRevision.hash != '').all() if revisions_with_hash: # these are not perfect by not far off fields['created'] = revisions_with_hash[0].revision_timestamp fields['resource_timestamp'] = revisions_with_hash[-1].revision_timestamp else: fields['created'] = min(fields['updated'] or END_OF_TIME, fields['first_failure'] or END_OF_TIME, fields['last_success'] or END_OF_TIME) fields['resource_timestamp'] = max( fields['updated'] or START_OF_TIME, fields['first_failure'] or START_OF_TIME, fields['last_success'] or START_OF_TIME) # Compare with any existing data in the Archival table archival = Archival.get_for_resource(res.id) if archival: changed = None for field, value in fields.items(): if getattr(archival, field) != value: if options.write: setattr(archival, field, value) changed = True if not changed: add_stat('Already exists correctly in archival table', res, stats) continue add_stat('Updated in archival table', res, stats) else: archival = Archival.create(res.id) if options.write: for field, value in fields.items(): setattr(archival, field, value) model.Session.add(archival) add_stat('Added to archival table', res, stats) print 'Summary\n', stats.report() if options.write: model.repo.commit_and_remove() print 'Written'
def plot(unit_prof_df, tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid): print "Plotting %s ..." % sid labels = [] # # Legend info info = info_df.loc[sid] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] df = pd.DataFrame() # # Pulling in # populating_df = unit_prof_df[ (unit_prof_df.cc_populating >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_populating']] # # Staging in # stage_in_df = unit_prof_df[ (unit_prof_df.cc_stage_in >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_stage_in']] # # Scheduling # sched_df = unit_prof_df[ (unit_prof_df.cc_sched >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_sched']] # # Executing # exec_df = unit_prof_df[ (unit_prof_df.cc_exec >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_exec']] # # Staging out # stage_out_df = unit_prof_df[ (unit_prof_df.cc_stage_out >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_stage_out']] print sched_df.head() df = populating_df labels.append("Populating MongoDB") df = pd.merge(df, stage_in_df, on='time', how='outer') labels.append("Staging Input Data") df = pd.merge(df, sched_df, on='time', how='outer') labels.append("Scheduling") df = pd.merge(df, exec_df, on='time', how='outer') labels.append("Executing") df = pd.merge(df, stage_out_df, on='time', how='outer') labels.append("Staging Output Data") df.set_index('time', inplace=True) print df.head() df.plot(colormap='Paired', drawstyle='steps-post') mp.pyplot.legend(labels, loc='upper left', fontsize=5) # mp.pyplot.title("Concurrent Compute Units per Component.\n" # "%d CUs of %d core(s) with a %ss payload on a %d core pilot on %s.\n" # "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n" # "RP: %s - RS: %s - RU: %s" # % (info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label, # info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], # info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] # ), fontsize=8) mp.pyplot.xlabel("Time (s)") mp.pyplot.ylabel("Concurrent Compute Units") #mp.pyplot.ylim(0,100) #mp.pyplot.xlim(1200, 1500) mp.pyplot.savefig('%s_plot4.pdf' % sid) mp.pyplot.close()
def plot(sid, values, label='', paper=False, window=1.0, plot_mean=False): labels = [] means = {} colors = [cmap(i) for i in np.linspace(0, 1, len(values))] c = 0 first = True if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = "bogus" # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] for value in values: if 'stagein_freq' == value: spec = {'state': rps.AGENT_STAGING_INPUT, 'event': 'advance'} elif 'sched_freq' == value: spec = {'state': rps.EXECUTING_PENDING, 'event': 'advance'} elif 'exec_freq' == value: spec = {'state': rps.EXECUTING, 'event': 'advance'} elif 'fork_freq' == value: spec = {'info': 'aec_start_script'} elif 'exit_freq' == value: spec = {'info': 'aec_after_exec'} elif 'stageout_pend_freq' == value: spec = { 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' } elif 'stageout_freq' == value: spec = {'state': rps.AGENT_STAGING_OUTPUT, 'event': 'advance'} else: raise Exception("Value %s unknown" % value) #print unit_prof_df.head() add_frequency(unit_prof_df, value, window, spec) df = unit_prof_df[(unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', value]] means[value] = df[value].mean() #df.columns = ['time', value] #df['time'] -= df['time'].min() df.time = pd.to_datetime(df.time, unit='s') df.set_index('time', inplace=True, drop=True, append=False) #print ("Head of %s before resample" % value) #print df.head() def _mean(array_like): return np.mean(array_like) / window df = df.resample('%dL' % int(1000.0 * window), how=_mean)[value] df = df.fillna(0) #print ("Head of %s after resample" % value) #print df.head() if first: df_all = df else: #df_all = pd.merge(df_all, df, on='time', how='outer') #df_all = pd.merge(df_all, df, on='time') #df_all = pd.merge(df_all, df) df_all = pd.concat([df_all, df], axis=1) #df_all.append(df) #print ("Head of df_all") #print df_all.head() if value == 'exec_freq': labels.append("Launching") elif value == 'sched_freq': labels.append("Scheduling") elif value == 'fork_freq': labels.append("Forking") elif value == 'stageout_pend_freq': labels.append("Completing") else: labels.append("%s" % value) first = False # df.plot(drawstyle='steps-pre') c = 0 for value in values: mean = df_all[value].mean() print "%s mean: %f" % (value, mean) # df_all['mean_%s' % value] = mean #labels.append("Mean %s" % value) print 'means:', means my_colors = colors if plot_mean: my_colors *= 2 my_styles = [] for x in range(len(values)): my_styles.append('-') if plot_mean: for x in range(len(values)): my_styles.append('--') #df_all.set_index('time', inplace=True) #print df_all.head(500) #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') ax = df_all.plot(drawstyle='steps-pre', color=my_colors, style=my_styles, linewidth=LINEWIDTH, fontsize=TICK_FONTSIZE) # df_all.plot(drawstyle='steps') #df_all.plot() # Vertial reference # x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] # mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') # labels.append("Optimal") mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0) if not paper: mp.pyplot.title( "Rate of various components: %s'.\n" "%d generations of %d 'concurrent' units of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (values, info['metadata.generations'], cores, info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Rate (Unit/s)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(-1, 400) #mp.pyplot.xlim(-1,) #mp.pyplot.xlim(['1/1/2000', '1/1/2000']) #mp.pyplot.xlim('03:00', '04:00') #mp.pyplot.xlim(380, 400) #mp.pyplot.xlim(675, 680) #ax.get_xaxis().set_ticks([]) # ax.set_yscale('log', basey=10) #mp.pyplot.xlim((291500.0, 1185200.0)) #mp.pyplot.xlim((474000.0, 2367600.0)) print "xlim:", ax.get_xlim() [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 width = 3.3 height = width / 1.618 # height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) #fig.tight_layout() mp.pyplot.savefig('plot_more_rates-%s.pdf' % sid) mp.pyplot.close()
def plot(sids, value, label='', paper=False, window=1.0, plot_mean=False, compare=None, micro=False): labels = [] colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] first = True values = [] counter = 0 for sid in sids: print "sid: %s" % sid if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = "bogus" # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') if not compare: raise Exception("Need to specify 'compare' parameter!") elif 'metadata.%s' % compare in info: metric = info['metadata.%s' % compare] else: #raise Exception("'%s' not found in info!" % compare) metric = counter counter += 1 values.append(metric) if value == 'sched_freq': plot_type = 'sched' plot_label = 'Scheduling' spec = {'state': rps.ALLOCATING, 'event' : 'advance'} add_frequency(unit_prof_df, 'sched_freq', window, spec) print unit_prof_df.state.unique() # # scheduling frequency # df = unit_prof_df[ (unit_prof_df.sched_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'sched_freq']] elif value == 'exec_freq': plot_type = 'exec' plot_label = 'Executing' spec = {'state': 'Executing', 'event' : 'advance'} add_frequency(unit_prof_df, 'exec_freq', window, spec) # # feq # df = unit_prof_df[ (unit_prof_df.exec_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'exec_freq']] elif 'fork_freq' == value: spec = {'info': 'aec_start_script'} add_frequency(unit_prof_df, value, window, spec) # # fork - start_script # df = unit_prof_df[ (unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', value]] elif value == 'done_freq': spec = {'state' : rps.AGENT_STAGING_OUTPUT_PENDING, 'event' : 'advance'} add_frequency(unit_prof_df, 'done_freq', 1, spec) # # feq # df = unit_prof_df[ (unit_prof_df.done_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'done_freq']] else: raise Exception("Value %s unknown" % value) df.columns = ['time', metric] df['time'] -= df['time'].min() df.time = pd.to_datetime(df.time, unit='s') df.set_index('time', inplace=True) def _mean(array_like): return np.mean(array_like)/window df = df.resample('%dL' % int(1000.0*window), how=_mean) df = df.fillna(0) print df.head() if first: df_all = df else: #df_all = pd.merge(df_all, df, on='time', how='outer') #df_all = pd.merge(df_all, df, how='outer') df_all = pd.concat([df_all, df], axis=1) labels.append("%d" % metric) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) first = False c = 0 for value in values: mean = df_all[value].mean() stddev = df_all[value].std(ddof=0) print "Mean value for %d: %f (%f)" % (value, mean, stddev) if plot_mean: df_all['mean_%s' % value] = mean # labels.append("Mean %s" % value) my_colors = colors my_styles = [] for x in range(len(values)): my_styles.append('-') if plot_mean: my_colors *= 2 for x in range(len(values)): my_styles.append('--') #df_all.set_index('time', inplace=True) # print df_all.head(500) #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') ax = df_all.plot(color=my_colors, style=my_styles, drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) # df_all.plot(drawstyle='steps') # df_all.plot() mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0) if not paper: mp.pyplot.title("Rate of CUs transitioning in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("%s Rate (Unit/s)" % plot_label, fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(0,) #mp.pyplot.xlim('0:00', '0:40') #mp.pyplot.xlim(380, 400) #mp.pyplot.xlim(675, 680) #ax.get_xaxis().set_ticks([]) from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, SecondLocator #second_fmt = DateFormatter('%S') # second_loc = SecondLocator(bysecond=range(0, 300, 10)) # ax.xaxis.set_minor_formatter(second_fmt) # ax.xaxis.set_minor_locator(second_loc) # ax.xaxis.set_major_formatter(second_fmt) # ax.xaxis.set_major_locator(second_loc) # second_loc.set_axis(ax.xaxis) # Have to manually make this call and the one below. # second_loc.refresh() # secondsFmt = DateFormatter('%s') s = SecondLocator() ax.xaxis.set_major_locator(s) # ax.xaxis.set_minor_locator(SecondLocator()) # ax.xaxis.set_major_formatter(secondsFmt) #ax.xaxis.set_major_locator(years) #ax.xaxis.set_major_formatter(yearsFmt) #ax.xaxis.set_minor_locator(months) ax.autoscale_view() [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 width = 3.3 height = width / 1.618 #height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_rate_%s%s_%s_%dgen.pdf' % ('micro_' if micro else '', value, resource_label, info['metadata.generations'])) mp.pyplot.close()
def wms_revisions(options): ''' These revisions look like this: # select url from resource_revision where id='3b157e17-cef2-43dc-b0ce-76de18549852' order by revision_timestamp; http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.1.1 http://www.acas.org.uk/CHttpHandler.ashx?service=WFS&request=GetCapabilities&version=2.0 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3 The bad ones have been changed to "?service=" params. These revisions need removing. # Typical revision: id | timestamp | author | message | state | approved_timestamp a2370bd1-b1b8-41b4-9fc1-d38b46d2fbda | 2014-02-22 04:34:56.634442 | co-prod3.dh.bytemark.co.uk | REST API: Update object financial-transactions-data-acas | active | # i.e. author='co-prod3...' (site-user, via API) ''' resources = common.get_resources(state='active', resource_id=options.resource, dataset_name=options.dataset) stats = StatsList() stats.report_value_limit = 1000 total_bad_revisions = 0 need_to_commit = False widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()] progress = ProgressBar(widgets=widgets) for res in progress(resources): res = model.Resource.get(res.id) # as the session gets flushed during the loop res_rev_q = model.Session.query(model.ResourceRevision).filter_by(id=res.id).order_by(model.ResourceRevision.revision_timestamp) res_revs = res_rev_q.all() first_res_rev = res_revs[0] if 'request=GetCapabilities&version=' in first_res_rev.url: print add_stat('First revision already was WMS', res, stats) continue # Identify bad revisions by the WMS URL parameters and author bad_res_revs = res_rev_q.filter(model.ResourceRevision.url.ilike('%?service=W%S&request=GetCapabilities&version=%')).all() if bad_res_revs and \ bad_res_revs[0].revision.author not in ('co-prod3.dh.bytemark.co.uk', 'current_revision_fixer2'): print add_stat('Misidentified', res, stats, 'author=%r' % bad_res_revs[0].revision.author) continue if not bad_res_revs: add_stat('Resource ok', res, stats) continue print ' ' # don't overwrite progress bar print add_stat('Bad revisions', res, stats, '(%d/%d)' % (len(bad_res_revs), len(res_revs))) total_bad_revisions += len(bad_res_revs) # Find the new latest (good) revision bad_res_revs_set = set(bad_res_revs) for res_rev_index in reversed(xrange(len(res_revs))): if res_revs[res_rev_index] not in bad_res_revs_set: latest_good_res_rev = res_revs[res_rev_index] break else: print add_stat('No good revisions', res, stats) continue if not options.write: continue # Delete the revisions and resource_revisions print ' Deleting bad revisions...' def delete_bad_revisions(res_revs): # Build the sql as a list, as it is faster when you have 1000 strings to append sql = ['''BEGIN; ALTER TABLE package_tag DROP CONSTRAINT package_tag_revision_id_fkey; ALTER TABLE package_extra DROP CONSTRAINT package_extra_revision_id_fkey; ALTER TABLE resource DROP CONSTRAINT resource_revision_id_fkey; '''] for res_rev in res_revs: sql.append("DELETE from resource_revision where id='%s' and revision_id='%s';\n" % (res.id, res_rev.revision_id)) # a revision created (e.g. over the API) can be connect to other # resources or a dataset, so only delete the revision if only # connected to this one. if model.Session.query(model.ResourceRevision).\ filter_by(revision_id=res_rev.revision_id).\ count() == 1 and \ model.Session.query(model.PackageRevision).\ filter_by(revision_id=res_rev.revision_id).count() == 0: sql.append("DELETE from revision where id='%s';\n" % res_rev.revision_id) sql.append("UPDATE resource SET revision_id='%s' WHERE id='%s';\n" % \ (latest_good_res_rev.revision_id, res.id)) sql.append(''' ALTER TABLE package_tag ADD CONSTRAINT package_tag_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); ALTER TABLE package_extra ADD CONSTRAINT package_extra_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); ALTER TABLE resource ADD CONSTRAINT resource_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); COMMIT;''') print ' sql..', model.Session.execute(''.join(sql)) print '.committed' model.Session.remove() def chunks(l, n): '''Yield successive n-sized chunks from l.''' for i in xrange(0, len(l), n): yield l[i:i+n] # chunk revisions in chunks to cope when there are so many widgets = ['Creating SQL: ', Counter(), 'k/%sk ' % int(float(len(bad_res_revs))/1000.0), Bar(), ' ', ETA()] progress2 = ProgressBar(widgets=widgets, maxval=int(float(len(bad_res_revs))/1000.0) or 1) for chunk_of_bad_res_revs in progress2(chunks(bad_res_revs, 1000)): delete_bad_revisions(chunk_of_bad_res_revs) # Knit together the remaining revisions again print ' Knitting existing revisions back together...' res_rev_q = model.Session.query(model.ResourceRevision).filter_by(id=res.id).order_by(model.ResourceRevision.revision_timestamp) res_revs = res_rev_q.all() latest_res_rev = res_revs[-1] if not latest_res_rev.current: latest_res_rev.current = True for i, res_rev in enumerate(res_revs[:-1]): if res_rev.expired_timestamp != res_revs[i+1].revision_timestamp: res_rev.expired_timestamp = res_revs[i+1].revision_timestamp res_rev.expired_id = res_revs[i+1].revision_id if latest_res_rev.expired_timestamp != END_OF_TIME: latest_res_rev.expired_timestamp = END_OF_TIME if latest_res_rev.expired_id is not None: latest_res_rev.expired_id = None # Correct the URL on the resource model.Session.query(model.Resource).filter_by(id=res.id).update({'url': latest_res_rev.url}) model.repo.commit_and_remove() print ' ...done' print 'Summary\n', stats.report() print 'Total bad revs: %d' % total_bad_revisions if options.write and need_to_commit: model.repo.commit_and_remove() print 'Written'
def plot(sids, paper=False): labels = [] colors = [cmap(i) for i in np.linspace(0, 1, 3)] c = 0 for key in sids: orte_ttc = {} for sid in sids[key]: session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] cores = info['metadata.effective_cores'] if cores not in orte_ttc: orte_ttc[cores] = [] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent tufs = tuf.sort('awo_get_u_pend') orte_ttc[cores].append((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min())) orte_df = pd.DataFrame(orte_ttc) labels.append("%s" % barrier_legend[key]) #ax = orte_df.mean().plot(kind='line', color=colors[c], marker=barrier_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) ax = orte_df.mean().plot(kind='line', color=colors[c], marker='+', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) c += 1 print 'labels: %s' % labels legend = mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE, markerscale=0, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Pilot Cores", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("$ttc_{a}$", fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(290, 550) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) if paper: # width = 3.487 width = 3.3 # height = width / 1.618 height = 1.3 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_ttc_cores_barriers.pdf') else: mp.pyplot.savefig('plot_ttc_cores_many.pdf') mp.pyplot.close()
def plot(sids, value, label='', paper=False, window=1.0, plot_mean=False, compare=None, micro=False): labels = [] colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] first = True values = [] counter = 0 for sid in sids: print "sid: %s" % sid if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'unit_prof.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = "bogus" # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') if not compare: raise Exception("Need to specify 'compare' parameter!") elif 'metadata.%s' % compare in info: metric = info['metadata.%s' % compare] else: #raise Exception("'%s' not found in info!" % compare) metric = counter counter += 1 values.append(metric) if value == 'sched_freq': plot_type = 'sched' plot_label = 'Scheduling' spec = {'state': rps.ALLOCATING, 'event': 'advance'} add_frequency(unit_prof_df, 'sched_freq', window, spec) print unit_prof_df.state.unique() # # scheduling frequency # df = unit_prof_df[(unit_prof_df.sched_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][[ 'time', 'sched_freq' ]] elif value == 'exec_freq': plot_type = 'exec' plot_label = 'Executing' spec = {'state': 'Executing', 'event': 'advance'} add_frequency(unit_prof_df, 'exec_freq', window, spec) # # feq # df = unit_prof_df[(unit_prof_df.exec_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', 'exec_freq']] elif 'fork_freq' == value: spec = {'info': 'aec_start_script'} add_frequency(unit_prof_df, value, window, spec) # # fork - start_script # df = unit_prof_df[(unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', value]] elif value == 'done_freq': spec = { 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' } add_frequency(unit_prof_df, 'done_freq', 1, spec) # # feq # df = unit_prof_df[(unit_prof_df.done_freq >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', 'done_freq']] else: raise Exception("Value %s unknown" % value) df.columns = ['time', metric] df['time'] -= df['time'].min() df.time = pd.to_datetime(df.time, unit='s') df.set_index('time', inplace=True) def _mean(array_like): return np.mean(array_like) / window df = df.resample('%dL' % int(1000.0 * window), how=_mean) df = df.fillna(0) print df.head() if first: df_all = df else: #df_all = pd.merge(df_all, df, on='time', how='outer') #df_all = pd.merge(df_all, df, how='outer') df_all = pd.concat([df_all, df], axis=1) labels.append("%d" % metric) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) first = False c = 0 for value in values: mean = df_all[value].mean() stddev = df_all[value].std(ddof=0) print "Mean value for %d: %f (%f)" % (value, mean, stddev) if plot_mean: df_all['mean_%s' % value] = mean # labels.append("Mean %s" % value) my_colors = colors my_styles = [] for x in range(len(values)): my_styles.append('-') if plot_mean: my_colors *= 2 for x in range(len(values)): my_styles.append('--') #df_all.set_index('time', inplace=True) # print df_all.head(500) #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') ax = df_all.plot(color=my_colors, style=my_styles, drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) # df_all.plot(drawstyle='steps') # df_all.plot() mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0) if not paper: mp.pyplot.title( "Rate of CUs transitioning in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("%s Rate (Unit/s)" % plot_label, fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(0, ) #mp.pyplot.xlim('0:00', '0:40') #mp.pyplot.xlim(380, 400) #mp.pyplot.xlim(675, 680) #ax.get_xaxis().set_ticks([]) from matplotlib.dates import YearLocator, MonthLocator, DateFormatter, SecondLocator #second_fmt = DateFormatter('%S') # second_loc = SecondLocator(bysecond=range(0, 300, 10)) # ax.xaxis.set_minor_formatter(second_fmt) # ax.xaxis.set_minor_locator(second_loc) # ax.xaxis.set_major_formatter(second_fmt) # ax.xaxis.set_major_locator(second_loc) # second_loc.set_axis(ax.xaxis) # Have to manually make this call and the one below. # second_loc.refresh() # secondsFmt = DateFormatter('%s') s = SecondLocator() ax.xaxis.set_major_locator(s) # ax.xaxis.set_minor_locator(SecondLocator()) # ax.xaxis.set_major_formatter(secondsFmt) #ax.xaxis.set_major_locator(years) #ax.xaxis.set_major_formatter(yearsFmt) #ax.xaxis.set_minor_locator(months) ax.autoscale_view() [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 width = 3.3 height = width / 1.618 #height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_rate_%s%s_%s_%dgen.pdf' % ('micro_' if micro else '', value, resource_label, info['metadata.generations'])) mp.pyplot.close()
def plot(sids, key, paper=False): # keys = [] # for sid in sids: # print ("sid: %s") % sid # session_dir = os.path.join(PICKLE_DIR, sid) # # session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # # # Legend info # info = session_info_df.loc[sid] # # # keys.append(val) # orte_dfs[val] = {} # all_kv_dict = {} for sid in sids: print "Sid: %s" % sid session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if key == 'pilot_cores': keyval = info['metadata.pilot_cores'] else: print 'Unknown key: %s' % key exit(-1) if keyval not in all_kv_dict: print "First time I see this number of cu_cores: %d" % keyval all_kv_dict[keyval] = {} else: print "Already saw this number of cu_cores: %d" % keyval cu_runtime = info['metadata.cu_runtime'] generations = info['metadata.generations'] if cu_runtime not in all_kv_dict[keyval]: print "First time I see this value of cu_runtime: %d" % cu_runtime all_kv_dict[keyval][cu_runtime] = pd.Series() else: print "Already saw this value of cu_runtime: %d" % cu_runtime # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') tufs = tuf.sort('asic_get_u_pend') val = all_kv_dict[keyval][cu_runtime].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) startup = all_kv_dict[keyval][cu_runtime].append(pd.Series((tufs['aec_start_script'].min() - tufs['asic_get_u_pend'].min()))) ttc_o = generations * cu_runtime * 1.0 ttc_a = val cores = info['metadata.pilot_cores'] freq = 60.0 # $$ \frac {generations * unit runtime}{cores / freq + (generations * unit % runtime)}$$ proj_eff = ttc_o / ( (cores / freq) + ttc_o) * 100 if val[0] < ttc_o: # This likely means the pilot runtime was too short and we didn't complete all cu's print ("Einstein was wrong!?!") val = val/val else: val /= ttc_o val = 1 / val val *= 100 real_eff = val delta = real_eff - proj_eff print "startup: %f, ttc_a: %f, ttc_o: %f, freq: %f, proj_eff: %f, real_eff: %f, delta: %f" % ( startup, val, ttc_o, freq, proj_eff, real_eff, delta) all_kv_dict[keyval][cu_runtime] = delta colors = [cmap(i) for i in np.linspace(0, 1, len(all_kv_dict))] c = 0 labels = [] for key in sorted(all_kv_dict, key=int, reverse=False): # Skip small experiments if key <= 128: continue print 'orte_ttc raw:', all_kv_dict[key] #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(all_kv_dict[key]) print 'orte_ttc df:', orte_df #labels.append("%s" % resource_legend[key]) labels.append("%s" % key) #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH) ax = orte_df.mean().plot(kind='line', marker='+', color=colors[c], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) c += 1 # ORTE only # Data for BW #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+') # Data for Stampede #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+') #labels.append("ORTE-only (C)") # Horizontal reference # y_ref = 100 # mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH) # labels.append("Optimal") print 'labels: %s' % labels position = 'lower right' mp.pyplot.legend(labels, loc=position, fontsize=LEGEND_FONTSIZE, markerscale=0, labelspacing=0.2) if not paper: mp.pyplot.title("Resource efficiency for varying CU runtime.\n" "%d generations of a variable number of 'concurrent' CUs with a variable payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Unit Duration (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Core Utilisation (\%)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(0, 105) #mp.pyplot.xlim(0, 4096) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(0, 2000) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set #ax.set_yscale('log', basey=10) ax.set_xscale('log', basex=2) #width = 3.487 width = 3.3 #height = width / 1.618 height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_formula.pdf') mp.pyplot.close()
def plot(sids, key, paper=False): # keys = [] # for sid in sids: # print ("sid: %s") % sid # session_dir = os.path.join(PICKLE_DIR, sid) # # session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # # # Legend info # info = session_info_df.loc[sid] # # # keys.append(val) # orte_dfs[val] = {} # all_kv_dict = {} for sid in sids: print "Sid: %s" % sid session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if key == 'pilot_cores': keyval = info['metadata.pilot_cores'] else: print 'Unknown key: %s' % key exit(-1) if keyval not in all_kv_dict: print "First time I see this number of cu_cores: %d" % keyval all_kv_dict[keyval] = {} else: print "Already saw this number of cu_cores: %d" % keyval cu_runtime = info['metadata.cu_runtime'] generations = info['metadata.generations'] if cu_runtime not in all_kv_dict[keyval]: print "First time I see this value of cu_runtime: %d" % cu_runtime all_kv_dict[keyval][cu_runtime] = pd.Series() else: print "Already saw this value of cu_runtime: %d" % cu_runtime # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') tufs = tuf.sort('asic_get_u_pend') val = all_kv_dict[keyval][cu_runtime].append( pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) startup = all_kv_dict[keyval][cu_runtime].append( pd.Series((tufs['aec_start_script'].min() - tufs['asic_get_u_pend'].min()))) ttc_o = generations * cu_runtime * 1.0 ttc_a = val cores = info['metadata.pilot_cores'] freq = 60.0 # $$ \frac {generations * unit runtime}{cores / freq + (generations * unit % runtime)}$$ proj_eff = ttc_o / ((cores / freq) + ttc_o) * 100 if val[0] < ttc_o: # This likely means the pilot runtime was too short and we didn't complete all cu's print("Einstein was wrong!?!") val = val / val else: val /= ttc_o val = 1 / val val *= 100 real_eff = val delta = real_eff - proj_eff print "startup: %f, ttc_a: %f, ttc_o: %f, freq: %f, proj_eff: %f, real_eff: %f, delta: %f" % ( startup, val, ttc_o, freq, proj_eff, real_eff, delta) all_kv_dict[keyval][cu_runtime] = delta colors = [cmap(i) for i in np.linspace(0, 1, len(all_kv_dict))] c = 0 labels = [] for key in sorted(all_kv_dict, key=int, reverse=False): # Skip small experiments if key <= 128: continue print 'orte_ttc raw:', all_kv_dict[key] #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(all_kv_dict[key]) print 'orte_ttc df:', orte_df #labels.append("%s" % resource_legend[key]) labels.append("%s" % key) #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH) ax = orte_df.mean().plot(kind='line', marker='+', color=colors[c], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) c += 1 # ORTE only # Data for BW #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+') # Data for Stampede #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+') #labels.append("ORTE-only (C)") # Horizontal reference # y_ref = 100 # mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH) # labels.append("Optimal") print 'labels: %s' % labels position = 'lower right' mp.pyplot.legend(labels, loc=position, fontsize=LEGEND_FONTSIZE, markerscale=0, labelspacing=0.2) if not paper: mp.pyplot.title( "Resource efficiency for varying CU runtime.\n" "%d generations of a variable number of 'concurrent' CUs with a variable payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=8) mp.pyplot.xlabel("Unit Duration (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Core Utilisation (\%)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(0, 105) #mp.pyplot.xlim(0, 4096) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(0, 2000) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set #ax.set_yscale('log', basey=10) ax.set_xscale('log', basex=2) #width = 3.487 width = 3.3 #height = width / 1.618 height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_formula.pdf') mp.pyplot.close()
def plot(sids, value, label='', paper=False): colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] labels = [] first = True for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'unit_prof.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] #cores = 32 if value == 'cc_fork': spec = { 'in': [{ 'info': 'aec_start_script' }], 'out': [{ 'info': 'aec_after_exec' }] } rpu.add_concurrency(unit_prof_df, 'cc_fork', spec) elif value == 'cc_exit': spec = { 'in': [{ 'info': 'aec_after_exec' }], 'out': [ { 'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance' }, ] } rpu.add_concurrency(unit_prof_df, 'cc_exit', spec) df = unit_prof_df[(unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid)][['time', value]] df.columns = ['time', cores] df['time'] -= df['time'].min() if first: df_all = df else: df_all = pd.merge(df_all, df, on='time', how='outer') #labels.append("Cores: %d" % cores) # labels.append("%d" % cores) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) labels.append("%d" % info['metadata.cu_runtime']) first = False df_all.set_index('time', inplace=True) print df_all.head() #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) # Vertial reference #x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') #labels.append("Optimal") location = 'upper right' legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title( "Concurrent number of CUs in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE) # mp.pyplot.ylim(0, 200) mp.pyplot.ylim(-50, ) mp.pyplot.xlim(0, 600) #ax.get_xaxis().set_ticks([]) print dir(ax) [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # width = 3.487 width = 3.3 height = width / 1.618 # height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_concurrency.pdf') mp.pyplot.close()
def wms_revisions(options): ''' These revisions look like this: # select url from resource_revision where id='3b157e17-cef2-43dc-b0ce-76de18549852' order by revision_timestamp; http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?id=2918&p=0 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.1.1 http://www.acas.org.uk/CHttpHandler.ashx?service=WFS&request=GetCapabilities&version=2.0 http://www.acas.org.uk/CHttpHandler.ashx?service=WMS&request=GetCapabilities&version=1.3 The bad ones have been changed to "?service=" params. These revisions need removing. # Typical revision: id | timestamp | author | message | state | approved_timestamp a2370bd1-b1b8-41b4-9fc1-d38b46d2fbda | 2014-02-22 04:34:56.634442 | co-prod3.dh.bytemark.co.uk | REST API: Update object financial-transactions-data-acas | active | # i.e. author='co-prod3...' (site-user, via API) ''' resources = common.get_resources(state='active', resource_id=options.resource, dataset_name=options.dataset) stats = StatsList() stats.report_value_limit = 1000 total_bad_revisions = 0 need_to_commit = False widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()] progress = ProgressBar(widgets=widgets) for res in progress(resources): res = model.Resource.get( res.id) # as the session gets flushed during the loop res_rev_q = model.Session.query(model.ResourceRevision).filter_by( id=res.id).order_by(model.ResourceRevision.revision_timestamp) res_revs = res_rev_q.all() first_res_rev = res_revs[0] if 'request=GetCapabilities&version=' in first_res_rev.url: print add_stat('First revision already was WMS', res, stats) continue # Identify bad revisions by the WMS URL parameters and author bad_res_revs = res_rev_q.filter( model.ResourceRevision.url.ilike( '%?service=W%S&request=GetCapabilities&version=%')).all() if bad_res_revs and \ bad_res_revs[0].revision.author not in ('co-prod3.dh.bytemark.co.uk', 'current_revision_fixer2'): print add_stat('Misidentified', res, stats, 'author=%r' % bad_res_revs[0].revision.author) continue if not bad_res_revs: add_stat('Resource ok', res, stats) continue print ' ' # don't overwrite progress bar print add_stat('Bad revisions', res, stats, '(%d/%d)' % (len(bad_res_revs), len(res_revs))) total_bad_revisions += len(bad_res_revs) # Find the new latest (good) revision bad_res_revs_set = set(bad_res_revs) for res_rev_index in reversed(xrange(len(res_revs))): if res_revs[res_rev_index] not in bad_res_revs_set: latest_good_res_rev = res_revs[res_rev_index] break else: print add_stat('No good revisions', res, stats) continue if not options.write: continue # Delete the revisions and resource_revisions print ' Deleting bad revisions...' def delete_bad_revisions(res_revs): # Build the sql as a list, as it is faster when you have 1000 strings to append sql = [ '''BEGIN; ALTER TABLE package_tag DROP CONSTRAINT package_tag_revision_id_fkey; ALTER TABLE package_extra DROP CONSTRAINT package_extra_revision_id_fkey; ALTER TABLE resource DROP CONSTRAINT resource_revision_id_fkey; ''' ] for res_rev in res_revs: sql.append( "DELETE from resource_revision where id='%s' and revision_id='%s';\n" % (res.id, res_rev.revision_id)) # a revision created (e.g. over the API) can be connect to other # resources or a dataset, so only delete the revision if only # connected to this one. if model.Session.query(model.ResourceRevision).\ filter_by(revision_id=res_rev.revision_id).\ count() == 1 and \ model.Session.query(model.PackageRevision).\ filter_by(revision_id=res_rev.revision_id).count() == 0: sql.append("DELETE from revision where id='%s';\n" % res_rev.revision_id) sql.append("UPDATE resource SET revision_id='%s' WHERE id='%s';\n" % \ (latest_good_res_rev.revision_id, res.id)) sql.append(''' ALTER TABLE package_tag ADD CONSTRAINT package_tag_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); ALTER TABLE package_extra ADD CONSTRAINT package_extra_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); ALTER TABLE resource ADD CONSTRAINT resource_revision_id_fkey FOREIGN KEY (revision_id) REFERENCES revision(id); COMMIT;''') print ' sql..', model.Session.execute(''.join(sql)) print '.committed' model.Session.remove() def chunks(l, n): '''Yield successive n-sized chunks from l.''' for i in xrange(0, len(l), n): yield l[i:i + n] # chunk revisions in chunks to cope when there are so many widgets = [ 'Creating SQL: ', Counter(), 'k/%sk ' % int(float(len(bad_res_revs)) / 1000.0), Bar(), ' ', ETA() ] progress2 = ProgressBar(widgets=widgets, maxval=int(float(len(bad_res_revs)) / 1000.0) or 1) for chunk_of_bad_res_revs in progress2(chunks(bad_res_revs, 1000)): delete_bad_revisions(chunk_of_bad_res_revs) # Knit together the remaining revisions again print ' Knitting existing revisions back together...' res_rev_q = model.Session.query(model.ResourceRevision).filter_by( id=res.id).order_by(model.ResourceRevision.revision_timestamp) res_revs = res_rev_q.all() latest_res_rev = res_revs[-1] if not latest_res_rev.current: latest_res_rev.current = True for i, res_rev in enumerate(res_revs[:-1]): if res_rev.expired_timestamp != res_revs[i + 1].revision_timestamp: res_rev.expired_timestamp = res_revs[i + 1].revision_timestamp res_rev.expired_id = res_revs[i + 1].revision_id if latest_res_rev.expired_timestamp != END_OF_TIME: latest_res_rev.expired_timestamp = END_OF_TIME if latest_res_rev.expired_id is not None: latest_res_rev.expired_id = None # Correct the URL on the resource model.Session.query(model.Resource).filter_by(id=res.id).update( {'url': latest_res_rev.url}) model.repo.commit_and_remove() print ' ...done' print 'Summary\n', stats.report() print 'Total bad revs: %d' % total_bad_revisions if options.write and need_to_commit: model.repo.commit_and_remove() print 'Written'
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid): print "Plotting %s ..." % sid labels = [] # Legend info info = info_df.loc[sid] mpi = get_mpi(unit_info_df, sid) #mpi = True # For this call assume that there is only one pilot per session lms = get_lm(unit_info_df, pilot_info_df, sid, mpi) assert len(lms) == 1 launch_method = lms.values()[0] # For this call assume that there is only one pilot per session spawners = get_spawners(unit_info_df, pilot_info_df, sid) assert len(spawners) == 1 spawner = spawners.values()[0] #exit() # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order ... #tufs = tuf.sort('awo_get_u_pend') # they arrived at the agent #tufs = tuf.sort('aec_work_u_pend') # they are picked up by an EW tufs = tuf.sort('asc_put_u_pend') # they are scheduled #tufs = tuf.sort('asc_get_u_pend') # the are picked up by the scheduler ax = (tufs['asc_released'] - tufs['asc_allocated'] - info['metadata.cu_runtime']).plot(kind='line', color='red') labels.append("Core Occupation overhead") ax = (tufs['aec_after_exec'] - tufs['aec_after_cd'] - info['metadata.cu_runtime']).plot(kind='line', color='orange') labels.append('%s LaunchMethod (%s)' % ('MPI' if mpi else 'Task', launch_method)) ax = (tufs['aec_start_script'] - tufs['aec_handover']).plot(kind='line', color='black') labels.append("Spawner (%s)" % spawner) (tufs['asc_get_u_pend'] - tufs['asic_put_u_pend']).plot(kind='line', color='blue') labels.append("Scheduler Queue") ax = (tufs['aec_work_u_pend'] - tufs['asc_put_u_pend']).plot(kind='line', color='green') labels.append("ExecWorker Queue") ax = (tufs['asc_released'] - tufs['aec_complete']).plot(kind='line', color='magenta') labels.append("Postexec") mp.pyplot.legend(labels, loc='upper left', fontsize=5) mp.pyplot.title("%s (%s)\n" "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n" "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n" "RP: %s - RS: %s - RU: %s" % (sid, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(info['created'])), info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Compute Units (ordered by agent arrival)") mp.pyplot.ylabel("Time (s)") mp.pyplot.ylim(-0.01) ax.get_xaxis().set_ticks([]) mp.pyplot.savefig('%s_plot1.pdf' % sid) mp.pyplot.close()
def plot(sids, paper=False): labels = [] for key in sids: orte_ttc = {} for sid in sids[key]: session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] cores = info['metadata.effective_cores'] if cores not in orte_ttc: orte_ttc[cores] = pd.Series() # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent tufs = tuf.sort('awo_get_u_pend') orte_ttc[cores] = orte_ttc[cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['awo_get_u_pend'].min()))) print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(orte_ttc) print 'orte_ttc df:', orte_df labels.append("%s" % resource_legend[key]) ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=BARRIER_FONTSIZE, linewidth=BARRIER_LINEWIDTH) print 'labels: %s' % labels mp.pyplot.legend(labels, loc='lower left', fontsize=BARRIER_FONTSIZE) if not paper: mp.pyplot.title("TTC for a varying number of 'concurrent' CUs.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("# Cores", fontsize=BARRIER_FONTSIZE) mp.pyplot.ylabel("Time to Completion (s)", fontsize=BARRIER_FONTSIZE) mp.pyplot.ylim(0) #ax.get_xaxis().set_ticks([]) #ax.get_xaxis.set if paper: mp.pyplot.savefig('plot_ttc_cores_resources.pdf') else: mp.pyplot.savefig('plot_ttc_cores_many.pdf') mp.pyplot.close()
def migrate(options): from ckan import model from ckanext.archiver.model import Archival, Status resources = common.get_resources(state='active', publisher_ref=options.publisher, resource_id=options.resource, dataset_name=options.dataset) stats = StatsList() widgets = ['Resources: ', Percentage(), ' ', Bar(), ' ', ETA()] progress = ProgressBar(widgets=widgets) for res in progress(resources): # Gather the details of archivals from TaskStatus and Resource # to fill all properties of Archival apart from: # * package_id # * resource_id fields = {} archiver_task_status = model.Session.query(model.TaskStatus)\ .filter_by(entity_id=res.id)\ .filter_by(task_type='archiver')\ .filter_by(key='status')\ .first() if archiver_task_status: ats_error = json.loads(archiver_task_status.error) fields['status_id'] = Status.by_text(archiver_task_status.value) fields['is_broken'] = Status.is_status_broken(fields['status_id']) fields['reason'] = ats_error['reason'] fields['last_success'] = date_str_to_datetime_or_none( ats_error['last_success']) fields['first_failure'] = date_str_to_datetime_or_none( ats_error['first_failure']) fields['failure_count'] = int(ats_error['failure_count']) fields['url_redirected_to'] = ats_error['url_redirected_to'] fields['updated'] = archiver_task_status.last_updated else: if not (res.cache_url or res.extras.get('cache_filepath') or res.hash or res.size or res.mimetype): add_stat('No archive data', res, stats) continue for field_name in ('status_id', 'is_broken', 'reason', 'last_success', 'first_failure', 'failure_count', 'url_redirected_to', 'updated', 'created'): fields[field_name] = None fields['cache_filepath'] = res.extras.get('cache_filepath') fields['cache_url'] = res.cache_url fields['hash'] = res.hash fields['size'] = res.size fields['mimetype'] = res.mimetype revisions_with_hash = model.Session.query(model.ResourceRevision)\ .filter_by(id=res.id)\ .order_by(model.ResourceRevision.revision_timestamp)\ .filter(model.ResourceRevision.hash != '').all() if revisions_with_hash: # these are not perfect by not far off fields['created'] = revisions_with_hash[0].revision_timestamp fields['resource_timestamp'] = revisions_with_hash[ -1].revision_timestamp else: fields['created'] = min(fields['updated'] or END_OF_TIME, fields['first_failure'] or END_OF_TIME, fields['last_success'] or END_OF_TIME) fields['resource_timestamp'] = max( fields['updated'] or START_OF_TIME, fields['first_failure'] or START_OF_TIME, fields['last_success'] or START_OF_TIME) # Compare with any existing data in the Archival table archival = Archival.get_for_resource(res.id) if archival: changed = None for field, value in fields.items(): if getattr(archival, field) != value: if options.write: setattr(archival, field, value) changed = True if not changed: add_stat('Already exists correctly in archival table', res, stats) continue add_stat('Updated in archival table', res, stats) else: archival = Archival.create(res.id) if options.write: for field, value in fields.items(): setattr(archival, field, value) model.Session.add(archival) add_stat('Added to archival table', res, stats) print 'Summary\n', stats.report() if options.write: model.repo.commit_and_remove() print 'Written'
def plot(sids, value, label='', paper=False): colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] labels = [] first = True for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] #cores = 32 if value == 'cc_fork': spec = { 'in': [ {'info' : 'aec_start_script'} ], 'out' : [ {'info' : 'aec_after_exec'} ] } rpu.add_concurrency (unit_prof_df, 'cc_fork', spec) elif value == 'cc_exit': spec = { 'in': [ {'info' : 'aec_after_exec'} ], 'out' : [ {'state': rps.AGENT_STAGING_OUTPUT_PENDING, 'event': 'advance'}, ] } rpu.add_concurrency (unit_prof_df, 'cc_exit', spec) df = unit_prof_df[ (unit_prof_df[value] >= 0) & #(unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', value]] df.columns = ['time', cores] df['time'] -= df['time'].min() if first: df_all = df else: df_all = pd.merge(df_all, df, on='time', how='outer') #labels.append("Cores: %d" % cores) # labels.append("%d" % cores) #labels.append("%d - %s" % (cores, 'RP' if rp else 'ORTE')) #labels.append(sid[-4:]) labels.append("%d" % info['metadata.cu_runtime']) first = False df_all.set_index('time', inplace=True) print df_all.head() #df_all.plot(colormap='Paired') #df_all.plot(drawstyle='steps-post') #ax = df_all.plot(drawstyle='steps-pre', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) ax = df_all.plot(fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH, colors=colors) # Vertial reference #x_ref = info['metadata.generations'] * info['metadata.cu_runtime'] #mp.pyplot.plot((x_ref, x_ref),(0, 1000), 'k--') #labels.append("Optimal") location = 'upper right' legend = mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("\# Concurrent Units", fontsize=LABEL_FONTSIZE) # mp.pyplot.ylim(0, 200) mp.pyplot.ylim(-50,) mp.pyplot.xlim(0, 600) #ax.get_xaxis().set_ticks([]) print dir(ax) [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # width = 3.487 width = 3.3 height = width / 1.618 # height = 2.5 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_concurrency.pdf') mp.pyplot.close()
def plot(sids, paper=False): labels = [] all_dict = {} for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] # cu_cores = info['metadata.cu_cores'] # cu_count = info['metadata.cu_count'] cu_count = info['metadata.cu_cores'] cu_cores = info['metadata.cu_count'] cu_runtime = info['metadata.cu_runtime'] if cu_count not in all_dict: all_dict[cu_count] = {} if cu_cores not in all_dict[cu_count]: all_dict[cu_count][cu_cores] = pd.Series() if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0].replace('_', '\_') else: resource_label = 'bogus' # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') #tufs = tuf.sort('asic_get_u_pend') tufs = tuf.sort() try: all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min() - cu_runtime))) #all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) except: print "Plotting failed for session: %s" % sid continue for key in all_dict: # print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(all_dict[key]) print 'orte_ttc df:', orte_df #labels.append("%s" % resource_legend[key]) labels.append("%s" % key) #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) ax = orte_df.mean().plot(kind='line', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) ax.set_xscale('log', basex=10) ax.set_yscale('log', basey=10) # Horizontal reference # y_ref = info['metadata.generations'] * info['metadata.cu_runtime'] # ax = mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--') # labels.append("Optimal") print 'labels: %s' % labels mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE) if not paper: mp.pyplot.title("TTC overhead for variable size CU.\n" "%d generations of a variable number of 'concurrent' CUs with variable number of cores with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("\# CUs", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("$TTC_{overhead}$ (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(0) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) # #ax.get_xaxis.set # [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] # plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 #width = 3.3 #height = width / 1.618 # height = 2.7 #fig = mp.pyplot.gcf() #fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) #fig.tight_layout(pad=0.1) #fig.tight_layout() mp.pyplot.savefig('plot_ttc_cu_cores.pdf') mp.pyplot.close()
def plot(sids, value, label='', paper=False): labels = [] #colors = [] colors = [cmap(i) for i in np.linspace(0, 1, len(sids))] #c = 0 first = True for sid in sids: session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session #uf = unit_prof_df[unit_prof_df['sid'] == sid] # We sort the units based on the order they arrived at the agent #ufs = uf.sort('awo_get_u_pend') cores = info['metadata.effective_cores'] if value == 'sched': # # Scheduling # df = unit_prof_df[ (unit_prof_df.cc_sched >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_sched']] elif value == 'exec': # # Scheduling # df = unit_prof_df[ (unit_prof_df.cc_exec >= 0) & (unit_prof_df.event == 'advance') & (unit_prof_df.sid == sid) ][['time', 'cc_exec']] else: raise Exception("Value %s unknown" % value) if 'metadata.barriers' in info and 'barrier_generation' in info['metadata.barriers']: barrier = BARRIER_GENERATION elif 'metadata.barriers' in info and 'barrier_client_submit' in info['metadata.barriers']: barrier = BARRIER_CLIENT_SUBMIT elif 'metadata.barriers' in info and 'barrier_agent_launch' in info['metadata.barriers']: barrier = BARRIER_AGENT_LAUNCH else: raise Exception("No barrier info found") df.columns = ['time', barrier] df['time'] -= df['time'].min() if first: df_all = df else: df_all = pd.merge(df_all, df, on='time', how='outer') labels.append(barrier_legend[barrier]) #colors.append(barrier_colors[barrier]) first = False df_all.set_index('time', inplace=True) print df_all.head() #df_all.plot(colormap='Paired') ax = df_all.plot(color=colors, fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) # For this call assume that there is only one pilot per session ppn_values = get_ppn(unit_info_df, pilot_info_df, sid) assert len(ppn_values) == 1 ppn = ppn_values.values()[0] legend = mp.pyplot.legend(labels, loc='upper right', fontsize=LEGEND_FONTSIZE, labelspacing=0) legend.get_frame().set_linewidth(BORDERWIDTH) if not paper: mp.pyplot.title("Concurrent number of CUs in stage '%s'.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (value, info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=8) mp.pyplot.xlabel("Time (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Concurrent Units", fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(-10, 1500) mi = df_all.index.min() ma = df_all.index.max() mp.pyplot.xlim(mi - 0.01 * ma, ma * 1.01) #ax.get_xaxis().set_ticks([]) [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) if paper: # width = 3.487 width = 3.3 height = width / 1.618 #height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) # fig.subplots_adjust(left=0, right=1, top=1, bottom=1) # fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_cc_ew_barriers.pdf') else: mp.pyplot.savefig('plot6_%s%s.pdf' % (value, label)) mp.pyplot.close()
def plot(tr_unit_prof_df, info_df, unit_info_df, pilot_info_df, sid): print "Plotting %s ..." % sid labels = [] # Legend info info = info_df.loc[sid] mpi = get_mpi(unit_info_df, sid) #mpi = True # For this call assume that there is only one pilot per session lms = get_lm(unit_info_df, pilot_info_df, sid, mpi) assert len(lms) == 1 launch_method = lms.values()[0] # For this call assume that there is only one pilot per session spawners = get_spawners(unit_info_df, pilot_info_df, sid) assert len(spawners) == 1 spawner = spawners.values()[0] # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] # Get only the entries for this session uf = unit_info_df[unit_info_df['sid'] == sid] # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] result = pd.value_counts(tuf['aec_complete'].notnull(), sort=False) # result = pd.value_counts(uf['state'].values, sort=False) print result # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] #print tuf['aec_after_exec'] #print tuf['aec_complete'] #print 'c:', c ax = result.plot(kind='pie', autopct='%.2f%%') ax.set_aspect('equal') print info #mp.pyplot.legend(labels, loc='upper left', fontsize=5) mp.pyplot.title( "%s (%s)\n" "%d CUs of %d core(s) with a %ds payload on a %d core pilot on %s.\n" "%d sub-agent(s) with %d ExecWorker(s) each. All times are per CU.\n" "RP: %s - RS: %s - RU: %s" % (sid, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(info['created'])), info['metadata.cu_count'], info['metadata.cu_cores'], info['metadata.cu_runtime'], info['metadata.pilot_cores'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=8) mp.pyplot.savefig('%s_plot_states.pdf' % sid) mp.pyplot.close()
def plot(sids, paper=False): labels = [] all_dict = {} for sid in sids: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle( os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle( os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle( os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle( os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] # cu_cores = info['metadata.cu_cores'] # cu_count = info['metadata.cu_count'] cu_count = info['metadata.cu_cores'] cu_cores = info['metadata.cu_count'] cu_runtime = info['metadata.cu_runtime'] if cu_count not in all_dict: all_dict[cu_count] = {} if cu_cores not in all_dict[cu_count]: all_dict[cu_count][cu_cores] = pd.Series() if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0].replace('_', '\_') else: resource_label = 'bogus' # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') #tufs = tuf.sort('asic_get_u_pend') tufs = tuf.sort() try: all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append( pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min() - cu_runtime))) #all_dict[cu_count][cu_cores] = all_dict[cu_count][cu_cores].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) except: print "Plotting failed for session: %s" % sid continue for key in all_dict: # print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(all_dict[key]) print 'orte_ttc df:', orte_df #labels.append("%s" % resource_legend[key]) labels.append("%s" % key) #ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) ax = orte_df.mean().plot(kind='line', fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) ax.set_xscale('log', basex=10) ax.set_yscale('log', basey=10) # Horizontal reference # y_ref = info['metadata.generations'] * info['metadata.cu_runtime'] # ax = mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--') # labels.append("Optimal") print 'labels: %s' % labels mp.pyplot.legend(labels, loc='upper left', fontsize=LEGEND_FONTSIZE) if not paper: mp.pyplot.title( "TTC overhead for variable size CU.\n" "%d generations of a variable number of 'concurrent' CUs with variable number of cores with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru']), fontsize=8) mp.pyplot.xlabel("\# CUs", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("$TTC_{overhead}$ (s)", fontsize=LABEL_FONTSIZE) mp.pyplot.ylim(0) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) # #ax.get_xaxis.set # [i.set_linewidth(BORDERWIDTH) for i in ax.spines.itervalues()] # plt.setp(ax.yaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) # plt.setp(ax.xaxis.get_ticklines(), 'markeredgewidth', BORDERWIDTH) #width = 3.487 #width = 3.3 #height = width / 1.618 # height = 2.7 #fig = mp.pyplot.gcf() #fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) #fig.tight_layout(pad=0.1) #fig.tight_layout() mp.pyplot.savefig('plot_ttc_cu_cores.pdf') mp.pyplot.close()
def plot(sids, paper=False): labels = [] for key in sids: orte_ttc = {} for sid in sids[key]: if sid.startswith('rp.session'): rp = True else: rp = False session_dir = os.path.join(PICKLE_DIR, sid) unit_info_df = pd.read_pickle(os.path.join(session_dir, 'unit_info.pkl')) pilot_info_df = pd.read_pickle(os.path.join(session_dir, 'pilot_info.pkl')) tr_unit_prof_df = pd.read_pickle(os.path.join(session_dir, 'tr_unit_prof.pkl')) session_info_df = pd.read_pickle(os.path.join(session_dir, 'session_info.pkl')) # Legend info info = session_info_df.loc[sid] cores = info['metadata.effective_cores'] nodes = cores / 32 if nodes not in orte_ttc: orte_ttc[nodes] = pd.Series() if rp: # For this call assume that there is only one pilot per session resources = get_resources(unit_info_df, pilot_info_df, sid) assert len(resources) == 1 resource_label = resources.values()[0] else: resource_label = 'bogus' # Get only the entries for this session tuf = tr_unit_prof_df[tr_unit_prof_df['sid'] == sid] # Only take completed CUs into account #tuf = tuf[tuf['Done'].notnull()] # We sort the units based on the order they arrived at the agent #tufs = tuf.sort('awo_get_u_pend') #tufs = tuf.sort('awo_adv_u') #tufs = tuf.sort('asic_get_u_pend') tufs = tuf.sort() orte_ttc[nodes] = orte_ttc[nodes].append(pd.Series((tufs['aec_after_exec'].max() - tufs['asic_get_u_pend'].min()))) print 'orte_ttc raw:', orte_ttc #print 'orte_ttc mean:', orte_ttc.mean() orte_df = pd.DataFrame(orte_ttc) print 'orte_ttc df:', orte_df labels.append("%s" % resource_legend[key]) ax = orte_df.mean().plot(kind='line', color=resource_colors[key], marker=resource_marker[key], fontsize=TICK_FONTSIZE, linewidth=LINEWIDTH) # ORTE only # Data for BW #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096, 8192), (305, 309, 309, 313, 326, 351, 558), 'b-+') # Data for Stampede #mp.pyplot.plot((128, 256, 512, 1024, 2048, 4096), (301, 303, 305, 311, 322, 344), 'b-+') #labels.append("ORTE-only (C)") # Horizontal reference y_ref = info['metadata.generations'] * info['metadata.cu_runtime'] mp.pyplot.plot((0, 10000), (y_ref, y_ref), 'k--', linewidth=LINEWIDTH) labels.append("Optimal") print 'labels: %s' % labels location = 'upper left' mp.pyplot.legend(labels, loc=location, fontsize=LEGEND_FONTSIZE, markerscale=0) if not paper: mp.pyplot.title("TTC for a varying number of 'concurrent' Full-Node CUs.\n" "%d generations of a variable number of 'concurrent' CUs of %d core(s) with a %ss payload on a variable core pilot on %s.\n" "Constant number of %d sub-agent with %d ExecWorker(s) each.\n" "RP: %s - RS: %s - RU: %s" % (info['metadata.generations'], info['metadata.cu_cores'], info['metadata.cu_runtime'], resource_label, info['metadata.num_sub_agents'], info['metadata.num_exec_instances_per_sub_agent'], info['metadata.radical_stack.rp'], info['metadata.radical_stack.rs'], info['metadata.radical_stack.ru'] ), fontsize=TITLE_FONTSIZE) mp.pyplot.xlabel("\# Nodes", fontsize=LABEL_FONTSIZE) mp.pyplot.ylabel("Time to Completion (s)", fontsize=LABEL_FONTSIZE) #mp.pyplot.ylim(0) #mp.pyplot.ylim(290, 500) #mp.pyplot.ylim(y_ref-10) #ax.get_xaxis().set_ticks([]) # #ax.get_xaxis.set #width = 3.487 width = 3.3 height = width / 1.618 # height = 2.7 fig = mp.pyplot.gcf() fig.set_size_inches(width, height) #fig.subplots_adjust(left=0, right=1, top=1, bottom=1) #fig.tight_layout(w_pad=0.0, h_pad=0.0, pad=0.1) fig.tight_layout(pad=0.1) mp.pyplot.savefig('plot_ttc_full_node.pdf') mp.pyplot.close()