def get_sched_waits(trace_id): rt = ResultTrace() rt.load_trace(db_obj, trace_id) machine = exp.get_machine() max_cores = machine.get_total_cores() start_times = rt._lists_start["time_start"] end_times = rt._lists_start["time_end"] id_jobs = rt._lists_start["id_job"] sched_gaps = [] sched_gaps_stamp = [] the_max = 0 the_max_id = -1 for s1, s2, id_job in zip(start_times[:-1], start_times[1:], id_jobs[1:]): if (s1 != 0 and s2 != 0): sched_gap = s2 - s1 if sched_gap > 0: sched_gaps.append(sched_gap) sched_gaps_stamp.append(s2) if sched_gap > the_max: the_max = sched_gap the_max_id = id_job print("MAAAAX", the_max, the_max_id) return sched_gaps_stamp, sched_gaps
def load_trace(self, db_obj): """Reads and returns the experiment trace from analysis database. Args: - db_obj: DB object configured to access the analysis database.""" result_trace = ResultTrace() result_trace.load_trace(db_obj, self._definition._trace_id, False) return result_trace
def test_multi_load_trace(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2], "account": ["account1", "account2"], "cpus_req": [48, 96], "cpus_alloc": [48, 96], "job_name": ["jobName1", "jobName2"], "id_job": [1, 2], "id_qos": [2, 3], "id_resv": [3, 4], "id_user": [4, 5], "nodes_alloc": [2, 4], "partition": ["partition1", "partition2"], "priority": [99, 199], "state": [3, 2], "timelimit": [100, 200], "time_submit": [3000, 3003], "time_start": [3002, 3001], "time_end": [3002, 3005] } rt._lists_start = { "job_db_inx": [2, 1], "account": ["account2", "account1"], "cpus_req": [96, 48], "cpus_alloc": [96, 48], "job_name": ["jobName2", "jobName1"], "id_job": [2, 1], "id_qos": [3, 2], "id_resv": [4, 3], "id_user": [5, 4], "nodes_alloc": [4, 2], "partition": ["partition2", "partition1"], "priority": [199, 99], "state": [2, 3], "timelimit": [200, 100], "time_submit": [3003, 3000], "time_start": [3001, 3002], "time_end": [3005, 3002] } rt.store_trace(self._db, 1) new_rt = ResultTrace() new_rt.load_trace(self._db, 1) new_rt.load_trace(self._db, 1, True) self.assertEqual(new_rt._lists_submit["time_submit"], [3000, 3003, 3004, 3007]) self.assertEqual(new_rt._lists_submit["time_start"], [3002, 3001, 3006, 3005]) self.assertEqual(new_rt._lists_submit["time_end"], [3002, 3005, 3006, 3009]) self.assertEqual(new_rt._lists_start["time_start"], [3001, 3002, 3005, 3006]) self.assertEqual(new_rt._lists_start["time_submit"], [3003, 3000, 3007, 3004]) self.assertEqual(new_rt._lists_start["time_end"], [3005, 3002, 3009, 3006])
def test_multi_load_results(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "account1"], "cpus_req": [48, 96, 24], "cpus_alloc": [48, 96, 24], "job_name": ["jobName1", "jobName2", "wf_manifest"], "id_job": [1, 2, 3], "id_qos": [2, 3, 4], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 1], "partition": ["partition1", "partition2", "partition1"], "priority": [99, 199, 99], "state": [3, 2, 3], "timelimit": [100, 200, 200], "time_submit": [3000, 3003, 3500], "time_start": [3002, 3004, 3501], "time_end": [3003, 3005, 3510] } rt._lists_start = { "job_db_inx": [2, 1, 3], "account": ["account2", "account1", "account1"], "cpus_req": [96, 48, 24], "cpus_alloc": [96, 48, 24], "job_name": ["jobName2", "jobName1", "wf_manifest"], "id_job": [2, 1, 3], "id_qos": [3, 2, 4], "id_resv": [4, 3, 5], "id_user": [5, 4, 6], "nodes_alloc": [4, 2, 1], "partition": ["partition2", "partition1", "partition1"], "priority": [199, 99, 99], "state": [2, 3, 3], "timelimit": [200, 100, 200], "time_submit": [3003, 3000, 3500], "time_start": [3004, 3002, 3501], "time_end": [3005, 3002, 3510] } rt.store_trace(self._db, 1) new_rt = ResultTrace() new_rt.load_trace(self._db, 1) new_rt.fill_job_values(start=3000, stop=4000) new_rt.load_trace(self._db, 1) new_rt.fill_job_values(start=3000, stop=4000, append=True) self.assertEqual(new_rt._jobs_runtime, [1, 1, 1, 1]) self.assertEqual(new_rt._jobs_waittime, [2, 1, 2, 1]) self.assertEqual(new_rt._jobs_turnaround, [3, 2, 3, 2]) self.assertEqual(new_rt._jobs_timelimit, [100, 200, 100, 200]) self.assertEqual(new_rt._jobs_cpus_alloc, [48, 96, 48, 96]) self.assertEqual(new_rt._jobs_slowdown, [3, 2, 3, 2])
def test_correct_times(self): self._create_tables() exp = ExperimentDefinition(workflow_handling="manifest") trace_id = exp.store(self._db) rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "a3"], "cpus_req": [48, 96, 96], "cpus_alloc": [48, 96, 96], "job_name": [ "wf_synthLongWide.json-1_S0", "wf_synthLongWide.json-1_S1_dS0", "wf_synthLongWide.json-2" ], "id_job": [1, 2, 3], "id_qos": [2, 3, 3], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 4], "partition": ["partition1", "partition2", "partition2"], "priority": [99, 199, 210], "state": [3, 3, 3], "timelimit": [100, 200, 300], "time_submit": [3000, 3003, 3004], "time_start": [0, 20000, 0], "time_end": [20000, 25000, 30000] } rt.store_trace(self._db, trace_id) rt.store_trace(self._db, trace_id + 1) stc = StartTimeCorrector() stc.correct_times(self._db, trace_id) new_rt = ResultTrace() new_rt.load_trace(self._db, trace_id) self.assertEqual(new_rt._lists_submit["time_start"], [20000 - 14340, 20000, 30000]) original_rt = ResultTrace() original_rt.load_trace(self._db, trace_id + 1) self.assertEqual(original_rt._lists_submit["time_start"], [0, 20000, 0])
def test_apply_new_times(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "a3"], "cpus_req": [48, 96, 96], "cpus_alloc": [48, 96, 96], "job_name": [ "wf_synthLongWide.json-1_S0", "wf_synthLongWide.json-1_S1_dS0", "wf_synthLongWide.json-2_S1_dS0" ], "id_job": [1, 2, 3], "id_qos": [2, 3, 3], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 4], "partition": ["partition1", "partition2", "partition2"], "priority": [99, 199, 210], "state": [3, 3, 3], "timelimit": [100, 200, 300], "time_submit": [3000, 3003, 3004], "time_start": [0, 20000, 0], "time_end": [20000, 25000, 30000] } trace_id = 1 trace_id_orig = 2 rt.store_trace(self._db, trace_id) rt.store_trace(self._db, trace_id_orig) stc = StartTimeCorrector() stc._experiment = ExperimentDefinition() stc._experiment._trace_id = trace_id stc.apply_new_times(self._db, {1: 20000 - 14340, 3: 30000 - 3540}) new_rt = ResultTrace() new_rt.load_trace(self._db, trace_id) self.assertEqual(new_rt._lists_submit["time_start"], [20000 - 14340, 20000, 30000 - 3540]) old_rt = ResultTrace() old_rt.load_trace(self._db, trace_id_orig) self.assertEqual(old_rt._lists_submit["time_start"], [0, 20000, 0])
db_obj = get_central_db() target_dir = "utilization-20160616-udog" if len(sys.argv) == 3: target_dir = sys.argv[1] trace_id = sys.argv[2] else: raise ValueError("Missing trace id to analyze") exp = ExperimentDefinition() exp.load(db_obj, trace_id) rt = ResultTrace() rt.load_trace(db_obj, trace_id) machine = exp.get_machine() max_cores = machine.get_total_cores() max_submit_time = rt._lists_submit["time_submit"][-1] def adjust_ut_plot(ut_stamps, ut_values): new_stamps = [] new_values = [] last_value = None for (st, vl) in zip(ut_stamps, ut_values): if last_value is not None: new_stamps.append(st) new_values.append(last_value) new_stamps.append(st)
class WorkflowDeltas(object): """ This class produces the delta in the runtime, wait time, turnaround time, and stretch factor for the same workflow in two different traces. It is meant compare the effect on the same workflow when the scheduling algorithm is different. Values are compared: values_from_second_trace - values_first_trace """ def __init__(self): """Constructor: Args: """ self._first_trace = None self._first_trace_id=None self._second_trace = None self._second_trace_id=None self._runtime_deltas = None self._waitime_deltas = None self._turnaround_deltas = None self._stretch_deltas = None self._wf_names = None def load_traces(self, db_obj, first_id, second_id): """ Loads the jobs from the two traces to compare. Args: - first_id: int, trace_id of the first trace - second_id: int, trace_id of the seconds trace """ self._first_trace = ResultTrace() self._first_trace_id=first_id self._second_trace = ResultTrace() self._second_trace_id=second_id self._first_trace.load_trace(db_obj, self._first_trace_id) self._second_trace.load_trace(db_obj, self._second_trace_id) self._first_workflows=self._first_trace.do_workflow_pre_processing() self._second_workflows=self._second_trace.do_workflow_pre_processing() def produce_deltas(self, append=False): """ Produces and stores the deltas between the two stored traces. Args: - append: If true, previously captured delta values are discarded. If false, newly produced are added, Returns: current delta information. """ (wf_names, runtime_deltas ,waitime_deltas, turnaround_deltas, stretch_deltas) = self._internal_produce_deltas() if not append or self._runtime_deltas == None: (self._wf_names, self._runtime_deltas, self._waitime_deltas, self._turnaround_deltas, self._stretch_deltas) = ([], [], [], [], []) self._wf_names+= wf_names self._runtime_deltas+= runtime_deltas self._waitime_deltas+= waitime_deltas self._turnaround_deltas+= turnaround_deltas self._stretch_deltas+= stretch_deltas return (self._wf_names, self._runtime_deltas, self._waitime_deltas, self._turnaround_deltas, self._stretch_deltas) def _internal_produce_deltas(self): """ Returns the list of worklfow names found in common to the two traces plus the lists with the deltas of each variable. """ runtime_deltas = [] waitime_deltas = [] turnaround_deltas = [] stretch_deltas = [] wf_names = [] for wf_name in list(self._first_workflows.keys()): if wf_name in list(self._second_workflows.keys()): wf_1=self._first_workflows[wf_name] wf_2=self._second_workflows[wf_name] runtime_d, waittime_d, turnaround_d, stretch_d = ( self.compare_wfs(wf_1, wf_2)) runtime_deltas.append(runtime_d) waitime_deltas.append(waittime_d) turnaround_deltas.append(turnaround_d) stretch_deltas.append(stretch_d) wf_names.append(wf_name) return (wf_names, runtime_deltas ,waitime_deltas, turnaround_deltas, stretch_deltas) def compare_wfs(self, wf_1, wf_2): """Produces the diferences betwen varibles of the two workflows. Returns: numeric value on difference of: runtime, waittime, turnaround, stretch_factor. """ return (wf_2.get_runtime()-wf_1.get_runtime(), wf_2.get_waittime()-wf_1.get_waittime(), wf_2.get_turnaround()-wf_1.get_turnaround(), wf_2.get_stretch_factor()-wf_1.get_stretch_factor()) def calculate_delta_results(self, store, db_obj=None, trace_id=None): """ Requires produce delta to be called at least once. """ if store and db_obj is None: raise ValueError("db_obj must be set to store jobs data") if store and trace_id is None: raise ValueError("trace_id must be set to store jobs data") data_list = [self._runtime_deltas, self._waitime_deltas, self._turnaround_deltas, self._stretch_deltas] field_list=["delta_runtime", "delta_waittime", "delta_turnaround", "delta_stretch"] bin_size_list = [30,30, 30, 0.01] minmax_list = [None, None, None, None] return calculate_results(data_list, field_list, bin_size_list, minmax_list, store=store, db_obj=db_obj, trace_id=trace_id) def load_delta_results(self, db_obj, trace_id): """ Creates Histogram and NumericStats objects and sets them as local object as self._[analyzed job field] over the delta information of the worfklows between to traces. The information is pulled from a database. Args: - db_obj: DBManager Object used to pull the information from a database. - trace_id: numeric id of the trace to which the data corresponds. """ field_list=["delta_runtime", "delta_waittime", "delta_turnaround", "delta_stretch"] return load_results(field_list, db_obj, trace_id)
class StartTimeCorrector(object): def __init__(self): self.manifest_dics = {} def correct_times(self, db_obj, trace_id): self._experiment = ExperimentDefinition() self._experiment.load(db_obj, trace_id) self._trace = ResultTrace() print("Loading trace {0}".format(trace_id)) self._trace.load_trace(db_obj, trace_id) trace_type = self._experiment._workflow_handling print( "Calculating corrected start times for trace {0}".format(trace_id)) modified_start_times = self.get_corrected_start_times(trace_type) print(("Found {0} jobs which start time was 0, but had ended.".format( len(modified_start_times)))) print("About to update times") self.apply_new_times(db_obj, modified_start_times) def apply_new_times(self, db_obj, modified_start_times): trace_id = self._experiment._trace_id for id_job in list(modified_start_times.keys()): time_start = modified_start_times[id_job] print(("updating trace_id({0}), id_job({1}) with time_start: {2}" "".format(trace_id, id_job, time_start))) self.update_time_start(db_obj, trace_id, id_job, time_start) def update_time_start(self, db_obj, trace_id, id_job, time_start): """ query =("update traces set time_start={0} where trace_id={1} and " " id_job={2}",format(time_start, trace_id, id_job)) """ db_obj.setFieldOnTable("traces", "time_start", str(time_start), "id_job", str(id_job), extra_cond="and trace_id={0}".format(trace_id), no_commas=True) def get_corrected_start_times(self, trace_type): modified_start_times = {} for (id_job, job_name, time_submit, time_start, time_end) in zip(self._trace._lists_submit["id_job"], self._trace._lists_submit["job_name"], self._trace._lists_submit["time_submit"], self._trace._lists_submit["time_start"], self._trace._lists_submit["time_end"]): if time_start == 0 and time_end != 0 and "wf" == job_name[:2]: modified_start_times[id_job] = self.get_time_start( job_name, time_end, trace_type) return modified_start_times def get_workflow_info(self, workflow_file): if not workflow_file in list(self.manifest_dics.keys()): from orchestration.running import ExperimentRunner manifest_route = path.join(ExperimentRunner.get_manifest_folder(), workflow_file) cores, runtime, tasks = WorkflowGeneratorMultijobs.parse_all_jobs( manifest_route) self.manifest_dics[workflow_file] = { "cores": cores, "runtime": runtime, "tasks": tasks } return self.manifest_dics[workflow_file] def get_time_start(self, job_name, time_end, trace_type="manifest"): name, stage_id, deps = TaskTracker.extract_wf_name(job_name) workflow_file = "-".join(name.split("-")[:-1]) from orchestration import ExperimentRunner manifest_info = self.get_workflow_info(workflow_file) (cores, runtime, tasks) = (manifest_info["cores"], manifest_info["runtime"], manifest_info["tasks"]) if stage_id is "": if trace_type == "multi": raise SystemError("Found a bounding job ({0}) in a " "dependencies type trace.".format(job_name)) if trace_type == "manifest": return time_end else: return time_end - runtime else: return time_end - int(tasks[stage_id]["runtime_sim"]) @classmethod def get_traces_with_bad_time_starts(cls, db_obj): query = """ SELECT traces.trace_id tid, name, count(*) cc FROM traces, experiment WHERE traces.trace_id=experiment.trace_id AND time_start=0 AND time_end!=0 AND job_name!="sim_job" group by traces.trace_id """ result_list = db_obj.doQueryDic(query) trace_id_list = [res["tid"] for res in result_list] return trace_id_list