def load_trace(self, db_obj): """Reads and returns the experiment trace from analysis database. Args: - db_obj: DB object configured to access the analysis database.""" result_trace = ResultTrace() result_trace.load_trace(db_obj, self._definition._trace_id, False) return result_trace
def test_transform_pbs_to_slurm(self): pbs_list = { "account": ["account1", "account2"], "cores_per_node": [24, 48], "numnodes": [100, 200], "class": ["queue1", "queue2"], "wallclock_requested": [120, 368], "created": [1000, 2000], "start": [1100, 2200], "completion": [1500, 2700], "jobname": ["name1", "name2"] } rt = ResultTrace() slurm_list = rt._transform_pbs_to_slurm(pbs_list) self.assertEqual(slurm_list["job_db_inx"], [0, 1]) self.assertEqual(slurm_list["account"], ["account1", "account2"]) self.assertEqual(slurm_list["cpus_req"], [2400, 9600]) self.assertEqual(slurm_list["cpus_alloc"], [2400, 9600]) self.assertEqual(slurm_list["job_name"], ["name1", "name2"]) self.assertEqual(slurm_list["id_job"], [0, 1]) self.assertEqual(slurm_list["id_qos"], [3, 3]) self.assertEqual(slurm_list["id_resv"], [3, 3]) self.assertEqual(slurm_list["id_user"], [3, 3]) self.assertEqual(slurm_list["nodes_alloc"], [100, 200]) self.assertEqual(slurm_list["partition"], ["queue1", "queue2"]) self.assertEqual(slurm_list["priority"], [3, 3]) self.assertEqual(slurm_list["state"], [3, 3]) self.assertEqual(slurm_list["timelimit"], [2, 6]) self.assertEqual(slurm_list["time_submit"], [1000, 2000]) self.assertEqual(slurm_list["time_start"], [1100, 2200]) self.assertEqual(slurm_list["time_end"], [1500, 2700])
def test_utilization(self): rt = ResultTrace() rt._lists_start = { "job_db_inx": [2, 1], "account": ["account2", "account1"], "cpus_req": [96, 48], "cpus_alloc": [96, 48], "job_name": ["jobName2", "jobName1"], "id_job": [2, 1], "id_qos": [3, 2], "id_resv": [4, 3], "id_user": [5, 4], "nodes_alloc": [4, 2], "partition": ["partition2", "partition1"], "priority": [199, 99], "state": [2, 3], "timelimit": [200, 100], "time_submit": [3003, 3000], "time_start": [3001, 3002], "time_end": [3005, 3010] } (integrated_ut, utilization_timestamps, utilization_values, acc_waste, corrected_ut) = (rt.calculate_utilization(144)) self.assertEqual(utilization_timestamps, [3001, 3002, 3005, 3010]) self.assertEqual(utilization_values, [96, 144, 48, 0]) self.assertEqual(acc_waste, 0) (integrated_ut, utilization_timestamps, utilization_values, acc_waste, corrected_ut) = (rt.calculate_utilization(144, endCut=3006)) self.assertEqual(utilization_timestamps, [3001, 3002, 3005, 3006]) self.assertEqual(utilization_values, [96, 144, 48, 48]) self.assertEqual(acc_waste, 0)
def extract_usage(db_obj, trace_id_rows, fill_none=True, factor=1.0, mean=False): """Takes a list of lists of trace_is and produces a list of lists of results corresponding to them. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. """ exp_rows = [] my = ResultTrace() res_type = "usage" if mean: res_type = "usage_mean" for row in trace_id_rows: new_row = [] exp_rows.append(new_row) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) result = my._get_utilization_result() if exp.is_analysis_done(): result.load(db_obj, trace_id, res_type) else: result._set("utilization", 0) result._set("waste", 0) result._set("corrected_utilization", 0) result.apply_factor(factor) new_row.append(result) return exp_rows
def get_sched_waits(trace_id): rt = ResultTrace() rt.load_trace(db_obj, trace_id) machine = exp.get_machine() max_cores = machine.get_total_cores() start_times = rt._lists_start["time_start"] end_times = rt._lists_start["time_end"] id_jobs = rt._lists_start["id_job"] sched_gaps = [] sched_gaps_stamp = [] the_max = 0 the_max_id = -1 for s1, s2, id_job in zip(start_times[:-1], start_times[1:], id_jobs[1:]): if (s1 != 0 and s2 != 0): sched_gap = s2 - s1 if sched_gap > 0: sched_gaps.append(sched_gap) sched_gaps_stamp.append(s2) if sched_gap > the_max: the_max = sched_gap the_max_id = id_job print("MAAAAX", the_max, the_max_id) return sched_gaps_stamp, sched_gaps
def extract_grouped_results(db_obj, trace_id_rows_colors, edges, result_type): """Takes a list of lists of trace_is and produces a list of lists of results corresponding to them. Args: - db_obj: DBManager object connted to a db where the results will be pulled from. - trace_id_rows_colors: list of lists of integers as trace_ids of experiments. - edges: if set to [""], it does no effect, the function extracts results of the type result_type. If set to a list of items, results will be pulled for each element as: "g"+str(edge)+_str(result_type) - result_type: string indentifying which type of result are we pulling. It correspond to the type of the NumericStats stored in db_obj. Returns: a dictionary indexed by edges. Each element is a list of lists of same dimension of trace_id_rows_colors, each element a NumericStats object corresponding to the result of that component. """ exp_rows = {} for edge in edges: exp_rows[edge] = extract_results( db_obj, trace_id_rows_colors, ResultTrace.get_result_type_edge(edge, result_type)) return exp_rows exp_rows = {} for edge in edges: exp_rows[edge] = [] for row in trace_id_rows_colors: these_rows = {} for edge in edges: these_rows[edge] = [] exp_rows[edge].append(these_rows[edge]) for trace_id in row: exp = ExperimentDefinition() exp.load(db_obj, trace_id) for edge in edges: result = None if exp.is_it_ready_to_process(): if edge == "": key = ResultTrace.get_result_type_edge( edge, result_type) else: key = result_type key += "_stats" result = NumericStats() result.load(db_obj, trace_id, key) else: result = NumericStats() result.calculate([0, 0, 0]) these_rows[edge].append(result) return exp_rows
def correct_times(self, db_obj, trace_id): self._experiment = ExperimentDefinition() self._experiment.load(db_obj, trace_id) self._trace = ResultTrace() print("Loading trace {0}".format(trace_id)) self._trace.load_trace(db_obj, trace_id) trace_type = self._experiment._workflow_handling print( "Calculating corrected start times for trace {0}".format(trace_id)) modified_start_times = self.get_corrected_start_times(trace_type) print(("Found {0} jobs which start time was 0, but had ended.".format( len(modified_start_times)))) print("About to update times") self.apply_new_times(db_obj, modified_start_times)
def test_calculate_utilization_median_result(self): rt = ResultTrace() self.addCleanup(self._del_table, "usage_values") rt._get_utilization_result().create_table(self._db) self._db.insertListValues("usage_values", [ "trace_id", "type", "utilization", "waste", "corrected_utilization" ], [[1, "usage", 0.5, 10, 0.4], [2, "usage", 0.2, 11, 0.2], [3, "usage", 0.6, 9, 0.5], [4, "usage", 0.7, 13, 0.7]]) rt.calculate_utilization_median_result([1, 2, 3, 4], True, self._db, 5) new_rt = ResultTrace() new_rt.load_utilization_results(self._db, 5) self.assertEqual(new_rt._acc_waste, 10.5) self.assertEqual(new_rt._integrated_ut, 0.55) self.assertEqual(new_rt._corrected_integrated_ut, 0.45)
def test_store_load_trace(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2], "account": ["account1", "account2"], "cpus_req": [48, 96], "cpus_alloc": [48, 96], "job_name": ["jobName1", "jobName2"], "id_job": [1, 2], "id_qos": [2, 3], "id_resv": [3, 4], "id_user": [4, 5], "nodes_alloc": [2, 4], "partition": ["partition1", "partition2"], "priority": [99, 199], "state": [3, 2], "timelimit": [100, 200], "time_submit": [3000, 3003], "time_start": [3002, 3001], "time_end": [3002, 3005] } rt._lists_start = { "job_db_inx": [2, 1], "account": ["account2", "account1"], "cpus_req": [96, 48], "cpus_alloc": [96, 48], "job_name": ["jobName2", "jobName1"], "id_job": [2, 1], "id_qos": [3, 2], "id_resv": [4, 3], "id_user": [5, 4], "nodes_alloc": [4, 2], "partition": ["partition2", "partition1"], "priority": [199, 99], "state": [2, 3], "timelimit": [200, 100], "time_submit": [3003, 3000], "time_start": [3001, 3002], "time_end": [3005, 3002] } rt.store_trace(self._db, 1) new_rt = ResultTrace() new_rt.load_trace(self._db, 1) self.assertEqual(rt._lists_start, new_rt._lists_start) self.assertEqual(rt._lists_submit, new_rt._lists_submit)
def test_calculate_waiting_submitted_work(self): rt = ResultTrace() rt._lists_submit = { "job_db_inx": [2, 1], "account": ["account2", "account1", "account3"], "cpus_req": [1, 1, 1], "cpus_alloc": [1, 1, 1], "job_name": ["jobName2", "jobName1", "jobName3"], "id_job": [2, 1, 3], "id_qos": [3, 2, 1], "id_resv": [4, 3, 0], "id_user": [5, 4, 1], "nodes_alloc": [4, 2, 3], "partition": ["partition2", "partition1", "partition1"], "priority": [199, 99, 200], "state": [3, 3, 3], "timelimit": [200, 100, 200], "time_submit": [2998, 2999, 3000], "time_start": [3001, 3003, 3004], "time_end": [3005, 3010, 3012] } rt._lists_start = { "job_db_inx": [2, 1], "account": ["account2", "account1", "account3"], "cpus_req": [1, 1, 1], "cpus_alloc": [1, 1, 1], "job_name": ["jobName2", "jobName1", "jobName3"], "id_job": [2, 1, 3], "id_qos": [3, 2, 1], "id_resv": [4, 3, 0], "id_user": [5, 4, 1], "nodes_alloc": [4, 2, 3], "partition": ["partition2", "partition1", "partition1"], "priority": [199, 99, 200], "state": [3, 3, 3], "timelimit": [200, 100, 200], "time_submit": [2998, 2999, 3000], "time_start": [3001, 3003, 3004], "time_end": [3005, 3010, 3012] } stamps, waiting_ch, core_h_per_min_stamps, core_h_per_min_values = ( rt.calculate_waiting_submitted_work(acc_period=0)) self.assertEqual(stamps, [2998, 2999, 3000, 3001, 3003, 3004]) self.assertEqual(waiting_ch, [4, 11, 19, 15, 8, 0]) self.assertEqual(core_h_per_min_stamps, [2999, 3000]) self.assertEqual(core_h_per_min_values, [11, 9.5])
def del_results(self, db_obj): """Deletes all analysis results associated with this experiment""" field="trace_id" value=self._trace_id db_obj.delete_rows(Histogram()._table_name, field, value) db_obj.delete_rows(ResultTrace()._get_utilization_result()._table_name, field, value) db_obj.delete_rows(NumericStats()._table_name, field, value)
def test_load_job_results_grouped_core_seconds(self): db_obj = self._db hist = Histogram() stat = NumericStats() self.addCleanup(self._del_table, "histograms") self.addCleanup(self._del_table, "numericStats") hist.create_table(db_obj) stat.create_table(db_obj) rt = ResultTrace() pbs_list = { "account": ["account1", "account2"], "cores_per_node": [24, 24, 24], "numnodes": [1, 1, 1], "wallclock_requested": [120, 368, 400], "class": ["queue1", "queue2", "queue3"], "created": [1000, 2000, 3000], "start": [1100, 2200, 3300], "completion": [1500, 2700, 4000], "jobname": ["name1", "name2", "name3"] } rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list) rt.calculate_job_results_grouped_core_seconds([0, 24 * 450, 24 * 550], True, db_obj, 1) db_obj = self._db new_rt = ResultTrace() new_rt.load_job_results_grouped_core_seconds([0, 24 * 450, 24 * 550], db_obj, 1) fields = [ "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf", "jobs_waittime_stats", "jobs_turnaround_cdf", "jobs_turnaround_stats", "jobs_requested_wc_cdf", "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf", "jobs_cpus_alloc_stats", "jobs_slowdown_cdf", "jobs_slowdown_stats" ] new_fields = [] for edge in [0, 24 * 450, 24 * 550]: for field in fields: new_fields.append("g" + str(edge) + "_" + field) for field in new_fields: self.assertNotEqual(new_rt.jobs_results[field], None)
def test_store_load(self): db_obj = self._db hist = Histogram() stat = NumericStats() self.addCleanup(self._del_table, "histograms") self.addCleanup(self._del_table, "numericStats") hist.create_table(db_obj) stat.create_table(db_obj) job_list_1 = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0" ], "id_job": [0, 1, 2, 3], "time_submit": [100, 100, 1100, 1100], "time_start": [110, 215, 1200, 1400], "time_end": [200, 250, 1300, 1500] } job_list_2 = { "job_name": ["wf_manifest-2_S0", "wf_manifest-3_S0"], "id_job": [0, 1], "time_submit": [100, 1100], "time_start": [110, 1200], "time_end": [615, 2000] } wf_d = WorkflowDeltas() wf_d._first_trace = ResultTrace() wf_d._second_trace = ResultTrace() wf_d._first_trace._lists_submit = job_list_1 wf_d._second_trace._lists_submit = job_list_2 wf_d._first_workflows = wf_d._first_trace.do_workflow_pre_processing() wf_d._second_workflows = wf_d._second_trace.do_workflow_pre_processing( ) wf_d.produce_deltas() results_1 = wf_d.calculate_delta_results(True, db_obj, 1) wf_d_2 = WorkflowDeltas() results_2 = wf_d_2.load_delta_results(db_obj, 1) for field in list(results_1.keys()): assertEqualResult(self, results_1[field], results_2[field], field)
def test_join_dics_of_lists(self): dict1 = {"key1": [1, 2, 3], "key2": [4, 5, 6]} dict2 = {"key2": [7, 8, 9], "key3": [10, 11, 12]} new_dict = ResultTrace.join_dics_of_lists(dict1, dict2) self.assertDictEqual(new_dict, { "key1": [1, 2, 3], "key2": [4, 5, 6, 7, 8, 9], "key3": [10, 11, 12] })
def load_traces(self, db_obj, first_id, second_id): """ Loads the jobs from the two traces to compare. Args: - first_id: int, trace_id of the first trace - second_id: int, trace_id of the seconds trace """ self._first_trace = ResultTrace() self._first_trace_id=first_id self._second_trace = ResultTrace() self._second_trace_id=second_id self._first_trace.load_trace(db_obj, self._first_trace_id) self._second_trace.load_trace(db_obj, self._second_trace_id) self._first_workflows=self._first_trace.do_workflow_pre_processing() self._second_workflows=self._second_trace.do_workflow_pre_processing()
def test_calculate_job_results(self): db_obj = FakeDBObj(self) rt = ResultTrace() pbs_list = { "account": ["account1", "account2"], "cores_per_node": [24, 48], "numnodes": [100, 200], "class": ["queue1", "queue2"], "wallclock_requested": [120, 368], "created": [1000, 2000], "start": [1100, 2200], "completion": [1500, 2700], "jobname": ["name1", "name2"] } rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list) rt.calculate_job_results(True, db_obj, 1) self.assertEqual(db_obj._id_count, 12) self.assertEqual(db_obj._set_fields, [ "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf", "jobs_waittime_stats", "jobs_turnaround_cdf", "jobs_turnaround_stats", "jobs_requested_wc_cdf", "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf", "jobs_cpus_alloc_stats", "jobs_slowdown_cdf", "jobs_slowdown_stats" ]) self.assertEqual(db_obj._hist_count, 6) self.assertEqual(db_obj._stats_count, 6)
def test_calculate_job_results_grouped_core_seconds(self): db_obj = FakeDBObj(self) rt = ResultTrace() pbs_list = { "account": ["account1", "account2"], "cores_per_node": [24, 24, 24], "numnodes": [1, 1, 1], "wallclock_requested": [360, 500, 600], "class": ["queue1", "queue2", "queue3"], "created": [1000, 2000, 3000], "start": [1100, 2200, 3300], "completion": [1500, 2700, 4000], "jobname": ["sim_job", "sim_job", "sim_job"] } rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list) rt.calculate_job_results_grouped_core_seconds([0, 24 * 450, 24 * 550], True, db_obj, 1) self.assertEqual(db_obj._id_count, 12 * 3) fields = [ "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf", "jobs_waittime_stats", "jobs_turnaround_cdf", "jobs_turnaround_stats", "jobs_requested_wc_cdf", "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf", "jobs_cpus_alloc_stats", "jobs_slowdown_cdf", "jobs_slowdown_stats" ] new_fields = [] for edge in [0, 24 * 450, 24 * 550]: for field in fields: new_fields.append("g" + str(edge) + "_" + field) self.assertEqual(db_obj._set_fields, new_fields) self.assertEqual(db_obj._hist_count, 6 * 3) self.assertEqual(db_obj._stats_count, 6 * 3)
def test_get_corrected_start_times(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2, 3], "account": ["account1", "account2", "a3"], "cpus_req": [48, 96, 96], "cpus_alloc": [48, 96, 96], "job_name": [ "wf_synthLongWide.json-1_S0", "wf_synthLongWide.json-1_S1_dS0", "wf_synthLongWide.json-2_S1_dS0" ], "id_job": [1, 2, 3], "id_qos": [2, 3, 3], "id_resv": [3, 4, 5], "id_user": [4, 5, 6], "nodes_alloc": [2, 4, 4], "partition": ["partition1", "partition2", "partition2"], "priority": [99, 199, 210], "state": [3, 3, 3], "timelimit": [100, 200, 300], "time_submit": [3000, 3003, 3004], "time_start": [0, 20000, 0], "time_end": [20000, 25000, 30000] } trace_id = 1 rt.store_trace(self._db, trace_id) stc = StartTimeCorrector() stc._experiment = ExperimentDefinition() stc._experiment._trace_id = trace_id stc._trace = ResultTrace() stc._trace.load_trace(self._db, trace_id) new_times = stc.get_corrected_start_times("multi") self.assertEqual(new_times, {1: 20000 - 14340, 3: 30000 - 3540})
def check_trace_and_store(self, scheduler_db_obj, store_db_obj): """ Imports the result trace from an experiment and stores it in a central database. Args: - scheduler_db_obj: DBManager object configured to connect to the schedulers's database. - store_db_obj: DBManager object configured to connect to the database where results traces should be stored. Returns True if the simulation produced a valid trace. False otherwise. """ result_trace = ResultTrace() result_trace.import_from_db(scheduler_db_obj, ExperimentRunner._scheduler_acc_table) status = True end_time = self._definition.get_end_epoch() if len(result_trace._lists_start["time_end"]) == 0: print("Error: No simulated jobs") return False last_job_end_time = result_trace._lists_submit["time_submit"][-1] if last_job_end_time < (end_time - 600): print(("Simulation ended too soon: {0} vs. expected {1}.".format( last_job_end_time, end_time))) status = False result_trace.store_trace(store_db_obj, self._definition._trace_id) return status
def test_get_job_times(self): rt = ResultTrace() rt._lists_submit["time_end"] = [10, 10, 10000, 55, 330] rt._lists_submit["time_start"] = [5, 2, 1000, 50, 290] rt._lists_submit["time_submit"] = [0, 2, 30, 100, 200] rt._lists_submit["job_name"] = ["J0", "J1", "J2", "J3", "wf_man"] rt._lists_submit["timelimit"] = [1, 2, 3, 4, 5] rt._lists_submit["cpus_alloc"] = [10, 20, 30, 40, 50] (jobs_runtime, jobs_waittime, jobs_turnaround, jobs_timelimit, jobs_cores_alloc, jobs_slow_down) = rt._get_job_times(only_non_wf=True) self.assertEqual(jobs_runtime, [8, 9000]) self.assertEqual(jobs_waittime, [0, 970]) self.assertEqual(jobs_turnaround, [8, 9970]) self.assertEqual(jobs_timelimit, [2, 3]) self.assertEqual(jobs_cores_alloc, [20, 30]) self.assertEqual(jobs_slow_down, [1.0, 9970.0 / 9000.0])
def test_get_job_times_limits(self): rt = ResultTrace() rt._lists_submit["time_end"] = [10, 10, 10000, 140] rt._lists_submit["time_start"] = [5, 2, 1000, 120] rt._lists_submit["time_submit"] = [0, 2, 30, 100] rt._lists_submit["job_name"] = ["J0", "J1", "J2", "J3"] rt._lists_submit["timelimit"] = [1, 2, 3, 4] rt._lists_submit["cpus_alloc"] = [10, 20, 30, 40] (jobs_runtime, jobs_waittime, jobs_turnaround, jobs_timelimit, jobs_cores_alloc, jobs_slow_down) = rt._get_job_times(submit_start=20, submit_stop=40) self.assertEqual(jobs_runtime, [9000]) self.assertEqual(jobs_waittime, [970]) self.assertEqual(jobs_turnaround, [9970]) self.assertEqual(jobs_timelimit, [3]) self.assertEqual(jobs_cores_alloc, [30]) self.assertEqual(jobs_slow_down, [9970.0 / 9000.0])
def test_get_delta_values_same_format(self): job_list_1 = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0" ], "id_job": [0, 1, 2, 3], "time_submit": [100, 100, 1100, 1100], "time_start": [110, 215, 1200, 1400], "time_end": [200, 250, 1300, 1500] } job_list_2 = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0" ], "id_job": [0, 1, 2, 3], "time_submit": [100, 100, 1100, 1100], "time_start": [110, 600, 1200, 1900], "time_end": [200, 615, 1300, 2000] } wf_d = WorkflowDeltas() wf_d._first_trace = ResultTrace() wf_d._second_trace = ResultTrace() wf_d._first_trace._lists_submit = job_list_1 wf_d._second_trace._lists_submit = job_list_2 wf_d._first_workflows = wf_d._first_trace.do_workflow_pre_processing() wf_d._second_workflows = wf_d._second_trace.do_workflow_pre_processing( ) (wf_names, runtime_deltas, waitime_deltas, turnaround_deltas, stretch_deltas) = wf_d.produce_deltas() self.assertEqual(runtime_deltas, [365, 500]) self.assertEqual(waitime_deltas, [0, 0]) self.assertEqual(turnaround_deltas, [365, 500])
def test_load_job_results(self): db_obj = self._db hist = Histogram() stat = NumericStats() self.addCleanup(self._del_table, "histograms") self.addCleanup(self._del_table, "numericStats") hist.create_table(db_obj) stat.create_table(db_obj) rt = ResultTrace() pbs_list = { "account": ["account1", "account2"], "cores_per_node": [24, 48], "numnodes": [100, 200], "class": ["queue1", "queue2"], "wallclock_requested": [120, 368], "created": [1000, 2000], "start": [1100, 2200], "completion": [1500, 2700], "jobname": ["name1", "name2"] } rt._lists_submit = rt._transform_pbs_to_slurm(pbs_list) rt.calculate_job_results(True, db_obj, 1) db_obj = self._db new_rt = ResultTrace() new_rt.load_job_results(db_obj, 1) for field in [ "jobs_runtime_cdf", "jobs_runtime_stats", "jobs_waittime_cdf", "jobs_waittime_stats", "jobs_turnaround_cdf", "jobs_turnaround_stats", "jobs_requested_wc_cdf", "jobs_requested_wc_stats", "jobs_cpus_alloc_cdf", "jobs_cpus_alloc_stats", "jobs_slowdown_cdf", "jobs_slowdown_stats" ]: self.assertNotEqual(rt.jobs_results[field], None)
def _create_tables(self): rt = ResultTrace() self.addCleanup(self._del_table, "import_table") rt.create_import_table(self._db, "import_table") self.addCleanup(self._del_table, "traces") rt.create_trace_table(self._db, "traces") self.addCleanup(self._del_table, "experiment") exp = ExperimentDefinition() exp.create_table(self._db)
def test_do_full_run(self): sched_db_obj = DB(self._vm_ip, "slurm_acct_db", os.getenv("SLURMDB_USER", None), os.getenv("SLURMDB_PASS", None)) trace = ResultTrace() self.addCleanup(self._del_table, "traces") trace.create_trace_table(self._db, "traces") ExperimentRunner.configure(trace_folder="/tmp/", trace_generation_folder="tmp", local=False, run_hostname=self._vm_ip, run_user=None, scheduler_conf_dir="/scsf/slurm_conf", local_conf_dir="configs/", scheduler_folder="/scsf/", drain_time=100) ensureDir("tmp") ed = ExperimentDefinition(seed="seeeed", machine="edison", trace_type="single", manifest_list=[{ "share": 1.0, "manifest": "manifestSim.json" }], workflow_policy="period", workflow_period_s=5, workflow_handling="single", preload_time_s=60, start_date=datetime(2016, 1, 1), workload_duration_s=1800) self.addCleanup(self._del_table, "experiment") ed.create_table(self._db) ed.store(self._db) er = ExperimentRunner(ed) self.assertTrue(er.do_full_run(sched_db_obj, self._db))
def test_get_job_times_grouped(self): rt = ResultTrace() rt._lists_submit["time_end"] = [10, 10, 10000, 55, 330, 460] rt._lists_submit["time_start"] = [5, 2, 1000, 50, 290, 400] rt._lists_submit["time_submit"] = [0, 2, 30, 100, 200, 300] rt._lists_submit["job_name"] = ["J0", "J1", "J2", "J3", "wf_man", "J4"] rt._lists_submit["timelimit"] = [1, 2, 3, 4, 5, 3] rt._lists_submit["cpus_alloc"] = [1, 1, 30, 40, 50, 4] cores_seconds_edges = [0, 500, 1000] (jobs_runtime, jobs_waittime, jobs_turnaround, jobs_timelimit, jobs_cores_alloc, jobs_slow_down, jobs_timesubmit) = (rt.get_job_times_grouped_core_seconds( cores_seconds_edges, only_non_wf=True, submit_start=0, submit_stop=10000000)) self.assertEqual(jobs_runtime[0], [8]) self.assertEqual(jobs_waittime[0], [0]) self.assertEqual(jobs_turnaround[0], [8]) self.assertEqual(jobs_timelimit[0], [2]) self.assertEqual(jobs_cores_alloc[0], [1]) self.assertEqual(jobs_slow_down[0], [1]) self.assertEqual(jobs_timesubmit[0], [2]) self.assertEqual(jobs_runtime[500], [60]) self.assertEqual(jobs_waittime[500], [100]) self.assertEqual(jobs_turnaround[500], [160]) self.assertEqual(jobs_timelimit[500], [3]) self.assertEqual(jobs_cores_alloc[500], [4]) self.assertEqual(jobs_slow_down[500], [160.0 / 60.0]) self.assertEqual(jobs_timesubmit[500], [300]) self.assertEqual(jobs_runtime[1000], [9000]) self.assertEqual(jobs_waittime[1000], [970]) self.assertEqual(jobs_turnaround[1000], [9970]) self.assertEqual(jobs_timelimit[1000], [3]) self.assertEqual(jobs_cores_alloc[1000], [30]) self.assertEqual(jobs_slow_down[1000], [9970.0 / 9000]) self.assertEqual(jobs_timesubmit[1000], [30])
def test_store_trace(self): self._create_tables() rt = ResultTrace() rt._lists_submit = { "job_db_inx": [1, 2], "account": ["account1", "account2"], "cpus_req": [48, 96], "cpus_alloc": [48, 96], "job_name": ["jobName1", "jobName2"], "id_job": [1, 2], "id_qos": [2, 3], "id_resv": [3, 4], "id_user": [4, 5], "nodes_alloc": [2, 4], "partition": ["partition1", "partition2"], "priority": [99, 199], "state": [3, 2], "timelimit": [100, 200], "time_submit": [3000, 3003], "time_start": [3002, 3001], "time_end": [3002, 3005] } rt._lists_start = { "job_db_inx": [2, 1], "account": ["account2", "account1"], "cpus_req": [96, 48], "cpus_alloc": [96, 48], "job_name": ["jobName2", "jobName1"], "id_job": [2, 1], "id_qos": [3, 2], "id_resv": [4, 3], "id_user": [5, 4], "nodes_alloc": [4, 2], "partition": ["partition2", "partition1"], "priority": [199, 99], "state": [2, 3], "timelimit": [200, 100], "time_submit": [3003, 3000], "time_start": [3001, 3002], "time_end": [3005, 3002] } rt.store_trace(self._db, 1) rows = self._db.doQuery("SELECT time_start FROM traces " "WHERE trace_id=1 " "ORDER BY time_start") self.assertIn((3001, ), rows) self.assertIn((3002, ), rows)
def load_trace(self, db_obj): result_trace = ResultTrace() return result_trace
""" Creates the SQL schema for the workload databases. Env vars: - ANALYSIS_DB_HOST: hostname of the system hosting the database. - ANALYSIS_DB_NAME: database name to read from. - ANALYSIS_DB_USER: user to be used to access the database. - ANALYSIS_DB_PASS: password to be used to used to access the database. - ANALYSIS_DB_PORT: port on which the database runs. """ from orchestration import get_central_db from orchestration.definition import ExperimentDefinition from stats.trace import ResultTrace from stats import Histogram, NumericStats db_obj = get_central_db() ExperimentDefinition().create_table(db_obj) ResultTrace().create_trace_table(db_obj, ResultTrace()._table_name) Histogram().create_table(db_obj) ResultTrace()._get_utilization_result().create_table(db_obj) NumericStats().create_table(db_obj)
y_limits_dic = { "[0,48] core.h": (1, 1000), "(48, 960] core.h": (1, 100), "(960, inf.) core.h": (1, 20) } target_dir = "percent" grouping = [1, 3, 3, 3, 3, 3] colors, hatches, legend = produce_plot_config(db_obj, trace_id_rows) name = "Slowdown" for edge in core_seconds_edges: edge_result_type = ResultTrace.get_result_type_edge(edge, result_type) print("Loading " + edge_result_type) edge_plot_results = extract_results(db_obj, trace_id_rows, edge_result_type) edge_formated = edge_keys[edge] title = "Jobs slowdow: {0}".format(edge_formated) y_limits = y_limits_dic[edge_formated] print("Plotting figure") plot_multi_exp_boxplot( name=title, file_name=target_dir + "/percent-slow_down_jobs-{0}.png".format(file_name_edges[edge]), title=title, exp_rows=edge_plot_results, y_axis_labels=manifest_label, x_axis_labels=time_labels,