def test_extract(self): job_list = { "job_name": [ "wf_manifest-2", "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2" ], "id_job": [2, 1, 0, 55, 4, 5, 6, 7, 8, 3, 9, 10, 11, 12], "time_start": [1, 2, 15, 17, 22, 27, 42, 12, 20, 1, 2, 15, 17, 22], "time_end": [2, 10, 20, 40, 25, 29, 50, 70, 30, 2, 10, 20, 19, 25] } we = WorkflowsExtractor() we.extract(job_list) self.assertEqual(len(we._workflows), 2) self.assertEqual(list(we._workflows.keys()), ["manifest-2", "manifest-3"]) self.assertEqual(len(we._workflows["manifest-2"]._tasks), 7) self.assertEqual(we._workflows["manifest-2"]._parent_job.name, "wf_manifest-2") self.assertEqual(len(we._workflows["manifest-3"]._tasks), 4)
def test_extract_process_single(self): db_obj = FakeDBObj(self) job_list = { "job_name": ["wf_manifest-0", "wf_manifest-1", "sim_job"], "id_job": [0, 1, 2], "time_submit": [1, 3, 4], "time_start": [1, 15, 17], "time_end": [11, 0, 40], "cpus_alloc": [100, 100, 300] } we = WorkflowsExtractor() we.extract(job_list) we.do_processing() self.assertEqual(len(we._workflows), 1) self.assertEqual(list(we._workflows.keys()), ["manifest-0"]) wt = we.get_workflow("manifest-0") t0 = wt.get_all_tasks()[0] #t1 = wt._tasks["S1"] #t2 = wt._tasks["S2"] #t3 = wt._tasks["S3"] self.assertEqual(wt._start_task, t0) self.assertEqual(wt._critical_path, [t0]) self.assertEqual(wt._critical_path_runtime, 10) we.calculate_overall_results(True, db_obj, 1)
def test_calculate_job_results(self): db_obj = FakeDBObj(self) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4] } we.extract(job_list) we.do_processing() we.calculate_overall_results(True, db_obj, 1) self.assertEqual(db_obj._id_count, 12) self.assertEqual(db_obj._set_fields, [ "wf_runtime_cdf", "wf_runtime_stats", "wf_waittime_cdf", "wf_waittime_stats", "wf_turnaround_cdf", "wf_turnaround_stats", "wf_stretch_factor_cdf", "wf_stretch_factor_stats", "wf_jobs_runtime_cdf", "wf_jobs_runtime_stats", "wf_jobs_cores_cdf", "wf_jobs_cores_stats" ]) self.assertEqual(db_obj._hist_count, 6) self.assertEqual(db_obj._stats_count, 6)
def test_load_job_results_per_manifest(self): db_obj = self._db hist = Histogram() stat = NumericStats() self.addCleanup(self._del_table, "histograms") self.addCleanup(self._del_table, "numericStats") hist.create_table(db_obj) stat.create_table(db_obj) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2", "wf_manifest2-4_S0" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22, 30], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25, 35], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2, 3], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 33] } we.extract(job_list) we.do_processing() old_results = we.calculate_per_manifest_results(True, db_obj, 1) new_we = WorkflowsExtractor() new_results = new_we.load_per_manifest_results(db_obj, 1) self.assertEqual(sorted(list(new_results.keys())), sorted(["manifest2", "manifest"])) for manifest in ["manifest2", "manifest"]: for field in [ "wf_runtime_cdf", "wf_runtime_stats", "wf_waittime_cdf", "wf_waittime_stats", "wf_turnaround_cdf", "wf_turnaround_stats", "wf_stretch_factor_cdf", "wf_stretch_factor_stats", "wf_jobs_runtime_cdf", "wf_jobs_runtime_stats", "wf_jobs_cores_cdf", "wf_jobs_cores_stats" ]: field = "m_" + manifest + "_" + field assertEqualResult(self, old_results[manifest][field], new_results[manifest][field], field)
def test_get_waste_changes(self): we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2", "wf_manifestSim.json-4" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22, 30], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25, 250], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2, 3], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 144] } we.extract(job_list) we.do_processing() stamps_list, wastedelta_list, acc_waste = we.get_waste_changes() self.assertEqual(stamps_list, [30, 150, 250]) self.assertEqual(wastedelta_list, [32, -32, 0]) self.assertEqual(acc_waste, 120 * 32)
def test_get_workflow_times_start_stop_per_manifest_multi(self): db_obj = FakeDBObj(self) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2", "wf_manifestA-4_S0" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 14], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22, 4], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25, 10], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2, 3], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 1] } we.extract(job_list) we.do_processing() manifests = we._get_per_manifest_workflow_times(submit_start=2, submit_stop=None) self.assertEqual(manifests["manifest"]["wf_runtime"], [24]) self.assertEqual(manifests["manifestA"]["wf_runtime"], [6])
def test_get_workflow_times_start_stop(self): db_obj = FakeDBObj(self) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4] } we.extract(job_list) we.do_processing() (wf_runtime, wf_waittime, wf_turnaround, wf_stretch_factor, wf_jobs_runtime, wf_jobs_cores) = we._get_workflow_times(submit_start=2, submit_stop=3) self.assertEqual(wf_runtime, [24])
def test_extract_process(self): job_list = { "job_name": [ "wf_manifest-2", "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2" ], "id_job": [2, 0, 1, 33, 3, 4, 5, 6, 7, 8, 9, 10, 11], "time_start": [1, 1, 15, 17, 22, 27, 42, 12, 20, 1, 15, 17, 22], "time_end": [1, 10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25] } we = WorkflowsExtractor() we.extract(job_list) we.do_processing() self.assertEqual(len(we._workflows), 2) self.assertEqual(list(we._workflows.keys()), ["manifest-2", "manifest-3"]) wt = we.get_workflow("manifest-3") t0 = wt._tasks["S0"] t1 = wt._tasks["S1"] t2 = wt._tasks["S2"] t3 = wt._tasks["S3"] self.assertEqual(wt._start_task, t0) self.assertEqual(wt._critical_path, [t0, t1, t3]) self.assertEqual(wt._critical_path_runtime, 24) wt = we.get_workflow("manifest-2") t0 = wt._tasks["S0"] t6 = wt._tasks["S6"] self.assertEqual(wt._start_task, t0) self.assertEqual(wt._critical_path, [t0, t6]) self.assertEqual(wt._critical_path_runtime, 69)
def test_extract_process_wrong_dash_name(self): job_list = { "job_name": [ "wf_floodplain.json-350", "wf_floodplain.json-350_S0", "wf_floodplain.json-350_S1", "wf_floodplain.json-350_S3-dS0-dS1", "wf_floodplain.json-350_S5-dS0-dS1", "wf_floodplain.json-350_S2-dS3-dS0", "wf_floodplain.json-350_S4-dS0-dS5", "wf_floodplain.json-350_S6-dS4-dS2" ], "id_job": [39794, 39796, 39797, 39798, 39799, 39800, 39801, 39802], "time_submit": [ 1420309202, 1420309202, 1420309202, 1420309202, 1420309202, 1420309202, 1420309202, 1420309202 ], "time_start": [ 1420318973, 1420318973, 1420318973, 1420358574, 1420358574, 1420387379, 1420405383, 1420419788 ], "time_end": [ 1420318973, 1420358573, 1420322573, 1420387374, 1420405374, 1420398179, 1420419784, 1420435988 ] } we = WorkflowsExtractor() we.extract(job_list) we.do_processing() self.assertEqual(len(we._workflows), 1) self.assertEqual(list(we._workflows.keys()), ["floodplain.json-350"]) wt = we.get_workflow("floodplain.json-350") print("TASKS", list(wt._tasks.keys())) print("DEPS", [t.deps for t in list(wt._tasks.values())]) print("CP", [x.name for x in wt._critical_path]) self.assertEqual(wt.get_runtime(), 117015) self.assertEqual(wt.get_waittime(), 9771) self.assertEqual(wt.get_turnaround(), 9771 + 117015)
def test_fill_per_manifest_values(self): db_obj = FakeDBObj(self) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2", "wf_manifest2-4_S0" ], "id_job": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12], "time_start": [1, 15, 17, 22, 27, 42, 12, 20, 2, 15, 17, 22, 30], "time_end": [10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25, 35], "time_submit": [1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2, 3], "cpus_alloc": [1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4, 33] } we.extract(job_list) we.do_processing() we.fill_per_manifest_values() self.assertEqual(sorted(we._detected_manifests), ["manifest", "manifest2"]) self.assertEqual(we._manifests_values["manifest"]["wf_runtime"], [69, 23]) self.assertEqual(we._manifests_values["manifest"]["wf_waittime"], [0, 0]) self.assertEqual(we._manifests_values["manifest"]["wf_turnaround"], [69, 23]) self.assertEqual( len(we._manifests_values["manifest"]["wf_stretch_factor"]), 2) self.assertEqual( len(we._manifests_values["manifest"]["wf_jobs_runtime"]), 11) self.assertEqual( len(we._manifests_values["manifest"]["wf_jobs_cores"]), 11) self.assertEqual(we._manifests_values["manifest2"]["wf_runtime"], [5]) self.assertEqual(we._manifests_values["manifest2"]["wf_waittime"], [27]) self.assertEqual(we._manifests_values["manifest2"]["wf_turnaround"], [32]) self.assertEqual( len(we._manifests_values["manifest2"]["wf_stretch_factor"]), 1) self.assertEqual( len(we._manifests_values["manifest2"]["wf_jobs_runtime"]), 1) self.assertEqual( len(we._manifests_values["manifest2"]["wf_jobs_cores"]), 1) we.extract(job_list) we.do_processing() we.fill_per_manifest_values(append=True) self.assertEqual(we._manifests_values["manifest"]["wf_runtime"], [69, 23, 69, 23]) self.assertEqual(we._manifests_values["manifest"]["wf_waittime"], [0, 0, 0, 0]) self.assertEqual(we._manifests_values["manifest"]["wf_turnaround"], [69, 23, 69, 23]) self.assertEqual( len(we._manifests_values["manifest"]["wf_stretch_factor"]), 4) self.assertEqual( len(we._manifests_values["manifest"]["wf_jobs_runtime"]), 22) self.assertEqual( len(we._manifests_values["manifest"]["wf_jobs_cores"]), 22) self.assertEqual(we._manifests_values["manifest2"]["wf_runtime"], [5, 5]) self.assertEqual(we._manifests_values["manifest2"]["wf_waittime"], [27, 27]) self.assertEqual(we._manifests_values["manifest2"]["wf_turnaround"], [32, 32]) self.assertEqual( len(we._manifests_values["manifest2"]["wf_stretch_factor"]), 2) self.assertEqual( len(we._manifests_values["manifest2"]["wf_jobs_runtime"]), 2) self.assertEqual( len(we._manifests_values["manifest2"]["wf_jobs_cores"]), 2)
def test_fill_overall_values(self): db_obj = FakeDBObj(self) we = WorkflowsExtractor() job_list = { "job_name": [ "wf_manifest-2", "wf_manifest-2_S0", "wf_manifest-2_S1_dS0", "wf_manifest-2_S2_dS0", "wf_manifest-2_S3_dS2", "wf_manifest-2_S4_dS3", "wf_manifest-2_S5_dS4-dS1", "wf_manifest-2_S6_dS0", "sim_job", "wf_manifest-3_S0", "wf_manifest-3_S1_dS0", "wf_manifest-3_S2_dS0", "wf_manifest-3_S3_dS1-dS2" ], "id_job": [2, 0, 1, 33, 3, 4, 5, 6, 7, 8, 9, 10, 11], "time_start": [2, 2, 15, 17, 22, 27, 42, 12, 20, 2, 15, 17, 22], "time_end": [2, 10, 20, 40, 25, 29, 50, 70, 30, 10, 20, 19, 25], "time_submit": [1, 1, 1, 1, 1, 1, 1, 1, 20, 2, 2, 2, 2], "cpus_alloc": [33, 1, 2, 3, 4, 5, 6, 7, 1, 1, 2, 3, 4] } we.extract(job_list) we.do_processing() we.fill_overall_values() self.assertEqual(we._wf_runtime, [68, 23]) self.assertEqual(we._wf_waittime, [1, 0]) self.assertEqual(we._wf_turnaround, [69, 23]) self.assertEqual(len(we._wf_stretch_factor), 2) self.assertEqual(len(we._wf_jobs_runtime), 11) self.assertEqual(len(we._wf_jobs_cores), 11) we.extract(job_list) we.do_processing() we.fill_overall_values(append=True) self.assertEqual(we._wf_runtime, [68, 23, 68, 23]) self.assertEqual(we._wf_waittime, [1, 0, 1, 0]) self.assertEqual(we._wf_turnaround, [69, 23, 69, 23]) self.assertEqual(len(we._wf_stretch_factor), 4) self.assertEqual(len(we._wf_jobs_runtime), 22) self.assertEqual(len(we._wf_jobs_cores), 22)