def test_parse_all_jobs(self): wg = WorkloadGenerator(FakeEdison(), FakeTraceGenWF(self), ["user1"], ["qos1"], ["partition1"], ["account1"]) wf = WorkflowGeneratorMultijobs(["manifest_sim.json"], [1.0], wg) cores, runtime, tasks = WorkflowGeneratorMultijobs.parse_all_jobs( "manifest_sim.json") self.assertEqual(cores, 144) self.assertEqual(runtime, 960) self.assertEqual(len(tasks), 2) task1 = tasks["Decode"] task2 = tasks["Hello"] self.assertEqual(task1["id"], "Decode") self.assertEqual(task1["number_of_cores"], 112) self.assertEqual(task1["name"], "Decode") self.assertEqual(task1["runtime_limit"], 480) self.assertEqual(task1["runtime_sim"], 120) self.assertEqual(task1["execution_cmd"], "python ./Decode.py") self.assertEqual(task1["dependencyFrom"], []) self.assertEqual(task1["dependencyTo"], [task2]) self.assertEqual(task2["id"], "Hello") self.assertEqual(task2["number_of_cores"], 144) self.assertEqual(task2["name"], "Hello") self.assertEqual(task2["runtime_limit"], 480) self.assertEqual(task2["runtime_sim"], 100) self.assertEqual(task2["execution_cmd"], "python ./Hello.py") self.assertEqual(task2["dependencyFrom"], [task1]) self.assertEqual(task2["dependencyTo"], [])
def test_geb_deps(self): tg = FakeTraceGenWFSimple(self) wg = WorkloadGenerator(FakeEdison(), tg, ["user1"], ["qos1"], ["partition1"], ["account1"]) wf = WorkflowGeneratorMultijobs(["manifest_sim.json"], [1.0], wg) task = {"dependencyFrom": [{"job_id": 1}, {"job_id": 2}]} self.assertEqual(wf._gen_deps(task), "afterok:1,afterok:2") task = {"dependencyFrom": []} self.assertEqual(wf._gen_deps(task), "")
def test_task_can_run(self): tg = FakeTraceGenWFSimple(self) wg = WorkloadGenerator(FakeEdison(), tg, ["user1"], ["qos1"], ["partition1"], ["account1"]) wf = WorkflowGeneratorMultijobs(["manifest_sim.json"], [1.0], wg) task = {"dependencyFrom": [{"job_id": 1}, {"job_id": 2}]} self.assertTrue(wf._task_can_run(task)) task = {"dependencyFrom": []} self.assertTrue(wf._task_can_run(task), "") task = {"dependencyFrom": [{"job_id": 1}, {}]} self.assertFalse(wf._task_can_run(task))
def test_parse_expand_workflow(self): tg = FakeTraceGenWFSimple(self) wg = WorkloadGenerator(FakeEdison(), tg, ["user1"], ["qos1"], ["partition1"], ["account1"]) wf = WorkflowGeneratorMultijobs(["manifest_sim.json"], [1.0], wg) self.assertEqual(wf.do_trigger(10000), 2) self.assertEqual(tg._job_count, 2) self.assertEqual(tg._dep_count, 1) self.assertEqual(tg._manifests, [ "|wf_manifest_sim.json-1_Decode", "|wf_manifest_sim.json-1_Hello_dDecode" ])
def get_workflow_info(self, workflow_file): if not workflow_file in list(self.manifest_dics.keys()): from orchestration.running import ExperimentRunner manifest_route = path.join(ExperimentRunner.get_manifest_folder(), workflow_file) cores, runtime, tasks = WorkflowGeneratorMultijobs.parse_all_jobs( manifest_route) self.manifest_dics[workflow_file] = { "cores": cores, "runtime": runtime, "tasks": tasks } return self.manifest_dics[workflow_file]
def _expand_workflow(self, manifest, start_time): total_cores, runtime, tasks = WorkflowGeneratorMultijobs.parse_all_jobs( manifest) job_count = 0 remaining_tasks = list(tasks.values()) while (remaining_tasks): new_remaining_tasks = [] for task in remaining_tasks: if self._task_can_run(task): job_count += 1 feasible_start_time = self._get_feasible_start_time( task, start_time) task[ "time_end"] = feasible_start_time + task["runtime_sim"] cores = task["number_of_cores"] self._add_job_change(feasible_start_time, cores) self._add_job_change(task["time_end"], -cores) task["job_id"] = job_count else: new_remaining_tasks.append(task) remaining_tasks = new_remaining_tasks return total_cores, runtime
def _generate_trace_files(self, definition, trace_generator=None): """Creates the workload files according an Experiment definition. Args: - definition: Definition object defining the experiment. """ if trace_generator is None: trace_generator = TraceGenerator() print(("This is the seed to be used:", definition._seed)) random_control.set_global_random_gen(seed=definition._seed) machine = definition.get_machine() (filter_cores, filter_runtime, filter_core_hours) = machine.get_filter_values() wg = WorkloadGenerator(machine=definition.get_machine(), trace_generator=trace_generator, user_list=definition.get_user_list(), qos_list=definition.get_qos_list(), partition_list=definition.get_partition_list(), account_list=definition.get_account_list()) if definition._workflow_policy.split("-")[0] == "sp": special_gen = SpecialGenerators.get_generator( definition._workflow_policy, wg, register_datetime=(definition._start_date - timedelta(0, definition._preload_time_s))) wg.register_pattern_generator_timer(special_gen) else: wg.config_filter_func(machine.job_can_be_submitted) wg.set_max_interarrival(machine.get_max_interarrival()) if definition._trace_type != "single": raise ValueError("Only 'single' experiments require trace " "generation") if definition.get_overload_factor() > 0.0: print(("doing overload:", definition.get_overload_factor())) max_cores = machine.get_total_cores() single_job_gen = PatternGenerator(wg) overload_time = OverloadTimeController( single_job_gen, register_datetime=( definition._start_date - timedelta(0, definition._preload_time_s))) overload_time.configure_overload( trace_generator, max_cores, overload_target=definition.get_overload_factor()) print(("about to register", wg, overload_time)) wg.register_pattern_generator_timer(overload_time) manifest_list = [m["manifest"] for m in definition._manifest_list] share_list = [m["share"] for m in definition._manifest_list] if (definition._workflow_handling == "single" or definition._workflow_handling == "manifest"): flow = WorkflowGeneratorSingleJob(manifest_list, share_list, wg) else: flow = WorkflowGeneratorMultijobs(manifest_list, share_list, wg) if definition._workflow_policy == "period": alarm = RepeatingAlarmTimer( flow, register_datetime=definition._start_date) alarm.set_alarm_period(definition._workflow_period_s) wg.register_pattern_generator_timer(alarm) elif definition._workflow_policy == "percentage": wg.register_pattern_generator_share( flow, definition._workflow_share / 100) target_wait = definition.get_forced_initial_wait() if target_wait: default_job_separation = 10 separation = int( os.getenv("FW_JOB_SEPARATION", default_job_separation)) filler(wg, start_time=TimeController.get_epoch( (definition._start_date - timedelta(0, definition._preload_time_s))), target_wait=target_wait, max_cores=machine.get_total_cores(), cores_per_node=machine._cores_per_node, job_separation=separation) trace_generator.reset_work() wg.generate_trace( (definition._start_date - timedelta(0, definition._preload_time_s)), (definition._preload_time_s + definition._workload_duration_s)) max_cores = machine.get_total_cores() total_submitted_core_s = trace_generator.get_total_submitted_core_s() job_pressure = (float(total_submitted_core_s) / float( (definition._preload_time_s + definition._workload_duration_s) * max_cores)) print(("Observed job pressure (bound): {0}".format(job_pressure))) trace_generator.dump_trace( path.join(ExperimentRunner._trace_generation_folder, definition.get_trace_file_name())) trace_generator.dump_qos( path.join(ExperimentRunner._trace_generation_folder, definition.get_qos_file_name())) trace_generator.dump_users( path.join(ExperimentRunner._trace_generation_folder, definition.get_users_file_name()), extra_users=definition.get_system_user_list()) trace_generator.free_mem() return [ definition.get_trace_file_name(), definition.get_qos_file_name(), definition.get_users_file_name() ]