def test_reanimate_kprofile(self): """ The purpose of the KProfile is to be able to (re-)animate ModelJobs from the input data. """ valid = { "version": 1, "tag": "KRONOS-KPROFILE-MAGIC", "created": "2016-12-14T09:57:35Z", # Timestamp in strict rfc3339 format. "uid": 1234, "workload_tag": "A-tag", "profiled_jobs": [{ "time_start": 537700, "time_queued": 99, "duration": 147, "ncpus": 72, "nnodes": 2, "time_series": { "kb_read": { "times": [0.01, 0.02, 0.03, 0.04], "values": [15, 16, 17, 18], "priority": 10 } } }] } pf = ProfileFormat.from_file(StringIO(json.dumps(valid))) workload = Workload.from_kprofile(pf) self.assertEqual(workload.tag, "A-tag") jobs = workload.jobs self.assertEqual(len(jobs), 1) self.assertIsInstance(jobs[0], ModelJob) self.assertEqual(jobs[0].time_start, 537700) self.assertEqual(jobs[0].time_queued, 99) self.assertEqual(jobs[0].duration, 147) self.assertEqual(jobs[0].ncpus, 72) self.assertEqual(jobs[0].nnodes, 2) self.assertEqual(len(jobs[0].timesignals), len(signal_types)) self.assertIn('kb_read', jobs[0].timesignals) for name, signal in jobs[0].timesignals.items(): if name == 'kb_read': self.assertIsInstance(signal, TimeSignal) self.assertTrue( all(x1 == x2 for x1, x2 in zip(signal.xvalues, [0.01, 0.02, 0.03, 0.04]))) self.assertTrue( all(y1 == y2 for y1, y2 in zip(signal.yvalues, [15, 16, 17, 18]))) else: self.assertIsNone(signal)
def normalize_jobs(model_jobs, wl_clusters): """ Normalize the time-signals of the generated jobs in order to preserve time-series sums in each sub-workload --- :param model_jobs: :param wl_name: :return: """ # create a workload from jobs in cluster and generated model jobs wl_original = Workload(jobs=wl_clusters['jobs_for_clustering'], tag="original_jobs") wl_generated = Workload(jobs=model_jobs, tag="generated_jobs") ts_orig = wl_original.total_metrics_sum_dict ts_generated = wl_generated.total_metrics_sum_dict # normalize generated jobs normalized_jobs = copy.deepcopy(model_jobs) for job in normalized_jobs: for ts in ts_generated.keys(): if float(ts_generated[ts]): job.timesignals[ts].yvalues = job.timesignals[ ts].yvalues / float(ts_generated[ts]) * ts_orig[ts] return normalized_jobs
def split_by_keywords(workload, split_config_output): """ Auxiliary internal splitting function :param workload: :param split_config_output: :return: """ # Extract configurations for the splitting new_wl_name = split_config_output['create_workload'] split_attr = split_config_output['split_by'] kw_include = split_config_output['keywords_in'] kw_exclude = split_config_output['keywords_out'] sub_wl_jobs = [] if kw_include and not kw_exclude: for j in workload.jobs: if getattr(j, split_attr): if all(kw in getattr(j, split_attr) for kw in kw_include): sub_wl_jobs.append(j) elif not kw_include and kw_exclude: for j in workload.jobs: if getattr(j, split_attr): if not any(kw in getattr(j, split_attr) for kw in kw_exclude): sub_wl_jobs.append(j) elif kw_include and kw_exclude: sub_wl_jobs = [ j for j in workload.jobs if all(kw in getattr(j, split_attr) for kw in kw_include) and not any( kw in getattr(j, split_attr) for kw in kw_exclude) ] else: raise ConfigurationError( "either included or excluded " "keywords are needed for splitting a workload") if not sub_wl_jobs: logger.error("Workload splitting has produced an empty workload!") return Workload(jobs=sub_wl_jobs, tag=new_wl_name)
def generate_synthetic_workload(self, clusters, config): """ Main method that call the specific generation method requested :return: """ schedule_key = self.generation_mapping[config["job_submission_strategy"]['type']][0] spawning_strategy = self.generation_mapping[config["job_submission_strategy"]['type']][1] print("schedule_key ", schedule_key) print("spawning_strategy ", spawning_strategy) n_bins_for_pdf = config["job_submission_strategy"]["n_bins_for_pdf"] global_t0 = min(j.time_start for cl in clusters for j in cl['jobs_for_clustering']) global_tend = max(j.time_start for cl in clusters for j in cl['jobs_for_clustering']) # generate a synthetic workload for each cluster of jobs t_submit_interval = config["job_submission_strategy"]['total_submit_interval'] submit_rate_factor = config["job_submission_strategy"]['submit_rate_factor'] for wl_clusters in clusters: start_times = [j.time_start for j in wl_clusters['jobs_for_clustering']] print("using cluster generated from workload {}".format(wl_clusters["source-workload"])) # invoke the required scheduling strategy jobs_schedule_strategy = job_schedule_factory[schedule_key](start_times, global_t0, global_tend, t_submit_interval, submit_rate_factor, n_bins_for_pdf) # instantiate and invoke the required scheduling strategy generation_strategy = strategy_factory[spawning_strategy](jobs_schedule_strategy, wl_clusters, config) model_jobs, vec_clust_indexes = generation_strategy.generate_jobs() # model jobs self.model_jobs = model_jobs self.workload_set = WorkloadSet([Workload(self.model_jobs)])
def test_generator(self): """ The configuration object should have some sane defaults """ # If all of the required arguments are supplied, this should result in a valid job ts_complete_set = { tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.]) for tsk in time_signal_names } valid_args = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': ts_complete_set } ts_complete_set_2 = { tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.]) for tsk in time_signal_names } valid_args_2 = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': ts_complete_set_2 } # check that it is a valid job job1 = ModelJob(**valid_args) job1.label = "job1" job2 = ModelJob(**valid_args_2) job2.label = "job2" job3 = ModelJob(**valid_args) job3.label = "job3" job4 = ModelJob(**valid_args_2) job4.label = "job4" job5 = ModelJob(**valid_args) job5.label = "job5" input_jobs = [job1, job2, job3, job4, job5] # diversify the time start.. for jj, job in enumerate(input_jobs): job.time_start += jj * 0.1 for job in input_jobs: self.assertTrue(job.is_valid()) config_generator = { "type": "cluster_and_spawn", "job_clustering": { "type": "Kmeans", "rseed": 0, "apply_to": ["test_wl_0"], "ok_if_low_rank": True, "max_iter": 100, "max_num_clusters": 3, "delta_num_clusters": 1, "num_timesignal_bins": 1, "user_does_not_check": True }, "job_submission_strategy": { "type": "match_job_pdf_exact", "n_bins_for_pdf": 20, "submit_rate_factor": 8, "total_submit_interval": 60, "random_seed": 0 } } # select the appropriate workload_filling strategy workloads = [ Workload(jobs=input_jobs, tag='test_wl_0'), Workload(jobs=input_jobs, tag='test_wl_1'), Workload(jobs=input_jobs, tag='test_wl_2') ] workload_modeller = workload_modelling_types[config_generator["type"]]( workloads) workload_modeller.apply(config_generator) # get the newly created set of (modelled) workloads workload_set = workload_modeller.get_workload_set() # make sure that we are creating only one workload self.assertEqual(len(workload_set.workloads), 1) # ---- check that all the jobs are correctly formed.. ---- # check that each job has time-signals as expected.. for job in workload_set.workloads[0].jobs: self.assertTrue(hasattr(job, "timesignals")) # check that each job has all the time-signals at this point.. for job in workload_set.workloads[0].jobs: self.assertTrue( all([k in job.timesignals.keys() for k in time_signal_names]))
def test_workload_data(self): # If all of the required arguments are supplied, this should result in a valid job ts_complete_set = { tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.]) for tsk in time_signal_names } valid_args = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': ts_complete_set } # check that it is a valid job job1 = ModelJob(**valid_args) job2 = ModelJob(**valid_args) job3 = ModelJob(**valid_args) job4 = ModelJob(**valid_args) job5 = ModelJob(**valid_args) input_jobs = [job1, job2, job3, job4, job5] # diversify the time start.. for jj, job in enumerate(input_jobs): job.time_start += jj * 0.1 for job in input_jobs: self.assertTrue(job.is_valid()) # create a workload with 5 model jobs test_workload = Workload(jobs=input_jobs, tag='test_wl') # -- verify that all the jobs in workload are actually the initial jobs provided -- self.assertTrue( all(job is input_jobs[jj] for jj, job in enumerate(test_workload.jobs))) # ------------ verify sums of timesignals ------------------- for ts_name in signal_types: ts_sum = 0 for j in input_jobs: ts_sum += sum(j.timesignals[ts_name].yvalues) # verify the sums.. self.assertEqual(ts_sum, test_workload.total_metrics_sum_dict[ts_name]) # ------------ verify global time signals ------------------- valid_args_1 = { 'time_start': 0.1, 'duration': 0.222, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.random.rand(10)) for tsk in time_signal_names } } job1 = ModelJob(**valid_args_1) valid_args_2 = { 'time_start': 0.1, 'duration': 0.333, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.random.rand(10)) for tsk in time_signal_names } } job2 = ModelJob(**valid_args_2) test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs') for job in [job1, job2]: for ts in signal_types: self.assertTrue( all(v + job.time_start in test_workload.total_metrics_timesignals[ts].xvalues for v in job.timesignals[ts].xvalues)) self.assertTrue( all(v in test_workload.total_metrics_timesignals[ts].yvalues for v in job.timesignals[ts].yvalues))
def test_splitter(self): # -------------- prepare a couple of dummy jobs --------------- # If all of the required arguments are supplied, this should result in a valid job ts_complete_set = { tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 999.]) for tsk in time_signal_names } ts_complete_set_2 = { tsk: TimeSignal.from_values(tsk, [0., 0.1], [1., 444.]) for tsk in time_signal_names } valid_args = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': ts_complete_set, 'job_name': "job_name_1" } valid_args_2 = { 'time_start': 0.2, 'duration': 0.4, 'ncpus': 2, 'nnodes': 2, 'timesignals': ts_complete_set_2, 'job_name': "job_name_2" } # a model job that WILL NOT be picked by the algorithm.. job1 = ModelJob(**valid_args) job1.label = "label_nottobepicked" # a model job that WILL be picked by the algorithm.. job2 = ModelJob(**valid_args_2) job2.label = "label_includeme" # dummy workload with 20 jobs np.random.seed(0) jobs_all = [] for i in range(20): # spawn a new job from either job1 or job2 if np.random.rand() < 0.5: new_job = copy.deepcopy(job1) else: new_job = copy.deepcopy(job2) # assign it a new label jobs_all.append(new_job) # create a workload out of all the jobs.. workload = Workload(jobs=jobs_all, tag="testing_workload") # configure the splitter from user config config_splitting = { "type": "split", "keywords_in": ["includeme"], "keywords_out": ["excludeme"], "split_by": "label", "apply_to": ["testing_workload"], "create_workload": "spawn_workload" } workloads = [workload] splitter = WorkloadSplit(workloads) splitter.apply(config_splitting) wl_out = None for wl in workloads: if wl.tag == config_splitting["create_workload"]: wl_out = wl break # make sure that we have created a workload as expected self.assertTrue(wl_out is not None) self.assertEqual(wl_out.tag, config_splitting["create_workload"]) # make sure that all the jobs have a label consistent with the filter for j in wl_out.jobs: self.assertTrue("includeme" in j.label and "excludeme" not in j.label)
def test_workload_fillin_default(self): """ Test the correct assignment of user-defined time-series :return: """ io_metrics = ['kb_read', 'kb_write', 'n_read', 'n_write'] # create 2 random jobs (with ONLY io metrics) valid_args_1 = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.random.rand(10)) for tsk in io_metrics } } job1 = ModelJob(**valid_args_1) valid_args_2 = { 'time_start': 0.1, 'duration': 0.2, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.random.rand(10)) for tsk in io_metrics } } job2 = ModelJob(**valid_args_2) test_workload = Workload(jobs=[job1, job2], tag='wl_2jobs') # ---------------------- fill in config ----------------------- filling_funct_config = [{ "type": "step", "name": "step-1", "x_step": 0.5 }, { "type": "custom", "name": "custom-1", "x_values": [0, 0.1, 0.15, 0.3333, 0.5, 0.8, 0.9, 1.0], "y_values": [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0] }] # Values to assign to all the unspecified metrics default_config = { "type": "fill_missing_entries", "apply_to": ["wl_2jobs"], "priority": 0, "metrics": { "kb_collective": [100, 101], "n_collective": [100, 101], "kb_pairwise": { "function": "step-1", "scaling": 1000.0 }, "n_pairwise": { "function": "custom-1", "scaling": 1000.0 }, "flops": [100, 101], } } # update the filling config with the user-defined functions default_config.update({"user_functions": filling_funct_config}) # Apply the user defaults to the workloads workloads = [test_workload] filler = StrategyUserDefaults(workloads) filler.apply(default_config) # test that the IO metrics are within the random range used [0,1] for j in workloads[0].jobs: self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['n_write'].xvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['n_write'].yvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['kb_write'].xvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['kb_write'].yvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['n_read'].xvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['n_read'].yvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].xvalues])) self.assertTrue( all([0.0 < x < 1.0 for x in j.timesignals['kb_read'].yvalues])) # test that the user-defined metrics are within the random range chosen [0,1] for j in workloads[0].jobs: self.assertTrue( all([100 < x < 101 for x in j.timesignals['flops'].yvalues])) self.assertTrue( all([ 100 < x < 101 for x in j.timesignals['n_collective'].yvalues ])) self.assertTrue( all([ 100 < x < 101 for x in j.timesignals['kb_collective'].yvalues ])) # test that the user-defined functions are being applied as expected for j in workloads[0].jobs: # values vs expected val_exp = zip(j.timesignals['n_pairwise'].yvalues, [0, 0.1, 0.2, 0.3, 0.5, 0.8, 0.9, 1.0]) self.assertTrue(all([x == y * 1000. for x, y in val_exp])) # and the step function self.assertTrue( all([(x == 0 or x == 1000.) for x in j.timesignals['kb_pairwise'].yvalues]))
def test_workload_fillin_match(self): """ Test the metrics assignment through job name (label) matching :return: """ # ------------ verify global time signals ------------------- valid_args_1 = { 'job_name': "blabla_weird_name", 'time_start': 0.1, 'duration': 0.222, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.arange(10) * 2) for tsk in time_signal_names } } job1 = ModelJob(**valid_args_1) valid_args_2 = { 'job_name': "job_match", 'time_start': 0.1, 'duration': 0.333, 'ncpus': 1, 'nnodes': 1, 'timesignals': {} } job2 = ModelJob(**valid_args_2) # ------ target workload (that will receive the time metrics..) target_wl = Workload(jobs=[job1, job2], tag='target_workload') # ---------- source workload valid_args_3 = { 'job_name': "job_match", 'time_start': 0.1, 'duration': 0.333, 'ncpus': 1, 'nnodes': 1, 'timesignals': { tsk: TimeSignal.from_values(tsk, np.random.rand(10), np.random.rand(10)) for tsk in time_signal_names } } job3 = ModelJob(**valid_args_3) source_wl = Workload(jobs=[job3], tag='wl_match_source') # filler config filler_config = { "type": "match_by_keyword", "priority": 0, "keywords": ["job_name"], "similarity_threshold": 0.3, "source_workloads": ["wl_match_source"], "apply_to": ["target_workload"] } # Apply the user defaults to the workloads workloads = [target_wl, source_wl] filler = StrategyMatchKeyword(workloads) filler.apply(filler_config) # for ts_k, ts_v in job3.timesignals.iteritems(): # print "JOB3:{}:{}".format(ts_k, ts_v.yvalues) # # for ts_k, ts_v in target_wl.jobs[1].timesignals.iteritems(): # print "TRG_J1:{}:{}".format(ts_k, ts_v.yvalues) self.assertTrue( all([ all(ys == yt for ys, yt in zip(job3.timesignals[ts_k].yvalues, ts_v.yvalues)) for ts_k, ts_v in target_wl.jobs[1].timesignals.items() ]))
def from_labelled_jobs(cls, wl_dict): return cls([Workload(jobs=v, tag=k) for k, v in wl_dict.items()])