def make_LODatastruct_mul(test_workloads, data_folder, config_path, with_intensive=False, si=False, destroy_on_serialize=False, config_dict=None, shared_within_templates=True): """Makes and serializes LODS_mul for workload mapping scenarios. test_workloads: list Contains the list of test workloads data_folder: str Path to folder containing csv files for each workloads data. config_path: str Path to a json configuration file that defines the splits. with_intensive: bool Whether or not to include intensive workloads si: bool, default=False si stands for 'separate intersections'. Indicates whether or not we need to separate intersecting (shared) configurations across a particular template from non-intersecting configurations. config_dict: dict Dictionary containing settings put into config.py when this LODS was created. shared_within_templates: boolean, default=True Whether the shared configurations intersections should be computed within templates or across all workloads (beyond template definition) """ param_training_workloads = sorted( list(set(PARAM_WORKLOADS) - set(test_workloads))) if with_intensive: training_workloads = INTENSIVE_WORKLOADS + param_training_workloads else: training_workloads = param_training_workloads lods = LODataStruct(test_workloads, lo_config_path=config_path, test_size=0.8) lods.read_jobs_data(training_workloads + test_workloads, data_folder, id_to_fname=id_to_str) if shared_within_templates: lods.build(separate_intersections=si, templates=TEMPLATES) else: lods.build(separate_intersections=si) if config_dict is not None: lods.set_config_dict(config_dict) lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_mul.bin"), destroy=destroy_on_serialize) return lods
def make_LODatastruct_X(template_X, test_X, data_folder, config_path, with_intensive=False, split_definitions=None, X="X", destroy_on_serialize=False, separate_intersections=False, config_dict=None, shared_within_templates=True): """ Makes and serializes LODS_X for left out template scenarios template_X: list List of workloads from the template X test_X: list List of test workloads from this template data_folder: str Path to folder containing csv files for each workloads data. config_path: str Path to a json configuration file that defines the splits. with_intensive: boolean Whether or not to include intensive workloads split_definitions: dict Contains split definitinons for the datasets trainval, traincomplement, test, shared_trainval, shared_traincomplement. config_dict: dict Dictionary containing the values of the global variables defined in config.py at the moment of creation of LODS. separate_intersections: boolean, default=False Whether or not to put the intersecting (shared) configurations across workloads into a separate dataset (prefixed by the word "shared") config_dict: dict Dictionary containing settings put into config.py when this LODS was created. shared_within_templates: boolean, default=True Whether the shared configurations intersections should be computed within templates or across all workloads (beyond template definition) """ # X can be any of "A", "B", "C", ... "G" (templates) param_except_X = sorted( list(set(PARAM_WORKLOADS) - set(template_X))) aux = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flatten_test_workloads = [e for l in aux for e in l] param_except_X_and_other_test = sorted( list(set(param_except_X) - set(flatten_test_workloads))) if separate_intersections: # jobs in template X but not in test (only used for intersections) # plus test jobs from other templates additional_jobs = list(set(template_X) - set(flatten_test_workloads)) for temp_name in TEST_WORKLOADS: for j in TEST_WORKLOADS[temp_name]: if j not in test_X: additional_jobs.append(j) else: additional_jobs = [] if with_intensive: training_except_X = INTENSIVE_WORKLOADS + param_except_X_and_other_test else: training_except_X = param_except_X_and_other_test lods = LODataStruct(test_X, lo_config_path=config_path, test_size=0.8) lods.read_jobs_data(training_except_X+test_X, data_folder, additional_jobs=additional_jobs, id_to_fname=id_to_str) if shared_within_templates: lods.build( imported_sd=split_definitions, separate_intersections=separate_intersections, templates=TEMPLATES) else: lods.build( imported_sd=split_definitions, separate_intersections=separate_intersections) if config_dict is not None: lods.set_config_dict(config_dict) lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_{}.bin".format(X)), destroy=destroy_on_serialize)