def main(): # LODATAStruct_mul flat_test_workloads = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flat_test_workloads = [e for l in flat_test_workloads for e in l] lods = make_LODatastruct_mul(flat_test_workloads, DATA_FOLDER, CONFIG_PATH, with_intensive=False, si=True, destroy=False) print("Running tests on newly created lods object....") check_shared_configs_consistency(lods) assert_no_shared_in_test(lods) assert_no_shared_in_test(lods, 'shared_traincomplement') lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_mul_var.bin"), destroy=True) # Now reads the serialized object and repeat the tests print("\nRepeating tests after reading serialized lods...") lods = None lods = LODataStruct.load_from_file(os.path.join(OUTPUT_FOLDER, "lods_mul_var.bin"), autobuild=True) check_shared_configs_consistency(lods) assert_no_shared_in_test(lods) assert_no_shared_in_test(lods, 'shared_traincomplement')
def main(): if not os.path.exists(OUT_FOLDER): os.makedirs(OUT_FOLDER) for fname in os.listdir(IN_FOLDER): if ".bin" in fname: fpath = os.path.join(IN_FOLDER, fname) lods = LODataStruct.load_from_file(os.path.join(fpath), autobuild=False) lods.serialize(os.path.join(OUT_FOLDER, fname), destroy=True)
def main(): print("Loading LODS...") lods = {} lods_mul = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_mul.bin"), autobuild=True) lods["mul"] = lods_mul for t in TEMPLATES: lods[t] = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_{}.bin".format(t)), autobuild=True) print("LODS loaded, starting tests...") # 1) Make sure test data the same between LODS_X and LODS_mul test_eval_jobs(lods, 'test') test_eval_splits(lods, 'test') # 2) Make sure observation data is the same between LODS_X and LODS_mul test_eval_jobs(lods, 'traincomplement') test_eval_splits(lods, 'traincomplement') # 3) Make sure training data points in LODS_X also appear in LODS_mul test_training_jobs(lods, "trainval") test_training_splits(lods, "trainval")
def read_lods(describe=False): # Autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join( LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild csv_folder = "../../../../datasets/tpcx-bb/" lods.folder = csv_folder lods._autobuild() # Notice that I didn't call minmaxscale, because Ottertune code does that. if describe: lods.describe() return lods
def get_lods(describe=False): # autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join(LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild csv_folder = "../../../../datasets/tpcx-bb/" lods.folder = csv_folder lods._autobuild() lods.minmaxscale("X") lods.minmaxscale("Y") if describe: lods.describe() return lods
def get_lods(describe=False): # autobuild is set to false because persisted as built object. lods = LODataStruct.load_from_file(os.path.join( LODS_FOLDER_PATH, LODS_FNAME), autobuild=False) # Overwrite folder containing csvs and autobuild lods.id_to_fname = None # backward compatibility for streaming csv_folder = "../../../../datasets/streaming/" lods.folder = csv_folder lods._autobuild() lods.minmaxscale("X") lods.minmaxscale("Y") if describe: lods.describe() return lods
def make_LODatastruct_mul( test_workloads, data_folder, config_path, with_intensive=False, si=False, destroy_on_serialize=False, config_dict=None, shared_within_templates=True): """Makes and serializes LODS_mul for workload mapping scenarios. test_workloads: list Contains the list of test workloads data_folder: str Path to folder containing csv files for each workloads data. config_path: str Path to a json configuration file that defines the splits. with_intensive: bool Whether or not to include intensive workloads si: bool, default=False si stands for 'separate intersections'. Indicates whether or not we need to separate intersecting (shared) configurations across a particular template from non-intersecting configurations. config_dict: dict Dictionary containing settings put into config.py when this LODS was created. shared_within_templates: boolean, default=True Whether the shared configurations intersections should be computed within templates or across all workloads (beyond template definition) """ param_training_workloads = sorted( list(set(PARAM_WORKLOADS) - set(test_workloads))) if with_intensive: training_workloads = INTENSIVE_WORKLOADS + param_training_workloads else: training_workloads = param_training_workloads lods = LODataStruct(test_workloads, lo_config_path=config_path, test_size=0.8) lods.read_jobs_data(training_workloads+test_workloads, data_folder, id_to_fname=id_to_str) if shared_within_templates: lods.build( separate_intersections=si, templates=TEMPLATES) else: lods.build(separate_intersections=si) if config_dict is not None: lods.set_config_dict(config_dict) lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_mul.bin"), destroy=destroy_on_serialize) return lods
def make_LODatastruct_X(template_X, test_X, data_folder, config_path, with_intensive=False, split_definitions=None, X="X", destroy_on_serialize=False, separate_intersections=False, config_dict=None, shared_within_templates=True): """ Makes and serializes LODS_X for left out template scenarios template_X: list List of workloads from the template X test_X: list List of test workloads from this template data_folder: str Path to folder containing csv files for each workloads data. config_path: str Path to a json configuration file that defines the splits. with_intensive: boolean Whether or not to include intensive workloads split_definitions: dict Contains split definitinons for the datasets trainval, traincomplement, test, shared_trainval, shared_traincomplement. config_dict: dict Dictionary containing the values of the global variables defined in config.py at the moment of creation of LODS. separate_intersections: boolean, default=False Whether or not to put the intersecting (shared) configurations across workloads into a separate dataset (prefixed by the word "shared") config_dict: dict Dictionary containing settings put into config.py when this LODS was created. shared_within_templates: boolean, default=True Whether the shared configurations intersections should be computed within templates or across all workloads (beyond template definition) """ # X can be any of "A", "B", "C", ... "G" (templates) param_except_X = sorted( list(set(PARAM_WORKLOADS) - set(template_X))) aux = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flatten_test_workloads = [e for l in aux for e in l] param_except_X_and_other_test = sorted( list(set(param_except_X) - set(flatten_test_workloads))) if separate_intersections: # jobs in template X but not in test (only used for intersections) # plus test jobs from other templates additional_jobs = list(set(template_X) - set(flatten_test_workloads)) for temp_name in TEST_WORKLOADS: for j in TEST_WORKLOADS[temp_name]: if j not in test_X: additional_jobs.append(j) else: additional_jobs = [] if with_intensive: training_except_X = INTENSIVE_WORKLOADS + param_except_X_and_other_test else: training_except_X = param_except_X_and_other_test lods = LODataStruct(test_X, lo_config_path=config_path, test_size=0.8) lods.read_jobs_data(training_except_X+test_X, data_folder, additional_jobs=additional_jobs, id_to_fname=id_to_str) if shared_within_templates: lods.build( imported_sd=split_definitions, separate_intersections=separate_intersections, templates=TEMPLATES) else: lods.build( imported_sd=split_definitions, separate_intersections=separate_intersections) if config_dict is not None: lods.set_config_dict(config_dict) lods.serialize(os.path.join(OUTPUT_FOLDER, "lods_{}.bin".format(X)), destroy=destroy_on_serialize)
def main(): if not os.path.exists(OUTPUT_FOLDER): os.makedirs(OUTPUT_FOLDER) config_dict = get_config_dict() flat_test_workloads = [TEST_WORKLOADS[t] for t in TEST_WORKLOADS] flat_test_workloads = [e for l in flat_test_workloads for e in l] lods = make_LODatastruct_mul( flat_test_workloads, DATA_FOLDER, CONFIG_PATH, with_intensive=WITH_INTENSIVE, si=SEPARATE_INTERSECTIONS_MUL, config_dict=config_dict, shared_within_templates=SHARED_WITHIN_TEMPLATES) sd = lods.get_split_definitions() for temp in TEMPLATES: print("[making LODS_{}]".format(temp)) make_LODatastruct_X( TEMPLATES[temp], TEST_WORKLOADS[temp], DATA_FOLDER, CONFIG_PATH, with_intensive=WITH_INTENSIVE, split_definitions=sd, X=temp, separate_intersections=SEPARATE_INTERSECTIONS_X, config_dict=config_dict, shared_within_templates=SHARED_WITHIN_TEMPLATES) autobuild = DESTROY_ON_SERIALIZE print("Loading LODS...") lods = {} lods_mul = LODataStruct.load_from_file(os.path.join( OUTPUT_FOLDER, "lods_mul.bin"), autobuild=DESTROY_ON_SERIALIZE) lods["mul"] = lods_mul print("LODS_mul loaded...") for t in TEMPLATES: print("Loading LODS_{}".format(t)) lods[t] = LODataStruct.load_from_file( os.path.join(OUTPUT_FOLDER, "lods_{}.bin".format(t)), autobuild=DESTROY_ON_SERIALIZE) for lod_name in lods: lods[lod_name].minmaxscale("X") lods[lod_name].minmaxscale("Y") print("LODS loaded, starting tests...") print("**** CONSISTENCY TESTS (LODS_mul & LODS_X) *****") test_eval_jobs(lods, 'test') test_eval_splits(lods, 'test') test_eval_jobs(lods, 'traincomplement') test_eval_splits(lods, 'traincomplement') test_training_jobs(lods, "trainval") test_training_splits(lods, "trainval") print("**** ***** *****") if SEPARATE_INTERSECTIONS_MUL: print("***** SHARED/UNSHARED TESTS ON LODS_mul *****") check_shared_configs_consistency(lods["mul"]) assert_no_shared_in_test(lods["mul"]) assert_no_shared_in_test(lods["mul"], 'shared_traincomplement')