def run_experiment(attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, data_rnd_seed, n_learning_sets, rnd_seed, results_path, base_learners, measures, learners, test=True, unpickle=False, visualize=True, weighting="all_equal", error_margin="std", error_bars=True, separate_figs=True, cfg_logger=True): """Run the binarization experiment according to the given parameters and save the results where indicated. Parameters ---------- attributes : int Number of attributes/variables of the generated Boolean functions. disjunct_degree : int The expected number of attributes/variables in a disjunct. n : int The number of examples for each task to generate. task_groups : int The number of task groups to generate. Each task group shares the same Boolean function. tasks_per_group : int The number of tasks (with their corresponding data) to generate for each task group. noise : float The proportion of examples of each task that have their class values determined randomly. data_rnd_seed : int The random seed with which to initialize a private Random object. n_learning_sets : int The number of different learning sets to create for each task. rnd_seed : int The random seed to be used for the BinarizationExperimentMTLTester object. results_path : string The path where to store the results (if it doesn't exist, it will be created). base_learners : OrderedDict An ordered dictionary with items of the form (name, learner), where name is a string representing the base learner's name and learner is a scikit-learn estimator object. measures : list A list of strings representing measure's names. learners : OrderedDict An ordered dictionary with items of the form (name, learner), where name is a string representing the learner's name and learner is a merging learning algorithm (e.g. ERM, NoMerging, ...). test : boolean Indicates whether to perform tests on the MTL problem (with the given base_learners, measures and learners). unpickle : boolean Indicates whether to search for previously computed testing results and include them in the MTL problem. visualize : boolean Indicates whether to visualize the results of the current tasks (for each combination of base learners, measures and learners of the MTL problem). weighting : string Indicator of the type of weighting to use when computing the overall results. error_margin : string The measure to use for error margins when computing the overall results. error_bars : boolean Indicates whether to plot the error bars when visualizing the results. separate_figs : boolean Indicates whether to plot the results of each base learner as a separate figure or as one combined figure cfg_logger : boolean Indicates whether to re-configure the global logger object. """ if not os.path.exists(results_path): os.makedirs(results_path) if cfg_logger: log_file = os.path.join(results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl") # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # create a MTL tester with tasks' data mtlt = BinarizationExperimentMTLTester(tasks_data, rnd_seed, repeats=1, preprepared_test_sets=tasks_complete_test_sets) # test all combinations of learners and base learners (compute the testing # results with the defined measures) and save the results if test == True if test: mtlt.test_tasks(learners, base_learners, measures, results_path, save_orange_data=True) mtlt.pickle_test_results(pickle_path_fmt) # find previously computed testing results and check if they were computed # using the same data tables and cross-validation indices if # unpickle == True if unpickle: mtlt.find_pickled_test_results(pickle_path_fmt) if not mtlt.check_test_results_compatible(): raise ValueError("Test results for different base learners are not " "compatible.") # visualize the results of the current tasks for each combination of base # learners, learners and measures that are in the MTL problem; in addition, # visualize the dendrograms showing merging history of ERM if visualize: if not mtlt.contains_test_results(): raise ValueError("The MTLTester object doesn't contain any testing" " results.") bls = mtlt.get_base_learners() ls = mtlt.get_learners() ms = mtlt.get_measures() mtlt.visualize_results(bls, ls, ms, results_path, {"ForcedTree": "blue", "Tree": "green", "ERM": "red"}, error_bars=error_bars, separate_figs=separate_figs) mtlt.visualize_dendrograms(bls, results_path) mtlt.compute_overall_results(bls, ls, ms, results_path, weighting=weighting, error_margin=error_margin) convert_svgs_to_pdfs(results_path) build_and_crop_tex_files(results_path, r"-tikz.tex$") combine_dendrograms_and_trees(bls, results_path) build_and_crop_tex_files(results_path, r"^comparison.*.tex$", crop=False)
def _test_config_44(data_rnd_seed_values, noise_values, results_dir="results/synthetic_data"): """Testing configuration for the changing amount of noise experiment. Parameters ---------- data_rnd_seed_values : list Values of data_rnd_seed for which to repeat the experiment. noise_values : list Values of the amount of noise for which to repeat the experiment. results_dir : str Directory in which to put the chaning_noise directory with the relevant experiment results. """ # parameters of the synthetic Boolean MTL problem attributes = 12 disjunct_degree = 6 n = 50 task_groups = 5 tasks_per_group = 5 noise = 0.0 n_learning_sets = 10 # parameters of the MTL problem tester rnd_seed = 51 # dynamic parameters of the synthetic Boolean MTL problem for data_rnd_seed in data_rnd_seed_values: results_path_fmt = os.path.join( path_prefix, results_dir, "changing_noise/bool_func-a{}d{}n{}g{}tg{}" "nse{{}}rs{}nls{}-seed{}-complete_test".format( attributes, disjunct_degree, n, task_groups, tasks_per_group, data_rnd_seed, n_learning_sets, rnd_seed)) if "run" in mode: for noise in noise_values: # prepare directories and loggers results_path = results_path_fmt.format(noise) if not os.path.exists(results_path): os.makedirs(results_path) log_file = os.path.join(results_path, "run-{}.log".\ format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) log_base_learner_info(logger, base_learners_bool) # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # test the generated MTL problem test_tasks(tasks_data, results_path, base_learners_bool, measures_clas, learners, "pre-prepared_test", rnd_seed=rnd_seed, test=test, unpickle=unpickle, visualize=visualize, preprepared_test_sets=tasks_complete_test_sets, separate_figs=True, cfg_logger=False) if "combine" in mode: combine_experiment_results( results_path_fmt, noise_values, (results_path_fmt.format(pprint_iter(noise_values)) + "-{}-{{}}.pdf".format(error_measure)), n_learning_sets, error_measure=error_measure, title="Avg. results for tasks", xlabel="% of noise")
n_learning_sets, rnd_seed))) if not os.path.exists(results_path): os.makedirs(results_path) log_file = os.path.join( results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) log_base_learner_info(logger, base_learners_bool) # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # test the generated MTL problem test_tasks(tasks_data, results_path, base_learners_bool, measures_clas, learners, "pre-prepared_test", rnd_seed=rnd_seed, test=test, unpickle=unpickle, visualize=visualize, preprepared_test_sets=tasks_complete_test_sets, separate_figs=True,
def run_experiment(attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, data_rnd_seed, n_learning_sets, rnd_seed, results_path, base_learners, measures, learners, test=True, unpickle=False, visualize=True, weighting="all_equal", error_margin="std", error_bars=True, separate_figs=True, cfg_logger=True): """Run the binarization experiment according to the given parameters and save the results where indicated. Parameters ---------- attributes : int Number of attributes/variables of the generated Boolean functions. disjunct_degree : int The expected number of attributes/variables in a disjunct. n : int The number of examples for each task to generate. task_groups : int The number of task groups to generate. Each task group shares the same Boolean function. tasks_per_group : int The number of tasks (with their corresponding data) to generate for each task group. noise : float The proportion of examples of each task that have their class values determined randomly. data_rnd_seed : int The random seed with which to initialize a private Random object. n_learning_sets : int The number of different learning sets to create for each task. rnd_seed : int The random seed to be used for the BinarizationExperimentMTLTester object. results_path : string The path where to store the results (if it doesn't exist, it will be created). base_learners : OrderedDict An ordered dictionary with items of the form (name, learner), where name is a string representing the base learner's name and learner is a scikit-learn estimator object. measures : list A list of strings representing measure's names. learners : OrderedDict An ordered dictionary with items of the form (name, learner), where name is a string representing the learner's name and learner is a merging learning algorithm (e.g. ERM, NoMerging, ...). test : boolean Indicates whether to perform tests on the MTL problem (with the given base_learners, measures and learners). unpickle : boolean Indicates whether to search for previously computed testing results and include them in the MTL problem. visualize : boolean Indicates whether to visualize the results of the current tasks (for each combination of base learners, measures and learners of the MTL problem). weighting : string Indicator of the type of weighting to use when computing the overall results. error_margin : string The measure to use for error margins when computing the overall results. error_bars : boolean Indicates whether to plot the error bars when visualizing the results. separate_figs : boolean Indicates whether to plot the results of each base learner as a separate figure or as one combined figure cfg_logger : boolean Indicates whether to re-configure the global logger object. """ if not os.path.exists(results_path): os.makedirs(results_path) if cfg_logger: log_file = os.path.join( results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl") # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # create a MTL tester with tasks' data mtlt = BinarizationExperimentMTLTester( tasks_data, rnd_seed, repeats=1, preprepared_test_sets=tasks_complete_test_sets) # test all combinations of learners and base learners (compute the testing # results with the defined measures) and save the results if test == True if test: mtlt.test_tasks(learners, base_learners, measures, results_path, save_orange_data=True) mtlt.pickle_test_results(pickle_path_fmt) # find previously computed testing results and check if they were computed # using the same data tables and cross-validation indices if # unpickle == True if unpickle: mtlt.find_pickled_test_results(pickle_path_fmt) if not mtlt.check_test_results_compatible(): raise ValueError( "Test results for different base learners are not " "compatible.") # visualize the results of the current tasks for each combination of base # learners, learners and measures that are in the MTL problem; in addition, # visualize the dendrograms showing merging history of ERM if visualize: if not mtlt.contains_test_results(): raise ValueError("The MTLTester object doesn't contain any testing" " results.") bls = mtlt.get_base_learners() ls = mtlt.get_learners() ms = mtlt.get_measures() mtlt.visualize_results(bls, ls, ms, results_path, { "ForcedTree": "blue", "Tree": "green", "ERM": "red" }, error_bars=error_bars, separate_figs=separate_figs) mtlt.visualize_dendrograms(bls, results_path) mtlt.compute_overall_results(bls, ls, ms, results_path, weighting=weighting, error_margin=error_margin) convert_svgs_to_pdfs(results_path) build_and_crop_tex_files(results_path, r"-tikz.tex$") combine_dendrograms_and_trees(bls, results_path) build_and_crop_tex_files(results_path, r"^comparison.*.tex$", crop=False)
def _test_config_44(data_rnd_seed_values, noise_values, results_dir="results/synthetic_data"): """Testing configuration for the changing amount of noise experiment. Parameters ---------- data_rnd_seed_values : list Values of data_rnd_seed for which to repeat the experiment. noise_values : list Values of the amount of noise for which to repeat the experiment. results_dir : str Directory in which to put the chaning_noise directory with the relevant experiment results. """ # parameters of the synthetic Boolean MTL problem attributes = 12 disjunct_degree = 6 n = 50 task_groups = 5 tasks_per_group = 5 noise = 0.0 n_learning_sets = 10 # parameters of the MTL problem tester rnd_seed = 51 # dynamic parameters of the synthetic Boolean MTL problem for data_rnd_seed in data_rnd_seed_values: results_path_fmt = os.path.join(path_prefix, results_dir, "changing_noise/bool_func-a{}d{}n{}g{}tg{}" "nse{{}}rs{}nls{}-seed{}-complete_test".format(attributes, disjunct_degree, n, task_groups, tasks_per_group, data_rnd_seed, n_learning_sets, rnd_seed)) if "run" in mode: for noise in noise_values: # prepare directories and loggers results_path = results_path_fmt.format(noise) if not os.path.exists(results_path): os.makedirs(results_path) log_file = os.path.join(results_path, "run-{}.log".\ format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) log_base_learner_info(logger, base_learners_bool) # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # test the generated MTL problem test_tasks(tasks_data, results_path, base_learners_bool, measures_clas, learners, "pre-prepared_test", rnd_seed=rnd_seed, test=test, unpickle=unpickle, visualize=visualize, preprepared_test_sets=tasks_complete_test_sets, separate_figs=True, cfg_logger=False) if "combine" in mode: combine_experiment_results(results_path_fmt, noise_values, (results_path_fmt.format(pprint_iter(noise_values)) + "-{}-{{}}.pdf".format(error_measure)), n_learning_sets, error_measure=error_measure, title="Avg. results for tasks", xlabel="% of noise")
"".format(attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, data_rnd_seed, n_learning_sets, rnd_seed))) if not os.path.exists(results_path): os.makedirs(results_path) log_file = os.path.join(results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S"))) configure_logger(logger, console_level=logging.INFO, file_name=log_file) log_base_learner_info(logger, base_learners_bool) # generate boolean data with complete test sets funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl") tasks_data, tasks_complete_test_sets = \ synthetic_data.generate_boolean_data_with_complete_test_sets( attributes, disjunct_degree, n, task_groups, tasks_per_group, noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets, funcs_pickle_path=funcs_pickle_path) # test the generated MTL problem test_tasks(tasks_data, results_path, base_learners_bool, measures_clas, learners, "pre-prepared_test", rnd_seed=rnd_seed, test=test, unpickle=unpickle, visualize=visualize, preprepared_test_sets=tasks_complete_test_sets, separate_figs=True, cfg_logger=False) if test_config == 2: # parameters of the synthetic Boolean MTL problem attributes = 16 disjunct_degree = 8 n = 200