Python generate_boolean_data_with_complete_test_sets 예제들, PyMTL.synthetic_data.generate_boolean_data_with_complete_test_sets Python 예제들

예제 #1

0

파일 보기

파일: binarization_experiment.py 프로젝트: adgress/PythonFramework

def run_experiment(attributes, disjunct_degree, n, task_groups, tasks_per_group,
                   noise, data_rnd_seed, n_learning_sets, rnd_seed,
                   results_path, base_learners, measures, learners,
                   test=True, unpickle=False, visualize=True,
                   weighting="all_equal", error_margin="std",
                   error_bars=True, separate_figs=True, cfg_logger=True):
    """Run the binarization experiment according to the given parameters and
    save the results where indicated.
    
    Parameters
    ----------
    attributes : int
        Number of attributes/variables of the generated Boolean functions.
    disjunct_degree : int
        The expected number of attributes/variables in a disjunct.
    n : int
        The number of examples for each task to generate.
    task_groups : int
        The number of task groups to generate. Each task group shares the
        same Boolean function.
    tasks_per_group : int
        The number of tasks (with their corresponding data) to generate for
        each task group.
    noise : float
        The proportion of examples of each task that have their class values
        determined randomly.
    data_rnd_seed : int
        The random seed with which to initialize a private Random object.
    n_learning_sets : int
        The number of different learning sets to create for each task.
    rnd_seed : int
        The random seed to be used for the BinarizationExperimentMTLTester
        object.
    results_path : string
        The path where to store the results (if it doesn't exist, it will be
        created).
    base_learners : OrderedDict
        An ordered dictionary with items of the form (name, learner), where
        name is a string representing the base learner's name and learner is a
        scikit-learn estimator object.
    measures : list
        A list of strings representing measure's names.
    learners : OrderedDict
        An ordered dictionary with items of the form (name, learner),
        where name is a string representing the learner's name and
        learner is a merging learning algorithm (e.g. ERM, NoMerging, ...).
    test : boolean 
        Indicates whether to perform tests on the MTL problem (with the given
        base_learners, measures and learners).
    unpickle : boolean
        Indicates whether to search for previously computed testing results and
        include them in the MTL problem.
    visualize : boolean
        Indicates whether to visualize the results of the current tasks (for
        each combination of base learners, measures and learners of the MTL
        problem).
    weighting : string
        Indicator of the type of weighting to use when computing the overall
        results.
    error_margin : string
        The measure to use for error margins when computing the overall results.
    error_bars : boolean
        Indicates whether to plot the error bars when visualizing the results.
    separate_figs : boolean
        Indicates whether to plot the results of each base learner as a separate
        figure or as one combined figure
    cfg_logger : boolean
        Indicates whether to re-configure the global logger object.
    
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if cfg_logger:
        log_file = os.path.join(results_path,
                            "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
        configure_logger(logger, console_level=logging.INFO, file_name=log_file)
    pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl")
    # generate boolean data with complete test sets
    funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
    tasks_data, tasks_complete_test_sets = \
        synthetic_data.generate_boolean_data_with_complete_test_sets(
            attributes, disjunct_degree, n, task_groups, tasks_per_group,
            noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets,
            funcs_pickle_path=funcs_pickle_path)
    # create a MTL tester with tasks' data
    mtlt = BinarizationExperimentMTLTester(tasks_data, rnd_seed, repeats=1,
            preprepared_test_sets=tasks_complete_test_sets)
    # test all combinations of learners and base learners (compute the testing
    # results with the defined measures) and save the results if test == True
    if test:
        mtlt.test_tasks(learners, base_learners, measures, results_path,
                        save_orange_data=True)
        mtlt.pickle_test_results(pickle_path_fmt)
    # find previously computed testing results and check if they were computed
    # using the same data tables and cross-validation indices if
    # unpickle == True
    if unpickle:
        mtlt.find_pickled_test_results(pickle_path_fmt)
        if not mtlt.check_test_results_compatible():
            raise ValueError("Test results for different base learners are not "
                             "compatible.")
    # visualize the results of the current tasks for each combination of base
    # learners, learners and measures that are in the MTL problem; in addition,
    # visualize the dendrograms showing merging history of ERM
    if visualize:
        if not mtlt.contains_test_results():
            raise ValueError("The MTLTester object doesn't contain any testing"
                             " results.")
        bls = mtlt.get_base_learners()
        ls = mtlt.get_learners()
        ms = mtlt.get_measures()
        mtlt.visualize_results(bls, ls, ms, results_path,
            {"ForcedTree": "blue", "Tree": "green", "ERM": "red"},
            error_bars=error_bars, separate_figs=separate_figs)
        mtlt.visualize_dendrograms(bls, results_path)
        mtlt.compute_overall_results(bls, ls, ms, results_path,
                weighting=weighting, error_margin=error_margin)
        convert_svgs_to_pdfs(results_path)
        build_and_crop_tex_files(results_path, r"-tikz.tex$")
        combine_dendrograms_and_trees(bls, results_path)
        build_and_crop_tex_files(results_path, r"^comparison.*.tex$",
                                 crop=False)

예제 #2

0

파일 보기

파일: synthetic_test.py 프로젝트: adgress/PythonFramework

def _test_config_44(data_rnd_seed_values,
                    noise_values,
                    results_dir="results/synthetic_data"):
    """Testing configuration for the changing amount of noise experiment.
    
    Parameters
    ----------
    data_rnd_seed_values : list
        Values of data_rnd_seed for which to repeat the experiment.
    noise_values : list
        Values of the amount of noise for which to repeat the experiment.
    results_dir : str
        Directory in which to put the chaning_noise directory with the relevant
        experiment results.
    
    """
    # parameters of the synthetic Boolean MTL problem
    attributes = 12
    disjunct_degree = 6
    n = 50
    task_groups = 5
    tasks_per_group = 5
    noise = 0.0
    n_learning_sets = 10
    # parameters of the MTL problem tester
    rnd_seed = 51

    # dynamic parameters of the synthetic Boolean MTL problem

    for data_rnd_seed in data_rnd_seed_values:
        results_path_fmt = os.path.join(
            path_prefix, results_dir,
            "changing_noise/bool_func-a{}d{}n{}g{}tg{}"
            "nse{{}}rs{}nls{}-seed{}-complete_test".format(
                attributes, disjunct_degree, n, task_groups, tasks_per_group,
                data_rnd_seed, n_learning_sets, rnd_seed))
        if "run" in mode:
            for noise in noise_values:
                # prepare directories and loggers
                results_path = results_path_fmt.format(noise)
                if not os.path.exists(results_path):
                    os.makedirs(results_path)
                log_file = os.path.join(results_path, "run-{}.log".\
                            format(time.strftime("%Y%m%d_%H%M%S")))
                configure_logger(logger,
                                 console_level=logging.INFO,
                                 file_name=log_file)
                log_base_learner_info(logger, base_learners_bool)
                # generate boolean data with complete test sets
                funcs_pickle_path = os.path.join(results_path,
                                                 "boolean_funcs.pkl")
                tasks_data, tasks_complete_test_sets = \
                    synthetic_data.generate_boolean_data_with_complete_test_sets(
                        attributes, disjunct_degree, n, task_groups,
                        tasks_per_group, noise,
                        random_seed=data_rnd_seed,
                        n_learning_sets=n_learning_sets,
                        funcs_pickle_path=funcs_pickle_path)
                # test the generated MTL problem
                test_tasks(tasks_data,
                           results_path,
                           base_learners_bool,
                           measures_clas,
                           learners,
                           "pre-prepared_test",
                           rnd_seed=rnd_seed,
                           test=test,
                           unpickle=unpickle,
                           visualize=visualize,
                           preprepared_test_sets=tasks_complete_test_sets,
                           separate_figs=True,
                           cfg_logger=False)
        if "combine" in mode:
            combine_experiment_results(
                results_path_fmt,
                noise_values,
                (results_path_fmt.format(pprint_iter(noise_values)) +
                 "-{}-{{}}.pdf".format(error_measure)),
                n_learning_sets,
                error_measure=error_measure,
                title="Avg. results for tasks",
                xlabel="% of noise")

예제 #3

0

파일 보기

파일: synthetic_test.py 프로젝트: adgress/PythonFramework

               n_learning_sets, rnd_seed)))
 if not os.path.exists(results_path):
     os.makedirs(results_path)
 log_file = os.path.join(
     results_path,
     "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
 configure_logger(logger,
                  console_level=logging.INFO,
                  file_name=log_file)
 log_base_learner_info(logger, base_learners_bool)
 # generate boolean data with complete test sets
 funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
 tasks_data, tasks_complete_test_sets = \
     synthetic_data.generate_boolean_data_with_complete_test_sets(
         attributes, disjunct_degree, n, task_groups,
         tasks_per_group, noise, random_seed=data_rnd_seed,
         n_learning_sets=n_learning_sets,
         funcs_pickle_path=funcs_pickle_path)
 # test the generated MTL problem
 test_tasks(tasks_data,
            results_path,
            base_learners_bool,
            measures_clas,
            learners,
            "pre-prepared_test",
            rnd_seed=rnd_seed,
            test=test,
            unpickle=unpickle,
            visualize=visualize,
            preprepared_test_sets=tasks_complete_test_sets,
            separate_figs=True,

예제 #4

0

파일 보기

파일: binarization_experiment.py 프로젝트: adgress/PythonFramework

def run_experiment(attributes,
                   disjunct_degree,
                   n,
                   task_groups,
                   tasks_per_group,
                   noise,
                   data_rnd_seed,
                   n_learning_sets,
                   rnd_seed,
                   results_path,
                   base_learners,
                   measures,
                   learners,
                   test=True,
                   unpickle=False,
                   visualize=True,
                   weighting="all_equal",
                   error_margin="std",
                   error_bars=True,
                   separate_figs=True,
                   cfg_logger=True):
    """Run the binarization experiment according to the given parameters and
    save the results where indicated.
    
    Parameters
    ----------
    attributes : int
        Number of attributes/variables of the generated Boolean functions.
    disjunct_degree : int
        The expected number of attributes/variables in a disjunct.
    n : int
        The number of examples for each task to generate.
    task_groups : int
        The number of task groups to generate. Each task group shares the
        same Boolean function.
    tasks_per_group : int
        The number of tasks (with their corresponding data) to generate for
        each task group.
    noise : float
        The proportion of examples of each task that have their class values
        determined randomly.
    data_rnd_seed : int
        The random seed with which to initialize a private Random object.
    n_learning_sets : int
        The number of different learning sets to create for each task.
    rnd_seed : int
        The random seed to be used for the BinarizationExperimentMTLTester
        object.
    results_path : string
        The path where to store the results (if it doesn't exist, it will be
        created).
    base_learners : OrderedDict
        An ordered dictionary with items of the form (name, learner), where
        name is a string representing the base learner's name and learner is a
        scikit-learn estimator object.
    measures : list
        A list of strings representing measure's names.
    learners : OrderedDict
        An ordered dictionary with items of the form (name, learner),
        where name is a string representing the learner's name and
        learner is a merging learning algorithm (e.g. ERM, NoMerging, ...).
    test : boolean 
        Indicates whether to perform tests on the MTL problem (with the given
        base_learners, measures and learners).
    unpickle : boolean
        Indicates whether to search for previously computed testing results and
        include them in the MTL problem.
    visualize : boolean
        Indicates whether to visualize the results of the current tasks (for
        each combination of base learners, measures and learners of the MTL
        problem).
    weighting : string
        Indicator of the type of weighting to use when computing the overall
        results.
    error_margin : string
        The measure to use for error margins when computing the overall results.
    error_bars : boolean
        Indicates whether to plot the error bars when visualizing the results.
    separate_figs : boolean
        Indicates whether to plot the results of each base learner as a separate
        figure or as one combined figure
    cfg_logger : boolean
        Indicates whether to re-configure the global logger object.
    
    """
    if not os.path.exists(results_path):
        os.makedirs(results_path)
    if cfg_logger:
        log_file = os.path.join(
            results_path, "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
        configure_logger(logger,
                         console_level=logging.INFO,
                         file_name=log_file)
    pickle_path_fmt = os.path.join(results_path, "bl-{}.pkl")
    # generate boolean data with complete test sets
    funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
    tasks_data, tasks_complete_test_sets = \
        synthetic_data.generate_boolean_data_with_complete_test_sets(
            attributes, disjunct_degree, n, task_groups, tasks_per_group,
            noise, random_seed=data_rnd_seed, n_learning_sets=n_learning_sets,
            funcs_pickle_path=funcs_pickle_path)
    # create a MTL tester with tasks' data
    mtlt = BinarizationExperimentMTLTester(
        tasks_data,
        rnd_seed,
        repeats=1,
        preprepared_test_sets=tasks_complete_test_sets)
    # test all combinations of learners and base learners (compute the testing
    # results with the defined measures) and save the results if test == True
    if test:
        mtlt.test_tasks(learners,
                        base_learners,
                        measures,
                        results_path,
                        save_orange_data=True)
        mtlt.pickle_test_results(pickle_path_fmt)
    # find previously computed testing results and check if they were computed
    # using the same data tables and cross-validation indices if
    # unpickle == True
    if unpickle:
        mtlt.find_pickled_test_results(pickle_path_fmt)
        if not mtlt.check_test_results_compatible():
            raise ValueError(
                "Test results for different base learners are not "
                "compatible.")
    # visualize the results of the current tasks for each combination of base
    # learners, learners and measures that are in the MTL problem; in addition,
    # visualize the dendrograms showing merging history of ERM
    if visualize:
        if not mtlt.contains_test_results():
            raise ValueError("The MTLTester object doesn't contain any testing"
                             " results.")
        bls = mtlt.get_base_learners()
        ls = mtlt.get_learners()
        ms = mtlt.get_measures()
        mtlt.visualize_results(bls,
                               ls,
                               ms,
                               results_path, {
                                   "ForcedTree": "blue",
                                   "Tree": "green",
                                   "ERM": "red"
                               },
                               error_bars=error_bars,
                               separate_figs=separate_figs)
        mtlt.visualize_dendrograms(bls, results_path)
        mtlt.compute_overall_results(bls,
                                     ls,
                                     ms,
                                     results_path,
                                     weighting=weighting,
                                     error_margin=error_margin)
        convert_svgs_to_pdfs(results_path)
        build_and_crop_tex_files(results_path, r"-tikz.tex$")
        combine_dendrograms_and_trees(bls, results_path)
        build_and_crop_tex_files(results_path,
                                 r"^comparison.*.tex$",
                                 crop=False)

예제 #5

0

파일 보기

파일: synthetic_test.py 프로젝트: adgress/PythonFramework

def _test_config_44(data_rnd_seed_values, noise_values,
                    results_dir="results/synthetic_data"):
    """Testing configuration for the changing amount of noise experiment.
    
    Parameters
    ----------
    data_rnd_seed_values : list
        Values of data_rnd_seed for which to repeat the experiment.
    noise_values : list
        Values of the amount of noise for which to repeat the experiment.
    results_dir : str
        Directory in which to put the chaning_noise directory with the relevant
        experiment results.
    
    """
    # parameters of the synthetic Boolean MTL problem
    attributes = 12
    disjunct_degree = 6
    n = 50
    task_groups = 5
    tasks_per_group = 5
    noise = 0.0
    n_learning_sets = 10
    # parameters of the MTL problem tester
    rnd_seed = 51
    
    # dynamic parameters of the synthetic Boolean MTL problem
    
    
    for data_rnd_seed in data_rnd_seed_values:
        results_path_fmt = os.path.join(path_prefix, results_dir,
            "changing_noise/bool_func-a{}d{}n{}g{}tg{}"
            "nse{{}}rs{}nls{}-seed{}-complete_test".format(attributes,
            disjunct_degree, n, task_groups, tasks_per_group,
            data_rnd_seed, n_learning_sets, rnd_seed))
        if "run" in mode:
            for noise in noise_values:
                # prepare directories and loggers
                results_path = results_path_fmt.format(noise)
                if not os.path.exists(results_path):
                    os.makedirs(results_path)
                log_file = os.path.join(results_path, "run-{}.log".\
                            format(time.strftime("%Y%m%d_%H%M%S")))
                configure_logger(logger, console_level=logging.INFO,
                                 file_name=log_file)
                log_base_learner_info(logger, base_learners_bool)
                # generate boolean data with complete test sets
                funcs_pickle_path = os.path.join(results_path,
                                                 "boolean_funcs.pkl")
                tasks_data, tasks_complete_test_sets = \
                    synthetic_data.generate_boolean_data_with_complete_test_sets(
                        attributes, disjunct_degree, n, task_groups,
                        tasks_per_group, noise,
                        random_seed=data_rnd_seed,
                        n_learning_sets=n_learning_sets,
                        funcs_pickle_path=funcs_pickle_path)
                # test the generated MTL problem
                test_tasks(tasks_data, results_path, base_learners_bool,
                           measures_clas, learners, "pre-prepared_test",
                           rnd_seed=rnd_seed, test=test,
                           unpickle=unpickle, visualize=visualize,
                           preprepared_test_sets=tasks_complete_test_sets,
                           separate_figs=True, cfg_logger=False)
        if "combine" in mode:
            combine_experiment_results(results_path_fmt,
                noise_values,
                (results_path_fmt.format(pprint_iter(noise_values)) +
                 "-{}-{{}}.pdf".format(error_measure)),
                n_learning_sets, error_measure=error_measure,
                title="Avg. results for tasks", xlabel="% of noise")

예제 #6

0

파일 보기

파일: synthetic_test.py 프로젝트: adgress/PythonFramework

         "".format(attributes, disjunct_degree, n, task_groups,
         tasks_per_group, noise, data_rnd_seed, n_learning_sets,
         rnd_seed)))
     if not os.path.exists(results_path):
         os.makedirs(results_path)
     log_file = os.path.join(results_path,
                     "run-{}.log".format(time.strftime("%Y%m%d_%H%M%S")))
     configure_logger(logger, console_level=logging.INFO,
                      file_name=log_file)
     log_base_learner_info(logger, base_learners_bool)
     # generate boolean data with complete test sets
     funcs_pickle_path = os.path.join(results_path, "boolean_funcs.pkl")
     tasks_data, tasks_complete_test_sets = \
         synthetic_data.generate_boolean_data_with_complete_test_sets(
             attributes, disjunct_degree, n, task_groups,
             tasks_per_group, noise, random_seed=data_rnd_seed,
             n_learning_sets=n_learning_sets,
             funcs_pickle_path=funcs_pickle_path)
     # test the generated MTL problem
     test_tasks(tasks_data, results_path, base_learners_bool,
                measures_clas, learners, "pre-prepared_test",
                rnd_seed=rnd_seed,
                test=test, unpickle=unpickle, visualize=visualize,
                preprepared_test_sets=tasks_complete_test_sets,
                separate_figs=True, cfg_logger=False)
 
 if test_config == 2:
     # parameters of the synthetic Boolean MTL problem
     attributes = 16
     disjunct_degree = 8
     n = 200