Beispiel #1
0
 def setUp(self):
     self.output = "test/test_files/test_output/"
     shutil.rmtree(self.output, ignore_errors=True)
     self.cave = CAVE(["examples/spear_qcp_small/example_output/run_1",
                       "examples/spear_qcp_small/example_output/run_2",
                       "examples/spear_qcp_small/example_output/run_3"],
                      output=self.output,
                      missing_data_method="epm",
                      ta_exec_dir="examples/spear_qcp_small")
     self.analyzer = self.cave.analyzer
Beispiel #2
0
 def setUpClass(self):
     self.output_dir = "test/test_files/test_output/"
     if os.path.exists(self.output_dir):
         shutil.rmtree(self.output_dir, ignore_errors=True)
     os.mkdir(self.output_dir)
     self.cave = CAVE([
         "test/example_output/example_output/run_1",
         "test/example_output/example_output/run_2"
     ],
                      output_dir=self.output_dir,
                      validation_method="epm",
                      ta_exec_dir=["test/example_output"])
     self.analyzer = self.cave.analyzer
Beispiel #3
0
    def main_cli(self):
        """
        Main cli, implementing comparison between and analysis of Configuration-results.
        """
        # Reset logging module (needs to happen before logger initalization)
        logging.shutdown()
        reload(logging)

        # Some choice-blocks, that can be reused throughout the CLI
        p_choices = [
            "all", "ablation", "forward_selection", "fanova", "lpi", "none"
        ]
        p_sort_by_choices = ["average"] + p_choices[1:-1]
        f_choices = [
            "all", "box_violin", "correlation", "clustering", "importance",
            "none"
        ]

        parser = ArgumentParser(
            formatter_class=SmartArgsDefHelpFormatter,
            add_help=False,
            description=
            'CAVE: Configuration Assessment Vizualisation and Evaluation')

        req_opts = parser.add_mutually_exclusive_group(required=True)
        req_opts.add_argument(
            "folders",
            nargs='*',
            # strings prefixed with raw| can be manually split with \n
            help="raw|path(s) to Configurator output-directory/ies",
            default=SUPPRESS)

        req_opts.add_argument("--folders",
                              nargs='*',
                              dest='folders',
                              default=SUPPRESS,
                              help=SUPPRESS)

        cave_opts = parser.add_argument_group(
            "CAVE global options",
            "Options that configure the analysis in general and define behaviour."
        )
        cave_opts.add_argument(
            "--verbose_level",
            default="INFO",
            choices=["INFO", "DEBUG", "DEV_DEBUG", "WARNING", "OFF"],
            help=
            "verbose level. use DEV_DEBUG for development to filter boilerplate-logs from "
            "imported modules, use DEBUG for full logging. full debug-log always in "
            "'output/debug/debug.log' ")
        cave_opts.add_argument(
            "--jupyter",
            default='off',
            choices=['on', 'off'],
            help="output everything to jupyter, if available.")
        cave_opts.add_argument(
            "--validation",
            default="epm",
            choices=["validation", "epm "],
            help=
            "how to complete missing runs for config/inst-pairs. epm trains random forest with "
            "available data to estimate missing runs, validation requires target algorithm. ",
            type=str.lower)
        cave_opts.add_argument(
            "--output",
            default="CAVE_output_%s" % (datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')),
            help="path to folder in which to save the HTML-report. ")
        cave_opts.add_argument("--seed",
                               default=42,
                               type=int,
                               help="random seed used throughout analysis. ")
        cave_opts.add_argument(
            "--file_format",
            default='auto',
            help="specify the format of the configurator-files. ",
            choices=['auto', 'SMAC2', 'SMAC3', 'CSV', 'BOHB'],
            type=str.upper)
        cave_opts.add_argument("--validation_format",
                               default='NONE',
                               help="what format the validation-files are in",
                               choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'],
                               type=str.upper)
        cave_opts.add_argument(
            "--ta_exec_dir",
            default='.',
            help=
            "path to the execution-directory of the configurator run. this is the path from "
            "which the scenario is loaded, so the instance-/pcs-files specified in the "
            "scenario, so they are relative to this path "
            "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ",
            nargs='+')
        # PIMP-configs
        pimp_opts = parser.add_argument_group(
            "Parameter Importance",
            "Define the behaviour of the ParameterImportance-module (pimp)")
        pimp_opts.add_argument(
            "--pimp_max_samples",
            default=-1,
            type=int,
            help="How many datapoints to use with PIMP. -1 -> use all. ")
        pimp_opts.add_argument("--pimp_no_fanova_pairs",
                               action="store_false",
                               dest="fanova_pairwise",
                               help="fANOVA won't compute pairwise marginals")
        pimp_opts.add_argument(
            "--pimp_sort_table_by",
            default="average",
            choices=p_sort_by_choices,
            help="raw|what kind of parameter importance method to "
            "use to sort the overview-table. ")

        cfp_opts = parser.add_argument_group(
            "Configurator Footprint", "Finetune the configurator footprint")
        cfp_opts.add_argument(
            "--cfp_time_slider",
            help="whether or not to have a time_slider-widget on cfp-plot"
            "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ",
            choices=["on", "off"],
            default="off")
        cfp_opts.add_argument(
            "--cfp_number_quantiles",
            help=
            "number of quantiles that configurator footprint should plot over time. ",
            default=3,
            type=int)
        cfp_opts.add_argument(
            "--cfp_max_configurations_to_plot",
            help=
            "maximum number of configurations to be plotted in configurator footprint (in case "
            "you run into a MemoryError). -1 -> plot all. ",
            default=-1,
            type=int)

        pc_opts = parser.add_argument_group(
            "Parallel Coordinates",
            "Finetune the parameter parallel coordinates")
        pc_opts.add_argument(
            "--pc_sort_by",
            help=
            "parameter-importance method to determine the order (and selection) of parameters "
            "for parallel coordinates. all: aggregate over all available methods. uses random "
            "method if none is given. ",
            default="all",
            type=str.lower,
            choices=p_choices)

        cot_opts = parser.add_argument_group(
            "Cost Over Time", "Finetune the cost over time plot")
        cot_opts.add_argument(
            "--cot_inc_traj",
            help=
            "if the optimizer belongs to HpBandSter (e.g. bohb), you can choose how the "
            "incumbent-trajectory will be interpreted with regards to the budget. You can "
            "choose from 'racing', which will only accept a configuration of a higher budget "
            "than the current incumbent's if the current incumbent has been evaluated on "
            "the higher budget; 'minimum', which will only look at the current performance "
            "no matter the budget; and 'prefer_higher_budget', which will always choose "
            "a configuration on a higher budget as incumbent as soon as it is available "
            "(this will likely lead to peaks, whenever a new budget is evaluated)",
            default="racing",
            type=str.lower,
            choices=["racing", "minimum", "prefer_higher_budget"])

        # General analysis to be carried out
        act_opts = parser.add_argument_group(
            "Analysis", "Which analysis methods should be carried out")
        act_opts.add_argument(
            "--parameter_importance",
            default="all",
            nargs='+',
            help="raw|what kind of parameter importance method to "
            "use. Choose any combination of\n[" + ', '.join(p_choices[1:-1]) +
            "] or set it to "
            "all/none",
            choices=p_choices,
            type=str.lower)
        act_opts.add_argument(
            "--feature_analysis",
            default="all",
            nargs='+',
            help="raw|what kind of feature analysis methods to use. "
            "Choose any combination of\n[" + ', '.join(f_choices[1:-1]) +
            "] or set it to "
            "all/none",
            choices=f_choices,
            type=str.lower)
        act_opts.add_argument("--no_performance_table",
                              action='store_false',
                              help="don't create performance table.",
                              dest='performance_table')
        act_opts.add_argument("--no_ecdf",
                              action='store_false',
                              help="don't plot ecdf.",
                              dest='ecdf')
        act_opts.add_argument("--no_scatter_plots",
                              action='store_false',
                              help="don't plot scatter plots.",
                              dest='scatter')
        act_opts.add_argument("--no_cost_over_time",
                              action='store_false',
                              help="don't plot cost over time.",
                              dest='cost_over_time')
        act_opts.add_argument("--no_configurator_footprint",
                              action='store_false',
                              help="don't plot configurator footprint.",
                              dest='configurator_footprint')
        act_opts.add_argument("--no_parallel_coordinates",
                              action='store_false',
                              help="don't plot parallel coordinates.",
                              dest='parallel_coordinates')
        act_opts.add_argument("--no_algorithm_footprints",
                              action='store_false',
                              help="don't plot algorithm footprints.",
                              dest='algorithm_footprints')
        act_opts.add_argument("--no_budget_correlation",
                              action='store_false',
                              help="don't plot budget correlation.",
                              dest='budget_correlation')
        act_opts.add_argument("--bohb_learning_curves",
                              action='store_false',
                              help="don't plot bohb learning curves.",
                              dest='bohb_learning_curves')
        act_opts.add_argument("--no_incumbents_over_budgets",
                              action='store_false',
                              help="don't plot incumbents over budgets.",
                              dest='incumbents_over_budgets')

        spe_opts = parser.add_argument_group("Meta arguments")
        spe_opts.add_argument('-v',
                              '--version',
                              action='version',
                              version='%(prog)s ' + str(v),
                              help="show program's version number and exit.")
        spe_opts.add_argument('-h',
                              '--help',
                              action="help",
                              help="show this help message and exit")

        args_ = parser.parse_args(sys.argv[1:])

        # Expand configs
        if "all" in args_.parameter_importance:
            param_imp = ["ablation", "forward_selection", "fanova", "lpi"]
        elif "none" in args_.parameter_importance:
            param_imp = []
        else:
            param_imp = args_.parameter_importance

        if "fanova" in param_imp:
            try:
                import fanova  # noqa
            except ImportError:
                raise ImportError(
                    'fANOVA is not installed! To install it please run '
                    '"git+http://github.com/automl/fanova.git@master"')

        if not (args_.pimp_sort_table_by == "average"
                or args_.pimp_sort_table_by in param_imp):
            raise ValueError("Pimp comparison sorting key is {}, but this "
                             "method is deactivated or non-existent.".format(
                                 args_.pimp_sort_table_by))

        if "all" in args_.feature_analysis:
            feature_analysis = [
                "box_violin", "correlation", "importance", "clustering"
            ]
        elif "none" in args_.feature_analysis:
            feature_analysis = []
        else:
            feature_analysis = args_.feature_analysis

        output_dir = args_.output

        # Configuration results to be analyzed
        folders = []
        for f in args_.folders:
            if '*' in f:
                folders.extend(list(glob.glob(f, recursive=True)))
            else:
                folders.append(f)
        # Default ta_exec_dir is cwd
        ta_exec_dir = []
        for t in args_.ta_exec_dir:
            if '*' in t:
                ta_exec_dir.extend(list(glob.glob(t, recursive=True)))
            else:
                ta_exec_dir.append(t)

        file_format = args_.file_format
        validation_format = args_.validation_format
        validation = args_.validation
        seed = args_.seed
        verbose_level = args_.verbose_level
        show_jupyter = args_.jupyter == 'on'

        analyzing_options = load_default_options(
            file_format=detect_fileformat(folders) if file_format.upper() ==
            "AUTO" else file_format)

        analyzing_options["Ablation"]["run"] = str('ablation' in param_imp)
        analyzing_options["Algorithm Footprint"]["run"] = str(
            args_.algorithm_footprints)
        analyzing_options["Budget Correlation"]["run"] = str(
            args_.budget_correlation)
        analyzing_options["BOHB Learning Curves"]["run"] = str(
            args_.bohb_learning_curves)
        analyzing_options["Configurator Footprint"]["run"] = str(
            args_.configurator_footprint)
        analyzing_options["Configurator Footprint"]["time_slider"] = str(
            args_.cfp_time_slider)
        analyzing_options["Configurator Footprint"]["number_quantiles"] = str(
            args_.cfp_number_quantiles)
        analyzing_options["Configurator Footprint"][
            "max_configurations_to_plot"] = str(
                args_.cfp_max_configurations_to_plot)
        analyzing_options["Cost Over Time"]["run"] = str(args_.cost_over_time)
        analyzing_options["Cost Over Time"]["incumbent_trajectory"] = str(
            args_.cot_inc_traj)
        analyzing_options["empirical Cumulative Distribution Function (eCDF)"][
            "run"] = str(args_.ecdf)
        analyzing_options["fANOVA"]["run"] = str('fanova' in param_imp)
        analyzing_options["fANOVA"]["fanova_pairwise"] = str(
            args_.fanova_pairwise)
        analyzing_options["fANOVA"]["pimp_max_samples"] = str(
            args_.pimp_max_samples)
        analyzing_options["Feature Clustering"]["run"] = str(
            'clustering' in feature_analysis)
        analyzing_options["Feature Correlation"]["run"] = str(
            'correlation' in feature_analysis)
        analyzing_options["Feature Importance"]["run"] = str(
            'importance' in feature_analysis)
        analyzing_options["Forward Selection"]["run"] = str(
            'forward_selection' in param_imp)
        analyzing_options["Importance Table"]["sort_table_by"] = str(
            args_.pimp_sort_table_by)
        analyzing_options["Incumbents Over Budgets"]["run"] = str(
            args_.incumbents_over_budgets)
        analyzing_options["Local Parameter Importance (LPI)"]["run"] = str(
            'lpi' in param_imp)
        analyzing_options["Parallel Coordinates"]["run"] = str(
            args_.parallel_coordinates)
        analyzing_options["Parallel Coordinates"]["pc_sort_by"] = str(
            args_.pc_sort_by)
        analyzing_options["Performance Table"]["run"] = str(
            args_.performance_table)

        cave = CAVE(
            folders,
            output_dir,
            ta_exec_dir,
            file_format=file_format,
            validation_format=validation_format,
            validation_method=validation,
            show_jupyter=show_jupyter,
            seed=seed,
            verbose_level=verbose_level,
            analyzing_options=analyzing_options,
        )

        try:
            cave.logger.debug("CAVE is called with arguments: " + str(args_))
        except AttributeError as err:
            logging.getLogger().warning(
                "Something went wrong with CAVE-initialization... (it's fine for running nosetests)"
            )
            logging.getLogger().debug("CAVE is called with arguments: " +
                                      str(args_))

        # Analyze
        cave.analyze()
Beispiel #4
0
    def main_cli(self):
        """
        Main cli, implementing comparison between and analysis of SMAC-results.
        """
        parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter)
        req_opts = parser.add_argument_group("Required Options")
        req_opts.add_argument("--folders",
                              required=True,
                              nargs='+',
                              help="path(s) to SMAC output-directory/ies, "
                              "containing each at least a runhistory and "
                              "a trajectory.")

        opt_opts = parser.add_argument_group("Optional Options")
        opt_opts.add_argument("--verbose_level",
                              default="INFO",
                              choices=["INFO", "DEBUG"],
                              help="verbose level")
        opt_opts.add_argument("--validation",
                              default="epm",
                              choices=["validation", "epm"],
                              help="how to complete missing runs for "
                              "config/inst-pairs.")
        opt_opts.add_argument(
            "--output",
            default="CAVE_output",
            help="path to folder in which to save the HTML-report.")
        opt_opts.add_argument("--ta_exec_dir",
                              default=None,
                              help="path to the execution-directory of the "
                              "SMAC run.")

        opt_opts.add_argument("--param_importance",
                              default="all",
                              nargs='+',
                              help="what kind of parameter importance to "
                              "calculate",
                              choices=[
                                  "all", "ablation", "forward_selection",
                                  "fanova", "incneighbor", "none"
                              ])
        opt_opts.add_argument("--max_pimp_samples",
                              default=-1,
                              type=int,
                              help="How many datapoints to use with PIMP")
        opt_opts.add_argument("--pimp_no_fanova_pairs",
                              action="store_false",
                              dest="fanova_pairwise")
        opt_opts.add_argument("--feat_analysis",
                              default="all",
                              nargs='+',
                              help="what kind of parameter importance to "
                              "calculate",
                              choices=[
                                  "all", "box_violin", "correlation",
                                  "clustering", "importance", "none"
                              ])
        opt_opts.add_argument("--cost_over_time",
                              default="true",
                              choices=["true", "false"],
                              help="whether to plot cost over time.")
        opt_opts.add_argument("--confviz",
                              default="true",
                              choices=["true", "false"],
                              help="whether to visualize configs.")
        opt_opts.add_argument("--parallel_coordinates",
                              default="true",
                              choices=["true", "false"],
                              help="whether to plot parallel coordinates.")
        opt_opts.add_argument("--algorithm_footprints",
                              default="true",
                              choices=["true", "false"],
                              help="whether to plot algorithm footprints.")

        args_, misc = parser.parse_known_args()

        if args_.verbose_level == "INFO":
            logging.basicConfig(level=logging.INFO)
        else:
            logging.basicConfig(level=logging.DEBUG)

        # SMAC results
        folders = []
        for f in args_.folders:
            if '*' in f:
                folders.extend(list(glob.glob(f, recursive=True)))
            else:
                folders.append(f)
        cave = CAVE(folders,
                    args_.output,
                    args_.ta_exec_dir,
                    missing_data_method=args_.validation,
                    max_pimp_samples=args_.max_pimp_samples,
                    fanova_pairwise=args_.fanova_pairwise)
        # Expand configs
        if "all" in args_.param_importance:
            param_imp = [
                "ablation", "forward_selection", "fanova", "incneighbor"
            ]
        elif "none" in args_.param_importance:
            param_imp = []
        else:
            param_imp = args_.param_importance

        if "all" in args_.feat_analysis:
            feature_analysis = [
                "box_violin", "correlation", "importance", "clustering",
                "feature_cdf"
            ]
        elif "none" in args_.feat_analysis:
            feature_analysis = []
        else:
            feature_analysis = args_.feat_analysis

        # Analyze
        #cave.analyze(performance=False, cdf=False, scatter=False, confviz=False,
        cave.analyze(performance=True,
                     cdf=True,
                     scatter=True,
                     confviz=args_.confviz == "true",
                     parallel_coordinates=args_.parallel_coordinates == "true",
                     cost_over_time=args_.cost_over_time == "true",
                     algo_footprint=args_.algorithm_footprints == "true",
                     param_importance=param_imp,
                     feature_analysis=feature_analysis)
Beispiel #5
0
          "-- 'firefox' will open all reports in firefox.")

if __name__ == '__main__':
    logging.basicConfig(level=logging.DEBUG)
    #logging.basicConfig(level=logging.INFO)

    if len(sys.argv) < 2:
        print_help()
    elif sys.argv[1] == 'generate':
        for scen in get_scenarios():
            scenario = Scenario(scen)
            smac = SMAC(scenario=scenario, rng=np.random.RandomState(42))
            smac.optimize()
    elif sys.argv[1] == 'cave':
        for scen in get_scenarios():
            folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0]
            cave = CAVE([os.path.join(scen['output_dir'], folder)],
                        os.path.join(scen['output_dir'], 'CAVE_RESULT'),
                        ta_exec_dir='.', validation_method='validation')
            cave.analyze(param_importance=['ablation', 'forward_selection', 'lpi'], cfp_number_quantiles=2)
    elif sys.argv[1] == 'firefox':
        import webbrowser
        firefox = webbrowser.get('firefox')
        for url in [os.path.join(scen['output_dir'], 'CAVE_RESULT/report.html') for scen in get_scenarios()]:
            firefox.open_new_tab(url)
    elif sys.argv[1] == 'clean':
        shutil.rmtree('test/general_example/results')
    else:
        logging.error("%s not an option.", sys.argv[1])
        print_help()
Beispiel #6
0
    def main_cli(self):
        """
        Main cli, implementing comparison between and analysis of Configuration-results.
        """
        # Some choice-blocks, that can be reused throughout the CLI
        p_choices = [
            "all", "ablation", "forward_selection", "fanova", "lpi", "none"
        ]
        p_sort_by_choices = ["average"] + p_choices[1:-1]
        f_choices = [
            "all", "box_violin", "correlation", "clustering", "importance",
            "none"
        ]

        parser = ArgumentParser(
            formatter_class=SmartArgsDefHelpFormatter,
            add_help=False,
            description=
            'CAVE: Configuration Assessment Vizualisation and Evaluation')

        req_opts = parser.add_argument_group("Required Options:" + '~' * 100)
        req_opts.add_argument(
            "--folders",
            required=True,
            nargs='+',
            # strings prefixed with raw| can be manually split with \n
            help="raw|path(s) to SMAC output-directory/ies, "
            "containing each at least a runhistory\nand "
            "a trajectory.",
            default=SUPPRESS)

        opt_opts = parser.add_argument_group("Optional Options:" + '~' * 100)
        opt_opts.add_argument(
            "--verbose_level",
            default="INFO",
            choices=["INFO", "DEBUG", "DEV_DEBUG", "WARNING", "OFF"],
            help=
            "verbose level. use DEV_DEBUG for development to filter boilerplate-logs from "
            "imported modules, use DEBUG for full logging. full debug-log always in "
            "'output/debug/debug.log' ")
        opt_opts.add_argument(
            "--jupyter",
            default='off',
            choices=['on', 'off'],
            help="output everything to jupyter, if available.")
        opt_opts.add_argument(
            "--validation",
            default="epm",
            choices=["validation", "epm"],
            help=
            "how to complete missing runs for config/inst-pairs. epm trains random forest with "
            "available data to estimate missing runs, validation requires target algorithm. ",
            type=str.lower)
        opt_opts.add_argument(
            "--output",
            default="CAVE_output_%s" % (datetime.fromtimestamp(
                time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')),
            help="path to folder in which to save the HTML-report. ")
        opt_opts.add_argument("--seed",
                              default=42,
                              type=int,
                              help="random seed used throughout analysis. ")
        opt_opts.add_argument(
            "--file_format",
            default='SMAC3',
            help="specify the format of the configurator-files. ",
            choices=['SMAC2', 'SMAC3', 'CSV', 'BOHB'],
            type=str.upper)
        opt_opts.add_argument("--validation_format",
                              default='NONE',
                              help="what format the validation-files are in",
                              choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'],
                              type=str.upper)
        opt_opts.add_argument(
            "--ta_exec_dir",
            default='.',
            help=
            "path to the execution-directory of the configurator run. this is the path from "
            "which the scenario is loaded, so the instance-/pcs-files specified in the "
            "scenario, so they are relative to this path "
            "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ",
            nargs='+')
        # PIMP-configs
        opt_opts.add_argument(
            "--pimp_max_samples",
            default=-1,
            type=int,
            help="How many datapoints to use with PIMP. -1 -> use all. ")
        opt_opts.add_argument("--pimp_no_fanova_pairs",
                              action="store_false",
                              dest="fanova_pairwise",
                              help="fANOVA won't compute pairwise marginals")
        opt_opts.add_argument(
            "--pimp_sort_table_by",
            default="average",
            choices=p_sort_by_choices,
            help="raw|what kind of parameter importance method to "
            "use to sort the overview-table. ")
        opt_opts.add_argument(
            "--parameter_importance",
            default="all",
            nargs='+',
            help="raw|what kind of parameter importance method to "
            "use. Choose any combination of\n[" + ', '.join(p_choices[1:-1]) +
            "] or set it to "
            "all/none",
            choices=p_choices,
            type=str.lower)
        opt_opts.add_argument(
            "--feature_analysis",
            default="all",
            nargs='+',
            help="raw|what kind of feature analysis methods to use. "
            "Choose any combination of\n[" + ', '.join(f_choices[1:-1]) +
            "] or set it to "
            "all/none",
            choices=f_choices,
            type=str.lower)
        opt_opts.add_argument(
            "--cfp_time_slider",
            help="whether or not to have a time_slider-widget on cfp-plot"
            "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ",
            choices=["on", "off"],
            default="off")
        opt_opts.add_argument(
            "--cfp_number_quantiles",
            help=
            "number of quantiles that configurator footprint should plot over time. ",
            default=3,
            type=int)
        opt_opts.add_argument(
            "--cfp_max_plot",
            help=
            "maximum number of configurations to be plotted in configurator footprint (in case "
            "you run into a MemoryError). -1 -> plot all. ",
            default=-1,
            type=int)
        opt_opts.add_argument("--no_tabular_analysis",
                              action='store_false',
                              help="don't create performance table.",
                              dest='tabular_analysis')
        opt_opts.add_argument("--no_ecdf",
                              action='store_false',
                              help="don't plot ecdf.",
                              dest='ecdf')
        opt_opts.add_argument("--no_scatter_plots",
                              action='store_false',
                              help="don't plot scatter plots.",
                              dest='scatter_plots')
        opt_opts.add_argument("--no_cost_over_time",
                              action='store_false',
                              help="don't plot cost over time.",
                              dest='cost_over_time')
        opt_opts.add_argument("--no_configurator_footprint",
                              action='store_false',
                              help="don't plot configurator footprint.",
                              dest='cfp')
        opt_opts.add_argument("--no_parallel_coordinates",
                              action='store_false',
                              help="don't plot parallel coordinates.",
                              dest='parallel_coordinates')
        opt_opts.add_argument("--no_algorithm_footprints",
                              action='store_false',
                              help="don't plot algorithm footprints.",
                              dest='algorithm_footprints')

        spe_opts = parser.add_argument_group("special arguments:" + '~' * 100)
        spe_opts.add_argument('-v',
                              '--version',
                              action='version',
                              version='%(prog)s ' + str(v),
                              help="show program's version number and exit.")
        spe_opts.add_argument('-h',
                              '--help',
                              action="help",
                              help="show this help message and exit")

        args_ = parser.parse_args(sys.argv[1:])

        # Expand configs
        if "all" in args_.parameter_importance:
            param_imp = ["ablation", "forward_selection", "fanova", "lpi"]
        elif "none" in args_.parameter_importance:
            param_imp = []
        else:
            param_imp = args_.parameter_importance

        if "fanova" in param_imp:
            try:
                import fanova  # noqa
            except ImportError:
                raise ImportError(
                    'fANOVA is not installed! To install it please run '
                    '"git+http://github.com/automl/fanova.git@master"')

        if not (args_.pimp_sort_table_by == "average"
                or args_.pimp_sort_table_by in param_imp):
            raise ValueError("Pimp comparison sorting key is {}, but this "
                             "method is deactivated or non-existent.".format(
                                 args_.pimp_sort_table_by))

        if "all" in args_.feature_analysis:
            feature_analysis = [
                "box_violin", "correlation", "importance", "clustering",
                "feature_cdf"
            ]
        elif "none" in args_.feature_analysis:
            feature_analysis = []
        else:
            feature_analysis = args_.feature_analysis

        cfp_time_slider = True if args_.cfp_time_slider == "on" else False

        if not (args_.tabular_analysis or args_.ecdf or args_.scatter_plots
                or args_.cfp or args_.parallel_coordinates
                or args_.parallel_coordinates or args_.cost_over_time or
                args_.algorithm_footprints or param_imp or feature_analysis):
            raise ValueError(
                'At least one analysis method required to run CAVE')

        output_dir = args_.output

        logging.getLogger().debug("CAVE is called with arguments: " +
                                  str(args_))

        # Configuration results to be analyzed
        folders = []
        for f in args_.folders:
            if '*' in f:
                folders.extend(list(glob.glob(f, recursive=True)))
            else:
                folders.append(f)
        # Default ta_exec_dir is cwd
        ta_exec_dir = []
        for t in args_.ta_exec_dir:
            if '*' in t:
                ta_exec_dir.extend(list(glob.glob(t, recursive=True)))
            else:
                ta_exec_dir.append(t)

        tabular_analysis = args_.tabular_analysis
        file_format = args_.file_format
        validation_format = args_.validation_format
        validation = args_.validation
        pimp_max_samples = args_.pimp_max_samples
        fanova_pairwise = args_.fanova_pairwise
        seed = args_.seed
        ecdf = args_.ecdf
        scatter_plots = args_.scatter_plots
        cfp = args_.cfp
        cfp_time_slider = args_.cfp_time_slider == 'on'
        cfp_max_plot = args_.cfp_max_plot
        cfp_number_quantiles = args_.cfp_number_quantiles
        parallel_coordinates = args_.parallel_coordinates
        cost_over_time = args_.cost_over_time
        algorithm_footprints = args_.algorithm_footprints
        pimp_sort_table_by = args_.pimp_sort_table_by
        verbose_level = args_.verbose_level
        show_jupyter = args_.jupyter == 'on'

        if file_format == 'BOHB':
            logging.getLogger().info(
                "File format is BOHB, performing special nested analysis for budget-based optimizer!"
            )
            validation_format = 'NONE'
            validation_method = 'epm'
            cdf = False
            scatter = False
            algo_footprint = False
            param_imp = [p for p in param_imp if not p == 'forward_selection']
            feature_analysis = []

        cave = CAVE(folders,
                    output_dir,
                    ta_exec_dir,
                    file_format=file_format,
                    validation_format=validation_format,
                    validation_method=validation,
                    pimp_max_samples=pimp_max_samples,
                    fanova_pairwise=fanova_pairwise,
                    use_budgets=file_format == 'BOHB',
                    show_jupyter=show_jupyter,
                    seed=seed,
                    verbose_level=verbose_level)

        # Analyze
        cave.analyze(performance=tabular_analysis,
                     cdf=ecdf,
                     scatter=scatter_plots,
                     cfp=cfp,
                     cfp_time_slider=cfp_time_slider,
                     cfp_max_plot=cfp_max_plot,
                     cfp_number_quantiles=cfp_number_quantiles,
                     parallel_coordinates=parallel_coordinates,
                     cost_over_time=cost_over_time,
                     algo_footprint=algorithm_footprints,
                     param_importance=param_imp,
                     pimp_sort_table_by=pimp_sort_table_by,
                     feature_analysis=feature_analysis)
Beispiel #7
0
    if len(sys.argv) < 2:
        print_help()
    elif sys.argv[1] == '--generate':
        generate_bohb_data()
        for scen in get_scenarios():
            scenario = Scenario(scen)
            smac = SMAC4AC(scenario=scenario, rng=np.random.RandomState(42))
            smac.optimize()
    elif sys.argv[1] == '--cave':
        failed = []
        for scen in get_scenarios():
            try:
                folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0]
                cave = CAVE([os.path.join(scen['output_dir'], folder)],
                            os.path.join(scen['output_dir'], 'CAVE_RESULT'),
                            ta_exec_dir=['.'], validation_method='validation')
                cave.analyze({'fANOVA' : False, 'number_quantiles' : 2})
            except:
                raise
                failed.append(scen['output_dir'])
        print("Failed: %s" % (str(failed)))
    elif sys.argv[1] == '--firefox':
        import webbrowser
        firefox = webbrowser.get('firefox')
        for url in [os.path.join(scen['output_dir'], 'CAVE_RESULT/report.html') for scen in get_scenarios()]:
            firefox.open_new_tab(url)
    elif sys.argv[1] == '--clean':
        shutil.rmtree('test/general_example/results')
    else:
        logging.error("%s not an option.", sys.argv[1])
Beispiel #8
0
    def main_cli(self):
        """
        Main cli, implementing comparison between and analysis of Configurator-results.
        """
        # Reset logging module (needs to happen before logger initalization)
        logging.shutdown()
        reload(logging)

        # Those are the options for the --only / --skip flags
        map_options = {
            'performance_table': 'Performance Table',
            'ecdf': 'empirical Cumulative Distribution Function (eCDF)',
            'scatter_plot': 'Scatter Plot',
            'cost_over_time': 'Cost Over Time',
            'configurator_footprint': 'Configurator Footprint',
            'parallel_coordinates': 'Parallel Coordinates',
            'algorithm_footprints': 'Algorithm Footprint',
            'budget_correlation': 'Budget Correlation',
            'bohb_learning_curves': 'BOHB Learning Curves',
            'incumbents_over_budgets': 'Incumbents Over Budgets',
            # Parameter Importance:
            'fanova': 'fANOVA',
            'ablation': 'Ablation',
            'lpi': 'Local Parameter Importance (LPI)',
            'local_parameter_importance': 'Local Parameter Importance (LPI)',
            'forward_selection': 'Forward Selection',
            # Feature Importance
            'clustering': "Feature Clustering",
            'correlation': "Feature Correlation",
            'importance': "Feature Importance",
            'box_violin': "Violin and Box Plots",
        }

        parser = ArgumentParser(formatter_class=SmartArgsDefHelpFormatter,
                                add_help=False,
                                description='CAVE: Configuration Assessment Vizualisation and Evaluation')

        req_opts = parser.add_mutually_exclusive_group(required=True)  # Either positional or keyword folders option
        req_opts.add_argument("folders",
                              nargs='*',
                              # strings prefixed with raw| can be manually split with \n
                              help="raw|path(s) to Configurator output-directory/ies",
                              default=SUPPRESS)

        req_opts.add_argument("--folders",
                              nargs='*',
                              dest='folders',
                              default=SUPPRESS,
                              help=SUPPRESS)

        cave_opts = parser.add_argument_group("CAVE global options",
                                              "Options that configure the analysis in general and define behaviour.")
        cave_opts.add_argument("--verbose_level",
                              default="INFO",
                              choices=[
                                  "INFO",
                                  "DEBUG",
                                  "DEV_DEBUG",
                                  "WARNING",
                                  "OFF"
                              ],
                              help="verbose level. use DEV_DEBUG for development to filter boilerplate-logs from "
                                   "imported modules, use DEBUG for full logging. full debug-log always in "
                                   "'output/debug/debug.log' ")
        cave_opts.add_argument("--jupyter",
                               default='off',
                               choices=['on', 'off'],
                               help="output everything to jupyter, if available."
                               )
        cave_opts.add_argument("--validation",
                               default="epm",
                               choices=[
                                   "validation",
                                   "epm "
                               ],
                               help="how to complete missing runs for config/inst-pairs. epm trains random forest with "
                                    "available data to estimate missing runs, validation requires target algorithm. ",
                               type=str.lower)
        cave_opts.add_argument("--output",
                               default="CAVE_output_%s" % (
                                        datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')),
                               help="path to folder in which to save the HTML-report. ")
        cave_opts.add_argument("--seed",
                               default=42,
                               type=int,
                               help="random seed used throughout analysis. ")
        cave_opts.add_argument("--file_format",
                               default='auto',
                               help="specify the format of the configurator-files. ",
                               choices=['auto', 'SMAC2', 'SMAC3', 'CSV', 'BOHB', 'APT'],
                               type=str.upper)
        cave_opts.add_argument("--validation_format",
                               default='NONE',
                               help="what format the validation-files are in",
                               choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'],
                               type=str.upper)
        cave_opts.add_argument("--ta_exec_dir",
                               default='.',
                               help="path to the execution-directory of the configurator run. this is the path from "
                                    "which the scenario is loaded, so the instance-/pcs-files specified in the "
                                    "scenario, so they are relative to this path "
                                    "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ",
                               nargs='+')

        # PIMP-configs
        pimp_opts = parser.add_argument_group("Parameter Importance",
                                              "Define the behaviour of the ParameterImportance-module (pimp)")

        pimp_opts.add_argument("--pimp_interactive",
                               choices=["on", "off"],
                               default="on",
                               help="Whether or not to plot interactive bokeh plots for parameter importance analysis")
        pimp_opts.add_argument("--pimp_whiskers",
                               choices=["on", "off"],
                               default="on",
                               help="Whether or not to plot interactive whisker plot for parameter importance analysis")
        pimp_opts.add_argument("--pimp_max_samples",
                               default=-1,
                               type=int,
                               help="How many datapoints to use with PIMP. -1 -> use all. ")
        pimp_opts.add_argument("--pimp_no_fanova_pairs",
                               action="store_false",
                               dest="fanova_pairwise",
                               help="fANOVA won't compute pairwise marginals")

        cfp_opts = parser.add_argument_group("Configurator Footprint", "Fine-tune the configurator footprint")
        cfp_opts.add_argument("--cfp_time_slider",
                              help="whether or not to have a time_slider-widget on cfp-plot"
                                   "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ",
                              choices=["on", "off"],
                              default="off")
        cfp_opts.add_argument("--cfp_number_quantiles",
                              help="number of quantiles that configurator footprint should plot over time. ",
                              default=3, type=int)
        cfp_opts.add_argument("--cfp_max_configurations_to_plot",
                              help="maximum number of configurations to be plotted in configurator footprint (in case "
                                   "you run into a MemoryError). -1 -> plot all. ",
                              default=-1, type=int)

        pc_opts = parser.add_argument_group("Parallel Coordinates", "Fine-tune the parameter parallel coordinates")
        # TODO: this choice should be integrated into the bokeh plot
        pc_opts.add_argument("--pc_sort_by",
                             help="parameter-importance method to determine the order (and selection) of parameters "
                                  "for parallel coordinates. all: aggregate over all available methods. uses random "
                                  "method if none is given. ",
                             default="all", type=str.lower,
                             choices=['fanova', 'lpi', 'ablation', 'forward_selection', 'all'])

        cot_opts = parser.add_argument_group("Cost Over Time", "Fine-tune the cost over time plot")
        cot_opts.add_argument("--cot_inc_traj",
                              help="if the optimizer belongs to HpBandSter (e.g. bohb), you can choose how the "
                                   "incumbent-trajectory will be interpreted with regards to the budget. You can "
                                   "choose from 'racing', which will only accept a configuration of a higher budget "
                                   "than the current incumbent's if the current incumbent has been evaluated on "
                                   "the higher budget; 'minimum', which will only look at the current performance "
                                   "no matter the budget; and 'prefer_higher_budget', which will always choose "
                                   "a configuration on a higher budget as incumbent as soon as it is available "
                                   "(this will likely lead to peaks, whenever a new budget is evaluated)",
                              default="racing", type=str.lower,
                              choices=["racing", "minimum", "prefer_higher_budget"])

        # General analysis to be carried out
        default_opts = parser.add_mutually_exclusive_group()
        default_opts.add_argument("--only",
                                  nargs='*',
                                  help='perform only these analysis methods. choose from: {}'.format(
                                      ", ".join(sorted(map_options.keys()))
                                  ),
                                  default=[],
                                  )
        default_opts.add_argument("--skip",
                                  nargs='*',
                                  help='perform all but these analysis methods. choose from: {}'.format(
                                      ", ".join(sorted(map_options.keys()))
                                  ),
                                  default=[]
                                  )

        spe_opts = parser.add_argument_group("Meta arguments")
        spe_opts.add_argument('-v', '--version', action='version',
                              version='%(prog)s ' + str(v), help="show program's version number and exit.")
        spe_opts.add_argument('-h', '--help', action="help", help="show this help message and exit")

        # Parse arguments and save to args_
        args_ = parser.parse_args(sys.argv[1:])

        # Configuration results to be analyzed
        folders = []
        for f in args_.folders:
            if '*' in f:
                folders.extend(list(glob.glob(f, recursive=True)))
            else:
                folders.append(f)
        # Default ta_exec_dir is cwd
        ta_exec_dir = []
        for t in args_.ta_exec_dir:
            if '*' in t:
                ta_exec_dir.extend(list(glob.glob(t, recursive=True)))
            else:
                ta_exec_dir.append(t)

        output_dir = args_.output
        file_format = args_.file_format
        validation_format = args_.validation_format
        validation = args_.validation
        seed = args_.seed
        verbose_level = args_.verbose_level
        show_jupyter = args_.jupyter == 'on'

        # Load default options for this file_format
        analyzing_options = load_default_options(file_format=detect_fileformat(folders)
                                                 if file_format.upper() == "AUTO" else file_format)

        # Interpret the --skip and --only flags
        if len(args_.only) > 0:
            # Set all to False
            for o in map_options.values():
                analyzing_options[o]["run"] = str(False)
        for o in args_.only if len(args_.only) > 0 else args_.skip:
            if o.lower() not in map_options:
                raise ValueError("Failed to interpret `--[only|skip] {}`.\n"
                                 "Please choose from:\n  {}".format(o, '\n  '.join(sorted(map_options.keys()))))
            # Set True if flag is --only and False if flag is --skip
            analyzing_options[map_options[o.lower()]]["run"] = str(len(args_.only) > 0)

        # Fine-tuning individual analyzer options
        analyzing_options["Configurator Footprint"]["time_slider"] = str(args_.cfp_time_slider)
        analyzing_options["Configurator Footprint"]["number_quantiles"] = str(args_.cfp_number_quantiles)
        analyzing_options["Configurator Footprint"]["max_configurations_to_plot"] = str(args_.cfp_max_configurations_to_plot)
        analyzing_options["Cost Over Time"]["incumbent_trajectory"] = str(args_.cot_inc_traj)
        analyzing_options["fANOVA"]["fanova_pairwise"] = str(args_.fanova_pairwise)
        analyzing_options["fANOVA"]["pimp_max_samples"] = str(args_.pimp_max_samples)
        analyzing_options["Parallel Coordinates"]["pc_sort_by"] = str(args_.pc_sort_by)
        analyzing_options["Parameter Importance"]["whisker_quantiles_plot"] = str(args_.pimp_whiskers)
        analyzing_options["Parameter Importance"]["interactive_bokeh_plots"] = str(args_.pimp_interactive)

        # Initialize CAVE
        cave = CAVE(folders,
                    output_dir,
                    ta_exec_dir,
                    file_format=file_format,
                    validation_format=validation_format,
                    validation_method=validation,
                    show_jupyter=show_jupyter,
                    seed=seed,
                    verbose_level=verbose_level,
                    analyzing_options=analyzing_options,
                    )

        # Check if CAVE was successfully initialized
        try:
            cave.logger.debug("CAVE is called with arguments: " + str(args_))
        except AttributeError as err:
            logging.getLogger().warning("Error in CAVE-initialization... (it's fine for running nosetests)")
            logging.getLogger().debug("CAVE is called with arguments: " + str(args_))

        # Analyze (with options defined in initialization via the analyzing_options
        cave.analyze()
Beispiel #9
0
#!/usr/bin/env python
# coding: utf-8

# In[4]:

from cave.cavefacade import CAVE
import os

# In[5]:

output_dir = "smac_output"
results_dir = os.path.join(output_dir, 'run_1')

# In[6]:

cave = CAVE(
    folders=[results_dir],
    output_dir="test_cave_smac",
    ta_exec_dir=["."],
    file_format='SMAC3',
)