def setUp(self): self.output = "test/test_files/test_output/" shutil.rmtree(self.output, ignore_errors=True) self.cave = CAVE(["examples/spear_qcp_small/example_output/run_1", "examples/spear_qcp_small/example_output/run_2", "examples/spear_qcp_small/example_output/run_3"], output=self.output, missing_data_method="epm", ta_exec_dir="examples/spear_qcp_small") self.analyzer = self.cave.analyzer
def setUpClass(self): self.output_dir = "test/test_files/test_output/" if os.path.exists(self.output_dir): shutil.rmtree(self.output_dir, ignore_errors=True) os.mkdir(self.output_dir) self.cave = CAVE([ "test/example_output/example_output/run_1", "test/example_output/example_output/run_2" ], output_dir=self.output_dir, validation_method="epm", ta_exec_dir=["test/example_output"]) self.analyzer = self.cave.analyzer
def main_cli(self): """ Main cli, implementing comparison between and analysis of Configuration-results. """ # Reset logging module (needs to happen before logger initalization) logging.shutdown() reload(logging) # Some choice-blocks, that can be reused throughout the CLI p_choices = [ "all", "ablation", "forward_selection", "fanova", "lpi", "none" ] p_sort_by_choices = ["average"] + p_choices[1:-1] f_choices = [ "all", "box_violin", "correlation", "clustering", "importance", "none" ] parser = ArgumentParser( formatter_class=SmartArgsDefHelpFormatter, add_help=False, description= 'CAVE: Configuration Assessment Vizualisation and Evaluation') req_opts = parser.add_mutually_exclusive_group(required=True) req_opts.add_argument( "folders", nargs='*', # strings prefixed with raw| can be manually split with \n help="raw|path(s) to Configurator output-directory/ies", default=SUPPRESS) req_opts.add_argument("--folders", nargs='*', dest='folders', default=SUPPRESS, help=SUPPRESS) cave_opts = parser.add_argument_group( "CAVE global options", "Options that configure the analysis in general and define behaviour." ) cave_opts.add_argument( "--verbose_level", default="INFO", choices=["INFO", "DEBUG", "DEV_DEBUG", "WARNING", "OFF"], help= "verbose level. use DEV_DEBUG for development to filter boilerplate-logs from " "imported modules, use DEBUG for full logging. full debug-log always in " "'output/debug/debug.log' ") cave_opts.add_argument( "--jupyter", default='off', choices=['on', 'off'], help="output everything to jupyter, if available.") cave_opts.add_argument( "--validation", default="epm", choices=["validation", "epm "], help= "how to complete missing runs for config/inst-pairs. epm trains random forest with " "available data to estimate missing runs, validation requires target algorithm. ", type=str.lower) cave_opts.add_argument( "--output", default="CAVE_output_%s" % (datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')), help="path to folder in which to save the HTML-report. ") cave_opts.add_argument("--seed", default=42, type=int, help="random seed used throughout analysis. ") cave_opts.add_argument( "--file_format", default='auto', help="specify the format of the configurator-files. ", choices=['auto', 'SMAC2', 'SMAC3', 'CSV', 'BOHB'], type=str.upper) cave_opts.add_argument("--validation_format", default='NONE', help="what format the validation-files are in", choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'], type=str.upper) cave_opts.add_argument( "--ta_exec_dir", default='.', help= "path to the execution-directory of the configurator run. this is the path from " "which the scenario is loaded, so the instance-/pcs-files specified in the " "scenario, so they are relative to this path " "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ", nargs='+') # PIMP-configs pimp_opts = parser.add_argument_group( "Parameter Importance", "Define the behaviour of the ParameterImportance-module (pimp)") pimp_opts.add_argument( "--pimp_max_samples", default=-1, type=int, help="How many datapoints to use with PIMP. -1 -> use all. ") pimp_opts.add_argument("--pimp_no_fanova_pairs", action="store_false", dest="fanova_pairwise", help="fANOVA won't compute pairwise marginals") pimp_opts.add_argument( "--pimp_sort_table_by", default="average", choices=p_sort_by_choices, help="raw|what kind of parameter importance method to " "use to sort the overview-table. ") cfp_opts = parser.add_argument_group( "Configurator Footprint", "Finetune the configurator footprint") cfp_opts.add_argument( "--cfp_time_slider", help="whether or not to have a time_slider-widget on cfp-plot" "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ", choices=["on", "off"], default="off") cfp_opts.add_argument( "--cfp_number_quantiles", help= "number of quantiles that configurator footprint should plot over time. ", default=3, type=int) cfp_opts.add_argument( "--cfp_max_configurations_to_plot", help= "maximum number of configurations to be plotted in configurator footprint (in case " "you run into a MemoryError). -1 -> plot all. ", default=-1, type=int) pc_opts = parser.add_argument_group( "Parallel Coordinates", "Finetune the parameter parallel coordinates") pc_opts.add_argument( "--pc_sort_by", help= "parameter-importance method to determine the order (and selection) of parameters " "for parallel coordinates. all: aggregate over all available methods. uses random " "method if none is given. ", default="all", type=str.lower, choices=p_choices) cot_opts = parser.add_argument_group( "Cost Over Time", "Finetune the cost over time plot") cot_opts.add_argument( "--cot_inc_traj", help= "if the optimizer belongs to HpBandSter (e.g. bohb), you can choose how the " "incumbent-trajectory will be interpreted with regards to the budget. You can " "choose from 'racing', which will only accept a configuration of a higher budget " "than the current incumbent's if the current incumbent has been evaluated on " "the higher budget; 'minimum', which will only look at the current performance " "no matter the budget; and 'prefer_higher_budget', which will always choose " "a configuration on a higher budget as incumbent as soon as it is available " "(this will likely lead to peaks, whenever a new budget is evaluated)", default="racing", type=str.lower, choices=["racing", "minimum", "prefer_higher_budget"]) # General analysis to be carried out act_opts = parser.add_argument_group( "Analysis", "Which analysis methods should be carried out") act_opts.add_argument( "--parameter_importance", default="all", nargs='+', help="raw|what kind of parameter importance method to " "use. Choose any combination of\n[" + ', '.join(p_choices[1:-1]) + "] or set it to " "all/none", choices=p_choices, type=str.lower) act_opts.add_argument( "--feature_analysis", default="all", nargs='+', help="raw|what kind of feature analysis methods to use. " "Choose any combination of\n[" + ', '.join(f_choices[1:-1]) + "] or set it to " "all/none", choices=f_choices, type=str.lower) act_opts.add_argument("--no_performance_table", action='store_false', help="don't create performance table.", dest='performance_table') act_opts.add_argument("--no_ecdf", action='store_false', help="don't plot ecdf.", dest='ecdf') act_opts.add_argument("--no_scatter_plots", action='store_false', help="don't plot scatter plots.", dest='scatter') act_opts.add_argument("--no_cost_over_time", action='store_false', help="don't plot cost over time.", dest='cost_over_time') act_opts.add_argument("--no_configurator_footprint", action='store_false', help="don't plot configurator footprint.", dest='configurator_footprint') act_opts.add_argument("--no_parallel_coordinates", action='store_false', help="don't plot parallel coordinates.", dest='parallel_coordinates') act_opts.add_argument("--no_algorithm_footprints", action='store_false', help="don't plot algorithm footprints.", dest='algorithm_footprints') act_opts.add_argument("--no_budget_correlation", action='store_false', help="don't plot budget correlation.", dest='budget_correlation') act_opts.add_argument("--bohb_learning_curves", action='store_false', help="don't plot bohb learning curves.", dest='bohb_learning_curves') act_opts.add_argument("--no_incumbents_over_budgets", action='store_false', help="don't plot incumbents over budgets.", dest='incumbents_over_budgets') spe_opts = parser.add_argument_group("Meta arguments") spe_opts.add_argument('-v', '--version', action='version', version='%(prog)s ' + str(v), help="show program's version number and exit.") spe_opts.add_argument('-h', '--help', action="help", help="show this help message and exit") args_ = parser.parse_args(sys.argv[1:]) # Expand configs if "all" in args_.parameter_importance: param_imp = ["ablation", "forward_selection", "fanova", "lpi"] elif "none" in args_.parameter_importance: param_imp = [] else: param_imp = args_.parameter_importance if "fanova" in param_imp: try: import fanova # noqa except ImportError: raise ImportError( 'fANOVA is not installed! To install it please run ' '"git+http://github.com/automl/fanova.git@master"') if not (args_.pimp_sort_table_by == "average" or args_.pimp_sort_table_by in param_imp): raise ValueError("Pimp comparison sorting key is {}, but this " "method is deactivated or non-existent.".format( args_.pimp_sort_table_by)) if "all" in args_.feature_analysis: feature_analysis = [ "box_violin", "correlation", "importance", "clustering" ] elif "none" in args_.feature_analysis: feature_analysis = [] else: feature_analysis = args_.feature_analysis output_dir = args_.output # Configuration results to be analyzed folders = [] for f in args_.folders: if '*' in f: folders.extend(list(glob.glob(f, recursive=True))) else: folders.append(f) # Default ta_exec_dir is cwd ta_exec_dir = [] for t in args_.ta_exec_dir: if '*' in t: ta_exec_dir.extend(list(glob.glob(t, recursive=True))) else: ta_exec_dir.append(t) file_format = args_.file_format validation_format = args_.validation_format validation = args_.validation seed = args_.seed verbose_level = args_.verbose_level show_jupyter = args_.jupyter == 'on' analyzing_options = load_default_options( file_format=detect_fileformat(folders) if file_format.upper() == "AUTO" else file_format) analyzing_options["Ablation"]["run"] = str('ablation' in param_imp) analyzing_options["Algorithm Footprint"]["run"] = str( args_.algorithm_footprints) analyzing_options["Budget Correlation"]["run"] = str( args_.budget_correlation) analyzing_options["BOHB Learning Curves"]["run"] = str( args_.bohb_learning_curves) analyzing_options["Configurator Footprint"]["run"] = str( args_.configurator_footprint) analyzing_options["Configurator Footprint"]["time_slider"] = str( args_.cfp_time_slider) analyzing_options["Configurator Footprint"]["number_quantiles"] = str( args_.cfp_number_quantiles) analyzing_options["Configurator Footprint"][ "max_configurations_to_plot"] = str( args_.cfp_max_configurations_to_plot) analyzing_options["Cost Over Time"]["run"] = str(args_.cost_over_time) analyzing_options["Cost Over Time"]["incumbent_trajectory"] = str( args_.cot_inc_traj) analyzing_options["empirical Cumulative Distribution Function (eCDF)"][ "run"] = str(args_.ecdf) analyzing_options["fANOVA"]["run"] = str('fanova' in param_imp) analyzing_options["fANOVA"]["fanova_pairwise"] = str( args_.fanova_pairwise) analyzing_options["fANOVA"]["pimp_max_samples"] = str( args_.pimp_max_samples) analyzing_options["Feature Clustering"]["run"] = str( 'clustering' in feature_analysis) analyzing_options["Feature Correlation"]["run"] = str( 'correlation' in feature_analysis) analyzing_options["Feature Importance"]["run"] = str( 'importance' in feature_analysis) analyzing_options["Forward Selection"]["run"] = str( 'forward_selection' in param_imp) analyzing_options["Importance Table"]["sort_table_by"] = str( args_.pimp_sort_table_by) analyzing_options["Incumbents Over Budgets"]["run"] = str( args_.incumbents_over_budgets) analyzing_options["Local Parameter Importance (LPI)"]["run"] = str( 'lpi' in param_imp) analyzing_options["Parallel Coordinates"]["run"] = str( args_.parallel_coordinates) analyzing_options["Parallel Coordinates"]["pc_sort_by"] = str( args_.pc_sort_by) analyzing_options["Performance Table"]["run"] = str( args_.performance_table) cave = CAVE( folders, output_dir, ta_exec_dir, file_format=file_format, validation_format=validation_format, validation_method=validation, show_jupyter=show_jupyter, seed=seed, verbose_level=verbose_level, analyzing_options=analyzing_options, ) try: cave.logger.debug("CAVE is called with arguments: " + str(args_)) except AttributeError as err: logging.getLogger().warning( "Something went wrong with CAVE-initialization... (it's fine for running nosetests)" ) logging.getLogger().debug("CAVE is called with arguments: " + str(args_)) # Analyze cave.analyze()
def main_cli(self): """ Main cli, implementing comparison between and analysis of SMAC-results. """ parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) req_opts = parser.add_argument_group("Required Options") req_opts.add_argument("--folders", required=True, nargs='+', help="path(s) to SMAC output-directory/ies, " "containing each at least a runhistory and " "a trajectory.") opt_opts = parser.add_argument_group("Optional Options") opt_opts.add_argument("--verbose_level", default="INFO", choices=["INFO", "DEBUG"], help="verbose level") opt_opts.add_argument("--validation", default="epm", choices=["validation", "epm"], help="how to complete missing runs for " "config/inst-pairs.") opt_opts.add_argument( "--output", default="CAVE_output", help="path to folder in which to save the HTML-report.") opt_opts.add_argument("--ta_exec_dir", default=None, help="path to the execution-directory of the " "SMAC run.") opt_opts.add_argument("--param_importance", default="all", nargs='+', help="what kind of parameter importance to " "calculate", choices=[ "all", "ablation", "forward_selection", "fanova", "incneighbor", "none" ]) opt_opts.add_argument("--max_pimp_samples", default=-1, type=int, help="How many datapoints to use with PIMP") opt_opts.add_argument("--pimp_no_fanova_pairs", action="store_false", dest="fanova_pairwise") opt_opts.add_argument("--feat_analysis", default="all", nargs='+', help="what kind of parameter importance to " "calculate", choices=[ "all", "box_violin", "correlation", "clustering", "importance", "none" ]) opt_opts.add_argument("--cost_over_time", default="true", choices=["true", "false"], help="whether to plot cost over time.") opt_opts.add_argument("--confviz", default="true", choices=["true", "false"], help="whether to visualize configs.") opt_opts.add_argument("--parallel_coordinates", default="true", choices=["true", "false"], help="whether to plot parallel coordinates.") opt_opts.add_argument("--algorithm_footprints", default="true", choices=["true", "false"], help="whether to plot algorithm footprints.") args_, misc = parser.parse_known_args() if args_.verbose_level == "INFO": logging.basicConfig(level=logging.INFO) else: logging.basicConfig(level=logging.DEBUG) # SMAC results folders = [] for f in args_.folders: if '*' in f: folders.extend(list(glob.glob(f, recursive=True))) else: folders.append(f) cave = CAVE(folders, args_.output, args_.ta_exec_dir, missing_data_method=args_.validation, max_pimp_samples=args_.max_pimp_samples, fanova_pairwise=args_.fanova_pairwise) # Expand configs if "all" in args_.param_importance: param_imp = [ "ablation", "forward_selection", "fanova", "incneighbor" ] elif "none" in args_.param_importance: param_imp = [] else: param_imp = args_.param_importance if "all" in args_.feat_analysis: feature_analysis = [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ] elif "none" in args_.feat_analysis: feature_analysis = [] else: feature_analysis = args_.feat_analysis # Analyze #cave.analyze(performance=False, cdf=False, scatter=False, confviz=False, cave.analyze(performance=True, cdf=True, scatter=True, confviz=args_.confviz == "true", parallel_coordinates=args_.parallel_coordinates == "true", cost_over_time=args_.cost_over_time == "true", algo_footprint=args_.algorithm_footprints == "true", param_importance=param_imp, feature_analysis=feature_analysis)
"-- 'firefox' will open all reports in firefox.") if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) #logging.basicConfig(level=logging.INFO) if len(sys.argv) < 2: print_help() elif sys.argv[1] == 'generate': for scen in get_scenarios(): scenario = Scenario(scen) smac = SMAC(scenario=scenario, rng=np.random.RandomState(42)) smac.optimize() elif sys.argv[1] == 'cave': for scen in get_scenarios(): folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0] cave = CAVE([os.path.join(scen['output_dir'], folder)], os.path.join(scen['output_dir'], 'CAVE_RESULT'), ta_exec_dir='.', validation_method='validation') cave.analyze(param_importance=['ablation', 'forward_selection', 'lpi'], cfp_number_quantiles=2) elif sys.argv[1] == 'firefox': import webbrowser firefox = webbrowser.get('firefox') for url in [os.path.join(scen['output_dir'], 'CAVE_RESULT/report.html') for scen in get_scenarios()]: firefox.open_new_tab(url) elif sys.argv[1] == 'clean': shutil.rmtree('test/general_example/results') else: logging.error("%s not an option.", sys.argv[1]) print_help()
def main_cli(self): """ Main cli, implementing comparison between and analysis of Configuration-results. """ # Some choice-blocks, that can be reused throughout the CLI p_choices = [ "all", "ablation", "forward_selection", "fanova", "lpi", "none" ] p_sort_by_choices = ["average"] + p_choices[1:-1] f_choices = [ "all", "box_violin", "correlation", "clustering", "importance", "none" ] parser = ArgumentParser( formatter_class=SmartArgsDefHelpFormatter, add_help=False, description= 'CAVE: Configuration Assessment Vizualisation and Evaluation') req_opts = parser.add_argument_group("Required Options:" + '~' * 100) req_opts.add_argument( "--folders", required=True, nargs='+', # strings prefixed with raw| can be manually split with \n help="raw|path(s) to SMAC output-directory/ies, " "containing each at least a runhistory\nand " "a trajectory.", default=SUPPRESS) opt_opts = parser.add_argument_group("Optional Options:" + '~' * 100) opt_opts.add_argument( "--verbose_level", default="INFO", choices=["INFO", "DEBUG", "DEV_DEBUG", "WARNING", "OFF"], help= "verbose level. use DEV_DEBUG for development to filter boilerplate-logs from " "imported modules, use DEBUG for full logging. full debug-log always in " "'output/debug/debug.log' ") opt_opts.add_argument( "--jupyter", default='off', choices=['on', 'off'], help="output everything to jupyter, if available.") opt_opts.add_argument( "--validation", default="epm", choices=["validation", "epm"], help= "how to complete missing runs for config/inst-pairs. epm trains random forest with " "available data to estimate missing runs, validation requires target algorithm. ", type=str.lower) opt_opts.add_argument( "--output", default="CAVE_output_%s" % (datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')), help="path to folder in which to save the HTML-report. ") opt_opts.add_argument("--seed", default=42, type=int, help="random seed used throughout analysis. ") opt_opts.add_argument( "--file_format", default='SMAC3', help="specify the format of the configurator-files. ", choices=['SMAC2', 'SMAC3', 'CSV', 'BOHB'], type=str.upper) opt_opts.add_argument("--validation_format", default='NONE', help="what format the validation-files are in", choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'], type=str.upper) opt_opts.add_argument( "--ta_exec_dir", default='.', help= "path to the execution-directory of the configurator run. this is the path from " "which the scenario is loaded, so the instance-/pcs-files specified in the " "scenario, so they are relative to this path " "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ", nargs='+') # PIMP-configs opt_opts.add_argument( "--pimp_max_samples", default=-1, type=int, help="How many datapoints to use with PIMP. -1 -> use all. ") opt_opts.add_argument("--pimp_no_fanova_pairs", action="store_false", dest="fanova_pairwise", help="fANOVA won't compute pairwise marginals") opt_opts.add_argument( "--pimp_sort_table_by", default="average", choices=p_sort_by_choices, help="raw|what kind of parameter importance method to " "use to sort the overview-table. ") opt_opts.add_argument( "--parameter_importance", default="all", nargs='+', help="raw|what kind of parameter importance method to " "use. Choose any combination of\n[" + ', '.join(p_choices[1:-1]) + "] or set it to " "all/none", choices=p_choices, type=str.lower) opt_opts.add_argument( "--feature_analysis", default="all", nargs='+', help="raw|what kind of feature analysis methods to use. " "Choose any combination of\n[" + ', '.join(f_choices[1:-1]) + "] or set it to " "all/none", choices=f_choices, type=str.lower) opt_opts.add_argument( "--cfp_time_slider", help="whether or not to have a time_slider-widget on cfp-plot" "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ", choices=["on", "off"], default="off") opt_opts.add_argument( "--cfp_number_quantiles", help= "number of quantiles that configurator footprint should plot over time. ", default=3, type=int) opt_opts.add_argument( "--cfp_max_plot", help= "maximum number of configurations to be plotted in configurator footprint (in case " "you run into a MemoryError). -1 -> plot all. ", default=-1, type=int) opt_opts.add_argument("--no_tabular_analysis", action='store_false', help="don't create performance table.", dest='tabular_analysis') opt_opts.add_argument("--no_ecdf", action='store_false', help="don't plot ecdf.", dest='ecdf') opt_opts.add_argument("--no_scatter_plots", action='store_false', help="don't plot scatter plots.", dest='scatter_plots') opt_opts.add_argument("--no_cost_over_time", action='store_false', help="don't plot cost over time.", dest='cost_over_time') opt_opts.add_argument("--no_configurator_footprint", action='store_false', help="don't plot configurator footprint.", dest='cfp') opt_opts.add_argument("--no_parallel_coordinates", action='store_false', help="don't plot parallel coordinates.", dest='parallel_coordinates') opt_opts.add_argument("--no_algorithm_footprints", action='store_false', help="don't plot algorithm footprints.", dest='algorithm_footprints') spe_opts = parser.add_argument_group("special arguments:" + '~' * 100) spe_opts.add_argument('-v', '--version', action='version', version='%(prog)s ' + str(v), help="show program's version number and exit.") spe_opts.add_argument('-h', '--help', action="help", help="show this help message and exit") args_ = parser.parse_args(sys.argv[1:]) # Expand configs if "all" in args_.parameter_importance: param_imp = ["ablation", "forward_selection", "fanova", "lpi"] elif "none" in args_.parameter_importance: param_imp = [] else: param_imp = args_.parameter_importance if "fanova" in param_imp: try: import fanova # noqa except ImportError: raise ImportError( 'fANOVA is not installed! To install it please run ' '"git+http://github.com/automl/fanova.git@master"') if not (args_.pimp_sort_table_by == "average" or args_.pimp_sort_table_by in param_imp): raise ValueError("Pimp comparison sorting key is {}, but this " "method is deactivated or non-existent.".format( args_.pimp_sort_table_by)) if "all" in args_.feature_analysis: feature_analysis = [ "box_violin", "correlation", "importance", "clustering", "feature_cdf" ] elif "none" in args_.feature_analysis: feature_analysis = [] else: feature_analysis = args_.feature_analysis cfp_time_slider = True if args_.cfp_time_slider == "on" else False if not (args_.tabular_analysis or args_.ecdf or args_.scatter_plots or args_.cfp or args_.parallel_coordinates or args_.parallel_coordinates or args_.cost_over_time or args_.algorithm_footprints or param_imp or feature_analysis): raise ValueError( 'At least one analysis method required to run CAVE') output_dir = args_.output logging.getLogger().debug("CAVE is called with arguments: " + str(args_)) # Configuration results to be analyzed folders = [] for f in args_.folders: if '*' in f: folders.extend(list(glob.glob(f, recursive=True))) else: folders.append(f) # Default ta_exec_dir is cwd ta_exec_dir = [] for t in args_.ta_exec_dir: if '*' in t: ta_exec_dir.extend(list(glob.glob(t, recursive=True))) else: ta_exec_dir.append(t) tabular_analysis = args_.tabular_analysis file_format = args_.file_format validation_format = args_.validation_format validation = args_.validation pimp_max_samples = args_.pimp_max_samples fanova_pairwise = args_.fanova_pairwise seed = args_.seed ecdf = args_.ecdf scatter_plots = args_.scatter_plots cfp = args_.cfp cfp_time_slider = args_.cfp_time_slider == 'on' cfp_max_plot = args_.cfp_max_plot cfp_number_quantiles = args_.cfp_number_quantiles parallel_coordinates = args_.parallel_coordinates cost_over_time = args_.cost_over_time algorithm_footprints = args_.algorithm_footprints pimp_sort_table_by = args_.pimp_sort_table_by verbose_level = args_.verbose_level show_jupyter = args_.jupyter == 'on' if file_format == 'BOHB': logging.getLogger().info( "File format is BOHB, performing special nested analysis for budget-based optimizer!" ) validation_format = 'NONE' validation_method = 'epm' cdf = False scatter = False algo_footprint = False param_imp = [p for p in param_imp if not p == 'forward_selection'] feature_analysis = [] cave = CAVE(folders, output_dir, ta_exec_dir, file_format=file_format, validation_format=validation_format, validation_method=validation, pimp_max_samples=pimp_max_samples, fanova_pairwise=fanova_pairwise, use_budgets=file_format == 'BOHB', show_jupyter=show_jupyter, seed=seed, verbose_level=verbose_level) # Analyze cave.analyze(performance=tabular_analysis, cdf=ecdf, scatter=scatter_plots, cfp=cfp, cfp_time_slider=cfp_time_slider, cfp_max_plot=cfp_max_plot, cfp_number_quantiles=cfp_number_quantiles, parallel_coordinates=parallel_coordinates, cost_over_time=cost_over_time, algo_footprint=algorithm_footprints, param_importance=param_imp, pimp_sort_table_by=pimp_sort_table_by, feature_analysis=feature_analysis)
if len(sys.argv) < 2: print_help() elif sys.argv[1] == '--generate': generate_bohb_data() for scen in get_scenarios(): scenario = Scenario(scen) smac = SMAC4AC(scenario=scenario, rng=np.random.RandomState(42)) smac.optimize() elif sys.argv[1] == '--cave': failed = [] for scen in get_scenarios(): try: folder = [f for f in os.listdir(scen['output_dir']) if f.startswith('run')][0] cave = CAVE([os.path.join(scen['output_dir'], folder)], os.path.join(scen['output_dir'], 'CAVE_RESULT'), ta_exec_dir=['.'], validation_method='validation') cave.analyze({'fANOVA' : False, 'number_quantiles' : 2}) except: raise failed.append(scen['output_dir']) print("Failed: %s" % (str(failed))) elif sys.argv[1] == '--firefox': import webbrowser firefox = webbrowser.get('firefox') for url in [os.path.join(scen['output_dir'], 'CAVE_RESULT/report.html') for scen in get_scenarios()]: firefox.open_new_tab(url) elif sys.argv[1] == '--clean': shutil.rmtree('test/general_example/results') else: logging.error("%s not an option.", sys.argv[1])
def main_cli(self): """ Main cli, implementing comparison between and analysis of Configurator-results. """ # Reset logging module (needs to happen before logger initalization) logging.shutdown() reload(logging) # Those are the options for the --only / --skip flags map_options = { 'performance_table': 'Performance Table', 'ecdf': 'empirical Cumulative Distribution Function (eCDF)', 'scatter_plot': 'Scatter Plot', 'cost_over_time': 'Cost Over Time', 'configurator_footprint': 'Configurator Footprint', 'parallel_coordinates': 'Parallel Coordinates', 'algorithm_footprints': 'Algorithm Footprint', 'budget_correlation': 'Budget Correlation', 'bohb_learning_curves': 'BOHB Learning Curves', 'incumbents_over_budgets': 'Incumbents Over Budgets', # Parameter Importance: 'fanova': 'fANOVA', 'ablation': 'Ablation', 'lpi': 'Local Parameter Importance (LPI)', 'local_parameter_importance': 'Local Parameter Importance (LPI)', 'forward_selection': 'Forward Selection', # Feature Importance 'clustering': "Feature Clustering", 'correlation': "Feature Correlation", 'importance': "Feature Importance", 'box_violin': "Violin and Box Plots", } parser = ArgumentParser(formatter_class=SmartArgsDefHelpFormatter, add_help=False, description='CAVE: Configuration Assessment Vizualisation and Evaluation') req_opts = parser.add_mutually_exclusive_group(required=True) # Either positional or keyword folders option req_opts.add_argument("folders", nargs='*', # strings prefixed with raw| can be manually split with \n help="raw|path(s) to Configurator output-directory/ies", default=SUPPRESS) req_opts.add_argument("--folders", nargs='*', dest='folders', default=SUPPRESS, help=SUPPRESS) cave_opts = parser.add_argument_group("CAVE global options", "Options that configure the analysis in general and define behaviour.") cave_opts.add_argument("--verbose_level", default="INFO", choices=[ "INFO", "DEBUG", "DEV_DEBUG", "WARNING", "OFF" ], help="verbose level. use DEV_DEBUG for development to filter boilerplate-logs from " "imported modules, use DEBUG for full logging. full debug-log always in " "'output/debug/debug.log' ") cave_opts.add_argument("--jupyter", default='off', choices=['on', 'off'], help="output everything to jupyter, if available." ) cave_opts.add_argument("--validation", default="epm", choices=[ "validation", "epm " ], help="how to complete missing runs for config/inst-pairs. epm trains random forest with " "available data to estimate missing runs, validation requires target algorithm. ", type=str.lower) cave_opts.add_argument("--output", default="CAVE_output_%s" % ( datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')), help="path to folder in which to save the HTML-report. ") cave_opts.add_argument("--seed", default=42, type=int, help="random seed used throughout analysis. ") cave_opts.add_argument("--file_format", default='auto', help="specify the format of the configurator-files. ", choices=['auto', 'SMAC2', 'SMAC3', 'CSV', 'BOHB', 'APT'], type=str.upper) cave_opts.add_argument("--validation_format", default='NONE', help="what format the validation-files are in", choices=['SMAC2', 'SMAC3', 'CSV', 'NONE'], type=str.upper) cave_opts.add_argument("--ta_exec_dir", default='.', help="path to the execution-directory of the configurator run. this is the path from " "which the scenario is loaded, so the instance-/pcs-files specified in the " "scenario, so they are relative to this path " "(e.g. 'ta_exec_dir/path_to_train_inst_specified_in_scenario.txt'). ", nargs='+') # PIMP-configs pimp_opts = parser.add_argument_group("Parameter Importance", "Define the behaviour of the ParameterImportance-module (pimp)") pimp_opts.add_argument("--pimp_interactive", choices=["on", "off"], default="on", help="Whether or not to plot interactive bokeh plots for parameter importance analysis") pimp_opts.add_argument("--pimp_whiskers", choices=["on", "off"], default="on", help="Whether or not to plot interactive whisker plot for parameter importance analysis") pimp_opts.add_argument("--pimp_max_samples", default=-1, type=int, help="How many datapoints to use with PIMP. -1 -> use all. ") pimp_opts.add_argument("--pimp_no_fanova_pairs", action="store_false", dest="fanova_pairwise", help="fANOVA won't compute pairwise marginals") cfp_opts = parser.add_argument_group("Configurator Footprint", "Fine-tune the configurator footprint") cfp_opts.add_argument("--cfp_time_slider", help="whether or not to have a time_slider-widget on cfp-plot" "INCREASES FILE-SIZE (and loading) DRAMATICALLY. ", choices=["on", "off"], default="off") cfp_opts.add_argument("--cfp_number_quantiles", help="number of quantiles that configurator footprint should plot over time. ", default=3, type=int) cfp_opts.add_argument("--cfp_max_configurations_to_plot", help="maximum number of configurations to be plotted in configurator footprint (in case " "you run into a MemoryError). -1 -> plot all. ", default=-1, type=int) pc_opts = parser.add_argument_group("Parallel Coordinates", "Fine-tune the parameter parallel coordinates") # TODO: this choice should be integrated into the bokeh plot pc_opts.add_argument("--pc_sort_by", help="parameter-importance method to determine the order (and selection) of parameters " "for parallel coordinates. all: aggregate over all available methods. uses random " "method if none is given. ", default="all", type=str.lower, choices=['fanova', 'lpi', 'ablation', 'forward_selection', 'all']) cot_opts = parser.add_argument_group("Cost Over Time", "Fine-tune the cost over time plot") cot_opts.add_argument("--cot_inc_traj", help="if the optimizer belongs to HpBandSter (e.g. bohb), you can choose how the " "incumbent-trajectory will be interpreted with regards to the budget. You can " "choose from 'racing', which will only accept a configuration of a higher budget " "than the current incumbent's if the current incumbent has been evaluated on " "the higher budget; 'minimum', which will only look at the current performance " "no matter the budget; and 'prefer_higher_budget', which will always choose " "a configuration on a higher budget as incumbent as soon as it is available " "(this will likely lead to peaks, whenever a new budget is evaluated)", default="racing", type=str.lower, choices=["racing", "minimum", "prefer_higher_budget"]) # General analysis to be carried out default_opts = parser.add_mutually_exclusive_group() default_opts.add_argument("--only", nargs='*', help='perform only these analysis methods. choose from: {}'.format( ", ".join(sorted(map_options.keys())) ), default=[], ) default_opts.add_argument("--skip", nargs='*', help='perform all but these analysis methods. choose from: {}'.format( ", ".join(sorted(map_options.keys())) ), default=[] ) spe_opts = parser.add_argument_group("Meta arguments") spe_opts.add_argument('-v', '--version', action='version', version='%(prog)s ' + str(v), help="show program's version number and exit.") spe_opts.add_argument('-h', '--help', action="help", help="show this help message and exit") # Parse arguments and save to args_ args_ = parser.parse_args(sys.argv[1:]) # Configuration results to be analyzed folders = [] for f in args_.folders: if '*' in f: folders.extend(list(glob.glob(f, recursive=True))) else: folders.append(f) # Default ta_exec_dir is cwd ta_exec_dir = [] for t in args_.ta_exec_dir: if '*' in t: ta_exec_dir.extend(list(glob.glob(t, recursive=True))) else: ta_exec_dir.append(t) output_dir = args_.output file_format = args_.file_format validation_format = args_.validation_format validation = args_.validation seed = args_.seed verbose_level = args_.verbose_level show_jupyter = args_.jupyter == 'on' # Load default options for this file_format analyzing_options = load_default_options(file_format=detect_fileformat(folders) if file_format.upper() == "AUTO" else file_format) # Interpret the --skip and --only flags if len(args_.only) > 0: # Set all to False for o in map_options.values(): analyzing_options[o]["run"] = str(False) for o in args_.only if len(args_.only) > 0 else args_.skip: if o.lower() not in map_options: raise ValueError("Failed to interpret `--[only|skip] {}`.\n" "Please choose from:\n {}".format(o, '\n '.join(sorted(map_options.keys())))) # Set True if flag is --only and False if flag is --skip analyzing_options[map_options[o.lower()]]["run"] = str(len(args_.only) > 0) # Fine-tuning individual analyzer options analyzing_options["Configurator Footprint"]["time_slider"] = str(args_.cfp_time_slider) analyzing_options["Configurator Footprint"]["number_quantiles"] = str(args_.cfp_number_quantiles) analyzing_options["Configurator Footprint"]["max_configurations_to_plot"] = str(args_.cfp_max_configurations_to_plot) analyzing_options["Cost Over Time"]["incumbent_trajectory"] = str(args_.cot_inc_traj) analyzing_options["fANOVA"]["fanova_pairwise"] = str(args_.fanova_pairwise) analyzing_options["fANOVA"]["pimp_max_samples"] = str(args_.pimp_max_samples) analyzing_options["Parallel Coordinates"]["pc_sort_by"] = str(args_.pc_sort_by) analyzing_options["Parameter Importance"]["whisker_quantiles_plot"] = str(args_.pimp_whiskers) analyzing_options["Parameter Importance"]["interactive_bokeh_plots"] = str(args_.pimp_interactive) # Initialize CAVE cave = CAVE(folders, output_dir, ta_exec_dir, file_format=file_format, validation_format=validation_format, validation_method=validation, show_jupyter=show_jupyter, seed=seed, verbose_level=verbose_level, analyzing_options=analyzing_options, ) # Check if CAVE was successfully initialized try: cave.logger.debug("CAVE is called with arguments: " + str(args_)) except AttributeError as err: logging.getLogger().warning("Error in CAVE-initialization... (it's fine for running nosetests)") logging.getLogger().debug("CAVE is called with arguments: " + str(args_)) # Analyze (with options defined in initialization via the analyzing_options cave.analyze()
#!/usr/bin/env python # coding: utf-8 # In[4]: from cave.cavefacade import CAVE import os # In[5]: output_dir = "smac_output" results_dir = os.path.join(output_dir, 'run_1') # In[6]: cave = CAVE( folders=[results_dir], output_dir="test_cave_smac", ta_exec_dir=["."], file_format='SMAC3', )