예제 #1
0
def read(run_history: RunHistory, output_dirs: typing.Union[str,
                                                            typing.List[str]],
         configuration_space: ConfigurationSpace,
         logger: logging.Logger) -> None:
    """Update runhistory with run results from concurrent runs of pSMAC.

    Parameters
    ----------
    run_history : smac.runhistory.RunHistory
        RunHistory object to be updated with run information from runhistory
        objects stored in the output directory.
    output_dirs : typing.Union[str,typing.List[str]]
        List of SMAC output directories
        or Linux path expression (str) which will be casted into a list with
        glob.glob(). This function will search the output directories
        for files matching the runhistory regular expression.
    configuration_space : ConfigSpace.ConfigurationSpace
        A ConfigurationSpace object to check if loaded configurations are valid.
    logger : logging.Logger
    """
    numruns_in_runhistory = len(run_history.data)
    initial_numruns_in_runhistory = numruns_in_runhistory

    if isinstance(output_dirs, str):
        parsed_output_dirs = glob.glob(output_dirs)
        if glob.glob(os.path.join(output_dirs, "run_*")):
            parsed_output_dirs += glob.glob(os.path.join(output_dirs, "run_*"))
    else:
        parsed_output_dirs = output_dirs

    for output_directory in parsed_output_dirs:
        for file_in_output_directory in os.listdir(output_directory):
            match = re.match(RUNHISTORY_RE, file_in_output_directory)
            valid_match = re.match(VALIDATEDRUNHISTORY_RE,
                                   file_in_output_directory)
            if match or valid_match:
                runhistory_file = os.path.join(output_directory,
                                               file_in_output_directory)
                run_history.update_from_json(runhistory_file,
                                             configuration_space)

                new_numruns_in_runhistory = len(run_history.data)
                difference = new_numruns_in_runhistory - numruns_in_runhistory
                logger.debug('Shared model mode: Loaded %d new runs from %s' %
                             (difference, runhistory_file))
                numruns_in_runhistory = new_numruns_in_runhistory

    difference = numruns_in_runhistory - initial_numruns_in_runhistory
    logger.info(
        'Shared model mode: Finished loading new runs, found %d new runs.' %
        difference)
예제 #2
0
                              run_obj=scenario.run_obj,
                              par_factor=scenario.par_factor,
                              cost_for_crash=scenario.cost_for_crash)
    if args_.tae == "aclib":
        tae = ExecuteTARunAClib(ta=scenario.ta,
                                run_obj=scenario.run_obj,
                                par_factor=scenario.par_factor,
                                cost_for_crash=scenario.cost_for_crash)

    validator = Validator(scenario, trajectory, args_.seed)

    # Load runhistory
    if args_.runhistory:
        runhistory = RunHistory(average_cost)
        for rh_path in args_.runhistory:
            runhistory.update_from_json(rh_path, scenario.cs)
    else:
        runhistory = None

    if args_.epm:
        validator.validate_epm(config_mode=args_.configs,
                               instance_mode=args_.instances,
                               repetitions=args_.repetitions,
                               runhistory=runhistory,
                               output_fn=args_.output)
    else:
        validator.validate(config_mode=args_.configs,
                           instance_mode=args_.instances,
                           repetitions=args_.repetitions,
                           n_jobs=args_.n_jobs,
                           runhistory=runhistory,
예제 #3
0
                              par_factor=scenario.par_factor,
                              cost_for_crash=scenario.cost_for_crash)
    if args_.tae == "aclib":
        tae = ExecuteTARunAClib(ta=scenario.ta,
                                run_obj=scenario.run_obj,
                                par_factor=scenario.par_factor,
                                cost_for_crash=scenario.cost_for_crash)

    validator = Validator(scenario, trajectory, args_.seed)

    # Load runhistory
    if args_.runhistory:
        runhistory = RunHistory(average_cost, file_system=scenario.file_system)
        for rh_path in args_.runhistory:
            runhistory.update_from_json(rh_path,
                                        scenario.cs,
                                        file_system=scenario.file_system)
    else:
        runhistory = None

    if args_.epm:
        validator.validate_epm(config_mode=args_.configs,
                               instance_mode=args_.instances,
                               repetitions=args_.repetitions,
                               runhistory=runhistory,
                               output_fn=args_.output)
    else:
        validator.validate(config_mode=args_.configs,
                           instance_mode=args_.instances,
                           repetitions=args_.repetitions,
                           n_jobs=args_.n_jobs,
예제 #4
0
                          required=True,
                          help="scenario file in AClib format")
    req_opts.add_argument("--runhistory",
                          required=True,
                          nargs="+",
                          help="runhistory files")

    req_opts.add_argument("--verbose_level",
                          default=logging.INFO,
                          choices=["INFO", "DEBUG"],
                          help="random seed")

    req_opts.add_argument("--save_fn",
                          default="fw_importance.pdf",
                          help="file name of saved plot")

    args_ = parser.parse_args()

    logging.basicConfig(level=args_.verbose_level)
    # if args_.verbose_level == "DEBUG":
    #    logging.parent.level = 10

    scen = Scenario(args_.scenario_file)
    hist = RunHistory()
    for runhist_fn in args_.runhistory:
        hist.update_from_json(fn=runhist_fn, cs=scen.cs)

    fws = ForwardSelection(scenario=scen, runhistory=hist)

    fws.run(save_fn=args_.save_fn)
예제 #5
0
    def test_load(self):
        configuration_space = test_helpers.get_branin_config_space()

        other_runhistory = '{"data": [[[2, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branin", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1,' \
                  '{"__enum__": "StatusType.SUCCESS"}, null]]], ' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        other_runhistory_filename = os.path.join(self.tmp_dir,
                                                 'runhistory.json')
        with open(other_runhistory_filename, 'w') as fh:
            fh.write(other_runhistory)

        # load from an empty runhistory
        runhistory = RunHistory(aggregate_func=average_cost)
        runhistory.load_json(other_runhistory_filename, configuration_space)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(len(runhistory.data), 6)

        # load from non-empty runhistory, in case of a duplicate the existing
        # result will be kept and the new one silently discarded
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        runhistory.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 6)
        self.assertEqual(id_before, id_after)

        # load from non-empty runhistory, in case of a duplicate the existing
        # result will be kept and the new one silently discarded
        runhistory = RunHistory(aggregate_func=average_cost)
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        config = configuration_space.sample_configuration()
        # This is the former config_3
        config = configuration_space.sample_configuration()
        runhistory.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 7)
        self.assertEqual(id_before, id_after)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(
            [runhistory.external[run_key] for run_key in runhistory.data],
            [DataOrigin.INTERNAL] + [DataOrigin.EXTERNAL_SAME_INSTANCES] * 6)
예제 #6
0
    def test_load(self):
        configuration_space = test_helpers.get_branin_config_space()

        other_runhistory = '{"data": [[[2, "branini", 1], [1, 1, 1, null]], ' \
        '[[1, "branin", 1], [1, 1, 1, null]], ' \
        '[[3, "branin-hoo", 1], [1, 1, 1, null]], ' \
        '[[2, null, 1], [1, 1, 1, null]], ' \
        '[[1, "branini", 1], [1, 1, 1, null]], ' \
        '[[4, null, 1], [1, 1, 1, null]]], ' \
        '"configs": {' \
        '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
        '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
        '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
        '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        other_runhistory_filename = os.path.join(self.tmp_dir,
                                                 '.runhistory_20.json')
        with open(other_runhistory_filename, 'w') as fh:
            fh.write(other_runhistory)

        # load from an empty runhistory
        runhistory = RunHistory()
        runhistory.load_json(other_runhistory_filename, configuration_space)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        self.assertEqual(len(runhistory.data), 6)

        # load from non-empty runhistory, but existing run will be overridden
        #  because it alread existed
        runhistory = RunHistory()
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1,
                        instance_id='branin')
        id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 6)
        self.assertNotEqual(id_before, id_after)

        # load from non-empty runhistory, but existing run will not be
        # overridden, but config_id will be re-used
        runhistory = RunHistory()
        configuration_space.seed(1)
        config = configuration_space.sample_configuration()
        config = configuration_space.sample_configuration()
        # This is the former config_3
        config = configuration_space.sample_configuration()
        runhistory.add(config, 1, 1, StatusType.SUCCESS, seed=1,
                       instance_id='branin')
        id_before = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)])
        runhistory.update_from_json(other_runhistory_filename,
                                    configuration_space)
        id_after = id(runhistory.data[runhistory.RunKey(1, 'branin', 1)])
        self.assertEqual(len(runhistory.data), 7)
        self.assertEqual(id_before, id_after)
        print(runhistory.config_ids)
        self.assertEqual(sorted(list(runhistory.ids_config.keys())),
                         [1, 2, 3, 4])
        print(list(runhistory.data.keys()))
예제 #7
0
class CAVE(object):
    """
    """
    def __init__(self,
                 folders: typing.List[str],
                 output: str,
                 ta_exec_dir: Union[str, None] = None,
                 missing_data_method: str = 'epm',
                 max_pimp_samples: int = -1,
                 fanova_pairwise=True):
        """
        Initialize CAVE facade to handle analyzing, plotting and building the
        report-page easily. During initialization, the analysis-infrastructure
        is built and the data is validated, meaning the overall best
        incumbent is found and default+incumbent are evaluated for all
        instances for all runs, by default using an EPM.
        The class holds two runhistories:
            self.original_rh -> only contains runs from the actual data
            self.validated_rh -> contains original runs and epm-predictions for
                                 all incumbents
        The analyze()-method performs an analysis and outputs a report.html.

        Arguments
        ---------
        folders: list<strings>
            paths to relevant SMAC runs
        output: string
            output for cave to write results (figures + report)
        ta_exec_dir: string
            execution directory for target algorithm (to find instance.txt, ..)
        missing_data_method: string
            from [validation, epm], how to estimate missing runs
        """
        self.logger = logging.getLogger("cave.cavefacade")
        self.logger.debug("Folders: %s", str(folders))
        self.ta_exec_dir = ta_exec_dir

        # Create output if necessary
        self.output = output
        self.logger.info("Saving results to %s", self.output)
        if not os.path.exists(output):
            self.logger.debug("Output-dir %s does not exist, creating",
                              self.output)
            os.makedirs(output)
        if not os.path.exists(os.path.join(self.output, "debug")):
            os.makedirs(os.path.join(self.output, "debug"))
        # Log to file
        logger = logging.getLogger()
        handler = logging.FileHandler(
            os.path.join(self.output, "debug/debug.log"), "w")
        handler.setLevel(logging.DEBUG)
        logger.addHandler(handler)

        # Global runhistory combines all actual runs of individual SMAC-runs
        # We save the combined (unvalidated) runhistory to disk, so we can use it later on.
        # We keep the validated runhistory (with as many runs as possible) in
        # memory. The distinction is made to avoid using runs that are
        # only estimated using an EPM for further EPMs or to handle runs
        # validated on different hardware (depending on validation-method).
        self.original_rh = RunHistory(average_cost)
        self.validated_rh = RunHistory(average_cost)

        # Save all relevant SMAC-runs in a list
        self.runs = []
        for folder in folders:
            try:
                self.logger.debug("Collecting data from %s.", folder)
                self.runs.append(SMACrun(folder, ta_exec_dir))
            except Exception as err:
                self.logger.warning(
                    "Folder %s could not be loaded, failed "
                    "with error message: %s", folder, err)
                continue
        if not len(self.runs):
            raise ValueError(
                "None of the specified SMAC-folders could be loaded.")

        # Use scenario of first run for general purposes (expecting they are all the same anyway!)
        self.scenario = self.runs[0].solver.scenario

        # Update global runhistory with all available runhistories
        self.logger.debug("Update original rh with all available rhs!")
        runhistory_fns = [
            os.path.join(run.folder, "runhistory.json") for run in self.runs
        ]
        for rh_file in runhistory_fns:
            self.original_rh.update_from_json(rh_file, self.scenario.cs)
        self.logger.debug(
            'Combined number of Runhistory data points: %d. '
            '# Configurations: %d. # Runhistories: %d',
            len(self.original_rh.data),
            len(self.original_rh.get_all_configs()), len(runhistory_fns))
        self.original_rh.save_json(
            os.path.join(self.output, "combined_rh.json"))

        # Validator for a) validating with epm, b) plot over time
        # Initialize without trajectory
        self.validator = Validator(self.scenario, None, None)

        # Estimate missing costs for [def, inc1, inc2, ...]
        self.complete_data(method=missing_data_method)
        self.best_run = min(
            self.runs,
            key=lambda run: self.validated_rh.get_cost(run.solver.incumbent))

        self.default = self.scenario.cs.get_default_configuration()
        self.incumbent = self.best_run.solver.incumbent

        self.logger.debug("Overall best run: %s, with incumbent: %s",
                          self.best_run.folder, self.incumbent)

        # Following variable determines whether a distinction is made
        # between train and test-instances (e.g. in plotting)
        self.train_test = bool(self.scenario.train_insts != [None]
                               and self.scenario.test_insts != [None])

        self.analyzer = Analyzer(self.original_rh, self.validated_rh,
                                 self.default, self.incumbent, self.train_test,
                                 self.scenario, self.validator, self.output,
                                 max_pimp_samples, fanova_pairwise)

        self.builder = HTMLBuilder(self.output, "CAVE")
        # Builder for html-website
        self.website = OrderedDict([])

    def complete_data(self, method="epm"):
        """Complete missing data of runs to be analyzed. Either using validation
        or EPM.
        """
        with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'):
            self.logger.info("Completing data using %s.", method)

            path_for_validated_rhs = os.path.join(self.output, "validated_rhs")
            for run in self.runs:
                self.validator.traj = run.traj
                if method == "validation":
                    # TODO determine # repetitions
                    new_rh = self.validator.validate(
                        'def+inc',
                        'train+test',
                        1,
                        -1,
                        runhistory=self.original_rh)
                elif method == "epm":
                    new_rh = self.validator.validate_epm(
                        'def+inc',
                        'train+test',
                        1,
                        runhistory=self.original_rh)
                else:
                    raise ValueError("Missing data method illegal (%s)",
                                     method)
                self.validator.traj = None  # Avoid usage-mistakes
                self.validated_rh.update(new_rh)

    def analyze(self,
                performance=True,
                cdf=True,
                scatter=True,
                confviz=True,
                param_importance=['forward_selection', 'ablation', 'fanova'],
                feature_analysis=[
                    "box_violin", "correlation", "feat_importance",
                    "clustering", "feature_cdf"
                ],
                parallel_coordinates=True,
                cost_over_time=True,
                algo_footprint=True):
        """Analyze the available data and build HTML-webpage as dict.
        Save webpage in 'self.output/CAVE/report.html'.
        Analyzing is performed with the analyzer-instance that is initialized in
        the __init__

        Parameters
        ----------
        performance: bool
            whether to calculate par10-values
        cdf: bool
            whether to plot cdf
        scatter: bool
            whether to plot scatter
        confviz: bool
            whether to perform configuration visualization
        param_importance: List[str]
            containing methods for parameter importance
        feature_analysis: List[str]
            containing methods for feature analysis
        parallel_coordinates: bool
            whether to plot parallel coordinates
        cost_over_time: bool
            whether to plot cost over time
        algo_footprint: bool
            whether to plot algorithm footprints
        """

        # Check arguments
        for p in param_importance:
            if p not in [
                    'forward_selection', 'ablation', 'fanova', 'incneighbor'
            ]:
                raise ValueError(
                    "%s not a valid option for parameter "
                    "importance!", p)
        for f in feature_analysis:
            if f not in [
                    "box_violin", "correlation", "importance", "clustering",
                    "feature_cdf"
            ]:
                raise ValueError("%s not a valid option for feature analysis!",
                                 f)

        # Start analysis
        overview = self.analyzer.create_overview_table(self.best_run.folder)
        self.website["Meta Data"] = {"table": overview}

        compare_config = self.analyzer.config_to_html(self.default,
                                                      self.incumbent)
        self.website["Best configuration"] = {"table": compare_config}

        ########## PERFORMANCE ANALYSIS
        self.website["Performance Analysis"] = OrderedDict()

        if performance:
            performance_table = self.analyzer.create_performance_table(
                self.default, self.incumbent)
            self.website["Performance Analysis"]["Performance Table"] = {
                "table": performance_table
            }

        if cdf:
            cdf_path = self.analyzer.plot_cdf()
            self.website["Performance Analysis"][
                "empirical Cumulative Distribution Function (eCDF)"] = {
                    "figure": cdf_path
                }

        if scatter and (self.scenario.train_insts != [[None]]):
            scatter_path = self.analyzer.plot_scatter()
            self.website["Performance Analysis"]["Scatterplot"] = {
                "figure": scatter_path
            }
        elif scatter:
            self.logger.info(
                "Scatter plot desired, but no instances available.")

        # Build report before time-consuming analysis
        self.build_website()

        if algo_footprint and self.scenario.feature_dict:
            algorithms = {self.default: "default", self.incumbent: "incumbent"}
            # Add all available incumbents to test portfolio strategy
            #for r in self.runs:
            #    if not r.get_incumbent() in algorithms:
            #        algorithms[r.get_incumbent()] = str(self.runs.index(r))

            algo_footprint_plots = self.analyzer.plot_algorithm_footprint(
                algorithms)
            self.website["Performance Analysis"][
                "Algorithm Footprints"] = OrderedDict()
            for p in algo_footprint_plots:
                header = os.path.splitext(os.path.split(p)[1])[0]  # algo name
                self.website["Performance Analysis"]["Algorithm Footprints"][
                    header] = {
                        "figure": p,
                        "tooltip":
                        get_tooltip("Algorithm Footprints") + ": " + header
                    }

        self.build_website()

        ########### Configurator's behavior
        self.website["Configurator's behavior"] = OrderedDict()

        if confviz:
            if self.scenario.feature_array is None:
                self.scenario.feature_array = np.array([[]])
            # Sort runhistories and incs wrt cost
            incumbents = [r.solver.incumbent for r in self.runs]
            trajectories = [r.traj for r in self.runs]
            runhistories = [r.runhistory for r in self.runs]
            costs = [self.validated_rh.get_cost(i) for i in incumbents]
            costs, incumbents, runhistories, trajectories = (
                list(t) for t in zip(
                    *sorted(zip(costs, incumbents, runhistories, trajectories),
                            key=lambda x: x[0])))
            incumbents = list(map(lambda x: x['incumbent'], trajectories[0]))

            confviz_script = self.analyzer.plot_confviz(
                incumbents, runhistories)
            self.website["Configurator's behavior"][
                "Configurator Footprint"] = {
                    "table": confviz_script
                }
        elif confviz:
            self.logger.info("Configuration visualization desired, but no "
                             "instance-features available.")

        self.build_website()

        if cost_over_time:
            cost_over_time_path = self.analyzer.plot_cost_over_time(
                self.best_run.traj, self.validator)
            self.website["Configurator's behavior"]["Cost over time"] = {
                "figure": cost_over_time_path
            }

        self.build_website()

        self.parameter_importance(ablation='ablation' in param_importance,
                                  fanova='fanova' in param_importance,
                                  forward_selection='forward_selection'
                                  in param_importance,
                                  incneighbor='incneighbor'
                                  in param_importance)

        self.build_website()

        if parallel_coordinates:
            # Should be after parameter importance, if performed.
            n_params = 6
            parallel_path = self.analyzer.plot_parallel_coordinates(n_params)
            self.website["Configurator's behavior"]["Parallel Coordinates"] = {
                "figure": parallel_path
            }

        self.build_website()

        if self.scenario.feature_dict:
            self.feature_analysis(box_violin='box_violin' in feature_analysis,
                                  correlation='correlation'
                                  in feature_analysis,
                                  clustering='clustering' in feature_analysis,
                                  importance='importance' in feature_analysis)
        else:
            self.logger.info('No feature analysis possible')

        self.logger.info("CAVE finished. Report is located in %s",
                         os.path.join(self.output, 'report.html'))

        self.build_website()

    def parameter_importance(self,
                             ablation=False,
                             fanova=False,
                             forward_selection=False,
                             incneighbor=False):
        """Perform the specified parameter importance procedures. """
        # PARAMETER IMPORTANCE
        if (ablation or forward_selection or fanova or incneighbor):
            self.website["Parameter Importance"] = OrderedDict()
        sum_ = 0
        if fanova:
            sum_ += 1
            table, plots, pair_plots = self.analyzer.fanova(self.incumbent)

            self.website["Parameter Importance"]["fANOVA"] = OrderedDict()

            self.website["Parameter Importance"]["fANOVA"]["Importance"] = {
                "table": table
            }
            # Insert plots (the received plots is a dict, mapping param -> path)
            self.website["Parameter Importance"]["fANOVA"][
                "Marginals"] = OrderedDict([])
            for param, plot in plots.items():
                self.website["Parameter Importance"]["fANOVA"]["Marginals"][
                    param] = {
                        "figure": plot
                    }
            if pair_plots:
                self.website["Parameter Importance"]["fANOVA"][
                    "PairwiseMarginals"] = OrderedDict([])
                for param, plot in pair_plots.items():
                    self.website["Parameter Importance"]["fANOVA"][
                        "PairwiseMarginals"][param] = {
                            "figure": plot
                        }

        if ablation:
            sum_ += 1
            self.logger.info("Ablation...")
            self.analyzer.parameter_importance("ablation", self.incumbent,
                                               self.output)
            ablationpercentage_path = os.path.join(self.output,
                                                   "ablationpercentage.png")
            ablationperformance_path = os.path.join(self.output,
                                                    "ablationperformance.png")
            self.website["Parameter Importance"]["Ablation"] = {
                "figure": [ablationpercentage_path, ablationperformance_path]
            }

        if forward_selection:
            sum_ += 1
            self.logger.info("Forward Selection...")
            self.analyzer.parameter_importance("forward-selection",
                                               self.incumbent, self.output)
            f_s_barplot_path = os.path.join(self.output,
                                            "forward selection-barplot.png")
            f_s_chng_path = os.path.join(self.output,
                                         "forward selection-chng.png")
            self.website["Parameter Importance"]["Forward Selection"] = {
                "figure": [f_s_barplot_path, f_s_chng_path]
            }

        if incneighbor:
            sum_ += 1
            self.logger.info("Local EPM-predictions around incumbent...")
            plots = self.analyzer.local_epm_plots()
            self.website["Parameter Importance"][
                "Local Parameter Importance (LPI)"] = OrderedDict([])
            for param, plot in plots.items():
                self.website["Parameter Importance"][
                    "Local Parameter Importance (LPI)"][param] = {
                        "figure": plot
                    }

        if sum_:
            of = os.path.join(self.output, 'pimp.tex')
            self.logger.info('Creating pimp latex table at %s' % of)
            self.analyzer.pimp.table_for_comparison(self.analyzer.evaluators,
                                                    of,
                                                    style='latex')

    def feature_analysis(self,
                         box_violin=False,
                         correlation=False,
                         clustering=False,
                         importance=False):
        if not (box_violin or correlation or clustering or importance):
            self.logger.debug("No feature analysis.")
            return

        # FEATURE ANALYSIS (ASAPY)
        # TODO make the following line prettier
        # TODO feat-names from scenario?
        in_reader = InputReader()
        feat_fn = self.scenario.feature_fn

        if not self.scenario.feature_names:
            with changedir(self.ta_exec_dir if self.ta_exec_dir else '.'):
                if not feat_fn or not os.path.exists(feat_fn):
                    self.logger.warning(
                        "Feature Analysis needs valid feature "
                        "file! Either {} is not a valid "
                        "filename or features are not saved in "
                        "the scenario.")
                    self.logger.error("Skipping Feature Analysis.")
                    return
                else:
                    feat_names = in_reader.read_instance_features_file(
                        self.scenario.feature_fn)[0]
        else:
            feat_names = copy.deepcopy(self.scenario.feature_names)

        self.website["Feature Analysis"] = OrderedDict([])

        # feature importance using forward selection
        if importance:
            self.website["Feature Analysis"][
                "Feature Importance"] = OrderedDict()
            imp, plots = self.analyzer.feature_importance()
            imp = DataFrame(data=list(imp.values()),
                            index=list(imp.keys()),
                            columns=["Error"])
            imp = imp.to_html()  # this is a table with the values in html
            self.website["Feature Analysis"]["Feature Importance"]["Table"] = {
                "table": imp
            }
            for p in plots:
                name = os.path.splitext(os.path.basename(p))[0]
                self.website["Feature Analysis"]["Feature Importance"][
                    name] = {
                        "figure": p
                    }

        # box and violin plots
        if box_violin:
            name_plots = self.analyzer.feature_analysis(
                'box_violin', feat_names)
            self.website["Feature Analysis"][
                "Violin and Box Plots"] = OrderedDict()
            for plot_tuple in name_plots:
                key = "%s" % (plot_tuple[0])
                self.website["Feature Analysis"]["Violin and Box Plots"][
                    key] = {
                        "figure": plot_tuple[1]
                    }

        # correlation plot
        if correlation:
            correlation_plot = self.analyzer.feature_analysis(
                'correlation', feat_names)
            if correlation_plot:
                self.website["Feature Analysis"]["Correlation"] = {
                    "figure": correlation_plot
                }

        # cluster instances in feature space
        if clustering:
            cluster_plot = self.analyzer.feature_analysis(
                'clustering', feat_names)
            self.website["Feature Analysis"]["Clustering"] = {
                "figure": cluster_plot
            }

        self.build_website()

    def build_website(self):
        self.builder.generate_html(self.website)
def read_configurations_for_task_id(task_id, task_id_to_dir, cs):
    incumbents_test_rval = list()

    rh = RunHistory()
    for entry in task_id_to_dir[task_id]:
        # Merge all evaluations from multiple SMAC runs into one runhistory
        rh.update_from_json(entry, cs)

    X = []
    run_times = []
    Y_train = []
    Y_test = []
    status = []
    results = {key.config_id: value for key, value in rh.data.items()}
    max_lc_length = 0

    for config_id in results:
        run_times_tmp = []
        y_train = []
        y_test = []

        if results[config_id].status == StatusType.SUCCESS:
            run_times_tmp.append(results[config_id].time)
            y_train.append(results[config_id].additional_info['train_loss'])
            y_test.append(results[config_id].additional_info['test_loss'])
            status.append(0)
        else:
            run_times_tmp.append(results[config_id].time)
            y_train.append(1.0)
            y_test.append(1.0)
            status.append(1)

        X.append(rh.ids_config[config_id])
        run_times.append(run_times_tmp)
        Y_train.append(y_train)
        Y_test.append(y_test)

    run_times = np.array(run_times)
    Y_train = np.array(Y_train)
    Y_test = np.array(Y_test)

    assert len(X) != 0
    assert run_times.dtype == np.float, (task_id, run_times.dtype)
    assert len(X) == run_times.shape[0]
    assert Y_train.dtype == np.float, (task_id, Y_train.dtype)
    assert len(X) == Y_train.shape[0]
    assert Y_test.dtype == np.float, (task_id, Y_test.dtype)
    assert len(X) == Y_test.shape[0]

    if len(run_times.shape) == 1:
        raise ValueError()

    # Get all configs with the best value
    incumbent_test = list(np.where(Y_test == Y_test.min())[0])

    # Shuffle incumbent array
    rng = np.random.RandomState(task_id)
    rng.shuffle(incumbent_test)
    for idx in incumbent_test:
        config = Configuration(cs, values=X[idx])
        incumbents_test_rval.append(config)
    # Return all incumbents
    return task_id, incumbents_test_rval
예제 #9
0
파일: smacrun.py 프로젝트: shunsunsun/CAVE
class SMACrun(SMAC):
    """
    SMACrun keeps all information on a specific SMAC run. Extends the standard
    SMAC-facade.
    """
    def __init__(self, folder: str, ta_exec_dir: Union[str, None] = None):
        """Initialize scenario, runhistory and incumbent from folder, execute
        init-method of SMAC facade (so you could simply use SMAC-instances instead)

        Parameters
        ----------
        folder: string
            output-dir of this run
        ta_exec_dir: string
            if the execution directory for the SMAC-run differs from the cwd,
            there might be problems loading instance-, feature- or PCS-files
            in the scenario-object. since instance- and PCS-files are necessary,
            specify the path to the execution-dir of SMAC here
        """
        run_1_existed = os.path.exists('run_1')
        self.logger = logging.getLogger("cave.SMACrun.{}".format(folder))
        in_reader = InputReader()

        self.folder = folder
        self.logger.debug("Loading from %s", folder)

        split_folder = os.path.split(folder)
        self.logger.info(split_folder)
        if ta_exec_dir is None:
            ta_exec_dir = '.'

        self.scen_fn = os.path.join(folder, 'scenario.txt')
        self.rh_fn = os.path.join(folder, 'runhistory.json')
        self.traj_fn = os.path.join(folder, 'traj_aclib2.json')
        self.traj_old_fn = os.path.join(folder, 'traj_old.csv')

        # Create Scenario (disable output_dir to avoid cluttering)
        scen_dict = in_reader.read_scenario_file(self.scen_fn)
        scen_dict['output_dir'] = ""
        with changedir(ta_exec_dir):
            self.scen = Scenario(scen_dict)

        # Load runhistory and trajectory
        self.runhistory = RunHistory(average_cost)
        self.runhistory.update_from_json(self.rh_fn, self.scen.cs)
        self.traj = TrajLogger.read_traj_aclib_format(fn=self.traj_fn,
                                                      cs=self.scen.cs)

        incumbent = self.traj[-1]['incumbent']
        self.train_inst = self.scen.train_insts
        self.test_inst = self.scen.test_insts

        # Initialize SMAC-object
        super().__init__(scenario=self.scen, runhistory=self.runhistory)
        #restore_incumbent=incumbent)
        # TODO use restore, delete next line
        self.solver.incumbent = incumbent

        if (not run_1_existed) and os.path.exists('run_1'):
            shutil.rmtree('run_1')

    def get_incumbent(self):
        return self.solver.incumbent