コード例 #1
0
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=max(self.intensifier._min_time, time_left))

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    # output_directory=self.scenario.input_psmac_dirs,
                    output_directory=self.scenario.output_dir_for_this_run,
                    logger=self.logger)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
コード例 #2
0
ファイル: pc_smbo.py プロジェクト: mfeurer/pc_smac
    def run(self):
        '''
        Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        '''
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main BO loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            X, Y = self.rh2EPM.transform(self.runhistory)
            #print("Shapes: {}, {}".format(X.shape, Y.shape))

            self.logger.debug("Search for next configuration")
            if self.double_intensification:
                # get all found configurations sorted according to acq
                challengers_smac, challengers_random = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                start_time_random = time.time()
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_random,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend / 2.),
                    min_number_of_runs=1)
                time_spend_random = time.time() - start_time_random

                #print("IN BETWEEN INTENSIFICATIONS")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_smac,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend_random),
                    min_number_of_runs=1)
            else:
                # get all found configurations sorted according to acq
                challengers = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)
                #print("Challengers: {}".format(challengers))

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend),
                    min_number_of_runs=2)

            print("Incumbent: {}, Performance: {}".format(self.incumbent, inc_perf))

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir,
                            num_run=self.num_run)

            iteration += 1

            logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
コード例 #3
0
ファイル: epils.py プロジェクト: k121995/Eric
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            # model training
            self.logger.info("Model Training")
            X, Y = self.rh2EPM.transform(self.runhistory)
            self.model.train(X, Y)
            self.acquisition_func.update(model=self.model,
                                         eta=self.runhistory.get_cost(
                                             self.incumbent))

            if iteration == 1:
                start_point = self.incumbent
            else:
                # Restart?
                if self.rng.rand() < self.restart_prob:
                    self.logger.info("Restart Search")
                    start_point = self.scenario.cs.sample_configuration()
                else:
                    # pertubate inc
                    self.logger.info("Pertubate Incumbent")
                    start_point = self.incumbent
                    for _ in range(self.pertubation_steps):
                        start_point = random.choice(
                            list(
                                get_one_exchange_neighbourhood(
                                    start_point, seed=self.rng.seed())))

            # SLS
            self.logger.info("SLS")
            local_inc = self.local_search(start_point=start_point)

            # decide global inc
            self.logger.info("Race local incumbent against global incumbent")
            # don't be too aggressive here
            self.intensifier.minR = self.slow_race_minR
            self.intensifier.Adaptive_Capping_Slackfactor = self.slow_race_adaptive_capping_factor
            # log traj
            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=[local_inc],
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=0.01,
                log_traj=True)
            if self.incumbent == local_inc:
                self.logger.info("Changed global incumbent!")

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.stats.output_dir,
                            num_run=self.num_run)

            iteration += 1

            self.logger.debug("Remaining budget: %f (wallclock), "
                              "%f (ta costs), %f (target runs)" %
                              (self.stats.get_remaing_time_budget(),
                               self.stats.get_remaining_ta_budget(),
                               self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
コード例 #4
0
ファイル: smbo.py プロジェクト: chrinide/SMAC3
    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        num_obj = len(self.scenario.multi_objectives
                      )  # type: ignore[attr-defined] # noqa F821

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(
                    run_history=self.runhistory,
                    output_dirs=self.scenario.
                    input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                    configuration_space=self.config_space,
                    logger=self.logger,
                )

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            intent, run_info = self.intensifier.get_next_run(
                challengers=self.initial_design_configs,
                incumbent=self.incumbent,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs,
                num_workers=self.tae_runner.num_workers(),
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [
                c for c in self.initial_design_configs if c != run_info.config
            ]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if self.intensifier.num_run == 0:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=False)
                self.logger.debug("New intensification time bound: %f",
                                  time_left)
            else:
                old_time_left = time_left
                time_spent = time_spent + (time.time() - start_time)
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=True)
                self.logger.debug(
                    "Updated intensification time bound from %f to %f",
                    old_time_left,
                    time_left,
                )

            # Skip starting new runs if the budget is now exhausted
            if self.stats.is_budget_exhausted():
                intent = RunInfoIntent.SKIP

            # Skip the run if there was a request to do so.
            # For example, during intensifier intensification, we
            # don't want to rerun a config that was previously ran
            if intent == RunInfoIntent.RUN:
                # Track the fact that a run was launched in the run
                # history. It's status is tagged as RUNNING, and once
                # completed and processed, it will be updated accordingly
                self.runhistory.add(
                    config=run_info.config,
                    cost=float(MAXINT) if num_obj == 1 else np.full(
                        num_obj, float(MAXINT)),
                    time=0.0,
                    status=StatusType.RUNNING,
                    instance_id=run_info.instance,
                    seed=run_info.seed,
                    budget=run_info.budget,
                )

                run_info.config.config_id = self.runhistory.config_ids[
                    run_info.config]

                self.tae_runner.submit_run(run_info=run_info)

                # There are 2 criteria that the stats object uses to know
                # if the budged was exhausted.
                # The budget time, which can only be known when the run finishes,
                # And the number of ta executions. Because we submit the job at this point,
                # we count this submission as a run. This prevent for using more
                # runner runs than what the scenario allows
                self.stats.submitted_ta_runs += 1

            elif intent == RunInfoIntent.SKIP:
                # No launch is required
                # This marks a transition request from the intensifier
                # To a new iteration
                pass
            elif intent == RunInfoIntent.WAIT:
                # In any other case, we wait for resources
                # This likely indicates that no further decision
                # can be taken by the intensifier until more data is
                # available
                self.tae_runner.wait()
            else:
                raise NotImplementedError(
                    "No other RunInfoIntent has been coded!")

            # Check if there is any result, or else continue
            for run_info, result in self.tae_runner.get_finished_runs():

                # Add the results of the run to the run history
                # Additionally check for new incumbent
                self._incorporate_run_results(run_info, result, time_left)

            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                assert self.scenario.output_dir_for_this_run is not None  # please mypy
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.
                    output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                    logger=self.logger,
                )

            self.logger.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (
                    self.stats.get_remaing_time_budget(),
                    self.stats.get_remaining_ta_budget(),
                    self.stats.get_remaining_ta_runs(),
                ))

            if self.stats.is_budget_exhausted() or self._stop:
                if self.stats.is_budget_exhausted():
                    self.logger.debug("Exhausted configuration budget")
                else:
                    self.logger.debug(
                        "Shutting down because a configuration or callback returned status STOP"
                    )

                # The budget can be exhausted  for 2 reasons: number of ta runs or
                # time. If the number of ta runs is reached, but there is still budget,
                # wait for the runs to finish
                while self.tae_runner.pending_runs():

                    self.tae_runner.wait()

                    for run_info, result in self.tae_runner.get_finished_runs(
                    ):
                        # Add the results of the run to the run history
                        # Additionally check for new incumbent
                        self._incorporate_run_results(run_info, result,
                                                      time_left)

                # Break from the intensification loop,
                # as there are no more resources
                break

            # print stats at the end of each intensification iteration
            if self.intensifier.iteration_done:
                self.stats.print_stats(debug_out=True)

        return self.incumbent
コード例 #5
0
    def test_write(self):
        # The nulls make sure that we correctly emit the python None value
        fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]]],' \
                  '"config_origins": {},' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        run_history = RunHistory(aggregate_func=average_cost)
        configuration_space = test_helpers.get_branin_config_space()
        configuration_space.seed(1)

        config = configuration_space.sample_configuration()
        # Config on two instances
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin')
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        config_2 = configuration_space.sample_configuration()
        # Another config on a known instance
        run_history.add(config_2,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        # Known Config on no instance
        run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1)
        # New config on new instance
        config_3 = configuration_space.sample_configuration()
        run_history.add(config_3,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin-hoo')
        # New config on no instance
        config_4 = configuration_space.sample_configuration()
        run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1)

        # External configuration which will not be written to json file!
        config_5 = configuration_space.sample_configuration()
        run_history.add(config_5,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        origin=DataOrigin.EXTERNAL_SAME_INSTANCES)

        logger = logging.getLogger("Test")
        pSMAC.write(run_history, self.tmp_dir, logger=logger)
        r_size = len(run_history.data)
        pSMAC.read(run_history=run_history,
                   output_dirs=[self.tmp_dir],
                   configuration_space=configuration_space,
                   logger=logger)
        self.assertEqual(
            r_size, len(run_history.data),
            "Runhistory should be the same and not changed after reading")

        output_filename = os.path.join(self.tmp_dir, 'runhistory.json')
        self.assertTrue(os.path.exists(output_filename))

        fixture = json.loads(fixture, object_hook=StatusType.enum_hook)
        with open(output_filename) as fh:
            output = json.load(fh, object_hook=StatusType.enum_hook)
        self.assertEqual(output, fixture)
コード例 #6
0
ファイル: smbo.py プロジェクト: maxc01/SMAC3
    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            challenger, new_challenger = self.intensifier.get_next_challenger(
                challengers=self.initial_design_configs,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [c for c in self.initial_design_configs if c != challenger]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if new_challenger:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(time_spent)

            if challenger:
                # evaluate selected challenger
                self.logger.debug("Intensify - evaluate challenger")

                try:
                    self.incumbent, inc_perf = self.intensifier.eval_challenger(
                        challenger=challenger,
                        incumbent=self.incumbent,
                        run_history=self.runhistory,
                        time_bound=max(self.intensifier._min_time, time_left))

                except FirstRunCrashedException:
                    if self.scenario.abort_on_first_run_crash:  # type: ignore[attr-defined] # noqa F821
                        raise
                if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                    assert self.scenario.output_dir_for_this_run is not None  # please mypy
                    pSMAC.write(run_history=self.runhistory,
                                output_directory=self.scenario.output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                                logger=self.logger)

            self.logger.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
コード例 #7
0
    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies
        runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runs_dataset_indices = {}

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.num_metalearning_cfgs > 0:
            if self.metadata_directory is None:
                metalearning_directory = os.path.dirname(
                    autosklearn.metalearning.__file__)
                # There is no multilabel data in OpenML
                if self.task == MULTILABEL_CLASSIFICATION:
                    meta_task = BINARY_CLASSIFICATION
                else:
                    meta_task = self.task
                metadata_directory = os.path.join(
                    metalearning_directory, 'files', '%s_%s_%s' %
                    (self.metric, TASK_TYPES_TO_STRING[meta_task], 'sparse'
                     if self.datamanager.info['is_sparse'] else 'dense'))
                self.metadata_directory = metadata_directory

            if os.path.exists(self.metadata_directory):

                self.logger.info('Metadata directory: %s',
                                 self.metadata_directory)
                meta_base = MetaBase(self.config_space,
                                     self.metadata_directory)

                try:
                    meta_base.remove_dataset(self.dataset_name)
                except:
                    pass

                metafeature_calculation_time_limit = int(
                    self.total_walltime_limit / 4)
                metafeature_calculation_start_time = time.time()
                meta_features = self._calculate_metafeatures_with_limits(
                    metafeature_calculation_time_limit)
                metafeature_calculation_end_time = time.time()
                metafeature_calculation_time_limit = \
                    metafeature_calculation_time_limit - (
                    metafeature_calculation_end_time -
                    metafeature_calculation_start_time)

                if metafeature_calculation_time_limit < 1:
                    self.logger.warning(
                        'Time limit for metafeature calculation less '
                        'than 1 seconds (%f). Skipping calculation '
                        'of metafeatures for encoded dataset.',
                        metafeature_calculation_time_limit)
                    meta_features_encoded = None
                else:
                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        self.datamanager.perform1HotEncoding()
                    meta_features_encoded = \
                        self._calculate_metafeatures_encoded_with_limits(
                            metafeature_calculation_time_limit)

                # In case there is a problem calculating the encoded meta-features
                if meta_features is None:
                    if meta_features_encoded is not None:
                        meta_features = meta_features_encoded
                else:
                    if meta_features_encoded is not None:
                        meta_features.metafeature_values.update(
                            meta_features_encoded.metafeature_values)

                if meta_features is not None:
                    meta_base.add_dataset(self.dataset_name, meta_features)
                    # Do mean imputation of the meta-features - should be done specific
                    # for each prediction model!
                    all_metafeatures = meta_base.get_metafeatures(
                        features=list(meta_features.keys()))
                    all_metafeatures.fillna(all_metafeatures.mean(),
                                            inplace=True)

                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        metalearning_configurations = self.collect_metalearning_suggestions(
                            meta_base)
                    if metalearning_configurations is None:
                        metalearning_configurations = []
                    self.reset_data_manager()

                    self.logger.info('%s', meta_features)

                    # Convert meta-features into a dictionary because the scenario
                    # expects a dictionary
                    meta_features_dict = {}
                    for dataset, series in all_metafeatures.iterrows():
                        meta_features_dict[dataset] = series.values
                    meta_features_list = []
                    for meta_feature_name in all_metafeatures.columns:
                        meta_features_list.append(
                            meta_features[meta_feature_name].value)
                    meta_features_list = np.array(meta_features_list).reshape(
                        (1, -1))
                    self.logger.info(list(meta_features_dict.keys()))

                    # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
                    # meta_runs_index = 0
                    # try:
                    #    meta_durations = meta_base.get_all_runs('runtime')
                    #    read_runtime_data = True
                    # except KeyError:
                    #    read_runtime_data = False
                    #    self.logger.critical('Cannot read runtime data.')
                    #    if self.acquisition_function == 'EIPS':
                    #        self.logger.critical('Reverting to acquisition function EI!')
                    #        self.acquisition_function = 'EI'

                    # for meta_dataset in meta_runs.index:
                    #     meta_dataset_start_index = meta_runs_index
                    #     for meta_configuration in meta_runs.columns:
                    #         if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                    #             try:
                    #                 config = meta_base.get_configuration_from_algorithm_index(
                    #                     meta_configuration)
                    #                 cost = meta_runs.loc[meta_dataset, meta_configuration]
                    #                 if read_runtime_data:
                    #                     runtime = meta_durations.loc[meta_dataset,
                    #                                                  meta_configuration]
                    #                 else:
                    #                     runtime = 1
                    #                 # TODO read out other status types!
                    #                 meta_runhistory.add(config, cost, runtime,
                    #                                     StatusType.SUCCESS,
                    #                                     instance_id=meta_dataset)
                    #                 meta_runs_index += 1
                    #             except:
                    #                 # TODO maybe add warning
                    #                 pass
                    #
                    #     meta_runs_dataset_indices[meta_dataset] = (
                    #         meta_dataset_start_index, meta_runs_index)
            else:
                meta_features = None
                self.logger.warning('Could not find meta-data directory %s' %
                                    metadata_directory)

        else:
            meta_features = None

        if meta_features is None:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[
                json.dumps({
                    'task_id': self.dataset_name,
                    'fold': fold_number
                })
            ] for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'cs': self.config_space,
            'cutoff-time': self.func_eval_time_limit,
            'memory-limit': self.memory_limit,
            'wallclock-limit': total_walltime_limit,
            'output-dir': self.backend.get_smac_output_directory(self.seed),
            'shared-model': self.shared_mode,
            'run-obj': 'quality',
            'deterministic': 'true',
            'instances': instances
        }

        if self.configuration_mode == 'RANDOM':
            scenario_dict['minR'] = len(
                instances) if instances is not None else 1
            scenario_dict['initial_incumbent'] = 'RANDOM'

        self.scenario = Scenario(scenario_dict)

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors
        if self.include_estimators is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        types, bounds = get_types(self.config_space,
                                  self.scenario.feature_array)

        # TODO extract generation of SMAC object into it's own function for
        # testing
        if self.acquisition_function == 'EI':
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                #instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   runhistory2epm=rh2EPM,
                                   tae_runner=ta,
                                   runhistory=runhistory)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types=types,
                bounds=bounds,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   tae_runner=ta,
                                   runhistory2epm=rh2EPM,
                                   runhistory=runhistory,
                                   acquisition_function=acquisition_function)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        if self.configuration_mode == 'SMAC':
            smac = SMAC(**_smac_arguments)
        elif self.configuration_mode in ['ROAR', 'RANDOM']:
            for not_in_roar in ['runhistory2epm', 'model']:
                if not_in_roar in _smac_arguments:
                    del _smac_arguments[not_in_roar]
            smac = ROAR(**_smac_arguments)
        else:
            raise ValueError(self.configuration_mode)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        # X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # # Transform Y_meta on a per-dataset base
        # for meta_dataset in meta_runs_dataset_indices:
        #     start_index, end_index = meta_runs_dataset_indices[meta_dataset]
        #     end_index += 1  # Python indexing
        #     Y_meta[start_index:end_index, 0]\
        #         [Y_meta[start_index:end_index, 0] >2.0] =  2.0
        #     dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
        #     Y_meta[start_index:end_index, 0] = 1 - (
        #         (1. - Y_meta[start_index:end_index, 0]) /
        #         (1. - dataset_minimum))
        #     Y_meta[start_index:end_index, 0]\
        #           [Y_meta[start_index:end_index, 0] > 2] = 2

        smac.solver.stats.start_timing()
        # == first, evaluate all metelearning and default configurations
        smac.solver.incumbent = smac.solver.initial_design.run()

        for challenger in metalearning_configurations:

            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=[challenger],
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=self.total_walltime_limit)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        # == after metalearning run SMAC loop
        while True:

            if smac.solver.scenario.shared_model:
                pSMAC.read(run_history=smac.solver.runhistory,
                           output_dirs=glob.glob(
                               self.backend.get_smac_output_glob()),
                           configuration_space=self.config_space,
                           logger=self.logger)

            choose_next_start_time = time.time()
            try:
                challengers = self.choose_next(smac)
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                challengers = [next_config]
            time_for_choose_next = time.time() - choose_next_start_time
            self.logger.info('Used %g seconds to find next '
                             'configurations' % (time_for_choose_next))

            time_for_choose_next = max(time_for_choose_next, 1.0)
            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=challengers,
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=time_for_choose_next)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
        smac.runhistory = self.runhistory
        self.fANOVA_input = smac.get_X_y()

        return self.runhistory, self.trajectory, self.fANOVA_input
コード例 #8
0
    def test_write(self):
        # The nulls make sure that we correctly emit the python None value
        fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]]],' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        run_history = RunHistory(aggregate_func=average_cost)
        configuration_space = test_helpers.get_branin_config_space()
        configuration_space.seed(1)

        config = configuration_space.sample_configuration()
        # Config on two instances
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin')
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        config_2 = configuration_space.sample_configuration()
        # Another config on a known instance
        run_history.add(config_2,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        # Known Config on no instance
        run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1)
        # New config on new instance
        config_3 = configuration_space.sample_configuration()
        run_history.add(config_3,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin-hoo')
        # New config on no instance
        config_4 = configuration_space.sample_configuration()
        run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1)

        pSMAC.write(run_history, self.tmp_dir, 20)

        output_filename = os.path.join(self.tmp_dir, 'runhistory.json')
        self.assertTrue(os.path.exists(output_filename))

        fixture = json.loads(fixture, object_hook=StatusType.enum_hook)
        with open(output_filename) as fh:
            output = json.load(fh, object_hook=StatusType.enum_hook)

        print(output)
        print(fixture)
        self.assertEqual(output, fixture)
コード例 #9
0
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()
        # 设置一个counter
        counter = 0
        # Main BO loop
        while True:
            # 打印每轮SMBO的最优结果(包括首轮SMBO 0)
            print('SMBO ' + str(counter) + ': ' +
                  str(self.runhistory.get_cost(self.incumbent)))
            counter += 1

            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            if self.server is None:
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(self.intensifier._min_time, time_left))
            else:
                # 从worker读取loss,加入history再运行新的challengers
                print(time_left)
                self.server.push(incumbent=self.incumbent,
                                 runhistory=self.runhistory,
                                 challengers=challengers.challengers,
                                 time_left=time_left)
                # 从worker读取runhistory,并merge到self.runhistory
                incumbent, new_runhistory = self.server.pull()
                self.runhistory.update(new_runhistory)
                # 更新了runhistory之后,应该找寻是否存在新的incumbent
                # 因为worker没有完整的
                runhistory_old = self.runhistory.get_history_for_config(
                    self.incumbent)
                runhistory_new = self.runhistory.get_history_for_config(
                    incumbent)
                # 找寻cost最小值
                lowest_cost_old = min([cost[0] for cost in runhistory_old])
                lowest_cost_new = min([cost[0] for cost in runhistory_new])
                if lowest_cost_new < lowest_cost_old:
                    # 替换为新的incumbent
                    self.incumbent = incumbent
                """可以考虑用这个函数
                new_incumbent = self._compare_configs(
                    incumbent=incumbent, challenger=challenger,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                """

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.output_dir_for_this_run)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent