Example #1
0
 def _optimize(self, optimizer, hist_list):
     optimizer.optimize()
     pSMAC.read(
         run_history=optimizer.solver.runhistory,
         output_dirs=optimizer.solver.scenario.input_psmac_dirs,
         configuration_space=optimizer.solver.config_space,
         logger=optimizer.solver.logger,
     )
     hist_list.append(optimizer.solver.runhistory)
Example #2
0
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=max(self.intensifier._min_time, time_left))

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    # output_directory=self.scenario.input_psmac_dirs,
                    output_directory=self.scenario.output_dir_for_this_run,
                    logger=self.logger)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
Example #3
0
def _iterate(optimizer, runcount_left, return_hist):
    while runcount_left.value > 0:
        runcount_left.value -= 1
        optimizer.iterate()
    pSMAC.read(
        run_history=optimizer.solver.runhistory,
        output_dirs=optimizer.solver.scenario.input_psmac_dirs,
        configuration_space=optimizer.solver.config_space,
        logger=optimizer.solver.logger,
    )
    # print(optimizer.solver.runhistory.data)
    return_hist.append(optimizer.solver.runhistory)
Example #4
0
    def iterate(self):
        trial_left = multiprocessing.Value('i', self.trials_this_run)
        _start_time = time.time()
        _flag = False
        if len(self.configs) >= self.config_num_threshold:
            _flag = True
            self.logger.warning('Already explored 70 percentage of the '
                                'hp space: %d!' % self.config_num_threshold)
        else:
            # for i in range(self.n_jobs):
            #     self.trial_statistics.append(self.pool.submit(_iterate,
            #                                                   self.optimizer_list[i], trial_left))
            # self.wait_tasks_finish()
            processes = []
            return_hist = multiprocessing.Manager().list()
            for i in range(self.n_jobs):
                pSMAC.read(
                    run_history=self.optimizer_list[i].solver.runhistory,
                    output_dirs=self.optimizer_list[i].solver.scenario.
                    output_dir + '/run_1',
                    configuration_space=self.optimizer_list[i].solver.
                    config_space,
                    logger=self.optimizer_list[i].solver.logger,
                )
            for i in range(self.n_jobs):
                p = multiprocessing.Process(
                    target=_iterate,
                    args=[self.optimizer_list[i], trial_left, return_hist])
                processes.append(p)
                p.start()
            for p in processes:
                p.join()

            for runhistory in return_hist:
                runkeys = list(runhistory.data.keys())
                for key in runkeys:
                    _reward = 1. - runhistory.data[key][0]
                    _config = runhistory.ids_config[key[0]]
                    if _config not in self.configs:
                        self.perfs.append(_reward)
                        self.configs.append(_config)
                    if _reward > self.incumbent_perf:
                        self.incumbent_perf = _reward
                        self.incumbent_config = _config
            self.trial_cnt += self.trials_per_iter
        if not _flag:
            iteration_cost = time.time() - _start_time
        else:
            iteration_cost = None
        return self.incumbent_perf, iteration_cost, self.incumbent_config
Example #5
0
    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies

        metalearning_configurations = self.get_metalearning_suggestions()

        if self.resampling_strategy in ['partial-cv',
                                        'partial-cv-iterative-fit']:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[json.dumps({'task_id': self.dataset_name,
                                      'fold': fold_number})]
                         for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['feature_preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['feature_preprocessor'] = self.exclude_preprocessors

        if self.include_estimators is not None and self.exclude_estimators is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue
        ta_kwargs = dict(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args
        )

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'abort_on_first_run_crash': False,
            'cs': self.config_space,
            'cutoff_time': self.func_eval_time_limit,
            'deterministic': 'true',
            'instances': instances,
            'memory_limit': self.memory_limit,
            'output-dir': self.backend.get_smac_output_directory(),
            'run_obj': 'quality',
            'shared-model': self.shared_mode,
            'wallclock_limit': total_walltime_limit,
            'cost_for_crash': WORST_POSSIBLE_RESULT,
        }
        if self.smac_scenario_args is not None:
            for arg in [
                'abort_on_first_run_crash',
                'cs',
                'deterministic',
                'instances',
                'output-dir',
                'run_obj',
                'shared-model',
                'cost_for_crash',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning('Cannot override scenario argument %s, '
                                        'will ignore this.', arg)
                    del self.smac_scenario_args[arg]
            for arg in [
                'cutoff_time',
                'memory_limit',
                'wallclock_limit',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Overriding scenario argument %s: %s with value %s',
                        arg,
                        scenario_dict[arg],
                        self.smac_scenario_args[arg]
                    )
            scenario_dict.update(self.smac_scenario_args)

        smac_args = {
            'scenario_dict': scenario_dict,
            'seed': seed,
            'ta': ta,
            'ta_kwargs': ta_kwargs,
            'backend': self.backend,
            'metalearning_configurations': metalearning_configurations,
        }
        if self.get_smac_object_callback is not None:
            smac = self.get_smac_object_callback(**smac_args)
        else:
            smac = get_smac_object(**smac_args)

        smac.optimize()

        # Patch SMAC to read in data from parallel runs after the last
        # function evaluation
        if self.shared_mode:
            pSMAC.read(
                run_history=smac.solver.runhistory,
                output_dirs=smac.solver.scenario.input_psmac_dirs,
                configuration_space=smac.solver.config_space,
                logger=smac.solver.logger,
            )

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
        self._budget_type = smac.solver.intensifier.tae_runner.budget_type

        return self.runhistory, self.trajectory, self._budget_type
Example #6
0
    def run(self):
        '''
        Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        '''
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main BO loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            X, Y = self.rh2EPM.transform(self.runhistory)
            #print("Shapes: {}, {}".format(X.shape, Y.shape))

            self.logger.debug("Search for next configuration")
            if self.double_intensification:
                # get all found configurations sorted according to acq
                challengers_smac, challengers_random = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                start_time_random = time.time()
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_random,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend / 2.),
                    min_number_of_runs=1)
                time_spend_random = time.time() - start_time_random

                #print("IN BETWEEN INTENSIFICATIONS")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_smac,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend_random),
                    min_number_of_runs=1)
            else:
                # get all found configurations sorted according to acq
                challengers = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)
                #print("Challengers: {}".format(challengers))

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend),
                    min_number_of_runs=2)

            print("Incumbent: {}, Performance: {}".format(self.incumbent, inc_perf))

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir,
                            num_run=self.num_run)

            iteration += 1

            logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
    def optimize(self) -> typing.List[Configuration]:
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        portfolio : typing.List[Configuration]
            Portfolio of found configurations

        """
        # Setup output directory
        self.portfolio = []
        portfolio_cost = np.inf
        if self.output_dir is None:
            self.top_dir = "hydra-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        # parent process SMAC only used for validation purposes
        self.solver = SMAC4AC(scenario=scen,
                              tae_runner=self._tae,
                              rng=self.rng,
                              run_id=self.run_id,
                              **self.kwargs)
        for i in range(self.n_iterations):
            self.logger.info("=" * 120)
            self.logger.info("Hydra Iteration: %d", (i + 1))

            if i == 0:
                tae = self._tae
                tae_kwargs = self._tae_kwargs
            else:
                tae = ExecuteTARunHydra
                if self._tae_kwargs:
                    tae_kwargs = self._tae_kwargs
                else:
                    tae_kwargs = {}
                tae_kwargs['cost_oracle'] = self.cost_per_inst
            self.optimizer = PSMAC(
                scenario=self.scenario,
                run_id=self.run_id,
                rng=self.rng,
                tae=tae,
                tae_kwargs=tae_kwargs,
                shared_model=False,
                validate=True if self.val_set else False,
                n_optimizers=self.n_optimizers,
                val_set=self.val_set,
                n_incs=self.
                n_optimizers,  # return all configurations (unvalidated)
                **self.kwargs)
            self.optimizer.output_dir = self.output_dir
            incs = self.optimizer.optimize()
            cost_per_conf_v, val_ids, cost_per_conf_e, est_ids = self.optimizer.get_best_incumbents_ids(
                incs)
            if self.val_set:
                to_keep_ids = val_ids[:self.incs_per_round]
            else:
                to_keep_ids = est_ids[:self.incs_per_round]
            config_cost_per_inst = {}
            incs = incs[to_keep_ids]
            self.logger.info('Kept incumbents')
            for inc in incs:
                self.logger.info(inc)
                config_cost_per_inst[inc] = cost_per_conf_v[
                    inc] if self.val_set else cost_per_conf_e[inc]

            cur_portfolio_cost = self._update_portfolio(
                incs, config_cost_per_inst)
            if portfolio_cost <= cur_portfolio_cost:
                self.logger.info(
                    "No further progress (%f) --- terminate hydra",
                    portfolio_cost)
                break
            else:
                portfolio_cost = cur_portfolio_cost
                self.logger.info("Current pertfolio cost: %f", portfolio_cost)

            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
        read(self.rh,
             os.path.join(self.top_dir, 'psmac3*', 'run_' + str(MAXINT)),
             self.scenario.cs, self.logger)
        self.rh.save_json(fn=os.path.join(
            self.top_dir, 'all_validated_runs_runhistory.json'),
                          save_external=True)
        with open(os.path.join(self.top_dir, 'portfolio.pkl'), 'wb') as fh:
            pickle.dump(self.portfolio, fh)
        self.logger.info("~" * 120)
        self.logger.info('Resulting Portfolio:')
        for configuration in self.portfolio:
            self.logger.info(str(configuration))
        self.logger.info("~" * 120)

        return self.portfolio
Example #8
0
    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies

        metalearning_configurations = self.get_metalearning_suggestions()

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[
                json.dumps({
                    'task_id': self.dataset_name,
                    'fold': fold_number
                })
            ] for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        # TODO rebuild targ to be it's own target algorithmet algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                        self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors

        if self.include_estimators is not None and \
                        self.exclude_estimators is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'abort_on_first_run_crash': False,
            'cs': self.config_space,
            'cutoff_time': self.func_eval_time_limit,
            'deterministic': 'true',
            'instances': instances,
            'memory_limit': self.memory_limit,
            'output-dir': self.backend.get_smac_output_directory(),
            'run_obj': 'quality',
            'shared-model': self.shared_mode,
            'wallclock_limit': total_walltime_limit,
            'cost_for_crash': WORST_POSSIBLE_RESULT,
        }
        if self.smac_scenario_args is not None:
            for arg in [
                    'abort_on_first_run_crash',
                    'cs',
                    'deterministic',
                    'instances',
                    'output-dir',
                    'run_obj',
                    'shared-model',
                    'cost_for_crash',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Cannot override scenario argument %s, '
                        'will ignore this.', arg)
                    del self.smac_scenario_args[arg]
            for arg in [
                    'cutoff_time',
                    'memory_limit',
                    'wallclock_limit',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Overriding scenario argument %s: %s with value %s',
                        arg, scenario_dict[arg], self.smac_scenario_args[arg])
            scenario_dict.update(self.smac_scenario_args)

        # runhistory = RunHistory(aggregate_func=average_cost)

        if self.read_history:
            #old version
            # print("load the file from Pikel")
            # import pickle
            # runhistory = pickle.load(open("/home/dfki/Desktop/temp/pickel/runhistory.p", "rb"))

            #new version
            import create_Runhistory
            import smac
            values = {
                'balancing:strategy': 'none',
                'categorical_encoding:__choice__': 'no_encoding',
                'classifier:__choice__': 'random_forest',
                'imputation:strategy': 'mean',
                'preprocessor:__choice__': 'pca',
                # 'preprocessor:pca:keep_variance': 0.99,
                # 'preprocessor:copy': True,
                # 'preprocessor:iterated_power': 'auto',
                # 'preprocessor:n_components': 'none',
                # 'preprocessor:random_state': 'none',
                # 'preprocessor:svd_solver': 'auto',
                # 'preprocessor:tol': 0.0,
                # 'preprocessor:whiten': 'False',
                'preprocessor:pca:whiten': 'False',
                'rescaling:__choice__': 'none',
                'classifier:random_forest:bootstrap': 'True',
                # 'classifier:random_forest:class_weight': 'none',
                'classifier:random_forest:criterion': 'entropy',
                'classifier:random_forest:max_depth': 10,
                'classifier:random_forest:max_features':
                0.45000000000000001,  #auto
                'classifier:random_forest:max_leaf_nodes': 'None',
                'classifier:random_forest:min_impurity_decrease': 0.0,
                # 'classifier:random_forest:min_impurity_split': '1e-07',
                'classifier:random_forest:min_samples_leaf': 6,
                'classifier:random_forest:min_samples_split': 7,
                'classifier:random_forest:min_weight_fraction_leaf': 0.0,
                'classifier:random_forest:n_estimators': 512,
                'classifier:random_forest:random_state': 3,
                # 'classifier:random_forest:n_jobs': 1,
                # 'classifier:random_forest:oob_score': 'False',
                # 'classifier:random_forest:random_state': 'none',
                # 'classifier:random_forest:verbose': 0,
                # 'classifier:random_forest:warm_start': 'False',
            }
            config = create_Runhistory.defult_config_builder(
                configspace=self.config_space, values=values)
            runhistory, traj_logger = create_Runhistory.runhistory_builder(
                ta=ta,
                scenario_dic=scenario_dict,
                rng=seed,
                backend=self.backend,
                config_milad=config)

        else:
            runhistory = RunHistory(aggregate_func=average_cost)

        smac_args = {
            'scenario_dict': scenario_dict,
            'seed': seed,
            'ta': ta,
            'backend': self.backend,
            'metalearning_configurations': metalearning_configurations,
            'runhistory': runhistory,
        }

        if self.get_smac_object_callback is not None:
            smac = self.get_smac_object_callback(**smac_args)
        else:
            smac = get_smac_object(**smac_args)

        smac.optimize()

        # Patch SMAC to read in data from parallel runs after the last
        # function evaluation
        if self.shared_mode:
            pSMAC.read(
                run_history=smac.solver.runhistory,
                output_dirs=smac.solver.scenario.input_psmac_dirs,
                configuration_space=smac.solver.config_space,
                logger=smac.solver.logger,
            )

        if self.read_history:

            #old version
            # last_trajectories = pickle.load(open("/home/dfki/Desktop/temp/pickel/trajectory.p", "rb"))
            # self.trajectory = last_trajectories

            #new version
            import pickle
            import create_Runhistory
            pickle.dump(
                runhistory,
                open("/home/dfki/Desktop/temp/pickel/new_runhistory.p", "wb"))
            last_trajectories = create_Runhistory.trajectory_builder(
                traj_logger=traj_logger, config_milad=config)
            present_trajectories = smac.solver.intensifier.traj_logger.trajectory
            self.trajectory = present_trajectories + last_trajectories
            pickle.dump(
                self.trajectory,
                open("/home/dfki/Desktop/temp/pickel/new_trajectory.p", "wb"))

        else:
            self.trajectory = smac.solver.intensifier.traj_logger.trajectory

        self.runhistory = smac.solver.runhistory

        return self.runhistory, self.trajectory
Example #9
0
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            # model training
            self.logger.info("Model Training")
            X, Y = self.rh2EPM.transform(self.runhistory)
            self.model.train(X, Y)
            self.acquisition_func.update(model=self.model,
                                         eta=self.runhistory.get_cost(
                                             self.incumbent))

            if iteration == 1:
                start_point = self.incumbent
            else:
                # Restart?
                if self.rng.rand() < self.restart_prob:
                    self.logger.info("Restart Search")
                    start_point = self.scenario.cs.sample_configuration()
                else:
                    # pertubate inc
                    self.logger.info("Pertubate Incumbent")
                    start_point = self.incumbent
                    for _ in range(self.pertubation_steps):
                        start_point = random.choice(
                            list(
                                get_one_exchange_neighbourhood(
                                    start_point, seed=self.rng.seed())))

            # SLS
            self.logger.info("SLS")
            local_inc = self.local_search(start_point=start_point)

            # decide global inc
            self.logger.info("Race local incumbent against global incumbent")
            # don't be too aggressive here
            self.intensifier.minR = self.slow_race_minR
            self.intensifier.Adaptive_Capping_Slackfactor = self.slow_race_adaptive_capping_factor
            # log traj
            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=[local_inc],
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=0.01,
                log_traj=True)
            if self.incumbent == local_inc:
                self.logger.info("Changed global incumbent!")

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.stats.output_dir,
                            num_run=self.num_run)

            iteration += 1

            self.logger.debug("Remaining budget: %f (wallclock), "
                              "%f (ta costs), %f (target runs)" %
                              (self.stats.get_remaing_time_budget(),
                               self.stats.get_remaining_ta_budget(),
                               self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
Example #10
0
    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        num_obj = len(self.scenario.multi_objectives
                      )  # type: ignore[attr-defined] # noqa F821

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(
                    run_history=self.runhistory,
                    output_dirs=self.scenario.
                    input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                    configuration_space=self.config_space,
                    logger=self.logger,
                )

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            intent, run_info = self.intensifier.get_next_run(
                challengers=self.initial_design_configs,
                incumbent=self.incumbent,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs,
                num_workers=self.tae_runner.num_workers(),
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [
                c for c in self.initial_design_configs if c != run_info.config
            ]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if self.intensifier.num_run == 0:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=False)
                self.logger.debug("New intensification time bound: %f",
                                  time_left)
            else:
                old_time_left = time_left
                time_spent = time_spent + (time.time() - start_time)
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=True)
                self.logger.debug(
                    "Updated intensification time bound from %f to %f",
                    old_time_left,
                    time_left,
                )

            # Skip starting new runs if the budget is now exhausted
            if self.stats.is_budget_exhausted():
                intent = RunInfoIntent.SKIP

            # Skip the run if there was a request to do so.
            # For example, during intensifier intensification, we
            # don't want to rerun a config that was previously ran
            if intent == RunInfoIntent.RUN:
                # Track the fact that a run was launched in the run
                # history. It's status is tagged as RUNNING, and once
                # completed and processed, it will be updated accordingly
                self.runhistory.add(
                    config=run_info.config,
                    cost=float(MAXINT) if num_obj == 1 else np.full(
                        num_obj, float(MAXINT)),
                    time=0.0,
                    status=StatusType.RUNNING,
                    instance_id=run_info.instance,
                    seed=run_info.seed,
                    budget=run_info.budget,
                )

                run_info.config.config_id = self.runhistory.config_ids[
                    run_info.config]

                self.tae_runner.submit_run(run_info=run_info)

                # There are 2 criteria that the stats object uses to know
                # if the budged was exhausted.
                # The budget time, which can only be known when the run finishes,
                # And the number of ta executions. Because we submit the job at this point,
                # we count this submission as a run. This prevent for using more
                # runner runs than what the scenario allows
                self.stats.submitted_ta_runs += 1

            elif intent == RunInfoIntent.SKIP:
                # No launch is required
                # This marks a transition request from the intensifier
                # To a new iteration
                pass
            elif intent == RunInfoIntent.WAIT:
                # In any other case, we wait for resources
                # This likely indicates that no further decision
                # can be taken by the intensifier until more data is
                # available
                self.tae_runner.wait()
            else:
                raise NotImplementedError(
                    "No other RunInfoIntent has been coded!")

            # Check if there is any result, or else continue
            for run_info, result in self.tae_runner.get_finished_runs():

                # Add the results of the run to the run history
                # Additionally check for new incumbent
                self._incorporate_run_results(run_info, result, time_left)

            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                assert self.scenario.output_dir_for_this_run is not None  # please mypy
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.
                    output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                    logger=self.logger,
                )

            self.logger.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (
                    self.stats.get_remaing_time_budget(),
                    self.stats.get_remaining_ta_budget(),
                    self.stats.get_remaining_ta_runs(),
                ))

            if self.stats.is_budget_exhausted() or self._stop:
                if self.stats.is_budget_exhausted():
                    self.logger.debug("Exhausted configuration budget")
                else:
                    self.logger.debug(
                        "Shutting down because a configuration or callback returned status STOP"
                    )

                # The budget can be exhausted  for 2 reasons: number of ta runs or
                # time. If the number of ta runs is reached, but there is still budget,
                # wait for the runs to finish
                while self.tae_runner.pending_runs():

                    self.tae_runner.wait()

                    for run_info, result in self.tae_runner.get_finished_runs(
                    ):
                        # Add the results of the run to the run history
                        # Additionally check for new incumbent
                        self._incorporate_run_results(run_info, result,
                                                      time_left)

                # Break from the intensification loop,
                # as there are no more resources
                break

            # print stats at the end of each intensification iteration
            if self.intensifier.iteration_done:
                self.stats.print_stats(debug_out=True)

        return self.incumbent
Example #11
0
    def test_write(self):
        # The nulls make sure that we correctly emit the python None value
        fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]]],' \
                  '"config_origins": {},' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        run_history = RunHistory(aggregate_func=average_cost)
        configuration_space = test_helpers.get_branin_config_space()
        configuration_space.seed(1)

        config = configuration_space.sample_configuration()
        # Config on two instances
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin')
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        config_2 = configuration_space.sample_configuration()
        # Another config on a known instance
        run_history.add(config_2,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        # Known Config on no instance
        run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1)
        # New config on new instance
        config_3 = configuration_space.sample_configuration()
        run_history.add(config_3,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin-hoo')
        # New config on no instance
        config_4 = configuration_space.sample_configuration()
        run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1)

        # External configuration which will not be written to json file!
        config_5 = configuration_space.sample_configuration()
        run_history.add(config_5,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        origin=DataOrigin.EXTERNAL_SAME_INSTANCES)

        logger = logging.getLogger("Test")
        pSMAC.write(run_history, self.tmp_dir, logger=logger)
        r_size = len(run_history.data)
        pSMAC.read(run_history=run_history,
                   output_dirs=[self.tmp_dir],
                   configuration_space=configuration_space,
                   logger=logger)
        self.assertEqual(
            r_size, len(run_history.data),
            "Runhistory should be the same and not changed after reading")

        output_filename = os.path.join(self.tmp_dir, 'runhistory.json')
        self.assertTrue(os.path.exists(output_filename))

        fixture = json.loads(fixture, object_hook=StatusType.enum_hook)
        with open(output_filename) as fh:
            output = json.load(fh, object_hook=StatusType.enum_hook)
        self.assertEqual(output, fixture)
Example #12
0
File: smbo.py Project: maxc01/SMAC3
    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            challenger, new_challenger = self.intensifier.get_next_challenger(
                challengers=self.initial_design_configs,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [c for c in self.initial_design_configs if c != challenger]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if new_challenger:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(time_spent)

            if challenger:
                # evaluate selected challenger
                self.logger.debug("Intensify - evaluate challenger")

                try:
                    self.incumbent, inc_perf = self.intensifier.eval_challenger(
                        challenger=challenger,
                        incumbent=self.incumbent,
                        run_history=self.runhistory,
                        time_bound=max(self.intensifier._min_time, time_left))

                except FirstRunCrashedException:
                    if self.scenario.abort_on_first_run_crash:  # type: ignore[attr-defined] # noqa F821
                        raise
                if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                    assert self.scenario.output_dir_for_this_run is not None  # please mypy
                    pSMAC.write(run_history=self.runhistory,
                                output_directory=self.scenario.output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                                logger=self.logger)

            self.logger.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent
Example #13
0
    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies
        runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runs_dataset_indices = {}

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.num_metalearning_cfgs > 0:
            if self.metadata_directory is None:
                metalearning_directory = os.path.dirname(
                    autosklearn.metalearning.__file__)
                # There is no multilabel data in OpenML
                if self.task == MULTILABEL_CLASSIFICATION:
                    meta_task = BINARY_CLASSIFICATION
                else:
                    meta_task = self.task
                metadata_directory = os.path.join(
                    metalearning_directory, 'files', '%s_%s_%s' %
                    (self.metric, TASK_TYPES_TO_STRING[meta_task], 'sparse'
                     if self.datamanager.info['is_sparse'] else 'dense'))
                self.metadata_directory = metadata_directory

            if os.path.exists(self.metadata_directory):

                self.logger.info('Metadata directory: %s',
                                 self.metadata_directory)
                meta_base = MetaBase(self.config_space,
                                     self.metadata_directory)

                try:
                    meta_base.remove_dataset(self.dataset_name)
                except:
                    pass

                metafeature_calculation_time_limit = int(
                    self.total_walltime_limit / 4)
                metafeature_calculation_start_time = time.time()
                meta_features = self._calculate_metafeatures_with_limits(
                    metafeature_calculation_time_limit)
                metafeature_calculation_end_time = time.time()
                metafeature_calculation_time_limit = \
                    metafeature_calculation_time_limit - (
                    metafeature_calculation_end_time -
                    metafeature_calculation_start_time)

                if metafeature_calculation_time_limit < 1:
                    self.logger.warning(
                        'Time limit for metafeature calculation less '
                        'than 1 seconds (%f). Skipping calculation '
                        'of metafeatures for encoded dataset.',
                        metafeature_calculation_time_limit)
                    meta_features_encoded = None
                else:
                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        self.datamanager.perform1HotEncoding()
                    meta_features_encoded = \
                        self._calculate_metafeatures_encoded_with_limits(
                            metafeature_calculation_time_limit)

                # In case there is a problem calculating the encoded meta-features
                if meta_features is None:
                    if meta_features_encoded is not None:
                        meta_features = meta_features_encoded
                else:
                    if meta_features_encoded is not None:
                        meta_features.metafeature_values.update(
                            meta_features_encoded.metafeature_values)

                if meta_features is not None:
                    meta_base.add_dataset(self.dataset_name, meta_features)
                    # Do mean imputation of the meta-features - should be done specific
                    # for each prediction model!
                    all_metafeatures = meta_base.get_metafeatures(
                        features=list(meta_features.keys()))
                    all_metafeatures.fillna(all_metafeatures.mean(),
                                            inplace=True)

                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        metalearning_configurations = self.collect_metalearning_suggestions(
                            meta_base)
                    if metalearning_configurations is None:
                        metalearning_configurations = []
                    self.reset_data_manager()

                    self.logger.info('%s', meta_features)

                    # Convert meta-features into a dictionary because the scenario
                    # expects a dictionary
                    meta_features_dict = {}
                    for dataset, series in all_metafeatures.iterrows():
                        meta_features_dict[dataset] = series.values
                    meta_features_list = []
                    for meta_feature_name in all_metafeatures.columns:
                        meta_features_list.append(
                            meta_features[meta_feature_name].value)
                    meta_features_list = np.array(meta_features_list).reshape(
                        (1, -1))
                    self.logger.info(list(meta_features_dict.keys()))

                    # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
                    # meta_runs_index = 0
                    # try:
                    #    meta_durations = meta_base.get_all_runs('runtime')
                    #    read_runtime_data = True
                    # except KeyError:
                    #    read_runtime_data = False
                    #    self.logger.critical('Cannot read runtime data.')
                    #    if self.acquisition_function == 'EIPS':
                    #        self.logger.critical('Reverting to acquisition function EI!')
                    #        self.acquisition_function = 'EI'

                    # for meta_dataset in meta_runs.index:
                    #     meta_dataset_start_index = meta_runs_index
                    #     for meta_configuration in meta_runs.columns:
                    #         if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                    #             try:
                    #                 config = meta_base.get_configuration_from_algorithm_index(
                    #                     meta_configuration)
                    #                 cost = meta_runs.loc[meta_dataset, meta_configuration]
                    #                 if read_runtime_data:
                    #                     runtime = meta_durations.loc[meta_dataset,
                    #                                                  meta_configuration]
                    #                 else:
                    #                     runtime = 1
                    #                 # TODO read out other status types!
                    #                 meta_runhistory.add(config, cost, runtime,
                    #                                     StatusType.SUCCESS,
                    #                                     instance_id=meta_dataset)
                    #                 meta_runs_index += 1
                    #             except:
                    #                 # TODO maybe add warning
                    #                 pass
                    #
                    #     meta_runs_dataset_indices[meta_dataset] = (
                    #         meta_dataset_start_index, meta_runs_index)
            else:
                meta_features = None
                self.logger.warning('Could not find meta-data directory %s' %
                                    metadata_directory)

        else:
            meta_features = None

        if meta_features is None:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[
                json.dumps({
                    'task_id': self.dataset_name,
                    'fold': fold_number
                })
            ] for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'cs': self.config_space,
            'cutoff-time': self.func_eval_time_limit,
            'memory-limit': self.memory_limit,
            'wallclock-limit': total_walltime_limit,
            'output-dir': self.backend.get_smac_output_directory(self.seed),
            'shared-model': self.shared_mode,
            'run-obj': 'quality',
            'deterministic': 'true',
            'instances': instances
        }

        if self.configuration_mode == 'RANDOM':
            scenario_dict['minR'] = len(
                instances) if instances is not None else 1
            scenario_dict['initial_incumbent'] = 'RANDOM'

        self.scenario = Scenario(scenario_dict)

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors
        if self.include_estimators is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        types, bounds = get_types(self.config_space,
                                  self.scenario.feature_array)

        # TODO extract generation of SMAC object into it's own function for
        # testing
        if self.acquisition_function == 'EI':
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                #instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   runhistory2epm=rh2EPM,
                                   tae_runner=ta,
                                   runhistory=runhistory)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types=types,
                bounds=bounds,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   tae_runner=ta,
                                   runhistory2epm=rh2EPM,
                                   runhistory=runhistory,
                                   acquisition_function=acquisition_function)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        if self.configuration_mode == 'SMAC':
            smac = SMAC(**_smac_arguments)
        elif self.configuration_mode in ['ROAR', 'RANDOM']:
            for not_in_roar in ['runhistory2epm', 'model']:
                if not_in_roar in _smac_arguments:
                    del _smac_arguments[not_in_roar]
            smac = ROAR(**_smac_arguments)
        else:
            raise ValueError(self.configuration_mode)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        # X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # # Transform Y_meta on a per-dataset base
        # for meta_dataset in meta_runs_dataset_indices:
        #     start_index, end_index = meta_runs_dataset_indices[meta_dataset]
        #     end_index += 1  # Python indexing
        #     Y_meta[start_index:end_index, 0]\
        #         [Y_meta[start_index:end_index, 0] >2.0] =  2.0
        #     dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
        #     Y_meta[start_index:end_index, 0] = 1 - (
        #         (1. - Y_meta[start_index:end_index, 0]) /
        #         (1. - dataset_minimum))
        #     Y_meta[start_index:end_index, 0]\
        #           [Y_meta[start_index:end_index, 0] > 2] = 2

        smac.solver.stats.start_timing()
        # == first, evaluate all metelearning and default configurations
        smac.solver.incumbent = smac.solver.initial_design.run()

        for challenger in metalearning_configurations:

            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=[challenger],
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=self.total_walltime_limit)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        # == after metalearning run SMAC loop
        while True:

            if smac.solver.scenario.shared_model:
                pSMAC.read(run_history=smac.solver.runhistory,
                           output_dirs=glob.glob(
                               self.backend.get_smac_output_glob()),
                           configuration_space=self.config_space,
                           logger=self.logger)

            choose_next_start_time = time.time()
            try:
                challengers = self.choose_next(smac)
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                challengers = [next_config]
            time_for_choose_next = time.time() - choose_next_start_time
            self.logger.info('Used %g seconds to find next '
                             'configurations' % (time_for_choose_next))

            time_for_choose_next = max(time_for_choose_next, 1.0)
            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=challengers,
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=time_for_choose_next)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
        smac.runhistory = self.runhistory
        self.fANOVA_input = smac.get_X_y()

        return self.runhistory, self.trajectory, self.fANOVA_input
Example #14
0
    def optimize(self):
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        incumbent(s) : Configuration / List[Configuration] / ndarray[Configuration]
            Incumbent / Portfolio of incumbents
        pid(s) : int / ndarray[ints]
            Process ID(s) from which the configuration stems

        """
        # Setup output directory
        if self.output_dir is None:
            self.scenario.output_dir = "psmac3-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
            if self.shared_model:
                self.scenario.shared_model = self.shared_model
        if self.scenario.input_psmac_dirs is None:
            self.scenario.input_psmac_dirs = os.path.sep.join(
                (self.scenario.output_dir, 'run_*'))

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        self.logger.info("+" * 120)
        self.logger.info("PSMAC run")

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part start ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
        q = multiprocessing.Queue()
        procs = []
        for p in range(self.n_optimizers):
            proc = multiprocessing.Process(
                target=optimize,
                args=(
                    q,  # Output queue
                    self.scenario,  # Scenario object
                    self._tae,  # type of tae to run target with
                    p,  # process_id (used in output folder name)
                    self.output_dir,  # directory to create outputs in
                ),
                kwargs=self.kwargs)
            proc.start()
            procs.append(proc)
        for proc in procs:
            proc.join()
        incs = np.empty((self.n_optimizers, ), dtype=Configuration)
        pids = np.empty((self.n_optimizers, ), dtype=int)
        idx = 0
        while not q.empty():
            conf, pid = q.get_nowait()
            incs[idx] = conf
            pids[idx] = pid
            idx += 1
        self.logger.info('Loading all runhistories')
        read(self.rh, self.scenario.input_psmac_dirs, self.scenario.cs,
             self.logger)
        q.close()
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
        if self.n_optimizers == self.n_incs:  # no validation necessary just return all incumbents
            return incs
        else:
            _, val_ids, _, est_ids = self.get_best_incumbents_ids(
                incs)  # determine the best incumbents
            if val_ids:
                return incs[val_ids]
            return incs[est_ids]
Example #15
0
    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()
        # 设置一个counter
        counter = 0
        # Main BO loop
        while True:
            # 打印每轮SMBO的最优结果(包括首轮SMBO 0)
            print('SMBO ' + str(counter) + ': ' +
                  str(self.runhistory.get_cost(self.incumbent)))
            counter += 1

            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            if self.server is None:
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(self.intensifier._min_time, time_left))
            else:
                # 从worker读取loss,加入history再运行新的challengers
                print(time_left)
                self.server.push(incumbent=self.incumbent,
                                 runhistory=self.runhistory,
                                 challengers=challengers.challengers,
                                 time_left=time_left)
                # 从worker读取runhistory,并merge到self.runhistory
                incumbent, new_runhistory = self.server.pull()
                self.runhistory.update(new_runhistory)
                # 更新了runhistory之后,应该找寻是否存在新的incumbent
                # 因为worker没有完整的
                runhistory_old = self.runhistory.get_history_for_config(
                    self.incumbent)
                runhistory_new = self.runhistory.get_history_for_config(
                    incumbent)
                # 找寻cost最小值
                lowest_cost_old = min([cost[0] for cost in runhistory_old])
                lowest_cost_new = min([cost[0] for cost in runhistory_new])
                if lowest_cost_new < lowest_cost_old:
                    # 替换为新的incumbent
                    self.incumbent = incumbent
                """可以考虑用这个函数
                new_incumbent = self._compare_configs(
                    incumbent=incumbent, challenger=challenger,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                """

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.output_dir_for_this_run)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent