Python read Examples

Programming Language: Python

Namespace/Package Name: smac.optimizer.pSMAC

Method/Function: read

Examples at hotexamples.com: 15

Python read - 15 examples found. These are the top rated real world Python examples of smac.optimizer.pSMAC.read extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

 def _optimize(self, optimizer, hist_list):
     optimizer.optimize()
     pSMAC.read(
         run_history=optimizer.solver.runhistory,
         output_dirs=optimizer.solver.scenario.input_psmac_dirs,
         configuration_space=optimizer.solver.config_space,
         logger=optimizer.solver.logger,
     )
     hist_list.append(optimizer.solver.runhistory)

Example #2

Show file

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=challengers,
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=max(self.intensifier._min_time, time_left))

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    # output_directory=self.scenario.input_psmac_dirs,
                    output_directory=self.scenario.output_dir_for_this_run,
                    logger=self.logger)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

Example #3

Show file

def _iterate(optimizer, runcount_left, return_hist):
    while runcount_left.value > 0:
        runcount_left.value -= 1
        optimizer.iterate()
    pSMAC.read(
        run_history=optimizer.solver.runhistory,
        output_dirs=optimizer.solver.scenario.input_psmac_dirs,
        configuration_space=optimizer.solver.config_space,
        logger=optimizer.solver.logger,
    )
    # print(optimizer.solver.runhistory.data)
    return_hist.append(optimizer.solver.runhistory)

Example #4

Show file

    def iterate(self):
        trial_left = multiprocessing.Value('i', self.trials_this_run)
        _start_time = time.time()
        _flag = False
        if len(self.configs) >= self.config_num_threshold:
            _flag = True
            self.logger.warning('Already explored 70 percentage of the '
                                'hp space: %d!' % self.config_num_threshold)
        else:
            # for i in range(self.n_jobs):
            #     self.trial_statistics.append(self.pool.submit(_iterate,
            #                                                   self.optimizer_list[i], trial_left))
            # self.wait_tasks_finish()
            processes = []
            return_hist = multiprocessing.Manager().list()
            for i in range(self.n_jobs):
                pSMAC.read(
                    run_history=self.optimizer_list[i].solver.runhistory,
                    output_dirs=self.optimizer_list[i].solver.scenario.
                    output_dir + '/run_1',
                    configuration_space=self.optimizer_list[i].solver.
                    config_space,
                    logger=self.optimizer_list[i].solver.logger,
                )
            for i in range(self.n_jobs):
                p = multiprocessing.Process(
                    target=_iterate,
                    args=[self.optimizer_list[i], trial_left, return_hist])
                processes.append(p)
                p.start()
            for p in processes:
                p.join()

            for runhistory in return_hist:
                runkeys = list(runhistory.data.keys())
                for key in runkeys:
                    _reward = 1. - runhistory.data[key][0]
                    _config = runhistory.ids_config[key[0]]
                    if _config not in self.configs:
                        self.perfs.append(_reward)
                        self.configs.append(_config)
                    if _reward > self.incumbent_perf:
                        self.incumbent_perf = _reward
                        self.incumbent_config = _config
            self.trial_cnt += self.trials_per_iter
        if not _flag:
            iteration_cost = time.time() - _start_time
        else:
            iteration_cost = None
        return self.incumbent_perf, iteration_cost, self.incumbent_config

Example #5

Show file

    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies

        metalearning_configurations = self.get_metalearning_suggestions()

        if self.resampling_strategy in ['partial-cv',
                                        'partial-cv-iterative-fit']:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[json.dumps({'task_id': self.dataset_name,
                                      'fold': fold_number})]
                         for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['feature_preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['feature_preprocessor'] = self.exclude_preprocessors

        if self.include_estimators is not None and self.exclude_estimators is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue
        ta_kwargs = dict(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args
        )

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'abort_on_first_run_crash': False,
            'cs': self.config_space,
            'cutoff_time': self.func_eval_time_limit,
            'deterministic': 'true',
            'instances': instances,
            'memory_limit': self.memory_limit,
            'output-dir': self.backend.get_smac_output_directory(),
            'run_obj': 'quality',
            'shared-model': self.shared_mode,
            'wallclock_limit': total_walltime_limit,
            'cost_for_crash': WORST_POSSIBLE_RESULT,
        }
        if self.smac_scenario_args is not None:
            for arg in [
                'abort_on_first_run_crash',
                'cs',
                'deterministic',
                'instances',
                'output-dir',
                'run_obj',
                'shared-model',
                'cost_for_crash',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning('Cannot override scenario argument %s, '
                                        'will ignore this.', arg)
                    del self.smac_scenario_args[arg]
            for arg in [
                'cutoff_time',
                'memory_limit',
                'wallclock_limit',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Overriding scenario argument %s: %s with value %s',
                        arg,
                        scenario_dict[arg],
                        self.smac_scenario_args[arg]
                    )
            scenario_dict.update(self.smac_scenario_args)

        smac_args = {
            'scenario_dict': scenario_dict,
            'seed': seed,
            'ta': ta,
            'ta_kwargs': ta_kwargs,
            'backend': self.backend,
            'metalearning_configurations': metalearning_configurations,
        }
        if self.get_smac_object_callback is not None:
            smac = self.get_smac_object_callback(**smac_args)
        else:
            smac = get_smac_object(**smac_args)

        smac.optimize()

        # Patch SMAC to read in data from parallel runs after the last
        # function evaluation
        if self.shared_mode:
            pSMAC.read(
                run_history=smac.solver.runhistory,
                output_dirs=smac.solver.scenario.input_psmac_dirs,
                configuration_space=smac.solver.config_space,
                logger=smac.solver.logger,
            )

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
        self._budget_type = smac.solver.intensifier.tae_runner.budget_type

        return self.runhistory, self.trajectory, self._budget_type

Example #6

Show file

File: pc_smbo.py Project: mfeurer/pc_smac

    def run(self):
        '''
        Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        '''
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main BO loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            X, Y = self.rh2EPM.transform(self.runhistory)
            #print("Shapes: {}, {}".format(X.shape, Y.shape))

            self.logger.debug("Search for next configuration")
            if self.double_intensification:
                # get all found configurations sorted according to acq
                challengers_smac, challengers_random = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                start_time_random = time.time()
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_random,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend / 2.),
                    min_number_of_runs=1)
                time_spend_random = time.time() - start_time_random

                #print("IN BETWEEN INTENSIFICATIONS")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers_smac,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend_random),
                    min_number_of_runs=1)
            else:
                # get all found configurations sorted according to acq
                challengers = \
                    self.select_configuration.run(X, Y,
                                                  incumbent=self.incumbent,
                                                  num_configurations_by_random_search_sorted=100,
                                                  num_configurations_by_local_search=10,
                                                  double_intensification=self.double_intensification)
                #print("Challengers: {}".format(challengers))

                time_spend = time.time() - start_time
                logging.debug(
                    "Time spend to choose next configurations: %.2f sec" % (time_spend))

                self.logger.debug("Intensify")

                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(0.01, time_spend),
                    min_number_of_runs=2)

            print("Incumbent: {}, Performance: {}".format(self.incumbent, inc_perf))

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.scenario.output_dir,
                            num_run=self.num_run)

            iteration += 1

            logging.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

Example #7

Show file

File: hydra_facade.py Project: tqichun/distributed-SMAC3

    def optimize(self) -> typing.List[Configuration]:
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        portfolio : typing.List[Configuration]
            Portfolio of found configurations

        """
        # Setup output directory
        self.portfolio = []
        portfolio_cost = np.inf
        if self.output_dir is None:
            self.top_dir = "hydra-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        # parent process SMAC only used for validation purposes
        self.solver = SMAC4AC(scenario=scen,
                              tae_runner=self._tae,
                              rng=self.rng,
                              run_id=self.run_id,
                              **self.kwargs)
        for i in range(self.n_iterations):
            self.logger.info("=" * 120)
            self.logger.info("Hydra Iteration: %d", (i + 1))

            if i == 0:
                tae = self._tae
                tae_kwargs = self._tae_kwargs
            else:
                tae = ExecuteTARunHydra
                if self._tae_kwargs:
                    tae_kwargs = self._tae_kwargs
                else:
                    tae_kwargs = {}
                tae_kwargs['cost_oracle'] = self.cost_per_inst
            self.optimizer = PSMAC(
                scenario=self.scenario,
                run_id=self.run_id,
                rng=self.rng,
                tae=tae,
                tae_kwargs=tae_kwargs,
                shared_model=False,
                validate=True if self.val_set else False,
                n_optimizers=self.n_optimizers,
                val_set=self.val_set,
                n_incs=self.
                n_optimizers,  # return all configurations (unvalidated)
                **self.kwargs)
            self.optimizer.output_dir = self.output_dir
            incs = self.optimizer.optimize()
            cost_per_conf_v, val_ids, cost_per_conf_e, est_ids = self.optimizer.get_best_incumbents_ids(
                incs)
            if self.val_set:
                to_keep_ids = val_ids[:self.incs_per_round]
            else:
                to_keep_ids = est_ids[:self.incs_per_round]
            config_cost_per_inst = {}
            incs = incs[to_keep_ids]
            self.logger.info('Kept incumbents')
            for inc in incs:
                self.logger.info(inc)
                config_cost_per_inst[inc] = cost_per_conf_v[
                    inc] if self.val_set else cost_per_conf_e[inc]

            cur_portfolio_cost = self._update_portfolio(
                incs, config_cost_per_inst)
            if portfolio_cost <= cur_portfolio_cost:
                self.logger.info(
                    "No further progress (%f) --- terminate hydra",
                    portfolio_cost)
                break
            else:
                portfolio_cost = cur_portfolio_cost
                self.logger.info("Current pertfolio cost: %f", portfolio_cost)

            self.scenario.output_dir = os.path.join(
                self.top_dir,
                "psmac3-output_%s" % (datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f')))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
        read(self.rh,
             os.path.join(self.top_dir, 'psmac3*', 'run_' + str(MAXINT)),
             self.scenario.cs, self.logger)
        self.rh.save_json(fn=os.path.join(
            self.top_dir, 'all_validated_runs_runhistory.json'),
                          save_external=True)
        with open(os.path.join(self.top_dir, 'portfolio.pkl'), 'wb') as fh:
            pickle.dump(self.portfolio, fh)
        self.logger.info("~" * 120)
        self.logger.info('Resulting Portfolio:')
        for configuration in self.portfolio:
            self.logger.info(str(configuration))
        self.logger.info("~" * 120)

        return self.portfolio

Example #8

Show file

    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies

        metalearning_configurations = self.get_metalearning_suggestions()

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[
                json.dumps({
                    'task_id': self.dataset_name,
                    'fold': fold_number
                })
            ] for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        # TODO rebuild targ to be it's own target algorithmet algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                        self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors

        if self.include_estimators is not None and \
                        self.exclude_estimators is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'abort_on_first_run_crash': False,
            'cs': self.config_space,
            'cutoff_time': self.func_eval_time_limit,
            'deterministic': 'true',
            'instances': instances,
            'memory_limit': self.memory_limit,
            'output-dir': self.backend.get_smac_output_directory(),
            'run_obj': 'quality',
            'shared-model': self.shared_mode,
            'wallclock_limit': total_walltime_limit,
            'cost_for_crash': WORST_POSSIBLE_RESULT,
        }
        if self.smac_scenario_args is not None:
            for arg in [
                    'abort_on_first_run_crash',
                    'cs',
                    'deterministic',
                    'instances',
                    'output-dir',
                    'run_obj',
                    'shared-model',
                    'cost_for_crash',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Cannot override scenario argument %s, '
                        'will ignore this.', arg)
                    del self.smac_scenario_args[arg]
            for arg in [
                    'cutoff_time',
                    'memory_limit',
                    'wallclock_limit',
            ]:
                if arg in self.smac_scenario_args:
                    self.logger.warning(
                        'Overriding scenario argument %s: %s with value %s',
                        arg, scenario_dict[arg], self.smac_scenario_args[arg])
            scenario_dict.update(self.smac_scenario_args)

        # runhistory = RunHistory(aggregate_func=average_cost)

        if self.read_history:
            #old version
            # print("load the file from Pikel")
            # import pickle
            # runhistory = pickle.load(open("/home/dfki/Desktop/temp/pickel/runhistory.p", "rb"))

            #new version
            import create_Runhistory
            import smac
            values = {
                'balancing:strategy': 'none',
                'categorical_encoding:__choice__': 'no_encoding',
                'classifier:__choice__': 'random_forest',
                'imputation:strategy': 'mean',
                'preprocessor:__choice__': 'pca',
                # 'preprocessor:pca:keep_variance': 0.99,
                # 'preprocessor:copy': True,
                # 'preprocessor:iterated_power': 'auto',
                # 'preprocessor:n_components': 'none',
                # 'preprocessor:random_state': 'none',
                # 'preprocessor:svd_solver': 'auto',
                # 'preprocessor:tol': 0.0,
                # 'preprocessor:whiten': 'False',
                'preprocessor:pca:whiten': 'False',
                'rescaling:__choice__': 'none',
                'classifier:random_forest:bootstrap': 'True',
                # 'classifier:random_forest:class_weight': 'none',
                'classifier:random_forest:criterion': 'entropy',
                'classifier:random_forest:max_depth': 10,
                'classifier:random_forest:max_features':
                0.45000000000000001,  #auto
                'classifier:random_forest:max_leaf_nodes': 'None',
                'classifier:random_forest:min_impurity_decrease': 0.0,
                # 'classifier:random_forest:min_impurity_split': '1e-07',
                'classifier:random_forest:min_samples_leaf': 6,
                'classifier:random_forest:min_samples_split': 7,
                'classifier:random_forest:min_weight_fraction_leaf': 0.0,
                'classifier:random_forest:n_estimators': 512,
                'classifier:random_forest:random_state': 3,
                # 'classifier:random_forest:n_jobs': 1,
                # 'classifier:random_forest:oob_score': 'False',
                # 'classifier:random_forest:random_state': 'none',
                # 'classifier:random_forest:verbose': 0,
                # 'classifier:random_forest:warm_start': 'False',
            }
            config = create_Runhistory.defult_config_builder(
                configspace=self.config_space, values=values)
            runhistory, traj_logger = create_Runhistory.runhistory_builder(
                ta=ta,
                scenario_dic=scenario_dict,
                rng=seed,
                backend=self.backend,
                config_milad=config)

        else:
            runhistory = RunHistory(aggregate_func=average_cost)

        smac_args = {
            'scenario_dict': scenario_dict,
            'seed': seed,
            'ta': ta,
            'backend': self.backend,
            'metalearning_configurations': metalearning_configurations,
            'runhistory': runhistory,
        }

        if self.get_smac_object_callback is not None:
            smac = self.get_smac_object_callback(**smac_args)
        else:
            smac = get_smac_object(**smac_args)

        smac.optimize()

        # Patch SMAC to read in data from parallel runs after the last
        # function evaluation
        if self.shared_mode:
            pSMAC.read(
                run_history=smac.solver.runhistory,
                output_dirs=smac.solver.scenario.input_psmac_dirs,
                configuration_space=smac.solver.config_space,
                logger=smac.solver.logger,
            )

        if self.read_history:

            #old version
            # last_trajectories = pickle.load(open("/home/dfki/Desktop/temp/pickel/trajectory.p", "rb"))
            # self.trajectory = last_trajectories

            #new version
            import pickle
            import create_Runhistory
            pickle.dump(
                runhistory,
                open("/home/dfki/Desktop/temp/pickel/new_runhistory.p", "wb"))
            last_trajectories = create_Runhistory.trajectory_builder(
                traj_logger=traj_logger, config_milad=config)
            present_trajectories = smac.solver.intensifier.traj_logger.trajectory
            self.trajectory = present_trajectories + last_trajectories
            pickle.dump(
                self.trajectory,
                open("/home/dfki/Desktop/temp/pickel/new_trajectory.p", "wb"))

        else:
            self.trajectory = smac.solver.intensifier.traj_logger.trajectory

        self.runhistory = smac.solver.runhistory

        return self.runhistory, self.trajectory

Example #9

Show file

File: epils.py Project: k121995/Eric

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.stats.start_timing()
        try:
            self.incumbent = self.initial_design.run()
        except FirstRunCrashedException as err:
            if self.scenario.abort_on_first_run_crash:
                raise

        # Main loop
        iteration = 1
        while True:
            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            # model training
            self.logger.info("Model Training")
            X, Y = self.rh2EPM.transform(self.runhistory)
            self.model.train(X, Y)
            self.acquisition_func.update(model=self.model,
                                         eta=self.runhistory.get_cost(
                                             self.incumbent))

            if iteration == 1:
                start_point = self.incumbent
            else:
                # Restart?
                if self.rng.rand() < self.restart_prob:
                    self.logger.info("Restart Search")
                    start_point = self.scenario.cs.sample_configuration()
                else:
                    # pertubate inc
                    self.logger.info("Pertubate Incumbent")
                    start_point = self.incumbent
                    for _ in range(self.pertubation_steps):
                        start_point = random.choice(
                            list(
                                get_one_exchange_neighbourhood(
                                    start_point, seed=self.rng.seed())))

            # SLS
            self.logger.info("SLS")
            local_inc = self.local_search(start_point=start_point)

            # decide global inc
            self.logger.info("Race local incumbent against global incumbent")
            # don't be too aggressive here
            self.intensifier.minR = self.slow_race_minR
            self.intensifier.Adaptive_Capping_Slackfactor = self.slow_race_adaptive_capping_factor
            # log traj
            self.incumbent, inc_perf = self.intensifier.intensify(
                challengers=[local_inc],
                incumbent=self.incumbent,
                run_history=self.runhistory,
                aggregate_func=self.aggregate_func,
                time_bound=0.01,
                log_traj=True)
            if self.incumbent == local_inc:
                self.logger.info("Changed global incumbent!")

            if self.scenario.shared_model:
                pSMAC.write(run_history=self.runhistory,
                            output_directory=self.stats.output_dir,
                            num_run=self.num_run)

            iteration += 1

            self.logger.debug("Remaining budget: %f (wallclock), "
                              "%f (ta costs), %f (target runs)" %
                              (self.stats.get_remaing_time_budget(),
                               self.stats.get_remaining_ta_budget(),
                               self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

Example #10

Show file

File: smbo.py Project: chrinide/SMAC3

    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        num_obj = len(self.scenario.multi_objectives
                      )  # type: ignore[attr-defined] # noqa F821

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(
                    run_history=self.runhistory,
                    output_dirs=self.scenario.
                    input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                    configuration_space=self.config_space,
                    logger=self.logger,
                )

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            intent, run_info = self.intensifier.get_next_run(
                challengers=self.initial_design_configs,
                incumbent=self.incumbent,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs,
                num_workers=self.tae_runner.num_workers(),
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [
                c for c in self.initial_design_configs if c != run_info.config
            ]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if self.intensifier.num_run == 0:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=False)
                self.logger.debug("New intensification time bound: %f",
                                  time_left)
            else:
                old_time_left = time_left
                time_spent = time_spent + (time.time() - start_time)
                time_left = self._get_timebound_for_intensification(
                    time_spent, update=True)
                self.logger.debug(
                    "Updated intensification time bound from %f to %f",
                    old_time_left,
                    time_left,
                )

            # Skip starting new runs if the budget is now exhausted
            if self.stats.is_budget_exhausted():
                intent = RunInfoIntent.SKIP

            # Skip the run if there was a request to do so.
            # For example, during intensifier intensification, we
            # don't want to rerun a config that was previously ran
            if intent == RunInfoIntent.RUN:
                # Track the fact that a run was launched in the run
                # history. It's status is tagged as RUNNING, and once
                # completed and processed, it will be updated accordingly
                self.runhistory.add(
                    config=run_info.config,
                    cost=float(MAXINT) if num_obj == 1 else np.full(
                        num_obj, float(MAXINT)),
                    time=0.0,
                    status=StatusType.RUNNING,
                    instance_id=run_info.instance,
                    seed=run_info.seed,
                    budget=run_info.budget,
                )

                run_info.config.config_id = self.runhistory.config_ids[
                    run_info.config]

                self.tae_runner.submit_run(run_info=run_info)

                # There are 2 criteria that the stats object uses to know
                # if the budged was exhausted.
                # The budget time, which can only be known when the run finishes,
                # And the number of ta executions. Because we submit the job at this point,
                # we count this submission as a run. This prevent for using more
                # runner runs than what the scenario allows
                self.stats.submitted_ta_runs += 1

            elif intent == RunInfoIntent.SKIP:
                # No launch is required
                # This marks a transition request from the intensifier
                # To a new iteration
                pass
            elif intent == RunInfoIntent.WAIT:
                # In any other case, we wait for resources
                # This likely indicates that no further decision
                # can be taken by the intensifier until more data is
                # available
                self.tae_runner.wait()
            else:
                raise NotImplementedError(
                    "No other RunInfoIntent has been coded!")

            # Check if there is any result, or else continue
            for run_info, result in self.tae_runner.get_finished_runs():

                # Add the results of the run to the run history
                # Additionally check for new incumbent
                self._incorporate_run_results(run_info, result, time_left)

            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                assert self.scenario.output_dir_for_this_run is not None  # please mypy
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.
                    output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                    logger=self.logger,
                )

            self.logger.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (
                    self.stats.get_remaing_time_budget(),
                    self.stats.get_remaining_ta_budget(),
                    self.stats.get_remaining_ta_runs(),
                ))

            if self.stats.is_budget_exhausted() or self._stop:
                if self.stats.is_budget_exhausted():
                    self.logger.debug("Exhausted configuration budget")
                else:
                    self.logger.debug(
                        "Shutting down because a configuration or callback returned status STOP"
                    )

                # The budget can be exhausted  for 2 reasons: number of ta runs or
                # time. If the number of ta runs is reached, but there is still budget,
                # wait for the runs to finish
                while self.tae_runner.pending_runs():

                    self.tae_runner.wait()

                    for run_info, result in self.tae_runner.get_finished_runs(
                    ):
                        # Add the results of the run to the run history
                        # Additionally check for new incumbent
                        self._incorporate_run_results(run_info, result,
                                                      time_left)

                # Break from the intensification loop,
                # as there are no more resources
                break

            # print stats at the end of each intensification iteration
            if self.intensifier.iteration_done:
                self.stats.print_stats(debug_out=True)

        return self.incumbent

Example #11

Show file

    def test_write(self):
        # The nulls make sure that we correctly emit the python None value
        fixture = '{"data": [[[1, "branin", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[1, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, "branini", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[2, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[3, "branin-hoo", 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]], ' \
                  '[[4, null, 1], [1, 1, {"__enum__": ' \
                  '"StatusType.SUCCESS"}, null]]],' \
                  '"config_origins": {},' \
                  '"configs": {' \
                  '"4": {"x": -2.2060968293349363, "y": 5.183410905645716}, ' \
                  '"3": {"x": -2.7986616377433045, "y": 1.385078921531967}, ' \
                  '"1": {"x": 1.2553300705386103, "y": 10.804867401632372}, ' \
                  '"2": {"x": -4.998284377739827, "y": 4.534988589477597}}}'

        run_history = RunHistory(aggregate_func=average_cost)
        configuration_space = test_helpers.get_branin_config_space()
        configuration_space.seed(1)

        config = configuration_space.sample_configuration()
        # Config on two instances
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin')
        run_history.add(config,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        config_2 = configuration_space.sample_configuration()
        # Another config on a known instance
        run_history.add(config_2,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branini')
        # Known Config on no instance
        run_history.add(config_2, 1, 1, StatusType.SUCCESS, seed=1)
        # New config on new instance
        config_3 = configuration_space.sample_configuration()
        run_history.add(config_3,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        instance_id='branin-hoo')
        # New config on no instance
        config_4 = configuration_space.sample_configuration()
        run_history.add(config_4, 1, 1, StatusType.SUCCESS, seed=1)

        # External configuration which will not be written to json file!
        config_5 = configuration_space.sample_configuration()
        run_history.add(config_5,
                        1,
                        1,
                        StatusType.SUCCESS,
                        seed=1,
                        origin=DataOrigin.EXTERNAL_SAME_INSTANCES)

        logger = logging.getLogger("Test")
        pSMAC.write(run_history, self.tmp_dir, logger=logger)
        r_size = len(run_history.data)
        pSMAC.read(run_history=run_history,
                   output_dirs=[self.tmp_dir],
                   configuration_space=configuration_space,
                   logger=logger)
        self.assertEqual(
            r_size, len(run_history.data),
            "Runhistory should be the same and not changed after reading")

        output_filename = os.path.join(self.tmp_dir, 'runhistory.json')
        self.assertTrue(os.path.exists(output_filename))

        fixture = json.loads(fixture, object_hook=StatusType.enum_hook)
        with open(output_filename) as fh:
            output = json.load(fh, object_hook=StatusType.enum_hook)
        self.assertEqual(output, fixture)

Example #12

Show file

File: smbo.py Project: maxc01/SMAC3

    def run(self) -> Configuration:
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()

        # Main BO loop
        while True:
            if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,  # type: ignore[attr-defined] # noqa F821
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()

            # sample next configuration for intensification
            # Initial design runs are also included in the BO loop now.
            challenger, new_challenger = self.intensifier.get_next_challenger(
                challengers=self.initial_design_configs,
                chooser=self.epm_chooser,
                run_history=self.runhistory,
                repeat_configs=self.intensifier.repeat_configs
            )

            # remove config from initial design challengers to not repeat it again
            self.initial_design_configs = [c for c in self.initial_design_configs if c != challenger]

            # update timebound only if a 'new' configuration is sampled as the challenger
            if new_challenger:
                time_spent = time.time() - start_time
                time_left = self._get_timebound_for_intensification(time_spent)

            if challenger:
                # evaluate selected challenger
                self.logger.debug("Intensify - evaluate challenger")

                try:
                    self.incumbent, inc_perf = self.intensifier.eval_challenger(
                        challenger=challenger,
                        incumbent=self.incumbent,
                        run_history=self.runhistory,
                        time_bound=max(self.intensifier._min_time, time_left))

                except FirstRunCrashedException:
                    if self.scenario.abort_on_first_run_crash:  # type: ignore[attr-defined] # noqa F821
                        raise
                if self.scenario.shared_model:  # type: ignore[attr-defined] # noqa F821
                    assert self.scenario.output_dir_for_this_run is not None  # please mypy
                    pSMAC.write(run_history=self.runhistory,
                                output_directory=self.scenario.output_dir_for_this_run,  # type: ignore[attr-defined] # noqa F821
                                logger=self.logger)

            self.logger.debug("Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)" % (
                self.stats.get_remaing_time_budget(),
                self.stats.get_remaining_ta_budget(),
                self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent

Example #13

Show file

    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run

        # Initialize some SMAC dependencies
        runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runs_dataset_indices = {}

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.num_metalearning_cfgs > 0:
            if self.metadata_directory is None:
                metalearning_directory = os.path.dirname(
                    autosklearn.metalearning.__file__)
                # There is no multilabel data in OpenML
                if self.task == MULTILABEL_CLASSIFICATION:
                    meta_task = BINARY_CLASSIFICATION
                else:
                    meta_task = self.task
                metadata_directory = os.path.join(
                    metalearning_directory, 'files', '%s_%s_%s' %
                    (self.metric, TASK_TYPES_TO_STRING[meta_task], 'sparse'
                     if self.datamanager.info['is_sparse'] else 'dense'))
                self.metadata_directory = metadata_directory

            if os.path.exists(self.metadata_directory):

                self.logger.info('Metadata directory: %s',
                                 self.metadata_directory)
                meta_base = MetaBase(self.config_space,
                                     self.metadata_directory)

                try:
                    meta_base.remove_dataset(self.dataset_name)
                except:
                    pass

                metafeature_calculation_time_limit = int(
                    self.total_walltime_limit / 4)
                metafeature_calculation_start_time = time.time()
                meta_features = self._calculate_metafeatures_with_limits(
                    metafeature_calculation_time_limit)
                metafeature_calculation_end_time = time.time()
                metafeature_calculation_time_limit = \
                    metafeature_calculation_time_limit - (
                    metafeature_calculation_end_time -
                    metafeature_calculation_start_time)

                if metafeature_calculation_time_limit < 1:
                    self.logger.warning(
                        'Time limit for metafeature calculation less '
                        'than 1 seconds (%f). Skipping calculation '
                        'of metafeatures for encoded dataset.',
                        metafeature_calculation_time_limit)
                    meta_features_encoded = None
                else:
                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        self.datamanager.perform1HotEncoding()
                    meta_features_encoded = \
                        self._calculate_metafeatures_encoded_with_limits(
                            metafeature_calculation_time_limit)

                # In case there is a problem calculating the encoded meta-features
                if meta_features is None:
                    if meta_features_encoded is not None:
                        meta_features = meta_features_encoded
                else:
                    if meta_features_encoded is not None:
                        meta_features.metafeature_values.update(
                            meta_features_encoded.metafeature_values)

                if meta_features is not None:
                    meta_base.add_dataset(self.dataset_name, meta_features)
                    # Do mean imputation of the meta-features - should be done specific
                    # for each prediction model!
                    all_metafeatures = meta_base.get_metafeatures(
                        features=list(meta_features.keys()))
                    all_metafeatures.fillna(all_metafeatures.mean(),
                                            inplace=True)

                    with warnings.catch_warnings():
                        warnings.showwarning = self._send_warnings_to_log
                        metalearning_configurations = self.collect_metalearning_suggestions(
                            meta_base)
                    if metalearning_configurations is None:
                        metalearning_configurations = []
                    self.reset_data_manager()

                    self.logger.info('%s', meta_features)

                    # Convert meta-features into a dictionary because the scenario
                    # expects a dictionary
                    meta_features_dict = {}
                    for dataset, series in all_metafeatures.iterrows():
                        meta_features_dict[dataset] = series.values
                    meta_features_list = []
                    for meta_feature_name in all_metafeatures.columns:
                        meta_features_list.append(
                            meta_features[meta_feature_name].value)
                    meta_features_list = np.array(meta_features_list).reshape(
                        (1, -1))
                    self.logger.info(list(meta_features_dict.keys()))

                    # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
                    # meta_runs_index = 0
                    # try:
                    #    meta_durations = meta_base.get_all_runs('runtime')
                    #    read_runtime_data = True
                    # except KeyError:
                    #    read_runtime_data = False
                    #    self.logger.critical('Cannot read runtime data.')
                    #    if self.acquisition_function == 'EIPS':
                    #        self.logger.critical('Reverting to acquisition function EI!')
                    #        self.acquisition_function = 'EI'

                    # for meta_dataset in meta_runs.index:
                    #     meta_dataset_start_index = meta_runs_index
                    #     for meta_configuration in meta_runs.columns:
                    #         if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                    #             try:
                    #                 config = meta_base.get_configuration_from_algorithm_index(
                    #                     meta_configuration)
                    #                 cost = meta_runs.loc[meta_dataset, meta_configuration]
                    #                 if read_runtime_data:
                    #                     runtime = meta_durations.loc[meta_dataset,
                    #                                                  meta_configuration]
                    #                 else:
                    #                     runtime = 1
                    #                 # TODO read out other status types!
                    #                 meta_runhistory.add(config, cost, runtime,
                    #                                     StatusType.SUCCESS,
                    #                                     instance_id=meta_dataset)
                    #                 meta_runs_index += 1
                    #             except:
                    #                 # TODO maybe add warning
                    #                 pass
                    #
                    #     meta_runs_dataset_indices[meta_dataset] = (
                    #         meta_dataset_start_index, meta_runs_index)
            else:
                meta_features = None
                self.logger.warning('Could not find meta-data directory %s' %
                                    metadata_directory)

        else:
            meta_features = None

        if meta_features is None:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[
                json.dumps({
                    'task_id': self.dataset_name,
                    'fold': fold_number
                })
            ] for fold_number in range(num_folds)]
        else:
            instances = [[json.dumps({'task_id': self.dataset_name})]]

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'cs': self.config_space,
            'cutoff-time': self.func_eval_time_limit,
            'memory-limit': self.memory_limit,
            'wallclock-limit': total_walltime_limit,
            'output-dir': self.backend.get_smac_output_directory(self.seed),
            'shared-model': self.shared_mode,
            'run-obj': 'quality',
            'deterministic': 'true',
            'instances': instances
        }

        if self.configuration_mode == 'RANDOM':
            scenario_dict['minR'] = len(
                instances) if instances is not None else 1
            scenario_dict['initial_incumbent'] = 'RANDOM'

        self.scenario = Scenario(scenario_dict)

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors
        if self.include_estimators is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            metric=self.metric,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        types, bounds = get_types(self.config_space,
                                  self.scenario.feature_array)

        # TODO extract generation of SMAC object into it's own function for
        # testing
        if self.acquisition_function == 'EI':
            model = RandomForestWithInstances(
                types=types,
                bounds=bounds,
                #instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   runhistory2epm=rh2EPM,
                                   tae_runner=ta,
                                   runhistory=runhistory)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types=types,
                bounds=bounds,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   tae_runner=ta,
                                   runhistory2epm=rh2EPM,
                                   runhistory=runhistory,
                                   acquisition_function=acquisition_function)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        if self.configuration_mode == 'SMAC':
            smac = SMAC(**_smac_arguments)
        elif self.configuration_mode in ['ROAR', 'RANDOM']:
            for not_in_roar in ['runhistory2epm', 'model']:
                if not_in_roar in _smac_arguments:
                    del _smac_arguments[not_in_roar]
            smac = ROAR(**_smac_arguments)
        else:
            raise ValueError(self.configuration_mode)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        # X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # # Transform Y_meta on a per-dataset base
        # for meta_dataset in meta_runs_dataset_indices:
        #     start_index, end_index = meta_runs_dataset_indices[meta_dataset]
        #     end_index += 1  # Python indexing
        #     Y_meta[start_index:end_index, 0]\
        #         [Y_meta[start_index:end_index, 0] >2.0] =  2.0
        #     dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
        #     Y_meta[start_index:end_index, 0] = 1 - (
        #         (1. - Y_meta[start_index:end_index, 0]) /
        #         (1. - dataset_minimum))
        #     Y_meta[start_index:end_index, 0]\
        #           [Y_meta[start_index:end_index, 0] > 2] = 2

        smac.solver.stats.start_timing()
        # == first, evaluate all metelearning and default configurations
        smac.solver.incumbent = smac.solver.initial_design.run()

        for challenger in metalearning_configurations:

            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=[challenger],
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=self.total_walltime_limit)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        # == after metalearning run SMAC loop
        while True:

            if smac.solver.scenario.shared_model:
                pSMAC.read(run_history=smac.solver.runhistory,
                           output_dirs=glob.glob(
                               self.backend.get_smac_output_glob()),
                           configuration_space=self.config_space,
                           logger=self.logger)

            choose_next_start_time = time.time()
            try:
                challengers = self.choose_next(smac)
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                challengers = [next_config]
            time_for_choose_next = time.time() - choose_next_start_time
            self.logger.info('Used %g seconds to find next '
                             'configurations' % (time_for_choose_next))

            time_for_choose_next = max(time_for_choose_next, 1.0)
            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=challengers,
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=time_for_choose_next)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory
        smac.runhistory = self.runhistory
        self.fANOVA_input = smac.get_X_y()

        return self.runhistory, self.trajectory, self.fANOVA_input

Example #14

Show file

File: psmac_facade.py Project: zenghanfu/SMAC3

    def optimize(self):
        """
        Optimizes the algorithm provided in scenario (given in constructor)

        Returns
        -------
        incumbent(s) : Configuration / List[Configuration] / ndarray[Configuration]
            Incumbent / Portfolio of incumbents
        pid(s) : int / ndarray[ints]
            Process ID(s) from which the configuration stems

        """
        # Setup output directory
        if self.output_dir is None:
            self.scenario.output_dir = "psmac3-output_%s" % (
                datetime.datetime.fromtimestamp(
                    time.time()).strftime('%Y-%m-%d_%H:%M:%S_%f'))
            self.output_dir = create_output_directory(self.scenario,
                                                      run_id=self.run_id,
                                                      logger=self.logger)
            if self.shared_model:
                self.scenario.shared_model = self.shared_model
        if self.scenario.input_psmac_dirs is None:
            self.scenario.input_psmac_dirs = os.path.sep.join(
                (self.scenario.output_dir, 'run_*'))

        scen = copy.deepcopy(self.scenario)
        scen.output_dir_for_this_run = None
        scen.output_dir = None
        self.logger.info("+" * 120)
        self.logger.info("PSMAC run")

        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part start ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
        q = multiprocessing.Queue()
        procs = []
        for p in range(self.n_optimizers):
            proc = multiprocessing.Process(
                target=optimize,
                args=(
                    q,  # Output queue
                    self.scenario,  # Scenario object
                    self._tae,  # type of tae to run target with
                    p,  # process_id (used in output folder name)
                    self.output_dir,  # directory to create outputs in
                ),
                kwargs=self.kwargs)
            proc.start()
            procs.append(proc)
        for proc in procs:
            proc.join()
        incs = np.empty((self.n_optimizers, ), dtype=Configuration)
        pids = np.empty((self.n_optimizers, ), dtype=int)
        idx = 0
        while not q.empty():
            conf, pid = q.get_nowait()
            incs[idx] = conf
            pids[idx] = pid
            idx += 1
        self.logger.info('Loading all runhistories')
        read(self.rh, self.scenario.input_psmac_dirs, self.scenario.cs,
             self.logger)
        q.close()
        # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Multiprocessing part end ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
        if self.n_optimizers == self.n_incs:  # no validation necessary just return all incumbents
            return incs
        else:
            _, val_ids, _, est_ids = self.get_best_incumbents_ids(
                incs)  # determine the best incumbents
            if val_ids:
                return incs[val_ids]
            return incs[est_ids]

Example #15

Show file

    def run(self):
        """Runs the Bayesian optimization loop

        Returns
        ----------
        incumbent: np.array(1, H)
            The best found configuration
        """
        self.start()
        # 设置一个counter
        counter = 0
        # Main BO loop
        while True:
            # 打印每轮SMBO的最优结果(包括首轮SMBO 0)
            print('SMBO ' + str(counter) + ': ' +
                  str(self.runhistory.get_cost(self.incumbent)))
            counter += 1

            if self.scenario.shared_model:
                pSMAC.read(run_history=self.runhistory,
                           output_dirs=self.scenario.input_psmac_dirs,
                           configuration_space=self.config_space,
                           logger=self.logger)

            start_time = time.time()
            X, Y = self.rh2EPM.transform(self.runhistory)

            self.logger.debug("Search for next configuration")
            # get all found configurations sorted according to acq
            challengers = self.choose_next(X, Y)

            time_spent = time.time() - start_time
            time_left = self._get_timebound_for_intensification(time_spent)

            self.logger.debug("Intensify")

            if self.server is None:
                self.incumbent, inc_perf = self.intensifier.intensify(
                    challengers=challengers,
                    incumbent=self.incumbent,
                    run_history=self.runhistory,
                    aggregate_func=self.aggregate_func,
                    time_bound=max(self.intensifier._min_time, time_left))
            else:
                # 从worker读取loss，加入history再运行新的challengers
                print(time_left)
                self.server.push(incumbent=self.incumbent,
                                 runhistory=self.runhistory,
                                 challengers=challengers.challengers,
                                 time_left=time_left)
                # 从worker读取runhistory，并merge到self.runhistory
                incumbent, new_runhistory = self.server.pull()
                self.runhistory.update(new_runhistory)
                # 更新了runhistory之后，应该找寻是否存在新的incumbent
                # 因为worker没有完整的
                runhistory_old = self.runhistory.get_history_for_config(
                    self.incumbent)
                runhistory_new = self.runhistory.get_history_for_config(
                    incumbent)
                # 找寻cost最小值
                lowest_cost_old = min([cost[0] for cost in runhistory_old])
                lowest_cost_new = min([cost[0] for cost in runhistory_new])
                if lowest_cost_new < lowest_cost_old:
                    # 替换为新的incumbent
                    self.incumbent = incumbent
                """可以考虑用这个函数
                new_incumbent = self._compare_configs(
                    incumbent=incumbent, challenger=challenger,
                    run_history=run_history,
                    aggregate_func=aggregate_func,
                    log_traj=log_traj)
                """

            if self.scenario.shared_model:
                pSMAC.write(
                    run_history=self.runhistory,
                    output_directory=self.scenario.output_dir_for_this_run)

            logging.debug(
                "Remaining budget: %f (wallclock), %f (ta costs), %f (target runs)"
                % (self.stats.get_remaing_time_budget(),
                   self.stats.get_remaining_ta_budget(),
                   self.stats.get_remaining_ta_runs()))

            if self.stats.is_budget_exhausted():
                break

            self.stats.print_stats(debug_out=True)

        return self.incumbent