コード例 #1
0
ファイル: benchmark.py プロジェクト: angellandros/ML4AAD-1
def run_roar(python_path,
             w_dir,
             n_iter=5,
             input_file='../rawAllx1000.json',
             seeds=[1],
             task_ids=None,
             max_tries=10):

    from smac.configspace import ConfigurationSpace
    from ConfigSpace.hyperparameters import UniformIntegerHyperparameter
    from smac.scenario.scenario import Scenario
    from smac.facade.roar_facade import ROAR

    def test_func(cutoff):
        cutoff = cutoff.get('x1')
        print(cutoff)
        result = find_cut_off.main(python_path=python_path,
                                   w_dir=w_dir,
                                   iter=n_iter,
                                   input_file=input_file,
                                   cutoffs=[cutoff],
                                   seeds=seeds,
                                   task_ids=task_ids)
        cleaned = [x[1] for x in result if 0.0 < x[1] < 1.0]
        mean = np.mean(cleaned) if cleaned else 0.0
        mean = mean if mean != 1.0 else 0.0
        return 1.0 - mean

    cs = ConfigurationSpace()
    cutoff_parameter = UniformIntegerHyperparameter('x1',
                                                    1,
                                                    99,
                                                    default_value=50)
    cs.add_hyperparameter(cutoff_parameter)
    scenario = Scenario({
        "run_obj": "quality",  # we optimize quality (alternatively runtime)
        "runcount-limit": max_tries,  # maximum function evaluations
        "cs": cs,  # configuration space
        "deterministic": "true",
        "abort_on_first_run_crash": "false",
    })

    roar = ROAR(scenario=scenario, tae_runner=test_func, rng=1234)

    x = roar.optimize()

    cost = test_func(x)

    return x, cost, roar
コード例 #2
0
    def _main_cli(self):
        """Main function of SMAC for CLI interface
        
        Returns
        -------
        instance
            optimizer
        """
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        args, _ = cmd_reader.read_cmd()

        root_logger = logging.getLogger()
        root_logger.setLevel(args.verbose_level)
        logger_handler = logging.StreamHandler(stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter("%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scen = Scenario(args.scenario_file, [])

        if args.mode == "SMAC":
            optimizer = SMAC(scenario=scen,
                             rng=np.random.RandomState(args.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             stats=stats,
                             restore_incumbent=incumbent,
                             run_id=args.seed)
        elif args.mode == "ROAR":
            optimizer = ROAR(scenario=scen,
                             rng=np.random.RandomState(args.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             run_id=args.seed)
        elif args.mode == "EPILS":
            optimizer = EPILS(scenario=scen,
                              rng=np.random.RandomState(args.seed),
                              runhistory=rh,
                              initial_configurations=initial_configs,
                              run_id=args.seed)
        else:
            optimizer = None

        return optimizer
コード例 #3
0
def create_smac_rs(benchmark, output_dir: Path, seed: int):
    # Set up SMAC-HB
    cs = benchmark.get_configuration_space(seed=seed)

    scenario_dict = {
        "run_obj": "quality",  # we optimize quality (alternative to runtime)
        "wallclock-limit": 60,
        "cs": cs,
        "deterministic": "true",
        "runcount-limit": 200,
        "limit_resources": True,  # Uses pynisher to limit memory and runtime
        "cutoff": 1800,  # runtime limit for target algorithm
        "memory_limit":
        10000,  # adapt this to reasonable value for your hardware
        "output_dir": output_dir,
        "abort_on_first_run_crash": True,
    }

    scenario = Scenario(scenario_dict)

    def optimization_function_wrapper(cfg, seed, **kwargs):
        """ Helper-function: simple wrapper to use the benchmark with smac """
        result_dict = benchmark.objective_function(cfg, rng=seed)
        cs.sample_configuration()
        return result_dict['function_value']

    smac = ROAR(
        scenario=scenario,
        rng=np.random.RandomState(seed),
        tae_runner=optimization_function_wrapper,
    )
    return smac
コード例 #4
0
 def test_inject_stats_and_runhistory_object_to_TAE(self):
     ta = ExecuteTAFuncArray(lambda x: x**2)
     self.assertIsNone(ta.stats)
     self.assertIsNone(ta.runhistory)
     ROAR(tae_runner=ta, scenario=self.scenario)
     self.assertIsInstance(ta.stats, Stats)
     self.assertIsInstance(ta.runhistory, RunHistory)
コード例 #5
0
def get_roar_object_callback(
    scenario_dict,
    seed,
    ta,
    ta_kwargs,
    metalearning_configurations,
    n_jobs,
    dask_client,
):
    """Random online adaptive racing."""

    if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
        raise ValueError(
            "Please make sure to guard the code invoking Auto-sklearn by "
            "`if __name__ == '__main__'` and remove this exception.")

    scenario = Scenario(scenario_dict)
    return ROAR(
        scenario=scenario,
        rng=seed,
        tae_runner=ta,
        tae_runner_kwargs=ta_kwargs,
        run_id=seed,
        dask_client=dask_client,
        n_jobs=n_jobs,
    )
コード例 #6
0
ファイル: test_roar_facade.py プロジェクト: zwj-coder/SMAC3
        def opt_rosenbrock():
            cs = ConfigurationSpace()

            cs.add_hyperparameter(UniformFloatHyperparameter("x1", -5, 5, default_value=-3))
            cs.add_hyperparameter(UniformFloatHyperparameter("x2", -5, 5, default_value=-4))

            scenario = Scenario({"run_obj": "quality",  # we optimize quality (alternatively runtime)
                                 "runcount-limit": 50,  # maximum function evaluations
                                 "cs": cs,  # configuration space
                                 "deterministic": "true",
                                 "intensification_percentage": 0.000000001
                                 })

            roar = ROAR(scenario=scenario, rng=np.random.RandomState(42),
                        tae_runner=rosenbrock_2d)
            incumbent = roar.optimize()
            return incumbent, roar.scenario.output_dir
コード例 #7
0
        def get_roar_object_callback(scenario_dict, seed, ta, **kwargs):
            """Random online adaptive racing.

            http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
            scenario = Scenario(scenario_dict)
            return ROAR(
                scenario=scenario,
                rng=seed,
                tae_runner=ta,
            )
コード例 #8
0
    def main_cli(self):
        '''
            main function of SMAC for CLI interface
        '''

        cmd_reader = CMDReader()
        args_, misc_args = cmd_reader.read_cmd()

        logging.basicConfig(level=args_.verbose_level)

        root_logger = logging.getLogger()
        root_logger.setLevel(args_.verbose_level)

        scen = Scenario(args_.scenario_file, misc_args)

        rh = None
        if args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=args_.warmstart_scenario,
                in_runhistory_fn_list=args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        initial_configs = None
        if args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn,
                                                               cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if args_.modus == "SMAC":
            optimizer = SMAC(scenario=scen,
                             rng=np.random.RandomState(args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs)
        elif args_.modus == "ROAR":
            optimizer = ROAR(scenario=scen,
                             rng=np.random.RandomState(args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs)
        try:
            optimizer.optimize()

        finally:
            # ensure that the runhistory is always dumped in the end
            if scen.output_dir is not None:
                optimizer.solver.runhistory.save_json(
                    fn=os.path.join(scen.output_dir, "runhistory.json"))
コード例 #9
0
    def main_cli(self):
        '''
            main function of SMAC for CLI interface
        '''
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        args_, misc_args = cmd_reader.read_cmd()

        logging.basicConfig(level=args_.verbose_level)

        root_logger = logging.getLogger()
        root_logger.setLevel(args_.verbose_level)

        scen = Scenario(args_.scenario_file, misc_args, run_id=args_.seed)

        rh = None
        if args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=args_.warmstart_scenario,
                in_runhistory_fn_list=args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        initial_configs = None
        if args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn,
                                                               cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if args_.modus == "SMAC":
            optimizer = SMAC(scenario=scen,
                             rng=np.random.RandomState(args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs)
        elif args_.modus == "ROAR":
            optimizer = ROAR(scenario=scen,
                             rng=np.random.RandomState(args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs)
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)
コード例 #10
0
def get_roar_object_callback(
    scenario_dict,
    seed,
    ta,
    backend,
    metalearning_configurations,
    runhistory,
):
    """Random online adaptive racing."""
    scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
    scenario = Scenario(scenario_dict)
    return ROAR(
        scenario=scenario,
        rng=seed,
        tae_runner=ta,
        runhistory=runhistory,
        run_id=seed,
    )
コード例 #11
0
def get_random_search_object_callback(
    scenario_dict,
    seed,
    ta,
    backend,
    metalearning_configurations,
    runhistory,
):
    """Random search."""
    scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
    scenario_dict['minR'] = len(scenario_dict['instances'])
    scenario_dict['initial_incumbent'] = 'RANDOM'
    scenario = Scenario(scenario_dict)
    return ROAR(
        scenario=scenario,
        rng=seed,
        tae_runner=ta,
        runhistory=runhistory,
        run_id=seed,
    )
コード例 #12
0
def get_roar_object_callback(
    scenario_dict,
    seed,
    ta,
    backend,
    metalearning_configurations,
    runhistory,
    run_id,
):
    """Random online adaptive racing.

    http://ml.informatik.uni-freiburg.de/papers/11-LION5-SMAC.pdf"""
    scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
    scenario = Scenario(scenario_dict)
    return ROAR(
        scenario=scenario,
        rng=seed,
        tae_runner=ta,
        runhistory=runhistory,
        run_id=run_id,
    )
コード例 #13
0
def get_random_search_object_callback(scenario_dict, seed, ta, ta_kwargs,
                                      metalearning_configurations, n_jobs,
                                      dask_client):
    """Random search."""

    if n_jobs > 1 or (dask_client and len(dask_client.nthreads()) > 1):
        raise ValueError(
            "Please make sure to guard the code invoking Auto-sklearn by "
            "`if __name__ == '__main__'` and remove this exception.")

    scenario_dict['minR'] = len(scenario_dict['instances'])
    scenario_dict['initial_incumbent'] = 'RANDOM'
    scenario = Scenario(scenario_dict)
    return ROAR(
        scenario=scenario,
        rng=seed,
        tae_runner=ta,
        tae_runner_kwargs=ta_kwargs,
        run_id=seed,
        dask_client=dask_client,
        n_jobs=n_jobs,
    )
コード例 #14
0
    def get_random_search_for_sh_callback(
            scenario_dict,
            seed,
            ta,
            ta_kwargs,
            backend,
            metalearning_configurations,
    ):
        from smac.intensification.successive_halving import SuccessiveHalving
        from smac.intensification.hyperband import Hyperband
        from smac.scenario.scenario import Scenario
        """Random search."""
        scenario_dict['input_psmac_dirs'] = backend.get_smac_output_glob()
        scenario_dict['minR'] = len(scenario_dict['instances'])
        scenario_dict['initial_incumbent'] = 'RANDOM'
        scenario = Scenario(scenario_dict)

        ta_kwargs['budget_type'] = budget_type

        if bandit_strategy == 'sh':
            bandit = SuccessiveHalving
        elif bandit_strategy == 'hb':
            bandit = Hyperband
        else:
            raise ValueError(bandit_strategy)

        return ROAR(
            scenario=scenario,
            rng=seed,
            tae_runner=ta,
            tae_runner_kwargs=ta_kwargs,
            run_id=seed,
            intensifier=bandit,
            intensifier_kwargs={
                'initial_budget': initial_budget,
                'max_budget': 100,
                'eta': eta,
                'min_chall': 1},
        )
コード例 #15
0
    def main_cli(self):
        """Main function of SMAC for CLI interface"""
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        args_, misc_args = cmd_reader.read_cmd()

        root_logger = logging.getLogger()
        root_logger.setLevel(args_.verbose_level)
        logger_handler = logging.StreamHandler(
                stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter(
                "%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        root_logger.removeHandler(root_logger.handlers[0])

        scen = Scenario(args_.scenario_file, misc_args,
                        run_id=args_.seed)

        rh = None
        if args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=args_.warmstart_scenario,
                in_runhistory_fn_list=args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        initial_configs = None
        if args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(
                    fn=traj_fn, cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if args_.mode == "SMAC":
            optimizer = SMAC(
                scenario=scen,
                rng=np.random.RandomState(args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs)
        elif args_.mode == "ROAR":
            optimizer = ROAR(
                scenario=scen,
                rng=np.random.RandomState(args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs)
        elif args_.mode == "EPILS":
            optimizer = EPILS(
                scenario=scen,
                rng=np.random.RandomState(args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs)
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)
コード例 #16
0
    def main_cli(self, commandline_arguments: typing.List[str] = None):
        """Main function of SMAC for CLI interface"""
        self.logger.info("SMAC call: %s" % (" ".join(sys.argv)))

        cmd_reader = CMDReader()
        kwargs = {}
        if commandline_arguments:
            kwargs['commandline_arguments'] = commandline_arguments
        main_args_, smac_args_, scen_args_ = cmd_reader.read_cmd(**kwargs)

        root_logger = logging.getLogger()
        root_logger.setLevel(main_args_.verbose_level)
        logger_handler = logging.StreamHandler(stream=sys.stdout)
        if root_logger.level >= logging.INFO:
            formatter = logging.Formatter("%(levelname)s:\t%(message)s")
        else:
            formatter = logging.Formatter(
                "%(asctime)s:%(levelname)s:%(name)s:%(message)s",
                "%Y-%m-%d %H:%M:%S")
        logger_handler.setFormatter(formatter)
        root_logger.addHandler(logger_handler)
        # remove default handler
        if len(root_logger.handlers) > 1:
            root_logger.removeHandler(root_logger.handlers[0])

        # Create defaults
        rh = None
        initial_configs = None
        stats = None
        incumbent = None

        # Create scenario-object
        scenario = {}
        scenario.update(vars(smac_args_))
        scenario.update(vars(scen_args_))
        scen = Scenario(scenario=scenario)

        # Restore state
        if main_args_.restore_state:
            root_logger.debug("Restoring state from %s...",
                              main_args_.restore_state)
            rh, stats, traj_list_aclib, traj_list_old = self.restore_state(
                scen, main_args_)

            scen.output_dir_for_this_run = create_output_directory(
                scen,
                main_args_.seed,
                root_logger,
            )
            scen.write()
            incumbent = self.restore_state_after_output_dir(
                scen, stats, traj_list_aclib, traj_list_old)

        if main_args_.warmstart_runhistory:
            aggregate_func = average_cost
            rh = RunHistory(aggregate_func=aggregate_func)

            scen, rh = merge_foreign_data_from_file(
                scenario=scen,
                runhistory=rh,
                in_scenario_fn_list=main_args_.warmstart_scenario,
                in_runhistory_fn_list=main_args_.warmstart_runhistory,
                cs=scen.cs,
                aggregate_func=aggregate_func)

        if main_args_.warmstart_incumbent:
            initial_configs = [scen.cs.get_default_configuration()]
            for traj_fn in main_args_.warmstart_incumbent:
                trajectory = TrajLogger.read_traj_aclib_format(fn=traj_fn,
                                                               cs=scen.cs)
                initial_configs.append(trajectory[-1]["incumbent"])

        if main_args_.mode == "SMAC":
            optimizer = SMAC(scenario=scen,
                             rng=np.random.RandomState(main_args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             stats=stats,
                             restore_incumbent=incumbent,
                             run_id=main_args_.seed)
        elif main_args_.mode == "BORF":
            optimizer = BORF(scenario=scen,
                             rng=np.random.RandomState(main_args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             stats=stats,
                             restore_incumbent=incumbent,
                             run_id=main_args_.seed)
        elif main_args_.mode == "BOGP":
            optimizer = BOGP(scenario=scen,
                             rng=np.random.RandomState(main_args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             stats=stats,
                             restore_incumbent=incumbent,
                             run_id=main_args_.seed)
        elif main_args_.mode == "ROAR":
            optimizer = ROAR(scenario=scen,
                             rng=np.random.RandomState(main_args_.seed),
                             runhistory=rh,
                             initial_configurations=initial_configs,
                             run_id=main_args_.seed)
        elif main_args_.mode == "EPILS":
            optimizer = EPILS(scenario=scen,
                              rng=np.random.RandomState(main_args_.seed),
                              runhistory=rh,
                              initial_configurations=initial_configs,
                              run_id=main_args_.seed)
        elif main_args_.mode == "Hydra":
            optimizer = Hydra(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                runhistory=rh,
                initial_configurations=initial_configs,
                stats=stats,
                restore_incumbent=incumbent,
                run_id=main_args_.seed,
                random_configuration_chooser=main_args_.
                random_configuration_chooser,
                n_iterations=main_args_.hydra_iterations,
                val_set=main_args_.hydra_validation,
                incs_per_round=main_args_.hydra_incumbents_per_round,
                n_optimizers=main_args_.hydra_n_optimizers)
        elif main_args_.mode == "PSMAC":
            optimizer = PSMAC(
                scenario=scen,
                rng=np.random.RandomState(main_args_.seed),
                run_id=main_args_.seed,
                shared_model=smac_args_.shared_model,
                validate=main_args_.psmac_validate,
                n_optimizers=main_args_.hydra_n_optimizers,
                n_incs=main_args_.hydra_incumbents_per_round,
            )
        try:
            optimizer.optimize()
        except (TAEAbortException, FirstRunCrashedException) as err:
            self.logger.error(err)
コード例 #17
0
ファイル: smbo.py プロジェクト: zbraiterman/auto-sklearn
    def run_smbo(self):

        self.watcher.start_task('SMBO')

        # == first things first: load the datamanager
        self.reset_data_manager()

        # == Initialize non-SMBO stuff
        # first create a scenario
        seed = self.seed
        self.config_space.seed(seed)
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # Initialize some SMAC dependencies
        runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runhistory = RunHistory(aggregate_func=average_cost)
        # meta_runs_dataset_indices = {}

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.num_metalearning_cfgs > 0:
            if self.metadata_directory is None:
                metalearning_directory = os.path.dirname(
                    autosklearn.metalearning.__file__)
                # There is no multilabel data in OpenML
                if self.task == MULTILABEL_CLASSIFICATION:
                    meta_task = BINARY_CLASSIFICATION
                else:
                    meta_task = self.task
                metadata_directory = os.path.join(
                    metalearning_directory, 'files', '%s_%s_%s' %
                    (METRIC_TO_STRING[self.metric],
                     TASK_TYPES_TO_STRING[meta_task], 'sparse'
                     if self.datamanager.info['is_sparse'] else 'dense'))
                self.metadata_directory = metadata_directory

            self.logger.info('Metadata directory: %s', self.metadata_directory)
            meta_base = MetaBase(self.config_space, self.metadata_directory)

            metafeature_calculation_time_limit = int(
                self.total_walltime_limit / 4)
            metafeature_calculation_start_time = time.time()
            meta_features = self._calculate_metafeatures_with_limits(
                metafeature_calculation_time_limit)
            metafeature_calculation_end_time = time.time()
            metafeature_calculation_time_limit = \
                metafeature_calculation_time_limit - (
                metafeature_calculation_end_time -
                metafeature_calculation_start_time)

            if metafeature_calculation_time_limit < 1:
                self.logger.warning(
                    'Time limit for metafeature calculation less '
                    'than 1 seconds (%f). Skipping calculation '
                    'of metafeatures for encoded dataset.',
                    metafeature_calculation_time_limit)
                meta_features_encoded = None
            else:
                with warnings.catch_warnings():
                    warnings.showwarning = self._send_warnings_to_log
                    self.datamanager.perform1HotEncoding()
                meta_features_encoded = \
                    self._calculate_metafeatures_encoded_with_limits(
                        metafeature_calculation_time_limit)

            # In case there is a problem calculating the encoded meta-features
            if meta_features is None:
                if meta_features_encoded is not None:
                    meta_features = meta_features_encoded
            else:
                if meta_features_encoded is not None:
                    meta_features.metafeature_values.update(
                        meta_features_encoded.metafeature_values)

            if meta_features is not None:
                meta_base.add_dataset(instance_id, meta_features)
                # Do mean imputation of the meta-features - should be done specific
                # for each prediction model!
                all_metafeatures = meta_base.get_metafeatures(
                    features=list(meta_features.keys()))
                all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

                with warnings.catch_warnings():
                    warnings.showwarning = self._send_warnings_to_log
                    metalearning_configurations = self.collect_metalearning_suggestions(
                        meta_base)
                if metalearning_configurations is None:
                    metalearning_configurations = []
                self.reset_data_manager()

                self.logger.info('%s', meta_features)

                # Convert meta-features into a dictionary because the scenario
                # expects a dictionary
                meta_features_dict = {}
                for dataset, series in all_metafeatures.iterrows():
                    meta_features_dict[dataset] = series.values
                meta_features_list = []
                for meta_feature_name in all_metafeatures.columns:
                    meta_features_list.append(
                        meta_features[meta_feature_name].value)
                meta_features_list = np.array(meta_features_list).reshape(
                    (1, -1))
                self.logger.info(list(meta_features_dict.keys()))

                # meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
                # meta_runs_index = 0
                # try:
                #    meta_durations = meta_base.get_all_runs('runtime')
                #    read_runtime_data = True
                # except KeyError:
                #    read_runtime_data = False
                #    self.logger.critical('Cannot read runtime data.')
                #    if self.acquisition_function == 'EIPS':
                #        self.logger.critical('Reverting to acquisition function EI!')
                #        self.acquisition_function = 'EI'

                # for meta_dataset in meta_runs.index:
                #     meta_dataset_start_index = meta_runs_index
                #     for meta_configuration in meta_runs.columns:
                #         if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                #             try:
                #                 config = meta_base.get_configuration_from_algorithm_index(
                #                     meta_configuration)
                #                 cost = meta_runs.loc[meta_dataset, meta_configuration]
                #                 if read_runtime_data:
                #                     runtime = meta_durations.loc[meta_dataset,
                #                                                  meta_configuration]
                #                 else:
                #                     runtime = 1
                #                 # TODO read out other status types!
                #                 meta_runhistory.add(config, cost, runtime,
                #                                     StatusType.SUCCESS,
                #                                     instance_id=meta_dataset)
                #                 meta_runs_index += 1
                #             except:
                #                 # TODO maybe add warning
                #                 pass
                #
                #     meta_runs_dataset_indices[meta_dataset] = (
                #         meta_dataset_start_index, meta_runs_index)

        else:
            meta_features = None

        if meta_features is None:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        if self.resampling_strategy in [
                'partial-cv', 'partial-cv-iterative-fit'
        ]:
            num_folds = self.resampling_strategy_args['folds']
            instances = [[fold_number] for fold_number in range(num_folds)]
        else:
            instances = None

        startup_time = self.watcher.wall_elapsed(self.dataset_name)
        total_walltime_limit = self.total_walltime_limit - startup_time - 5
        scenario_dict = {
            'cs': self.config_space,
            'cutoff-time': self.func_eval_time_limit,
            'memory-limit': self.memory_limit,
            'wallclock-limit': total_walltime_limit,
            # 'instances': [[name] for name in meta_features_dict],
            'output-dir': self.backend.temporary_directory,
            'shared-model': self.shared_mode,
            'run-obj': 'quality',
            'deterministic': 'true',
            'instances': instances
        }

        if self.configuration_mode == 'RANDOM':
            scenario_dict['minR'] = len(
                instances) if instances is not None else 1
            scenario_dict['initial_incumbent'] = 'RANDOM'

        self.scenario = Scenario(scenario_dict)

        # TODO rebuild target algorithm to be it's own target algorithm
        # evaluator, which takes into account that a run can be killed prior
        # to the model being fully fitted; thus putting intermediate results
        # into a queue and querying them once the time is over
        exclude = dict()
        include = dict()
        if self.include_preprocessors is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_preprocessors and '
                             'exclude_preprocessors.')
        elif self.include_preprocessors is not None:
            include['preprocessor'] = self.include_preprocessors
        elif self.exclude_preprocessors is not None:
            exclude['preprocessor'] = self.exclude_preprocessors
        if self.include_estimators is not None and \
                self.exclude_preprocessors is not None:
            raise ValueError('Cannot specify include_estimators and '
                             'exclude_estimators.')
        elif self.include_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                include['classifier'] = self.include_estimators
            elif self.task in REGRESSION_TASKS:
                include['regressor'] = self.include_estimators
            else:
                raise ValueError(self.task)
        elif self.exclude_estimators is not None:
            if self.task in CLASSIFICATION_TASKS:
                exclude['classifier'] = self.exclude_estimators
            elif self.task in REGRESSION_TASKS:
                exclude['regressor'] = self.exclude_estimators
            else:
                raise ValueError(self.task)

        ta = ExecuteTaFuncWithQueue(
            backend=self.backend,
            autosklearn_seed=seed,
            resampling_strategy=self.resampling_strategy,
            initial_num_run=num_run,
            logger=self.logger,
            include=include,
            exclude=exclude,
            memory_limit=self.memory_limit,
            disable_file_output=self.disable_file_output,
            **self.resampling_strategy_args)

        types = get_types(self.config_space, self.scenario.feature_array)

        # TODO extract generation of SMAC object into it's own function for
        # testing
        if self.acquisition_function == 'EI':
            model = RandomForestWithInstances(
                types,
                #instance_features=meta_features_list,
                seed=1,
                num_trees=10)
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   runhistory2epm=rh2EPM,
                                   tae_runner=ta,
                                   runhistory=runhistory)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=[
                                             StatusType.SUCCESS,
                                             StatusType.MEMOUT,
                                             StatusType.TIMEOUT
                                         ],
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'],
                types,
                num_trees=10,
                instance_features=meta_features_list,
                seed=1)
            acquisition_function = EIPS(model)
            _smac_arguments = dict(scenario=self.scenario,
                                   model=model,
                                   rng=seed,
                                   tae_runner=ta,
                                   runhistory2epm=rh2EPM,
                                   runhistory=runhistory,
                                   acquisition_function=acquisition_function)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        if self.configuration_mode == 'SMAC':
            smac = SMAC(**_smac_arguments)
        elif self.configuration_mode in ['ROAR', 'RANDOM']:
            for not_in_roar in ['runhistory2epm', 'model']:
                if not_in_roar in _smac_arguments:
                    del _smac_arguments[not_in_roar]
            smac = ROAR(**_smac_arguments)
        else:
            raise ValueError(self.configuration_mode)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        # X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # # Transform Y_meta on a per-dataset base
        # for meta_dataset in meta_runs_dataset_indices:
        #     start_index, end_index = meta_runs_dataset_indices[meta_dataset]
        #     end_index += 1  # Python indexing
        #     Y_meta[start_index:end_index, 0]\
        #         [Y_meta[start_index:end_index, 0] >2.0] =  2.0
        #     dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
        #     Y_meta[start_index:end_index, 0] = 1 - (
        #         (1. - Y_meta[start_index:end_index, 0]) /
        #         (1. - dataset_minimum))
        #     Y_meta[start_index:end_index, 0]\
        #           [Y_meta[start_index:end_index, 0] > 2] = 2

        smac.solver.stats.start_timing()
        # == first, evaluate all metelearning and default configurations
        smac.solver.incumbent = smac.solver.initial_design.run()

        for challenger in metalearning_configurations:

            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=[challenger],
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=self.total_walltime_limit)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        # == after metalearning run SMAC loop
        while True:

            if smac.solver.scenario.shared_model:
                pSMAC.read(run_history=smac.solver.runhistory,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            choose_next_start_time = time.time()
            try:
                challengers = self.choose_next(smac)
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                challengers = [next_config]
            time_for_choose_next = time.time() - choose_next_start_time
            self.logger.info('Used %g seconds to find next '
                             'configurations' % (time_for_choose_next))

            time_for_choose_next = max(time_for_choose_next, 1.0)
            smac.solver.incumbent, inc_perf = smac.solver.intensifier.intensify(
                challengers=challengers,
                incumbent=smac.solver.incumbent,
                run_history=smac.solver.runhistory,
                aggregate_func=smac.solver.aggregate_func,
                time_bound=time_for_choose_next)

            if smac.solver.scenario.shared_model:
                pSMAC.write(run_history=smac.solver.runhistory,
                            output_directory=smac.solver.scenario.output_dir,
                            num_run=self.seed)

            if smac.solver.stats.is_budget_exhausted():
                break

        self.runhistory = smac.solver.runhistory
        self.trajectory = smac.solver.intensifier.traj_logger.trajectory

        return self.runhistory, self.trajectory
コード例 #18
0
ファイル: parallel_sh_mlp.py プロジェクト: zwj-coder/SMAC3
    # Intensifier will allocate from 5 to a maximum of 25 epochs to each configuration
    # Successive Halving child-instances are created to prevent idle
    # workers.
    intensifier_kwargs = {
        'initial_budget': 5,
        'max_budget': 25,
        'eta': 3,
        'min_chall': 1,
        'instance_order': 'shuffle_once'
    }

    # To optimize, we pass the function to the SMAC-object
    smac = ROAR(scenario=scenario,
                rng=np.random.RandomState(42),
                tae_runner=mlp_from_cfg,
                intensifier=SuccessiveHalving,
                intensifier_kwargs=intensifier_kwargs,
                initial_design=RandomConfigurations,
                n_jobs=4)

    # Example call of the function with default values
    # It returns: Status, Cost, Runtime, Additional Infos
    def_value = smac.get_tae_runner().run(
        config=cs.get_default_configuration(), instance='1', budget=25,
        seed=0)[1]
    print("Value for default configuration: %.4f" % def_value)

    # Start optimization
    try:
        incumbent = smac.optimize()
    finally:
コード例 #19
0
                print(f"\n[{name}] ")
                hpo = SMAC4HPO(scenario=scenario, rng=rng, tae_runner=tat)
                hpo_result, info = run_smac_based_optimizer(hpo, tae)

                write_output(
                    f"[{name}] time={info['time']} train_loss={info['last_train_loss']} "
                    f"test_loss={info['last_test_loss']}\n")

                records = util.add_record(records, task_id, name, hpo_result)

                ########################################################################################################
                # ROAR x2
                ########################################################################################################
                name = "roar_x2"
                print(f"\n[{name}] ")
                hpo = ROAR(scenario=scenario, rng=rng, tae_runner=tat)
                hpo_result, info = run_smac_based_optimizer(hpo, tae, speed=2)

                write_output(
                    f"[{name}] time={info['time']} train_loss={info['last_train_loss']} "
                    f"test_loss={info['last_test_loss']}\n")

                records = util.add_record(records, task_id, name, hpo_result)

                ########################################################################################################
                # Random
                ########################################################################################################
                name = "random_x2"
                print(f"\n[{name}] ")
                speed = 2
                best_loss = 1