예제 #1
0
    def test_inst_no_feat(self):
        ''' test if scenarios are treated correctly if no features are
        specified.'''
        scen = Scenario(self.scen_fn,
                        cmd_options={
                            'run_obj': 'quality',
                            'train_insts': self.train_insts,
                            'test_insts': self.test_insts
                        })
        self.assertTrue(scen.feature_array is None)
        self.assertEqual(len(scen.feature_dict), 0)

        scen.instance_specific = self.inst_specs
        validator = Validator(scen, self.trajectory, self.rng)
        # Add a few runs and check, if they are correctly processed
        old_configs = [entry["incumbent"] for entry in self.trajectory]
        old_rh = RunHistory()
        for config in old_configs[:int(len(old_configs) / 2)]:
            old_rh.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       instance_id='0',
                       seed=127)
        rh = validator.validate_epm('all', 'train+test', 1, old_rh)
        self.assertEqual(len(old_rh.get_all_configs()), 4)
        self.assertEqual(len(rh.get_all_configs()), 10)
예제 #2
0
def eval_challenger(
    run_info: RunInfo,
    taf: ExecuteTAFuncDict,
    stats: Stats,
    runhistory: RunHistory,
):
    """
    Wrapper over challenger evaluation

    SMBO objects handles run history now, but to keep
    same testing functionality this function is a small
    wrapper to launch the taf and add it to the history
    """
    # evaluating configuration
    run_info, result = taf.run_wrapper(
        run_info=run_info,
    )
    stats.ta_time_used += float(result.time)
    runhistory.add(
        config=run_info.config,
        cost=result.cost,
        time=result.time,
        status=result.status,
        instance_id=run_info.instance,
        seed=run_info.seed,
        budget=run_info.budget,
    )
    stats.n_configs = len(runhistory.config_ids)
    return result
예제 #3
0
파일: utils.py 프로젝트: ctlab/GADMA
def transform_smac(optimizer, variables, X, Y):
    from ..optimizers import SMACBayesianOptimizer
    from . import ContinuousVariable
    if not isinstance(optimizer, SMACBayesianOptimizer):
        return X, Y
    # We create run history, fill it and transform its data with rh2epm
    # It is usual pipeline for SMAC
    config_space = optimizer.get_config_space(variables=variables)
    rh2epm = optimizer.get_runhistory2epm(
        scenario=optimizer.get_scenario(
            maxeval=None,
            config_space=config_space
        )
    )
    runhistory = RunHistory()
    config = config_space.sample_configuration(1)
    for x, y in zip(X, Y):
        for var, value in zip(variables, x):
            if isinstance(var, ContinuousVariable):
                config[var.name] = float(value)
            else:
                config[var.name] = value
        runhistory.add(
            config=copy.copy(config),
            cost=y,
            time=0,
            status=StatusType.SUCCESS
        )
    X, Y = rh2epm.transform(runhistory)
    return X, Y.flatten()
예제 #4
0
    def test_passed_runhistory(self):
        ''' test if passed runhistory is in resulting runhistory '''
        scen = Scenario(self.scen_fn,
                        cmd_args={'run_obj':'quality',
                                  'instances' : self.train_insts,
                                  'test_instances': self.test_insts})
        scen.instance_specific = self.inst_specs
        validator = Validator(scen, self.trajectory, self.rng)
        # Add a few runs and check, if they are correctly processed
        old_configs = [entry["incumbent"] for entry in self.trajectory]
        old_rh = RunHistory(average_cost)
        seeds = [127 for i in range(int(len(old_configs)/2))]
        seeds[-1] = 126  # Test instance_seed-structure in validation
        for config in old_configs[:int(len(old_configs)/2)]:
            old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                       seed=seeds[old_configs.index(config)])

        configs = validator._get_configs('all')
        insts = validator._get_instances('train')
        runs_w_rh = validator.get_runs(configs, insts, repetitions=2,
                                       runhistory=old_rh)
        runs_wo_rh = validator.get_runs(configs, insts, repetitions=2)
        self.assertEqual(len(runs_w_rh[0]), len(runs_wo_rh[0]) - 4)
        self.assertEqual(len(runs_w_rh[1].data), 4)
        self.assertEqual(len(runs_wo_rh[1].data), 0)
예제 #5
0
    def test_passed_runhistory(self):
        ''' test if passed runhistory is in resulting runhistory '''
        self.scen.train_insts = self.train_insts
        self.scen.test_insts = self.test_insts
        validator = Validator(self.scen, self.trajectory, self.output_rh,
                              self.rng)
        # Add a few runs and check, if they are correctly processed
        old_configs = [entry["incumbent"] for entry in self.trajectory]
        old_rh = RunHistory(average_cost)
        for config in old_configs[:int(len(old_configs) / 2)]:
            old_rh.add(config,
                       1,
                       1,
                       StatusType.SUCCESS,
                       instance_id='0',
                       seed=127)

        configs = validator._get_configs('all')
        insts = validator._get_instances('train')
        runs_w_rh = validator.get_runs(configs,
                                       insts,
                                       repetitions=2,
                                       runhistory=old_rh)
        runs_wo_rh = validator.get_runs(configs, insts, repetitions=2)
        self.assertEqual(len(runs_w_rh), len(runs_wo_rh) - 4)
예제 #6
0
    def test_add_and_pickle(self):
        '''
            simply adding some rundata to runhistory, then pickle it
        '''
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        self.assertTrue(rh.empty())

        rh.add(config=config, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=None,
               seed=None,
               additional_info=None)

        rh.add(config=config, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=12354,
               additional_info={"start_time": 10})

        self.assertFalse(rh.empty())

        tmpfile = tempfile.NamedTemporaryFile(mode='wb', delete=False)
        pickle.dump(rh, tmpfile, -1)
        name = tmpfile.name
        tmpfile.close()

        with open(name, 'rb') as fh:
            loaded_rh = pickle.load(fh)
        self.assertEqual(loaded_rh.data, rh.data)
예제 #7
0
    def test_incremental_update(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=10,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 10)

        rh.add(
            config=config1,
            cost=20,
            time=20,
            status=StatusType.SUCCESS,
            instance_id=2,
            seed=1,
        )

        self.assertEqual(rh.get_cost(config1), 15)
예제 #8
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={"a": 1, "b": 2})

        rh.add(
            config=config1,
            cost=[10, 50],
            time=20,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=1,
        )

        self.assertEqual(rh.get_cost(config1), 1.0)

        # Only the higher budget gets included in the config cost
        # However, we expect that the bounds are changed
        rh.add(
            config=config1,
            cost=[20, 25],
            time=25,
            status=StatusType.SUCCESS,
            instance_id=1,
            seed=1,
            budget=5,
        )

        self.assertEqual(rh.get_cost(config1), 0.5)
예제 #9
0
    def test_local_search_finds_minimum(self):
        class AcquisitionFunction:

            model = None

            def __call__(self, arrays):
                rval = []
                for array in arrays:
                    rval.append([-rosenbrock_4d(array)])
                return np.array(rval)

        ls = LocalSearch(
            acquisition_function=AcquisitionFunction(),
            config_space=self.cs,
            n_steps_plateau_walk=10,
            max_steps=np.inf,
        )

        runhistory = RunHistory()
        self.cs.seed(1)
        random_configs = self.cs.sample_configuration(size=100)
        costs = [
            rosenbrock_4d(random_config) for random_config in random_configs
        ]
        self.assertGreater(np.min(costs), 100)
        for random_config, cost in zip(random_configs, costs):
            runhistory.add(config=random_config,
                           cost=cost,
                           time=0,
                           status=StatusType.SUCCESS)
        minimizer = ls.maximize(runhistory, None, 10)
        minima = [-rosenbrock_4d(m) for m in minimizer]
        self.assertGreater(minima[0], -0.05)
예제 #10
0
    def test_illegal_input(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={"a": 1, "b": 2})

        self.assertTrue(rh.empty())

        with pytest.raises(ValueError):
            rh.add(
                config=config,
                cost=[4.5, 5.5, 6.5],
                time=20,
                status=StatusType.SUCCESS,
                instance_id=1,
                seed=12354,
                starttime=10,
                endtime=30,
                additional_info={"start_time": 10},
            )

            rh.add(
                config=config,
                cost=[2.5, 5.5],
                time=20,
                status=StatusType.SUCCESS,
                instance_id=1,
                seed=12354,
                starttime=10,
                endtime=30,
                additional_info={"start_time": 10},
            )
예제 #11
0
    def test_multiple_budgets(self):

        rh = RunHistory()
        cs = get_config_space()
        config1 = Configuration(cs, values={'a': 1, 'b': 2})

        rh.add(config=config1,
               cost=10,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=1)

        self.assertEqual(rh.get_cost(config1), 10)

        # only the higher budget gets included in the config cost
        rh.add(config=config1,
               cost=20,
               time=20,
               status=StatusType.SUCCESS,
               instance_id=1,
               seed=1,
               budget=2)

        self.assertEqual(rh.get_cost(config1), 20)
        self.assertEqual(rh.get_min_cost(config1), 10)
예제 #12
0
    def test_get_initial_points_moo(self):
        class Model:
            def predict_marginalized_over_instances(self, X):
                return X, X

        class AcquisitionFunction:

            model = Model()

            def __call__(self, X):
                return np.array([x.get_array().sum() for x in X]).reshape(
                    (-1, 1))

        ls = LocalSearch(
            acquisition_function=AcquisitionFunction(),
            config_space=self.cs,
            n_steps_plateau_walk=10,
            max_steps=np.inf,
        )

        runhistory = RunHistory()
        random_configs = self.cs.sample_configuration(size=100)
        costs = np.array(
            [rosenbrock_4d(random_config) for random_config in random_configs])
        for random_config, cost in zip(random_configs, costs):
            runhistory.add(config=random_config,
                           cost=cost,
                           time=0,
                           status=StatusType.SUCCESS)

        points = ls._get_initial_points(num_points=5,
                                        runhistory=runhistory,
                                        additional_start_points=None)
        self.assertEqual(len(points), 10)
예제 #13
0
def combine_runhistories(rhs, logger=None):
    """Combine list of given runhistories. interleaving to best approximate execution order"""
    combi_rh = RunHistory()
    rh_to_runs = {rh: list(rh.data.items()) for rh in rhs}
    if logger:
        logger.debug("number of elements: " +
                     str({k: len(v)
                          for k, v in rh_to_runs}))
    idx = 0
    while len(rh_to_runs) > 0:
        for rh in list(rh_to_runs.keys()):
            try:
                k, v = rh_to_runs[rh][idx]
                combi_rh.add(
                    config=rh.ids_config[k.config_id],
                    cost=v.cost,
                    time=v.time,
                    status=v.status,
                    instance_id=k.instance_id,
                    #TODO budget option
                    seed=k.seed,
                    additional_info=v.additional_info)
            except IndexError:
                rh_to_runs.pop(rh)
        idx += 1
    if logger:
        logger.debug("number of elements in individual rhs: " +
                     str({k: len(v)
                          for k, v in rh_to_runs}))
        logger.debug("number of elements in combined rh: " +
                     str(len(combi_rh.data)))
    return combi_rh
예제 #14
0
    def test_passed_runhistory_deterministic(self):
        ''' test if passed runhistory is in resulting runhistory '''
        scen = Scenario(self.scen_fn,
                        cmd_options={
                            'run_obj': 'quality',
                            'train_insts': self.train_insts,
                            'deterministic': True
                        })
        scen.instance_specific = self.inst_specs
        validator = Validator(scen, self.trajectory, self.rng)
        # Add a few runs and check, if they are correctly processed
        old_configs = [entry["incumbent"] for entry in self.trajectory]
        old_rh = RunHistory()
        for config in old_configs[:int(len(old_configs) / 2)]:
            old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0')

        configs = validator._get_configs('all')
        insts = validator._get_instances('train')
        runs_w_rh = validator._get_runs(configs,
                                        insts,
                                        repetitions=2,
                                        runhistory=old_rh)
        runs_wo_rh = validator._get_runs(configs, insts, repetitions=2)
        self.assertEqual(len(runs_w_rh[0]), len(runs_wo_rh[0]) - 4)
        self.assertEqual(len(runs_w_rh[1].data), 4)
        self.assertEqual(len(runs_wo_rh[1].data), 0)
예제 #15
0
 def test_no_feature_dict(self):
     scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'})
     scen.feature_array = None
     validator = Validator(scen, self.trajectory)
     old_rh = RunHistory(average_cost)
     for config in [e["incumbent"] for e in self.trajectory]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                    seed=127)
     validator.validate_epm(runhistory=old_rh)
    def test_choose_next_2(self):
        # Test with a single configuration in the runhistory!
        def side_effect(X):
            return np.mean(X, axis=1).reshape((-1, 1))

        def side_effect_predict(X):
            m, v = np.ones((X.shape[0], 1)), None
            return m, v

        seed = 42
        incumbent = self.scenario.cs.get_default_configuration()
        rh = RunHistory()
        rh.add(incumbent, 10, 10, StatusType.SUCCESS)
        epm_chooser = SMAC4AC(self.scenario, rng=seed,
                              runhistory=rh).solver.epm_chooser

        epm_chooser.model = mock.Mock(spec=RandomForestWithInstances)
        epm_chooser.model.predict_marginalized_over_instances.side_effect = (
            side_effect_predict)
        epm_chooser.acquisition_func._compute = mock.Mock(
            spec=RandomForestWithInstances)
        epm_chooser.acquisition_func._compute.side_effect = side_effect
        epm_chooser.incumbent = incumbent

        challengers = epm_chooser.choose_next()
        # Convert challenger list (a generator) to a real list
        challengers = [c for c in challengers]

        self.assertEqual(epm_chooser.model.train.call_count, 1)

        # For each configuration it is randomly sampled whether to take it from the list of challengers or to sample it
        # completely at random. Therefore, it is not guaranteed to obtain twice the number of configurations selected
        # by EI.
        self.assertEqual(len(challengers), 10198)
        num_random_search_sorted = 0
        num_random_search = 0
        num_local_search = 0
        for c in challengers:
            self.assertIsInstance(c, Configuration)
            if "Random Search (sorted)" == c.origin:
                num_random_search_sorted += 1
            elif "Random Search" == c.origin:
                num_random_search += 1
            elif "Local Search" == c.origin:
                num_local_search += 1
            else:
                raise ValueError((
                    c.origin,
                    "Local Search" == c.origin,
                    type("Local Search"),
                    type(c.origin),
                ))

        self.assertEqual(num_local_search, 11)
        self.assertEqual(num_random_search_sorted, 5000)
        self.assertEqual(num_random_search, 5187)
예제 #17
0
    def test_choose_next(self):
        seed = 42
        config = self.scenario.cs.sample_configuration()
        rh = RunHistory()
        rh.add(config, 10, 10, StatusType.SUCCESS)

        smbo = SMAC4AC(self.scenario, rng=seed, runhistory=rh).solver

        x = next(smbo.epm_chooser.choose_next()).get_array()
        self.assertEqual(x.shape, (2, ))
예제 #18
0
 def test_objective_runtime(self):
     ''' test if everything is ok with objective runtime (imputing!) '''
     scen = Scenario(self.scen_fn, cmd_args={'run_obj' : 'runtime',
                                             'cutoff_time' : 5})
     validator = Validator(scen, self.trajectory, self.rng)
     old_configs = [entry["incumbent"] for entry in self.trajectory]
     old_rh = RunHistory(average_cost)
     for config in old_configs[:int(len(old_configs)/2)]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0')
     validator.validate_epm('all', 'train', 1, old_rh)
예제 #19
0
    def test_get_runs_capped(self):
        ''' test if capped, crashed and aborted runs are ignored
            during rh-recovery '''
        scen = Scenario(self.scen_fn,
                        cmd_options={'run_obj': 'quality',
                                     'instances': ['0']})

        validator = Validator(scen, self.trajectory, self.rng)

        # Get runhistory
        old_configs = [Configuration(scen.cs, values={'x1': i, 'x2': i}) for i in range(1, 7)]
        old_rh = RunHistory()
        old_rh.add(old_configs[0], 1, 1, StatusType.SUCCESS, instance_id='0', seed=0)
        old_rh.add(old_configs[1], 1, 1, StatusType.TIMEOUT, instance_id='0', seed=0)
        old_rh.add(old_configs[2], 1, 1, StatusType.CRASHED, instance_id='0', seed=0)
        old_rh.add(old_configs[3], 1, 1, StatusType.ABORT, instance_id='0', seed=0)
        old_rh.add(old_configs[4], 1, 1, StatusType.MEMOUT, instance_id='0', seed=0)
        old_rh.add(old_configs[5], 1, 1, StatusType.CAPPED, instance_id='0', seed=0)

        # Get multiple configs
        expected = [_Run(inst_specs='0', seed=0, inst='0', config=old_configs[2]),
                    _Run(inst_specs='0', seed=0, inst='0', config=old_configs[3]),
                    _Run(inst_specs='0', seed=0, inst='0', config=old_configs[5])]

        runs = validator._get_runs(old_configs, ['0'], repetitions=1, runhistory=old_rh)
        self.assertEqual(runs[0], expected)
예제 #20
0
    def test_add_multiple_times(self):
        rh = RunHistory()
        cs = get_config_space()
        config = Configuration(cs, values={'a': 1, 'b': 2})

        for i in range(5):
            rh.add(config=config, cost=i + 1, time=i + 1,
                   status=StatusType.SUCCESS, instance_id=None,
                   seed=12345, additional_info=None)

        self.assertEqual(len(rh.data), 1)
        self.assertEqual(len(rh.get_runs_for_config(config, only_max_observed_budget=True)), 1)
        self.assertEqual(len(rh._configid_to_inst_seed_budget[1]), 1)
        self.assertEqual(list(rh.data.values())[0].cost, 1)
예제 #21
0
    def test_get_runs_capped(self):
        ''' test if capped, crashed and aborted runs are ignored
            during rh-recovery '''
        scen = Scenario(self.scen_fn,
                        cmd_options={
                            'run_obj': 'quality',
                            'instances': ['0']
                        })

        validator = Validator(scen, self.trajectory, self.rng)

        # Get runhistory
        old_configs = [
            'config1', 'config2', 'config3', 'config4', 'config5', 'config6'
        ]
        old_rh = RunHistory(average_cost)
        old_rh.add('config1',
                   1,
                   1,
                   StatusType.SUCCESS,
                   instance_id='0',
                   seed=0)
        old_rh.add('config2',
                   1,
                   1,
                   StatusType.TIMEOUT,
                   instance_id='0',
                   seed=0)
        old_rh.add('config3',
                   1,
                   1,
                   StatusType.CRASHED,
                   instance_id='0',
                   seed=0)
        old_rh.add('config4', 1, 1, StatusType.ABORT, instance_id='0', seed=0)
        old_rh.add('config5', 1, 1, StatusType.MEMOUT, instance_id='0', seed=0)
        old_rh.add('config6', 1, 1, StatusType.CAPPED, instance_id='0', seed=0)

        # Get multiple configs
        expected = [
            _Run(inst_specs='0', seed=0, inst='0', config='config3'),
            _Run(inst_specs='0', seed=0, inst='0', config='config4'),
            _Run(inst_specs='0', seed=0, inst='0', config='config6')
        ]

        runs = validator._get_runs(old_configs, ['0'],
                                   repetitions=1,
                                   runhistory=old_rh)
        self.assertEqual(runs[0], expected)
예제 #22
0
 def test_epm_reuse_rf(self):
     """ if no runhistory is passed to epm, but there was a model trained
     before, that model should be reused! (if reuse_epm flag is set) """
     scen = Scenario(self.scen_fn, cmd_args={'run_obj':'quality'})
     scen.feature_array = None
     validator = Validator(scen, self.trajectory)
     old_rh = RunHistory(average_cost)
     for config in [e["incumbent"] for e in self.trajectory]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                    seed=127)
     self.assertTrue(isinstance(validator.validate_epm(runhistory=old_rh),
                                RunHistory))
     self.assertTrue(isinstance(validator.validate_epm(
                                 output_fn="test/test_files/validation/"),
                                 RunHistory))
     self.assertRaises(ValueError, validator.validate_epm, reuse_epm=False)
예제 #23
0
 def test_validate_epm(self):
     ''' test using epm to validate '''
     scen = Scenario(self.scen_fn,
                     cmd_args={'run_obj':'quality',
                               'instances' : self.train_insts,
                               'test_instances': self.test_insts,
                               'features': self.feature_dict})
     scen.instance_specific = self.inst_specs
     validator = Validator(scen, self.trajectory, self.rng)
     # Add a few runs and check, if they are correctly processed
     old_configs = [entry["incumbent"] for entry in self.trajectory]
     old_rh = RunHistory(average_cost)
     for config in old_configs[:int(len(old_configs)/2)]:
         old_rh.add(config, 1, 1, StatusType.SUCCESS, instance_id='0',
                    seed=127)
     validator.validate_epm('all', 'train', 1, old_rh)
예제 #24
0
    def reduce_runhistory(self, rh: RunHistory, max_configs: int, keep=None):
        """
        Reduce configs to desired number, by default just drop the configs with the fewest runs.

        Parameters
        ----------
        rh: RunHistory
            runhistory that is to be reduced
        max_configs: int
            if > -1 reduce runhistory to at most max_configs
        keep: List[Configuration]
            list of configs that should be kept for sure (e.g. default, incumbents)

        Returns
        -------
        rh: RunHistory
            reduced runhistory
        """
        configs = rh.get_all_configs()
        if max_configs <= 0 or max_configs > len(configs):  # keep all
            return rh

        runs = [(c,
                 len(rh.get_runs_for_config(c,
                                            only_max_observed_budget=False)))
                for c in configs]
        if not keep:
            keep = []
        runs = sorted(runs, key=lambda x: x[1])[-self.max_plot:]
        keep = [r[0] for r in runs] + keep
        self.logger.info(
            "Reducing number of configs from %d to %d, dropping from the fewest evaluations",
            len(configs), len(keep))

        new_rh = RunHistory()
        for k, v in list(rh.data.items()):
            c = rh.ids_config[k.config_id]
            if c in keep:
                new_rh.add(config=rh.ids_config[k.config_id],
                           cost=v.cost,
                           time=v.time,
                           status=v.status,
                           instance_id=k.instance_id,
                           seed=k.seed)
        return new_rh
예제 #25
0
    def read_str(data: str,
                 cs: ConfigurationSpace,
                 aggregate_func: callable = average_cost):
        """Read a string line and transform it to a ConfigHistory. The input
        should be valid. For example, "0.8(config) 1(#runhistory) 0.6 1.2 1234"

        Parameters
        ----------
        data : str
            A list of strings containing config and runhistory info.
        cs : ConfigurationSpace
            The ConfigurationSpace.
        aggregate_func : callable, default = average_cost
            The aggregate function.

        Returns
        -------
        Return : ConfigHistory
            Return a ConfigHistory.
        """
        # 首先将这行分开,读入Configuration
        line = data.split()
        # 用ConfigSpace计算超参的个数
        num_config = len(cs.get_hyperparameters())
        config = Configuration(cs, vector=np.array(
            [float(param) for param in line[:num_config]]))

        # 初始化参数,每个config对应一个runhistory
        runhistory = RunHistory(aggregate_func=aggregate_func)
        # 读取runhistory的数量
        num_runhistory = int(float(line[num_config]))
        counter = num_config + 1
        # 之后,读取每三对的数作为runhistory
        for i in range(num_runhistory):
            cost = float(line[counter])
            time = float(line[counter + 1])
            seed = int(float(line[counter + 2]))
            counter += 3
            # 添加到runhistory
            runhistory.add(config, cost, time, StatusType.SUCCESS, seed=seed)

        # 返回本个runhistory
        config_history = ConfigHistory(config, cs, runhistory=runhistory,
                                       aggregate_func=aggregate_func)
        return config_history
예제 #26
0
 def reduce_runhistory(rh, keep_budgets):
     if not isinstance(rh, RunHistory):
         self.logger.debug("This is not a RunHistory: %s", rh)
         return rh
     new_rh = RunHistory()
     for rk, rv in rh.data.items():
         if rk.budget in keep_budgets or rh.ids_config[
                 rk.config_id] in [cr.default]:
             new_rh.add(config=rh.ids_config[rk.config_id],
                        cost=rv.cost,
                        time=rv.time,
                        status=rv.status,
                        instance_id=rk.instance_id,
                        seed=rk.seed,
                        budget=rk.budget,
                        additional_info=rv.additional_info,
                        origin=rh.external[rk])
     return new_rh
예제 #27
0
    def test_incremental_update(self):

        rh = RunHistory(aggregate_func=average_cost)
        cs = get_config_space()
        config1 = Configuration(cs,
                                values={'a': 1, 'b': 2})

        rh.add(config=config1, cost=10, time=20,
               status=StatusType.SUCCESS, instance_id=1,
               seed=1)

        self.assertTrue(rh.get_cost(config1) == 10)

        rh.add(config=config1, cost=20, time=20,
               status=StatusType.SUCCESS, instance_id=2,
               seed=1)

        self.assertTrue(rh.get_cost(config1) == 15)
    def test_choose_next_higher_budget(self):
        seed = 42
        config = self.scenario.cs.sample_configuration
        rh = RunHistory()
        rh.add(
            config=config(),
            cost=1,
            time=10,
            instance_id=None,
            seed=1,
            budget=1,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=2,
            time=10,
            instance_id=None,
            seed=1,
            budget=2,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=3,
            time=10,
            instance_id=None,
            seed=1,
            budget=2,
            additional_info=None,
            status=StatusType.SUCCESS,
        )
        rh.add(
            config=config(),
            cost=4,
            time=10,
            instance_id=None,
            seed=1,
            budget=3,
            additional_info=None,
            status=StatusType.SUCCESS,
        )

        smbo = SMAC4AC(self.scenario, rng=seed, runhistory=rh).solver
        smbo.epm_chooser.min_samples_model = 2

        # Return two configurations evaluated with budget==2
        X, Y, X_configurations = smbo.epm_chooser._collect_data_to_train_model(
        )
        self.assertListEqual(list(Y.flatten()), [2, 3])
        self.assertEqual(X.shape[0], 2)
        self.assertEqual(X_configurations.shape[0], 2)
예제 #29
0
    def _get_runs_per_config_quantiled(self, rh, conf_list, quantiles):
        """Returns a list of lists, each sublist representing the current state
        at that timestep (quantile). The current state means a list of times
        each config was evaluated at that timestep.

        Parameters
        ----------
        rh: RunHistory
            rh to evaluate
        conf_list: list
            list of all Configuration objects that appeared in runhistory
        quantiles: int
            number of fractions to split rh into

        Returns:
        --------
        runs_per_quantile: np.array
            numpy array of runs per configuration per quantile
        """
        runs_total = len(rh.data)
        # Create LINEAR ranges. TODO do we want log? -> this line
        ranges = [int(r) for r in np.linspace(0, runs_total, quantiles + 1)]
        self.logger.debug(
            "Creating %d quantiles with a step of %.2f and a total "
            "runs of %d", quantiles, runs_total / quantiles, runs_total)
        self.logger.debug("Ranges: %s", str(ranges))

        # Iterate over the runhistory's entries in ranges and creating each
        # sublist from a "snapshot"-runhistory
        r_p_q_p_c = []  # runs per quantile per config
        as_list = list(rh.data.items())
        tmp_rh = RunHistory(average_cost)
        for i, j in zip(ranges[:-1], ranges[1:]):
            for idx in range(i, j):
                k, v = as_list[idx]
                tmp_rh.add(config=rh.ids_config[k.config_id],
                           cost=v.cost,
                           time=v.time,
                           status=v.status,
                           instance_id=k.instance_id,
                           seed=k.seed)
            r_p_q_p_c.append(
                [len(tmp_rh.get_runs_for_config(c)) for c in conf_list])
        return r_p_q_p_c
예제 #30
0
    def test_illegal_input(self):
        rh = RunHistory()

        with self.assertRaisesRegex(
                TypeError,
                'Configuration to add to the runhistory must not be None'):
            rh.add(config=None,
                   cost=1.23,
                   time=2.34,
                   status=StatusType.SUCCESS)

        with self.assertRaisesRegex(
                TypeError,
                "Configuration to add to the runhistory is not of type Configuration, but <class 'str'>",
        ):
            rh.add(config='abc',
                   cost=1.23,
                   time=2.34,
                   status=StatusType.SUCCESS)
예제 #31
0
파일: smbo.py 프로젝트: Ayaro/auto-sklearn
    def run_smbo(self, max_iters=1000):
        global evaluator

        # == first things first: load the datamanager
        self.reset_data_manager()
        
        # == Initialize SMBO stuff
        # first create a scenario
        seed = self.seed # TODO
        num_params = len(self.config_space.get_hyperparameters())
        # allocate a run history
        run_history = RunHistory()
        meta_runhistory = RunHistory()
        meta_runs_dataset_indices = {}
        num_run = self.start_num_run
        instance_id = self.dataset_name + SENTINEL

        # == Train on subset
        #    before doing anything, let us run the default_cfg
        #    on a subset of the available data to ensure that
        #    we at least have some models
        #    we will try three different ratios of decreasing magnitude
        #    in the hope that at least on the last one we will be able
        #    to get a model
        n_data = self.datamanager.data['X_train'].shape[0]
        subset_ratio = 10000. / n_data
        if subset_ratio >= 0.5:
            subset_ratio = 0.33
            subset_ratios = [subset_ratio, subset_ratio * 0.10]
        else:
            subset_ratios = [subset_ratio, 500. / n_data]
        self.logger.info("Training default configurations on a subset of "
                         "%d/%d data points." %
                         (int(n_data * subset_ratio), n_data))

        # the time limit for these function evaluations is rigorously
        # set to only 1/2 of a full function evaluation
        subset_time_limit = max(5, int(self.func_eval_time_limit / 2))
        # the configs we want to run on the data subset are:
        # 1) the default configs
        # 2) a set of configs we selected for training on a subset
        subset_configs = [self.config_space.get_default_configuration()] \
                          + self.collect_additional_subset_defaults()
        subset_config_succesful = [False] * len(subset_configs)
        for subset_config_id, next_config in enumerate(subset_configs):
            for i, ratio in enumerate(subset_ratios):
                self.reset_data_manager()
                n_data_subsample = int(n_data * ratio)

                # run the config, but throw away the result afterwards
                # since this cfg was evaluated only on a subset
                # and we don't want  to confuse SMAC
                self.logger.info("Starting to evaluate %d on SUBSET "
                                 "with size %d and time limit %ds.",
                                 num_run, n_data_subsample,
                                 subset_time_limit)
                self.logger.info(next_config)
                _info = eval_with_limits(
                    self.datamanager, self.tmp_dir, next_config,
                    seed, num_run,
                    self.resampling_strategy,
                    self.resampling_strategy_args,
                    self.memory_limit,
                    subset_time_limit, n_data_subsample)
                (duration, result, _, additional_run_info, status) = _info
                self.logger.info("Finished evaluating %d. configuration on SUBSET. "
                                 "Duration %f; loss %f; status %s; additional run "
                                 "info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)

                num_run += 1
                if i < len(subset_ratios) - 1:
                    if status != StatusType.SUCCESS:
                        # Do not increase num_run here, because we will try
                        # the same configuration with less data
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f -> going smaller",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break
                else:
                    if status != StatusType.SUCCESS:
                        self.logger.info("A CONFIG did not finish "
                                         " for subset ratio %f.",
                                         ratio)
                        continue
                    else:
                        self.logger.info("Finished SUBSET training sucessfully "
                                         "with ratio %f", ratio)
                        subset_config_succesful[subset_config_id] = True
                        break

        # Use the first non-failing configuration from the subsets as the new
        #  default configuration -> this guards us against the random forest
        # failing on large, sparse datasets
        default_cfg = None
        for subset_config_id, next_config in enumerate(subset_configs):
            if subset_config_succesful[subset_config_id]:
                default_cfg = next_config
                break
        if default_cfg is None:
            default_cfg = self.config_space.get_default_configuration()

        # == METALEARNING suggestions
        # we start by evaluating the defaults on the full dataset again
        # and add the suggestions from metalearning behind it

        if self.metadata_directory is None:
            metalearning_directory = os.path.dirname(
                autosklearn.metalearning.__file__)
            # There is no multilabel data in OpenML
            if self.task == MULTILABEL_CLASSIFICATION:
                meta_task = BINARY_CLASSIFICATION
            else:
                meta_task = self.task
            metadata_directory = os.path.join(
                metalearning_directory, 'files',
                '%s_%s_%s' % (METRIC_TO_STRING[self.metric],
                              TASK_TYPES_TO_STRING[meta_task],
                              'sparse' if self.datamanager.info['is_sparse']
                              else 'dense'))
            self.metadata_directory = metadata_directory

        self.logger.info('Metadata directory: %s', self.metadata_directory)
        meta_base = MetaBase(self.config_space, self.metadata_directory)

        metafeature_calculation_time_limit = int(
            self.total_walltime_limit / 4)
        metafeature_calculation_start_time = time.time()
        meta_features = self._calculate_metafeatures_with_limits(
            metafeature_calculation_time_limit)
        metafeature_calculation_end_time = time.time()
        metafeature_calculation_time_limit = \
            metafeature_calculation_time_limit - (
            metafeature_calculation_end_time -
            metafeature_calculation_start_time)

        if metafeature_calculation_time_limit < 1:
            self.logger.warning('Time limit for metafeature calculation less '
                                'than 1 seconds (%f). Skipping calculation '
                                'of metafeatures for encoded dataset.',
                                metafeature_calculation_time_limit)
            meta_features_encoded = None
        else:
            self.datamanager.perform1HotEncoding()
            meta_features_encoded = \
                self._calculate_metafeatures_encoded_with_limits(
                    metafeature_calculation_time_limit)

        # In case there is a problem calculating the encoded meta-features
        if meta_features is None:
            if meta_features_encoded is not None:
                meta_features = meta_features_encoded
        else:
            if meta_features_encoded is not None:
                meta_features.metafeature_values.update(
                    meta_features_encoded.metafeature_values)

        if meta_features is not None:
            meta_base.add_dataset(instance_id, meta_features)
            # Do mean imputation of the meta-features - should be done specific
            # for each prediction model!
            all_metafeatures = meta_base.get_metafeatures(
                features=list(meta_features.keys()))
            all_metafeatures.fillna(all_metafeatures.mean(), inplace=True)

            metalearning_configurations = self.collect_metalearning_suggestions(
                meta_base)
            if metalearning_configurations is None:
                metalearning_configurations = []
            self.reset_data_manager()

            self.logger.info('%s', meta_features)

            # Convert meta-features into a dictionary because the scenario
            # expects a dictionary
            meta_features_dict = {}
            for dataset, series in all_metafeatures.iterrows():
                meta_features_dict[dataset] = series.values
            meta_features_list = []
            for meta_feature_name in all_metafeatures.columns:
                meta_features_list.append(meta_features[meta_feature_name].value)
            meta_features_list = np.array(meta_features_list).reshape((1, -1))
            self.logger.info(list(meta_features_dict.keys()))

            meta_runs = meta_base.get_all_runs(METRIC_TO_STRING[self.metric])
            meta_runs_index = 0
            try:
                meta_durations = meta_base.get_all_runs('runtime')
                read_runtime_data = True
            except KeyError:
                read_runtime_data = False
                self.logger.critical('Cannot read runtime data.')
                if self.acquisition_function == 'EIPS':
                    self.logger.critical('Reverting to acquisition function EI!')
                    self.acquisition_function = 'EI'

            for meta_dataset in meta_runs.index:
                meta_dataset_start_index = meta_runs_index
                for meta_configuration in meta_runs.columns:
                    if np.isfinite(meta_runs.loc[meta_dataset, meta_configuration]):
                        try:
                            config = meta_base.get_configuration_from_algorithm_index(
                                meta_configuration)
                            cost = meta_runs.loc[meta_dataset, meta_configuration]
                            if read_runtime_data:
                                runtime = meta_durations.loc[meta_dataset,
                                                             meta_configuration]
                            else:
                                runtime = 1
                            # TODO read out other status types!
                            meta_runhistory.add(config, cost, runtime,
                                                StatusType.SUCCESS,
                                                instance_id=meta_dataset)
                            meta_runs_index += 1
                        except:
                            # TODO maybe add warning
                            pass

                meta_runs_dataset_indices[meta_dataset] = (
                    meta_dataset_start_index, meta_runs_index)
        else:
            if self.acquisition_function == 'EIPS':
                self.logger.critical('Reverting to acquisition function EI!')
                self.acquisition_function = 'EI'
            meta_features_list = []
            meta_features_dict = {}
            metalearning_configurations = []

        self.scenario = AutoMLScenario(self.config_space,
                                       self.total_walltime_limit,
                                       self.func_eval_time_limit,
                                       meta_features_dict,
                                       self.tmp_dir,
                                       self.shared_mode)

        types = get_types(self.config_space, self.scenario.feature_array)
        if self.acquisition_function == 'EI':
            rh2EPM = RunHistory2EPM4Cost(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = RandomForestWithInstances(types,
                                              instance_features=meta_features_list,
                                              seed=1, num_trees=10)
            smac = SMBO(self.scenario, model=model,
                        rng=seed)
        elif self.acquisition_function == 'EIPS':
            rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
                                         scenario=self.scenario,
                                         success_states=None,
                                         impute_censored_data=False,
                                         impute_state=None)
            model = UncorrelatedMultiObjectiveRandomForestWithInstances(
                ['cost', 'runtime'], types, num_trees = 10,
                instance_features=meta_features_list, seed=1)
            acquisition_function = EIPS(model)
            smac = SMBO(self.scenario,
                        acquisition_function=acquisition_function,
                        model=model, runhistory2epm=rh2EPM, rng=seed)
        else:
            raise ValueError('Unknown acquisition function value %s!' %
                             self.acquisition_function)

        # Build a runtime model
        # runtime_rf = RandomForestWithInstances(types,
        #                                        instance_features=meta_features_list,
        #                                        seed=1, num_trees=10)
        # runtime_rh2EPM = RunHistory2EPM4EIPS(num_params=num_params,
        #                                      scenario=self.scenario,
        #                                      success_states=None,
        #                                      impute_censored_data=False,
        #                                      impute_state=None)
        # X_runtime, y_runtime = runtime_rh2EPM.transform(meta_runhistory)
        # runtime_rf.train(X_runtime, y_runtime[:, 1].flatten())
        X_meta, Y_meta = rh2EPM.transform(meta_runhistory)
        # Transform Y_meta on a per-dataset base
        for meta_dataset in meta_runs_dataset_indices:
            start_index, end_index = meta_runs_dataset_indices[meta_dataset]
            end_index += 1  # Python indexing
            Y_meta[start_index:end_index, 0]\
                [Y_meta[start_index:end_index, 0] >2.0] =  2.0
            dataset_minimum = np.min(Y_meta[start_index:end_index, 0])
            Y_meta[start_index:end_index, 0] = 1 - (
                (1. - Y_meta[start_index:end_index, 0]) /
                (1. - dataset_minimum))
            Y_meta[start_index:end_index, 0]\
                  [Y_meta[start_index:end_index, 0] > 2] = 2

        # == first, evaluate all metelearning and default configurations
        for i, next_config in enumerate(([default_cfg] +
                                          metalearning_configurations)):
            # Do not evaluate default configurations more than once
            if i >= len([default_cfg]) and next_config in [default_cfg]:
                continue

            config_name = 'meta-learning' if i >= len([default_cfg]) \
                else 'default'

            self.logger.info("Starting to evaluate %d. configuration "
                             "(%s configuration) with time limit %ds.",
                             num_run, config_name, self.func_eval_time_limit)
            self.logger.info(next_config)
            self.reset_data_manager()
            info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                    seed, num_run,
                                    self.resampling_strategy,
                                    self.resampling_strategy_args,
                                    self.memory_limit,
                                    self.func_eval_time_limit)
            (duration, result, _, additional_run_info, status) = info
            run_history.add(config=next_config, cost=result,
                            time=duration , status=status,
                            instance_id=instance_id, seed=seed)
            run_history.update_cost(next_config, result)
            self.logger.info("Finished evaluating %d. configuration. "
                             "Duration %f; loss %f; status %s; additional run "
                             "info: %s ", num_run, duration, result,
                             str(status), additional_run_info)
            num_run += 1
            if smac.incumbent is None:
                smac.incumbent = next_config
            elif result < run_history.get_cost(smac.incumbent):
                smac.incumbent = next_config

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)

        # == after metalearning run SMAC loop
        smac.runhistory = run_history
        smac_iter = 0
        finished = False
        while not finished:
            if self.scenario.shared_model:
                pSMAC.read(run_history=run_history,
                           output_directory=self.scenario.output_dir,
                           configuration_space=self.config_space,
                           logger=self.logger)

            next_configs = []
            time_for_choose_next = -1
            try:
                X_cfg, Y_cfg = rh2EPM.transform(run_history)

                if not run_history.empty():
                    # Update costs by normalization
                    dataset_minimum = np.min(Y_cfg[:, 0])
                    Y_cfg[:, 0] = 1 - ((1. - Y_cfg[:, 0]) /
                                       (1. - dataset_minimum))
                    Y_cfg[:, 0][Y_cfg[:, 0] > 2] = 2

                if len(X_meta) > 0 and len(X_cfg) > 0:
                    pass
                    #X_cfg = np.concatenate((X_meta, X_cfg))
                    #Y_cfg = np.concatenate((Y_meta, Y_cfg))
                elif len(X_meta) > 0:
                    X_cfg = X_meta.copy()
                    Y_cfg = Y_meta.copy()
                elif len(X_cfg) > 0:
                    X_cfg = X_cfg.copy()
                    Y_cfg = Y_cfg.copy()
                else:
                    raise ValueError('No training data for SMAC random forest!')

                self.logger.info('Using %d training points for SMAC.' %
                                 X_cfg.shape[0])
                choose_next_start_time = time.time()
                next_configs_tmp = smac.choose_next(X_cfg, Y_cfg,
                                                    num_interleaved_random=110,
                                                    num_configurations_by_local_search=10,
                                                    num_configurations_by_random_search_sorted=100)
                time_for_choose_next = time.time() - choose_next_start_time
                self.logger.info('Used %g seconds to find next '
                                 'configurations' % (time_for_choose_next))
                next_configs.extend(next_configs_tmp)
            # TODO put Exception here!
            except Exception as e:
                self.logger.error(e)
                self.logger.error("Error in getting next configurations "
                                  "with SMAC. Using random configuration!")
                next_config = self.config_space.sample_configuration()
                next_configs.append(next_config)

            models_fitted_this_iteration = 0
            start_time_this_iteration = time.time()
            for next_config in next_configs:
                x_runtime = impute_inactive_values(next_config)
                x_runtime = impute_inactive_values(x_runtime).get_array()
                # predicted_runtime = runtime_rf.predict_marginalized_over_instances(
                #     x_runtime.reshape((1, -1)))
                # predicted_runtime = np.exp(predicted_runtime[0][0][0]) - 1

                self.logger.info("Starting to evaluate %d. configuration (from "
                                 "SMAC) with time limit %ds.", num_run,
                                 self.func_eval_time_limit)
                self.logger.info(next_config)
                self.reset_data_manager()
                info = eval_with_limits(self.datamanager, self.tmp_dir, next_config,
                                        seed, num_run,
                                        self.resampling_strategy,
                                        self.resampling_strategy_args,
                                        self.memory_limit,
                                        self.func_eval_time_limit)
                (duration, result, _, additional_run_info, status) = info
                run_history.add(config=next_config, cost=result,
                                time=duration , status=status,
                                instance_id=instance_id, seed=seed)
                run_history.update_cost(next_config, result)

                #self.logger.info('Predicted runtime %g, true runtime %g',
                #                 predicted_runtime, duration)

                # TODO add unittest to make sure everything works fine and
                # this does not get outdated!
                if smac.incumbent is None:
                    smac.incumbent = next_config
                elif result < run_history.get_cost(smac.incumbent):
                    smac.incumbent = next_config

                self.logger.info("Finished evaluating %d. configuration. "
                                 "Duration: %f; loss: %f; status %s; additional "
                                 "run info: %s ", num_run, duration, result,
                                 str(status), additional_run_info)
                smac_iter += 1
                num_run += 1

                models_fitted_this_iteration += 1
                time_used_this_iteration = time.time() - start_time_this_iteration
                if models_fitted_this_iteration >= 2 and \
                        time_for_choose_next > 0 and \
                        time_used_this_iteration > time_for_choose_next:
                    break
                elif time_for_choose_next <= 0 and \
                        models_fitted_this_iteration >= 1:
                    break
                elif models_fitted_this_iteration >= 50:
                    break

                if max_iters is not None:
                    finished = (smac_iter < max_iters)

            if self.scenario.shared_model:
                pSMAC.write(run_history=run_history,
                            output_directory=self.scenario.output_dir,
                            num_run=self.seed)