def test_uniformfloat_transform(self):
        """This checks whether a value sampled through the configuration
        space (it does not happend when the variable is sampled alone) stays
        equal when it is serialized via JSON and the deserialized again."""

        cs = ConfigurationSpace()
        a = cs.add_hyperparameter(UniformFloatHyperparameter('a', -5, 10))
        b = cs.add_hyperparameter(NormalFloatHyperparameter('b', 1, 2,
                                                            log=True))
        for i in range(100):
            config = cs.sample_configuration()
            value = OrderedDict(sorted(config.get_dictionary().items()))
            string = json.dumps(value)
            saved_value = json.loads(string)
            saved_value = OrderedDict(sorted(byteify(saved_value).items()))
            self.assertEqual(repr(value), repr(saved_value))

        # Next, test whether the truncation also works when initializing the
        # Configuration with a dictionary
        for i in range(100):
            rs = np.random.RandomState(1)
            value_a = a.sample(rs)
            value_b = b.sample(rs)
            values_dict = {'a': value_a, 'b': value_b}
            config = Configuration(cs, values=values_dict)
            string = json.dumps(config.get_dictionary())
            saved_value = json.loads(string)
            saved_value = byteify(saved_value)
            self.assertEqual(values_dict, saved_value)
Exemple #2
0
 def _test_random_neigbor(self, hp):
     cs = ConfigurationSpace()
     if not isinstance(hp, list):
         hp = [hp]
     for hp_ in hp:
         cs.add_hyperparameter(hp_)
     cs.seed(1)
     config = cs.sample_configuration()
     for i in range(100):
         new_config = get_random_neighbor(config, i)
         self.assertNotEqual(config, new_config)
Exemple #3
0
    def test_sample_configuration(self):
        cs = ConfigurationSpace()
        hp1 = CategoricalHyperparameter("parent", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = UniformIntegerHyperparameter("child", 0, 10)
        cs.add_hyperparameter(hp2)
        cond1 = EqualsCondition(hp2, hp1, 0)
        cs.add_condition(cond1)
        # This automatically checks the configuration!
        Configuration(cs, dict(parent=0, child=5))

        # and now for something more complicated
        cs = ConfigurationSpace(seed=1)
        hp1 = CategoricalHyperparameter("input1", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        cs.add_hyperparameter(hp2)
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        cs.add_hyperparameter(hp3)
        hp4 = CategoricalHyperparameter("input4", [0, 1])
        cs.add_hyperparameter(hp4)
        hp5 = CategoricalHyperparameter("input5", [0, 1])
        cs.add_hyperparameter(hp5)
        hp6 = Constant("AND", "True")
        cs.add_hyperparameter(hp6)

        cond1 = EqualsCondition(hp6, hp1, 1)
        cond2 = NotEqualsCondition(hp6, hp2, 1)
        cond3 = InCondition(hp6, hp3, [1])
        cond4 = EqualsCondition(hp5, hp3, 1)
        cond5 = EqualsCondition(hp4, hp5, 1)
        cond6 = EqualsCondition(hp6, hp4, 1)
        cond7 = EqualsCondition(hp6, hp5, 1)

        conj1 = AndConjunction(cond1, cond2)
        conj2 = OrConjunction(conj1, cond3)
        conj3 = AndConjunction(conj2, cond6, cond7)
        cs.add_condition(cond4)
        cs.add_condition(cond5)
        cs.add_condition(conj3)

        samples = []
        for i in range(5):
            cs.seed(1)
            samples.append([])
            for j in range(100):
                sample = cs.sample_configuration()
                samples[-1].append(sample)

            if i > 0:
                for j in range(100):
                    self.assertEqual(samples[-1][j], samples[-2][j])
    def test_sample_configuration(self):
        cs = ConfigurationSpace()
        hp1 = CategoricalHyperparameter("parent", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = UniformIntegerHyperparameter("child", 0, 10)
        cs.add_hyperparameter(hp2)
        cond1 = EqualsCondition(hp2, hp1, 0)
        cs.add_condition(cond1)
        # This automatically checks the configuration!
        Configuration(cs, dict(parent=0, child=5))

        # and now for something more complicated
        cs = ConfigurationSpace(seed=1)
        hp1 = CategoricalHyperparameter("input1", [0, 1])
        cs.add_hyperparameter(hp1)
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        cs.add_hyperparameter(hp2)
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        cs.add_hyperparameter(hp3)
        hp4 = CategoricalHyperparameter("input4", [0, 1])
        cs.add_hyperparameter(hp4)
        hp5 = CategoricalHyperparameter("input5", [0, 1])
        cs.add_hyperparameter(hp5)
        hp6 = Constant("AND", "True")
        cs.add_hyperparameter(hp6)

        cond1 = EqualsCondition(hp6, hp1, 1)
        cond2 = NotEqualsCondition(hp6, hp2, 1)
        cond3 = InCondition(hp6, hp3, [1])
        cond4 = EqualsCondition(hp5, hp3, 1)
        cond5 = EqualsCondition(hp4, hp5, 1)
        cond6 = EqualsCondition(hp6, hp4, 1)
        cond7 = EqualsCondition(hp6, hp5, 1)

        conj1 = AndConjunction(cond1, cond2)
        conj2 = OrConjunction(conj1, cond3)
        conj3 = AndConjunction(conj2, cond6, cond7)
        cs.add_condition(cond4)
        cs.add_condition(cond5)
        cs.add_condition(conj3)

        samples = []
        for i in range(5):
            cs.seed(1)
            samples.append([])
            for j in range(100):
                sample = cs.sample_configuration()
                samples[-1].append(sample)

            if i > 0:
                for j in range(100):
                    self.assertEqual(samples[-1][j], samples[-2][j])
Exemple #5
0
    def test_get_one_exchange_neighbourhood(self):
        # test fixed_dims
        cs = ConfigurationSpace()
        cs.add_hyperparameter(
            CategoricalHyperparameter('0', [0, 1], default_value=0))
        cs.add_hyperparameter(
            CategoricalHyperparameter('1', [0, 1], default_value=0))
        cs.add_hyperparameter(
            CategoricalHyperparameter('2', [0, 1], default_value=0))
        cs.add_hyperparameter(
            CategoricalHyperparameter('3', [0, 1], default_value=0))
        cs.add_hyperparameter(
            CategoricalHyperparameter('4', [0, 1, 2, 3, 4], default_value=0))

        fixed_dims = {'0': 1, '1': 0, '2': 0, '3': 0}

        conf = cs.sample_configuration(fixed_dims=fixed_dims)
        neighborhood_iter = get_one_exchange_neighbourhood(
            conf, seed=0, fixed_dims=fixed_dims)

        tmp = next(neighborhood_iter)
        self.assertEqual(tmp['0'], fixed_dims['0'])
        self.assertEqual(tmp['1'], fixed_dims['1'])
        self.assertEqual(tmp['2'], fixed_dims['2'])
        self.assertEqual(tmp['3'], fixed_dims['3'])

        tmp = next(neighborhood_iter)
        self.assertEqual(tmp['0'], fixed_dims['0'])
        self.assertEqual(tmp['1'], fixed_dims['1'])
        self.assertEqual(tmp['2'], fixed_dims['2'])
        self.assertEqual(tmp['3'], fixed_dims['3'])

        tmp = next(neighborhood_iter)
        self.assertEqual(tmp['0'], fixed_dims['0'])
        self.assertEqual(tmp['1'], fixed_dims['1'])
        self.assertEqual(tmp['2'], fixed_dims['2'])
        self.assertEqual(tmp['3'], fixed_dims['3'])

        tmp = next(neighborhood_iter)
        self.assertEqual(tmp['0'], fixed_dims['0'])
        self.assertEqual(tmp['1'], fixed_dims['1'])
        self.assertEqual(tmp['2'], fixed_dims['2'])
        self.assertEqual(tmp['3'], fixed_dims['3'])

        # StopIteration
        with self.assertRaises(StopIteration):
            tmp = next(neighborhood_iter)
Exemple #6
0
def sample_configurations(configuration_space: ConfigurationSpace,
                          historical_configs: List[Configuration],
                          sample_size: int):
    result = list()
    sample_cnt = 0
    if len(historical_configs) == 0:
        result.append(configuration_space.get_default_configuration())

    while len(result) < sample_size:
        config = configuration_space.sample_configuration(1)
        if config not in result and config not in historical_configs:
            result.append(config)
        sample_cnt += 1
        if sample_cnt > 50 * sample_size:
            result.append(config)
            break
    return result
    def test_setitem(self):
        '''
        Checks overriding a sampled configuration
        '''
        pcs = ConfigurationSpace()
        pcs.add_hyperparameter(UniformIntegerHyperparameter('x0', 1, 5))
        pcs.add_hyperparameter(UniformFloatHyperparameter('x1', 0.5, 2.55))
        pcs.add_hyperparameter(
            CategoricalHyperparameter('x2', ['ab', 'bc', 'cd', 'de']))

        conf = pcs.sample_configuration()

        # failed because it's a invalid configuration
        with self.assertRaisesRegex(ValueError,
                                    'Illegal value 0 for hyperparameter x1'):
            conf['x1'] = 0

        with self.assertRaisesRegex(ValueError,
                                    'Illegal value 2.5 for hyperparameter x0'):
            conf['x0'] = 2.5

        # failed because the variable didn't exists
        with self.assertRaisesRegex(
                KeyError,
                "Hyperparameter 'x_0' does not exist in this configuration space."
        ):
            conf['x_0'] = 1

        # successful operation 1
        x1_old = conf['x1']
        if x1_old == 1.5:
            conf['x1'] = 2.1
        else:
            conf['x1'] = 1.5
        x1_new = conf['x1']
        self.assertNotEqual(x1_old, x1_new)

        # successful operation 2
        x2_old = conf['x2']
        if x2_old == 'ab':
            conf['x2'] = 'cd'
        else:
            conf['x2'] = 'ab'
        x2_new = conf['x2']
        self.assertNotEqual(x2_old, x2_new)
    def test_keys(self):
        # A regression test to make sure issue #49 does no longer pop up. By
        # iterating over the configuration in the for loop, it should not raise
        # a KeyError if the child hyperparameter is inactive.
        cs = ConfigurationSpace()
        shrinkage = CategoricalHyperparameter(
            "shrinkage", ["None", "auto", "manual"], default_value="None",
        )
        shrinkage_factor = UniformFloatHyperparameter(
            "shrinkage_factor", 0., 1., 0.5,
        )
        cs.add_hyperparameters([shrinkage, shrinkage_factor])

        cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual"))

        for i in range(10):
            config = cs.sample_configuration()
            {hp_name: config[hp_name] for hp_name in config if config[hp_name] is not None}
    def test_keys(self):
        # A regression test to make sure issue #49 does no longer pop up. By
        # iterating over the configuration in the for loop, it should not raise
        # a KeyError if the child hyperparameter is inactive.
        cs = ConfigurationSpace()
        shrinkage = CategoricalHyperparameter(
            "shrinkage", ["None", "auto", "manual"], default_value="None",
        )
        shrinkage_factor = UniformFloatHyperparameter(
            "shrinkage_factor", 0., 1., 0.5,
        )
        cs.add_hyperparameters([shrinkage, shrinkage_factor])

        cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual"))

        for i in range(10):
            config = cs.sample_configuration()
            {hp_name: config[hp_name] for hp_name in config if config[hp_name] is not None}
Exemple #10
0
    def evaluate(_config):
        _config = _config.get_dictionary()
        # print(_config)
        arm = None
        cs = ConfigurationSpace()
        for key in _config:
            key_str = key.split(":")
            if key_str[0] == 'classifier':
                if key_str[1] == '__choice__':
                    arm = _config[key]
                    cs.add_hyperparameter(
                        UnParametrizedHyperparameter("estimator",
                                                     _config[key]))
                else:
                    cs.add_hyperparameter(
                        UnParametrizedHyperparameter(key_str[2], _config[key]))

        if arm in first_bandit.arms:
            transformed_node = apply_metalearning_fe(
                first_bandit.sub_bandits[arm].optimizer['fe'], _config)
            default_config = cs.sample_configuration(1)
            hpo_evaluator = ClassificationEvaluator(
                None,
                data_node=transformed_node,
                name='hpo',
                resampling_strategy=first_bandit.eval_type,
                seed=first_bandit.seed)

            start_time = time.time()
            score1 = -hpo_evaluator(default_config)
            time_cost1 = time.time() - start_time

            # Evaluate the default config
            start_time = time.time()
            score2 = -hpo_evaluator(
                first_bandit.sub_bandits[arm].default_config)
            time_cost2 = time.time() - start_time
            transformed_node.score2 = max(score1, score2)

            return (arm, score1, default_config, transformed_node,
                    time_cost1), (arm, score2,
                                  first_bandit.sub_bandits[arm].default_config,
                                  transformed_node, time_cost2)
Exemple #11
0
    def test_sample_configuration_with_or_conjunction(self):
        cs = ConfigurationSpace(seed=1)

        hyper_params = {}
        hyper_params["hp5"] = CategoricalHyperparameter("hp5", ['0', '1', '2'])
        hyper_params["hp7"] = CategoricalHyperparameter("hp7", ['3', '4', '5'])
        hyper_params["hp8"] = CategoricalHyperparameter("hp8", ['6', '7', '8'])
        for key in hyper_params:
            cs.add_hyperparameter(hyper_params[key])

        cs.add_condition(
            InCondition(hyper_params["hp5"], hyper_params["hp8"], ['6']))

        cs.add_condition(
            OrConjunction(
                InCondition(hyper_params["hp7"], hyper_params["hp8"], ['7']),
                InCondition(hyper_params["hp7"], hyper_params["hp5"], ['1'])))

        for cfg, fixture in zip(cs.sample_configuration(10),
                                [[1, np.NaN, 2], [0, 2, np.NaN], [0, 1, 1],
                                 [1, np.NaN, 2], [1, np.NaN, 2]]):
            np.testing.assert_array_almost_equal(cfg.get_array(), fixture)
Exemple #12
0
#          '-sklearn_2017_04.pcs') as fh:
#    cs = pcs.read(fh)


cs = ConfigurationSpace()
hp1 = cs.add_hyperparameter(CategoricalHyperparameter("hp1", [0, 1, 2, 3, 4, 5]))
cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 1))
cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 3))
cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 5))


times = []

for i in range(20):
    start_time = time.time()
    configs = cs.sample_configuration(500000)
    end_time = time.time()
    times.append(end_time - start_time)
print("all times:", times)
print('Sampling 500000 configurations took on average:', np.mean(times))

times = []
for config in configs[:100]:
    start_time = time.time()
    for i, n in enumerate(get_one_exchange_neighbourhood(config, 1)):
        if i == 100:
            break
    end_time = time.time()
    times.append((end_time - start_time) / 10)

print('Getting a nearest neighbor took on average:', np.mean(times))
Exemple #13
0
def run_on_tasks(config_frame_orig: pd.DataFrame,
                 surrogates: typing.Dict[int, sklearn.pipeline.Pipeline],
                 quality_frame: pd.DataFrame,
                 config_space: ConfigSpace.ConfigurationSpace,
                 search_hyperparameters: typing.List[str],
                 search_transform_fns: typing.List[str],
                 hold_out_task: typing.Optional[int], resized_grid_size: int,
                 output_file: str):
    hold_out_surrogate = None
    if hold_out_task is not None:
        hold_out_surrogate = surrogates[hold_out_task]
        surrogates = dict(surrogates)
        del surrogates[hold_out_task]

    # performance untransformed
    baseline_configuration, baseline_results_per_task = select_best_configuration_across_tasks(
        config_frame_orig, surrogates, config_frame_orig.columns.values, None,
        None, None, None)
    baseline_avg_performance = np.average(baseline_results_per_task)
    baseline_holdout = None
    baseline_random_search = None
    if hold_out_task is not None:
        baseline_holdout = openmldefaults.utils.single_prediction(
            config_frame_orig, hold_out_surrogate, baseline_configuration)
        baseline_random_search = [
            openmldefaults.utils.single_prediction(
                config_frame_orig, hold_out_surrogate,
                config_space.sample_configuration(1).get_dictionary())
            for i in range(50)
        ]
    logging.info('Baseline: %s [%s] %s. Holdout task: %s' %
                 (baseline_configuration, baseline_results_per_task,
                  baseline_avg_performance, baseline_holdout))

    transform_fns = openmldefaults.symbolic.all_transform_fns()
    search_transform_fns = search_transform_fns if search_transform_fns is not None else transform_fns.keys(
    )
    search_hyperparameters = search_hyperparameters if search_hyperparameters is not None \
        else [hp.name for hp in config_space.get_hyperparameters()]

    symbolic_defaults = list()
    for idx_hp, hyperparameter_name in enumerate(search_hyperparameters):
        hyperparameter = config_space.get_hyperparameter(hyperparameter_name)
        if isinstance(hyperparameter, ConfigSpace.hyperparameters.Constant):
            logging.warning('Skipping Constant Hyperparameter: %s' %
                            hyperparameter.name)
            continue
        if isinstance(
                hyperparameter,
                ConfigSpace.hyperparameters.UnParametrizedHyperparameter):
            logging.warning('Skipping Unparameterized Hyperparameter: %s' %
                            hyperparameter.name)
            continue
        if not isinstance(hyperparameter,
                          ConfigSpace.hyperparameters.NumericalHyperparameter):
            logging.warning('Skipping Non-Numerical Hyperparameter: %s' %
                            hyperparameter.name)
            continue
        logging.info(
            'Started with hyperparameter %s (%d/%d)' %
            (hyperparameter.name, idx_hp + 1, len(search_hyperparameters)))
        config_space_prime = openmldefaults.utils.remove_hyperparameter(
            config_space, hyperparameter.name)
        configurations = openmldefaults.utils.generate_grid_configurations(
            config_space_prime, 0, resized_grid_size)
        config_frame_prime = pd.DataFrame(configurations)
        for idx_trnfm_fn, transform_name in enumerate(search_transform_fns):
            logging.info(
                '- Transformer fn %s (%d/%d)' %
                (transform_name, idx_trnfm_fn + 1, len(transform_fns)))
            geom_space = np.geomspace(0.01, 2, 10)
            geom_space = np.append(geom_space, [1])
            for idx_av, alpha_value in enumerate(geom_space):
                logging.info('--- Alpha value %f (%d/%d)' %
                             (alpha_value, idx_av + 1, len(geom_space)))
                for meta_feature in quality_frame.columns.values:
                    try:
                        transform_fn = openmldefaults.symbolic.all_transform_fns(
                        )[transform_name]
                        symbolic_config, symbolic_results_per_task = select_best_configuration_across_tasks(
                            config_frame_prime,
                            surrogates,
                            config_frame_orig.columns.
                            values,  # note to take the original frame
                            hyperparameter.name,
                            transform_fn,
                            alpha_value,
                            quality_frame[meta_feature].to_dict(),
                        )
                        symbolic_average_performance = np.average(
                            symbolic_results_per_task)
                        if symbolic_average_performance > baseline_avg_performance:
                            symbolic_holdout_score = None
                            if hold_out_surrogate is not None:
                                symbolic_value = transform_fn(
                                    alpha_value,
                                    quality_frame[meta_feature][hold_out_task])
                                symbolic_config[
                                    hyperparameter.name] = symbolic_value
                                symbolic_holdout_score = openmldefaults.utils.single_prediction(
                                    config_frame_orig, hold_out_surrogate,
                                    symbolic_config)
                            current_result = {
                                'configuration': symbolic_config,
                                'results_per_task': symbolic_results_per_task,
                                'avg_performance':
                                symbolic_average_performance,
                                'holdout_score': symbolic_holdout_score,
                                'trasnform_hyperparameter':
                                hyperparameter.name,
                                'transform_fn': transform_name,
                                'transform_alpha_value': alpha_value,
                                'transform_meta_feature': meta_feature,
                            }
                            symbolic_defaults.append(current_result)
                            logging.info(
                                'Found improvement over base-line: %s' %
                                current_result)
                    except ZeroDivisionError:
                        logging.warning(
                            'Zero division error with (fn=%s, alpha=%s, meta_f=%s). '
                            'skipping. ' %
                            (transform_name, alpha_value, meta_feature))
                        pass
                    except OverflowError:
                        logging.warning(
                            'Overflow error with (fn=%s, alpha=%s, meta_f=%s). '
                            'skipping. ' %
                            (transform_name, alpha_value, meta_feature))
                        pass
                    except ValueError:
                        # keep a close eye on this one. Question: why do the others not catch this one?
                        logging.warning(
                            'Overflow error with (fn=%s, alpha=%s, meta_f=%s). '
                            'skipping. ' %
                            (transform_name, alpha_value, meta_feature))
                        pass
    total = {
        'baseline_configuration': baseline_configuration,
        'baseline_avg_performance': baseline_avg_performance,
        'baseline_random_search': baseline_random_search,
        'baseline_results_per_task': baseline_results_per_task,
        'baseline_holdout_score': baseline_holdout,
        'symbolic_defaults': symbolic_defaults
    }
    with open(output_file, 'wb') as fp:
        pickle.dump(obj=total, file=fp, protocol=0)
    logging.info('Saved result file to: %s' % output_file)
def evaluate_metalearning_configs(first_bandit):
    score_list = []
    for config in first_bandit.meta_configs:
        try:
            config = config.get_dictionary()
            # print(config)
            arm = None
            cs = ConfigurationSpace()
            for key in config:
                key_str = key.split(":")
                if key_str[0] == 'classifier':
                    if key_str[1] == '__choice__':
                        arm = config[key]
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                "estimator", config[key]))
                    else:
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                key_str[2], config[key]))

            if arm in first_bandit.arms:
                transformed_node = apply_metalearning_fe(
                    first_bandit.sub_bandits[arm].optimizer['fe'], config)
                default_config = cs.sample_configuration(1)
                hpo_evaluator = Evaluator(
                    None,
                    data_node=transformed_node,
                    name='hpo',
                    resampling_strategy=first_bandit.eval_type,
                    seed=first_bandit.seed)

                start_time = time.time()
                score = 1 - hpo_evaluator(default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, default_config, transformed_node, time_cost))
                transformed_node.score = score

                # Evaluate the default config
                start_time = time.time()
                score = 1 - hpo_evaluator(
                    first_bandit.sub_bandits[arm].default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, first_bandit.sub_bandits[arm].default_config,
                     transformed_node, time_cost))
                transformed_node.score = score
        except Exception as e:
            print(e)

    # Sort the meta-configs
    score_list.sort(key=lambda x: x[1], reverse=True)
    meta_arms = list()
    for arm_score_config in score_list:
        if arm_score_config[0] in meta_arms:
            continue

        first_bandit.sub_bandits[
            arm_score_config[0]].default_config = arm_score_config[2]
        first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats(
            'fe',
            (arm_score_config[1], arm_score_config[4], arm_score_config[3]))
        # first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats('hpo',
        #                                                                  (arm_score_config[1], arm_score_config[4],
        #                                                                   arm_score_config[2]))
        first_bandit.sub_bandits[arm_score_config[0]].optimizer[
            'fe'].hp_config = arm_score_config[2]
        meta_arms.append(arm_score_config[0])
    for arm in first_bandit.arms:
        if arm not in meta_arms:
            meta_arms.append(arm)

    first_bandit.final_rewards.append(score_list[0][1])
    first_bandit.action_sequence.append(score_list[0][0])
    first_bandit.time_records.append(score_list[0][2])
    first_bandit.arms = meta_arms
    first_bandit.logger.info("Arms after evaluating meta-configs: " +
                             str(first_bandit.arms))
Exemple #15
0
def run_bot_on_task(task_id: int,
                    configuration_space: ConfigSpace.ConfigurationSpace,
                    output_dir: str,
                    upload_and_delete: bool) \
        -> typing.Tuple[bool, typing.Optional[int], typing.Optional[str]]:
    """
    Runs the bot with a random configuration on an OpenML task

    Parameters
    ----------
    task_id: int
        The OpenML task id to run the bot on

    configuration_space: ConfigSpace.ConfigurationSpace
        The config space from which a random configuration will be sampled

    output_dir: str
        A writable directory where the intermediate run results can be stored,
        before uploading

    upload_and_delete: bool
        If true, after the run has been executed it will be uploaded to OpenML.
        If the uploading is correct, the local files will be deleted afterwards.

    Returns
    -------
    success: bool
        A boolean indicating whether the operation (and and/or upload) was
        successful

    run_id: int or None
        If uploaded, the OpenML run id that was assigned to the run. None
        otherwise

    local_run_folder: str or None
        If the run was executed successfully and the folder was not deleted,
        the path to the folder. None otherwise
    """
    local_run_dir = None
    try:
        # obtain task
        task = openml.tasks.get_task(task_id)
        data_name = task.get_dataset().name
        data_qualities = task.get_dataset().qualities
        data_tuple = (task.task_id, data_name,
                      data_qualities['NumberOfFeatures'],
                      data_qualities['NumberOfInstances'])
        logging.info('Obtained task %d (%s); %s attributes; %s observations' %
                     data_tuple)

        # obtain deserialized classifier
        nominal_indices = task.get_dataset().get_features_by_type(
            'nominal', [task.target_name])
        numeric_indices = task.get_dataset().get_features_by_type(
            'numeric', [task.target_name])
        classifier = sklearnbot.sklearn.as_estimator(configuration_space,
                                                     numeric_indices,
                                                     nominal_indices)

        # sample configuration and set hyperparameters
        configuration = configuration_space.sample_configuration(1)
        logging.info('Configuration: %s' % configuration.get_dictionary())
        classifier.set_params(**configuration.get_dictionary())

        # invoke OpenML run
        run = openml.runs.run_model_on_task(classifier, task)
        score = run.get_metric_fn(sklearn.metrics.accuracy_score)
        logging.info('Task %d - %s; Accuracy: %0.2f' %
                     (task_id, task.get_dataset().name, score.mean()))
        local_run_dir = os.path.join(output_dir, str(task_id),
                                     str(uuid.uuid4()))
        run.to_filesystem(local_run_dir, store_model=False)
        if upload_and_delete:
            run = run.publish()
            shutil.rmtree(local_run_dir)
            local_run_dir = None
        return True, run.run_id, local_run_dir
    except openml.exceptions.OpenMLServerException:
        traceback.print_exc()
        return False, None, local_run_dir
Exemple #16
0
 def test_sample_no_configuration(self):
     cs = ConfigurationSpace()
     rval = cs.sample_configuration(size=0)
     self.assertEqual(len(rval), 0)
 def test_sample_no_configuration(self):
     cs = ConfigurationSpace()
     rval = cs.sample_configuration(size=0)
     self.assertEqual(len(rval), 0)