def test_uniformfloat_transform(self): """This checks whether a value sampled through the configuration space (it does not happend when the variable is sampled alone) stays equal when it is serialized via JSON and the deserialized again.""" cs = ConfigurationSpace() a = cs.add_hyperparameter(UniformFloatHyperparameter('a', -5, 10)) b = cs.add_hyperparameter(NormalFloatHyperparameter('b', 1, 2, log=True)) for i in range(100): config = cs.sample_configuration() value = OrderedDict(sorted(config.get_dictionary().items())) string = json.dumps(value) saved_value = json.loads(string) saved_value = OrderedDict(sorted(byteify(saved_value).items())) self.assertEqual(repr(value), repr(saved_value)) # Next, test whether the truncation also works when initializing the # Configuration with a dictionary for i in range(100): rs = np.random.RandomState(1) value_a = a.sample(rs) value_b = b.sample(rs) values_dict = {'a': value_a, 'b': value_b} config = Configuration(cs, values=values_dict) string = json.dumps(config.get_dictionary()) saved_value = json.loads(string) saved_value = byteify(saved_value) self.assertEqual(values_dict, saved_value)
def _test_random_neigbor(self, hp): cs = ConfigurationSpace() if not isinstance(hp, list): hp = [hp] for hp_ in hp: cs.add_hyperparameter(hp_) cs.seed(1) config = cs.sample_configuration() for i in range(100): new_config = get_random_neighbor(config, i) self.assertNotEqual(config, new_config)
def test_sample_configuration(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace(seed=1) hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) samples = [] for i in range(5): cs.seed(1) samples.append([]) for j in range(100): sample = cs.sample_configuration() samples[-1].append(sample) if i > 0: for j in range(100): self.assertEqual(samples[-1][j], samples[-2][j])
def test_sample_configuration(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace(seed=1) hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) samples = [] for i in range(5): cs.seed(1) samples.append([]) for j in range(100): sample = cs.sample_configuration() samples[-1].append(sample) if i > 0: for j in range(100): self.assertEqual(samples[-1][j], samples[-2][j])
def test_get_one_exchange_neighbourhood(self): # test fixed_dims cs = ConfigurationSpace() cs.add_hyperparameter( CategoricalHyperparameter('0', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('1', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('2', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('3', [0, 1], default_value=0)) cs.add_hyperparameter( CategoricalHyperparameter('4', [0, 1, 2, 3, 4], default_value=0)) fixed_dims = {'0': 1, '1': 0, '2': 0, '3': 0} conf = cs.sample_configuration(fixed_dims=fixed_dims) neighborhood_iter = get_one_exchange_neighbourhood( conf, seed=0, fixed_dims=fixed_dims) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) tmp = next(neighborhood_iter) self.assertEqual(tmp['0'], fixed_dims['0']) self.assertEqual(tmp['1'], fixed_dims['1']) self.assertEqual(tmp['2'], fixed_dims['2']) self.assertEqual(tmp['3'], fixed_dims['3']) # StopIteration with self.assertRaises(StopIteration): tmp = next(neighborhood_iter)
def sample_configurations(configuration_space: ConfigurationSpace, historical_configs: List[Configuration], sample_size: int): result = list() sample_cnt = 0 if len(historical_configs) == 0: result.append(configuration_space.get_default_configuration()) while len(result) < sample_size: config = configuration_space.sample_configuration(1) if config not in result and config not in historical_configs: result.append(config) sample_cnt += 1 if sample_cnt > 50 * sample_size: result.append(config) break return result
def test_setitem(self): ''' Checks overriding a sampled configuration ''' pcs = ConfigurationSpace() pcs.add_hyperparameter(UniformIntegerHyperparameter('x0', 1, 5)) pcs.add_hyperparameter(UniformFloatHyperparameter('x1', 0.5, 2.55)) pcs.add_hyperparameter( CategoricalHyperparameter('x2', ['ab', 'bc', 'cd', 'de'])) conf = pcs.sample_configuration() # failed because it's a invalid configuration with self.assertRaisesRegex(ValueError, 'Illegal value 0 for hyperparameter x1'): conf['x1'] = 0 with self.assertRaisesRegex(ValueError, 'Illegal value 2.5 for hyperparameter x0'): conf['x0'] = 2.5 # failed because the variable didn't exists with self.assertRaisesRegex( KeyError, "Hyperparameter 'x_0' does not exist in this configuration space." ): conf['x_0'] = 1 # successful operation 1 x1_old = conf['x1'] if x1_old == 1.5: conf['x1'] = 2.1 else: conf['x1'] = 1.5 x1_new = conf['x1'] self.assertNotEqual(x1_old, x1_new) # successful operation 2 x2_old = conf['x2'] if x2_old == 'ab': conf['x2'] = 'cd' else: conf['x2'] = 'ab' x2_new = conf['x2'] self.assertNotEqual(x2_old, x2_new)
def test_keys(self): # A regression test to make sure issue #49 does no longer pop up. By # iterating over the configuration in the for loop, it should not raise # a KeyError if the child hyperparameter is inactive. cs = ConfigurationSpace() shrinkage = CategoricalHyperparameter( "shrinkage", ["None", "auto", "manual"], default_value="None", ) shrinkage_factor = UniformFloatHyperparameter( "shrinkage_factor", 0., 1., 0.5, ) cs.add_hyperparameters([shrinkage, shrinkage_factor]) cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual")) for i in range(10): config = cs.sample_configuration() {hp_name: config[hp_name] for hp_name in config if config[hp_name] is not None}
def test_keys(self): # A regression test to make sure issue #49 does no longer pop up. By # iterating over the configuration in the for loop, it should not raise # a KeyError if the child hyperparameter is inactive. cs = ConfigurationSpace() shrinkage = CategoricalHyperparameter( "shrinkage", ["None", "auto", "manual"], default_value="None", ) shrinkage_factor = UniformFloatHyperparameter( "shrinkage_factor", 0., 1., 0.5, ) cs.add_hyperparameters([shrinkage, shrinkage_factor]) cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage, "manual")) for i in range(10): config = cs.sample_configuration() {hp_name: config[hp_name] for hp_name in config if config[hp_name] is not None}
def evaluate(_config): _config = _config.get_dictionary() # print(_config) arm = None cs = ConfigurationSpace() for key in _config: key_str = key.split(":") if key_str[0] == 'classifier': if key_str[1] == '__choice__': arm = _config[key] cs.add_hyperparameter( UnParametrizedHyperparameter("estimator", _config[key])) else: cs.add_hyperparameter( UnParametrizedHyperparameter(key_str[2], _config[key])) if arm in first_bandit.arms: transformed_node = apply_metalearning_fe( first_bandit.sub_bandits[arm].optimizer['fe'], _config) default_config = cs.sample_configuration(1) hpo_evaluator = ClassificationEvaluator( None, data_node=transformed_node, name='hpo', resampling_strategy=first_bandit.eval_type, seed=first_bandit.seed) start_time = time.time() score1 = -hpo_evaluator(default_config) time_cost1 = time.time() - start_time # Evaluate the default config start_time = time.time() score2 = -hpo_evaluator( first_bandit.sub_bandits[arm].default_config) time_cost2 = time.time() - start_time transformed_node.score2 = max(score1, score2) return (arm, score1, default_config, transformed_node, time_cost1), (arm, score2, first_bandit.sub_bandits[arm].default_config, transformed_node, time_cost2)
def test_sample_configuration_with_or_conjunction(self): cs = ConfigurationSpace(seed=1) hyper_params = {} hyper_params["hp5"] = CategoricalHyperparameter("hp5", ['0', '1', '2']) hyper_params["hp7"] = CategoricalHyperparameter("hp7", ['3', '4', '5']) hyper_params["hp8"] = CategoricalHyperparameter("hp8", ['6', '7', '8']) for key in hyper_params: cs.add_hyperparameter(hyper_params[key]) cs.add_condition( InCondition(hyper_params["hp5"], hyper_params["hp8"], ['6'])) cs.add_condition( OrConjunction( InCondition(hyper_params["hp7"], hyper_params["hp8"], ['7']), InCondition(hyper_params["hp7"], hyper_params["hp5"], ['1']))) for cfg, fixture in zip(cs.sample_configuration(10), [[1, np.NaN, 2], [0, 2, np.NaN], [0, 1, 1], [1, np.NaN, 2], [1, np.NaN, 2]]): np.testing.assert_array_almost_equal(cfg.get_array(), fixture)
# '-sklearn_2017_04.pcs') as fh: # cs = pcs.read(fh) cs = ConfigurationSpace() hp1 = cs.add_hyperparameter(CategoricalHyperparameter("hp1", [0, 1, 2, 3, 4, 5])) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 1)) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 3)) cs.add_forbidden_clause(ForbiddenEqualsClause(hp1, 5)) times = [] for i in range(20): start_time = time.time() configs = cs.sample_configuration(500000) end_time = time.time() times.append(end_time - start_time) print("all times:", times) print('Sampling 500000 configurations took on average:', np.mean(times)) times = [] for config in configs[:100]: start_time = time.time() for i, n in enumerate(get_one_exchange_neighbourhood(config, 1)): if i == 100: break end_time = time.time() times.append((end_time - start_time) / 10) print('Getting a nearest neighbor took on average:', np.mean(times))
def run_on_tasks(config_frame_orig: pd.DataFrame, surrogates: typing.Dict[int, sklearn.pipeline.Pipeline], quality_frame: pd.DataFrame, config_space: ConfigSpace.ConfigurationSpace, search_hyperparameters: typing.List[str], search_transform_fns: typing.List[str], hold_out_task: typing.Optional[int], resized_grid_size: int, output_file: str): hold_out_surrogate = None if hold_out_task is not None: hold_out_surrogate = surrogates[hold_out_task] surrogates = dict(surrogates) del surrogates[hold_out_task] # performance untransformed baseline_configuration, baseline_results_per_task = select_best_configuration_across_tasks( config_frame_orig, surrogates, config_frame_orig.columns.values, None, None, None, None) baseline_avg_performance = np.average(baseline_results_per_task) baseline_holdout = None baseline_random_search = None if hold_out_task is not None: baseline_holdout = openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, baseline_configuration) baseline_random_search = [ openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, config_space.sample_configuration(1).get_dictionary()) for i in range(50) ] logging.info('Baseline: %s [%s] %s. Holdout task: %s' % (baseline_configuration, baseline_results_per_task, baseline_avg_performance, baseline_holdout)) transform_fns = openmldefaults.symbolic.all_transform_fns() search_transform_fns = search_transform_fns if search_transform_fns is not None else transform_fns.keys( ) search_hyperparameters = search_hyperparameters if search_hyperparameters is not None \ else [hp.name for hp in config_space.get_hyperparameters()] symbolic_defaults = list() for idx_hp, hyperparameter_name in enumerate(search_hyperparameters): hyperparameter = config_space.get_hyperparameter(hyperparameter_name) if isinstance(hyperparameter, ConfigSpace.hyperparameters.Constant): logging.warning('Skipping Constant Hyperparameter: %s' % hyperparameter.name) continue if isinstance( hyperparameter, ConfigSpace.hyperparameters.UnParametrizedHyperparameter): logging.warning('Skipping Unparameterized Hyperparameter: %s' % hyperparameter.name) continue if not isinstance(hyperparameter, ConfigSpace.hyperparameters.NumericalHyperparameter): logging.warning('Skipping Non-Numerical Hyperparameter: %s' % hyperparameter.name) continue logging.info( 'Started with hyperparameter %s (%d/%d)' % (hyperparameter.name, idx_hp + 1, len(search_hyperparameters))) config_space_prime = openmldefaults.utils.remove_hyperparameter( config_space, hyperparameter.name) configurations = openmldefaults.utils.generate_grid_configurations( config_space_prime, 0, resized_grid_size) config_frame_prime = pd.DataFrame(configurations) for idx_trnfm_fn, transform_name in enumerate(search_transform_fns): logging.info( '- Transformer fn %s (%d/%d)' % (transform_name, idx_trnfm_fn + 1, len(transform_fns))) geom_space = np.geomspace(0.01, 2, 10) geom_space = np.append(geom_space, [1]) for idx_av, alpha_value in enumerate(geom_space): logging.info('--- Alpha value %f (%d/%d)' % (alpha_value, idx_av + 1, len(geom_space))) for meta_feature in quality_frame.columns.values: try: transform_fn = openmldefaults.symbolic.all_transform_fns( )[transform_name] symbolic_config, symbolic_results_per_task = select_best_configuration_across_tasks( config_frame_prime, surrogates, config_frame_orig.columns. values, # note to take the original frame hyperparameter.name, transform_fn, alpha_value, quality_frame[meta_feature].to_dict(), ) symbolic_average_performance = np.average( symbolic_results_per_task) if symbolic_average_performance > baseline_avg_performance: symbolic_holdout_score = None if hold_out_surrogate is not None: symbolic_value = transform_fn( alpha_value, quality_frame[meta_feature][hold_out_task]) symbolic_config[ hyperparameter.name] = symbolic_value symbolic_holdout_score = openmldefaults.utils.single_prediction( config_frame_orig, hold_out_surrogate, symbolic_config) current_result = { 'configuration': symbolic_config, 'results_per_task': symbolic_results_per_task, 'avg_performance': symbolic_average_performance, 'holdout_score': symbolic_holdout_score, 'trasnform_hyperparameter': hyperparameter.name, 'transform_fn': transform_name, 'transform_alpha_value': alpha_value, 'transform_meta_feature': meta_feature, } symbolic_defaults.append(current_result) logging.info( 'Found improvement over base-line: %s' % current_result) except ZeroDivisionError: logging.warning( 'Zero division error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass except OverflowError: logging.warning( 'Overflow error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass except ValueError: # keep a close eye on this one. Question: why do the others not catch this one? logging.warning( 'Overflow error with (fn=%s, alpha=%s, meta_f=%s). ' 'skipping. ' % (transform_name, alpha_value, meta_feature)) pass total = { 'baseline_configuration': baseline_configuration, 'baseline_avg_performance': baseline_avg_performance, 'baseline_random_search': baseline_random_search, 'baseline_results_per_task': baseline_results_per_task, 'baseline_holdout_score': baseline_holdout, 'symbolic_defaults': symbolic_defaults } with open(output_file, 'wb') as fp: pickle.dump(obj=total, file=fp, protocol=0) logging.info('Saved result file to: %s' % output_file)
def evaluate_metalearning_configs(first_bandit): score_list = [] for config in first_bandit.meta_configs: try: config = config.get_dictionary() # print(config) arm = None cs = ConfigurationSpace() for key in config: key_str = key.split(":") if key_str[0] == 'classifier': if key_str[1] == '__choice__': arm = config[key] cs.add_hyperparameter( UnParametrizedHyperparameter( "estimator", config[key])) else: cs.add_hyperparameter( UnParametrizedHyperparameter( key_str[2], config[key])) if arm in first_bandit.arms: transformed_node = apply_metalearning_fe( first_bandit.sub_bandits[arm].optimizer['fe'], config) default_config = cs.sample_configuration(1) hpo_evaluator = Evaluator( None, data_node=transformed_node, name='hpo', resampling_strategy=first_bandit.eval_type, seed=first_bandit.seed) start_time = time.time() score = 1 - hpo_evaluator(default_config) time_cost = time.time() - start_time score_list.append( (arm, score, default_config, transformed_node, time_cost)) transformed_node.score = score # Evaluate the default config start_time = time.time() score = 1 - hpo_evaluator( first_bandit.sub_bandits[arm].default_config) time_cost = time.time() - start_time score_list.append( (arm, score, first_bandit.sub_bandits[arm].default_config, transformed_node, time_cost)) transformed_node.score = score except Exception as e: print(e) # Sort the meta-configs score_list.sort(key=lambda x: x[1], reverse=True) meta_arms = list() for arm_score_config in score_list: if arm_score_config[0] in meta_arms: continue first_bandit.sub_bandits[ arm_score_config[0]].default_config = arm_score_config[2] first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats( 'fe', (arm_score_config[1], arm_score_config[4], arm_score_config[3])) # first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats('hpo', # (arm_score_config[1], arm_score_config[4], # arm_score_config[2])) first_bandit.sub_bandits[arm_score_config[0]].optimizer[ 'fe'].hp_config = arm_score_config[2] meta_arms.append(arm_score_config[0]) for arm in first_bandit.arms: if arm not in meta_arms: meta_arms.append(arm) first_bandit.final_rewards.append(score_list[0][1]) first_bandit.action_sequence.append(score_list[0][0]) first_bandit.time_records.append(score_list[0][2]) first_bandit.arms = meta_arms first_bandit.logger.info("Arms after evaluating meta-configs: " + str(first_bandit.arms))
def run_bot_on_task(task_id: int, configuration_space: ConfigSpace.ConfigurationSpace, output_dir: str, upload_and_delete: bool) \ -> typing.Tuple[bool, typing.Optional[int], typing.Optional[str]]: """ Runs the bot with a random configuration on an OpenML task Parameters ---------- task_id: int The OpenML task id to run the bot on configuration_space: ConfigSpace.ConfigurationSpace The config space from which a random configuration will be sampled output_dir: str A writable directory where the intermediate run results can be stored, before uploading upload_and_delete: bool If true, after the run has been executed it will be uploaded to OpenML. If the uploading is correct, the local files will be deleted afterwards. Returns ------- success: bool A boolean indicating whether the operation (and and/or upload) was successful run_id: int or None If uploaded, the OpenML run id that was assigned to the run. None otherwise local_run_folder: str or None If the run was executed successfully and the folder was not deleted, the path to the folder. None otherwise """ local_run_dir = None try: # obtain task task = openml.tasks.get_task(task_id) data_name = task.get_dataset().name data_qualities = task.get_dataset().qualities data_tuple = (task.task_id, data_name, data_qualities['NumberOfFeatures'], data_qualities['NumberOfInstances']) logging.info('Obtained task %d (%s); %s attributes; %s observations' % data_tuple) # obtain deserialized classifier nominal_indices = task.get_dataset().get_features_by_type( 'nominal', [task.target_name]) numeric_indices = task.get_dataset().get_features_by_type( 'numeric', [task.target_name]) classifier = sklearnbot.sklearn.as_estimator(configuration_space, numeric_indices, nominal_indices) # sample configuration and set hyperparameters configuration = configuration_space.sample_configuration(1) logging.info('Configuration: %s' % configuration.get_dictionary()) classifier.set_params(**configuration.get_dictionary()) # invoke OpenML run run = openml.runs.run_model_on_task(classifier, task) score = run.get_metric_fn(sklearn.metrics.accuracy_score) logging.info('Task %d - %s; Accuracy: %0.2f' % (task_id, task.get_dataset().name, score.mean())) local_run_dir = os.path.join(output_dir, str(task_id), str(uuid.uuid4())) run.to_filesystem(local_run_dir, store_model=False) if upload_and_delete: run = run.publish() shutil.rmtree(local_run_dir) local_run_dir = None return True, run.run_id, local_run_dir except openml.exceptions.OpenMLServerException: traceback.print_exc() return False, None, local_run_dir
def test_sample_no_configuration(self): cs = ConfigurationSpace() rval = cs.sample_configuration(size=0) self.assertEqual(len(rval), 0)
def test_sample_no_configuration(self): cs = ConfigurationSpace() rval = cs.sample_configuration(size=0) self.assertEqual(len(rval), 0)