def __init__(self, configuration_space, aslib_directory): """Container for dataset metadata and experiment results. Constructor arguments: - The configuration space - aslib_directory: directory with a problem instance in the aslib format """ self.logger = logging.getLogger(__name__) self.configuration_space = configuration_space self.aslib_directory = aslib_directory aslib_reader = aslib_simple.AlgorithmSelectionProblem(self.aslib_directory) self.metafeatures = aslib_reader.metafeatures self.algorithm_runs = aslib_reader.algorithm_runs self.configurations = aslib_reader.configurations configurations = dict() for algorithm_id in self.configurations: configuration = self.configurations[algorithm_id] try: configurations[algorithm_id] = \ (Configuration(configuration_space, values=configuration)) except (ValueError, KeyError) as e: self.logger.debug("Error reading configurations: %s", e) self.configurations = configurations
def test_predict_proba_batched_sparse(self): cs = SimpleClassificationPipeline.get_hyperparameter_search_space( dataset_properties={'sparse': True}) config = Configuration( cs, values={ "balancing:strategy": "none", "classifier:__choice__": "random_forest", "imputation:strategy": "mean", "one_hot_encoding:minimum_fraction": 0.01, "one_hot_encoding:use_minimum_fraction": 'True', "preprocessor:__choice__": "no_preprocessing", 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_samples_leaf': 2, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:n_estimators': 100, "rescaling:__choice__": "min/max" }) # Multiclass cls = SimpleClassificationPipeline(config) X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict_proba(X_test_) # The object behind the last step in the pipeline cls_predict = mock.Mock(wraps=cls.pipeline_.steps[-1][1]) cls.pipeline_.steps[-1] = ("estimator", cls_predict) prediction = cls.predict_proba(X_test, batch_size=20) self.assertEqual((1647, 10), prediction.shape) self.assertEqual(84, cls_predict.predict_proba.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel cls = SimpleClassificationPipeline(config) X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict_proba(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_.steps[-1][1]) cls.pipeline_.steps[-1] = ("estimator", cls_predict) prediction = cls.predict_proba(X_test, batch_size=20) self.assertIsInstance(prediction, list) self.assertEqual(2, len(prediction)) self.assertEqual((1647, 10), prediction[0].shape) self.assertEqual((1647, 10), prediction[1].shape) self.assertEqual(84, cls_predict.predict_proba.call_count) assert_array_almost_equal(prediction_, prediction)
def test_check_forbidden_with_sampled_vector_configuration(self): cs = ConfigurationSpace() metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) cs.add_hyperparameter(metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, vector=np.ones(1, dtype=[('metric', int)])) self.assertRaisesRegexp(ValueError, "violates forbidden clause", cs._check_forbidden, configuration)
def test_sample_configuration(self): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace(seed=1) hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) samples = [] for i in range(5): cs.seed(1) samples.append([]) for j in range(100): sample = cs.sample_configuration() samples[-1].append(sample) if i > 0: for j in range(100): self.assertEqual(samples[-1][j], samples[-2][j])
def test_predict_batched_sparse(self): cs = ParamSklearnClassifier.get_hyperparameter_search_space( dataset_properties={'sparse': True}) config = Configuration( cs, values={ "balancing:strategy": "none", "classifier:__choice__": "random_forest", "imputation:strategy": "mean", "one_hot_encoding:minimum_fraction": 0.01, "one_hot_encoding:use_minimum_fraction": "True", "preprocessor:__choice__": "no_preprocessing", 'classifier:random_forest:bootstrap': 'True', 'classifier:random_forest:criterion': 'gini', 'classifier:random_forest:max_depth': 'None', 'classifier:random_forest:min_samples_split': 2, 'classifier:random_forest:min_samples_leaf': 2, 'classifier:random_forest:max_features': 0.5, 'classifier:random_forest:max_leaf_nodes': 'None', 'classifier:random_forest:n_estimators': 100, 'classifier:random_forest:min_weight_fraction_leaf': 0.0, "rescaling:__choice__": "min/max" }) cls = ParamSklearnClassifier(config) # Multiclass X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, ), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction) # Multilabel X_train, Y_train, X_test, Y_test = get_dataset(dataset='digits', make_sparse=True) Y_train = np.array([(y, 26 - y) for y in Y_train]) cls.fit(X_train, Y_train) X_test_ = X_test.copy() prediction_ = cls.predict(X_test_) cls_predict = mock.Mock(wraps=cls.pipeline_) cls.pipeline_ = cls_predict prediction = cls.predict(X_test, batch_size=20) self.assertEqual((1647, 2), prediction.shape) self.assertEqual(83, cls_predict.predict.call_count) assert_array_almost_equal(prediction_, prediction)
def test_get_hyperparameters_topological_sort(self): for iteration in range(10): cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) hp2 = CategoricalHyperparameter("input2", [0, 1]) hp3 = CategoricalHyperparameter("input3", [0, 1]) hp4 = CategoricalHyperparameter("input4", [0, 1]) hp5 = CategoricalHyperparameter("input5", [0, 1]) hp6 = Constant("AND", "True") # More top-level hyperparameters hp7 = CategoricalHyperparameter("input7", [0, 1]) hps = [hp1, hp2, hp3, hp4, hp5, hp6, hp7] random.shuffle(hps) for hp in hps: cs.add_hyperparameter(hp) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp5, hp3, 1) cond5 = EqualsCondition(hp4, hp5, 1) cond6 = EqualsCondition(hp6, hp4, 1) cond7 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond6, cond7) cs.add_condition(cond4) cs.add_condition(cond5) cs.add_condition(conj3) hps = cs.get_hyperparameters() self.assertEqual(hps.index(hp1), 0) self.assertEqual(hps.index(hp2), 1) self.assertEqual(hps.index(hp3), 2) self.assertEqual(hps.index(hp7), 3) self.assertEqual(hps.index(hp5), 4) self.assertEqual(hps.index(hp4), 5) self.assertEqual(hps.index(hp6), 6)
def read_experiment_pickle(self, fh): runs = list() trials = cPickle.load(fh) for trial in trials["trials"]: params = trial['params'] for key in params: try: params[key] = float(params[key]) except: pass configuration = Configuration(self.configuration_space, **params) runs.append(Run(configuration, trial["result"], trial["duration"])) return runs
def sample_configuration(self): # TODO: this is straightforward, but slow. It would make more sense # to have a list of conditions, which are sorted topological by the # appearence of their children iteration = 0 while True: instantiated_hyperparameters = {} hyperparameters = self.configuration_space.get_hyperparameters( order="topologic") for hyperparameter in hyperparameters: conditions = self.configuration_space.get_parents_of( hyperparameter.name) # TODO this conditions should all be equal, are they actually? add = True for condition in conditions: parent_names = [c.parent.name for c in condition.get_descendant_literal_conditions()] parents = [instantiated_hyperparameters[parent_name] for parent_name in parent_names] if len(parents) == 1: parents = parents[0] if not condition.evaluate(parents): add = False if add: instantiated_hyperparameters[hyperparameter.name] = \ getattr(self, "_sample_%s" % type(hyperparameter).__name__)\ (hyperparameter) else: instantiated_hyperparameters[hyperparameter.name] = \ InactiveHyperparameter(None, hyperparameter) try: return Configuration(self.configuration_space, **instantiated_hyperparameters) except ValueError as e: iteration += 1 if iteration == 1000000: raise ValueError("Cannot sample valid configuration for " "%s" % self.configuration_space)
def test_check_configuration2(self): # Test that hyperparameters which are not active must not be set and # that evaluating forbidden clauses does not choke on missing # hyperparameters cs = ConfigurationSpace() classifier = CategoricalHyperparameter( "classifier", ["k_nearest_neighbors", "extra_trees"]) metric = CategoricalHyperparameter("metric", ["minkowski", "other"]) p = CategoricalHyperparameter("k_nearest_neighbors:p", [1, 2]) metric_depends_on_classifier = EqualsCondition(metric, classifier, "k_nearest_neighbors") p_depends_on_metric = EqualsCondition(p, metric, "minkowski") cs.add_hyperparameter(metric) cs.add_hyperparameter(p) cs.add_hyperparameter(classifier) cs.add_condition(metric_depends_on_classifier) cs.add_condition(p_depends_on_metric) forbidden = ForbiddenEqualsClause(metric, "other") cs.add_forbidden_clause(forbidden) configuration = Configuration(cs, dict(classifier="extra_trees"))
def _fit(self, datamanager): # Reset learnt stuff self.models_ = None self.ensemble_ = None # Check arguments prior to doing anything! if self._resampling_strategy not in ['holdout', 'holdout-iterative-fit', 'cv', 'nested-cv', 'partial-cv']: raise ValueError('Illegal resampling strategy: %s' % self._resampling_strategy) if self._resampling_strategy == 'partial-cv' and \ self._ensemble_size != 0: raise ValueError("Resampling strategy partial-cv cannot be used " "together with ensembles.") self._backend._make_internals_directory() if self._keep_models: try: os.mkdir(self._backend.get_model_dir()) except OSError: self._logger.warning("model directory already exists") if not self._shared_mode: raise self._metric = datamanager.info['metric'] self._task = datamanager.info['task'] self._label_num = datamanager.info['label_num'] set_auto_seed(self._seed) # == Pickle the data manager, here, because no more global # OneHotEncoding data_manager_path = self._backend.save_datamanager(datamanager) self._save_ensemble_data( datamanager.data['X_train'], datamanager.data['Y_train']) time_for_load_data = self._stopwatch.wall_elapsed(self._dataset_name) if self._debug_mode: self._print_load_time( self._dataset_name, self._time_for_task, time_for_load_data, self._logger) # == Perform dummy predictions if self._resampling_strategy in ['holdout', 'holdout-iterative-fit']: self._do_dummy_prediction(datamanager) # = Create a searchspace # Do this before One Hot Encoding to make sure that it creates a # search space for a dense classifier even if one hot encoding would # make it sparse (tradeoff; if one hot encoding would make it sparse, # densifier and truncatedSVD would probably lead to a MemoryError, # like this we can't use some of the preprocessing methods in case # the data became sparse) self.configuration_space, configspace_path = _create_search_space( self._tmp_dir, datamanager.info, self._backend, self._stopwatch, self._logger, self._include_estimators, self._include_preprocessors) self.configuration_space_created_hook(datamanager) # == RUN ensemble builder # Do this before calculating the meta-features to make sure that the # dummy predictions are actually included in the ensemble even if # calculating the meta-features takes very long proc_ensembles = self.run_ensemble_builder() # == Calculate metafeatures meta_features = _calculate_metafeatures( data_feat_type=datamanager.feat_type, data_info_task=datamanager.info['task'], x_train=datamanager.data['X_train'], y_train=datamanager.data['Y_train'], basename=self._dataset_name, watcher=self._stopwatch, metalearning_cnt=self._initial_configurations_via_metalearning, logger=self._logger) self._stopwatch.start_task('OneHot') datamanager.perform1HotEncoding() self._stopwatch.stop_task('OneHot') if meta_features is None: initial_configurations = [] elif datamanager.info['task'] in [MULTICLASS_CLASSIFICATION, BINARY_CLASSIFICATION, MULTILABEL_CLASSIFICATION]: meta_features_encoded = _calculate_metafeatures_encoded( self._dataset_name, datamanager.data['X_train'], datamanager.data['Y_train'], self._stopwatch, self._logger) self._logger.debug(meta_features.__repr__(verbosity=2)) self._logger.debug(meta_features_encoded.__repr__(verbosity=2)) initial_configurations = _get_initial_configuration( meta_features, meta_features_encoded, self._dataset_name, self._metric, self.configuration_space, self._task, self._metadata_directory, self._initial_configurations_via_metalearning, datamanager.info[ 'is_sparse'], self._stopwatch, self._logger) _print_debug_info_of_init_configuration( initial_configurations, self._dataset_name, self._time_for_task, self._logger, self._stopwatch) else: initial_configurations = [] self._logger.warning('Metafeatures encoded not calculated') # == RUN SMAC if (datamanager.info["task"] == BINARY_CLASSIFICATION) or \ (datamanager.info["task"] == MULTICLASS_CLASSIFICATION): config = {'balancing:strategy': 'weighting', 'classifier:__choice__': 'sgd', 'classifier:sgd:loss': 'hinge', 'classifier:sgd:penalty': 'l2', 'classifier:sgd:alpha': 0.0001, 'classifier:sgd:fit_intercept': 'True', 'classifier:sgd:n_iter': 5, 'classifier:sgd:learning_rate': 'optimal', 'classifier:sgd:eta0': 0.01, 'classifier:sgd:average': 'True', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'one_hot_encoding:minimum_fraction': 0.1, 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'min/max'} elif datamanager.info["task"] == MULTILABEL_CLASSIFICATION: config = {'classifier:__choice__': 'adaboost', 'classifier:adaboost:algorithm': 'SAMME.R', 'classifier:adaboost:learning_rate': 1.0, 'classifier:adaboost:max_depth': 1, 'classifier:adaboost:n_estimators': 50, 'balancing:strategy': 'weighting', 'imputation:strategy': 'mean', 'one_hot_encoding:use_minimum_fraction': 'True', 'one_hot_encoding:minimum_fraction': 0.1, 'preprocessor:__choice__': 'no_preprocessing', 'rescaling:__choice__': 'none'} else: config = None self._logger.info("Tasktype unknown: %s" % TASK_TYPES_TO_STRING[datamanager.info["task"]]) if config is not None: try: configuration = Configuration(self.configuration_space, config) config_string = convert_conf2smac_string(configuration) initial_configurations = [config_string] + initial_configurations except ValueError: pass # == RUN SMAC proc_smac = run_smac(tmp_dir=self._tmp_dir, basename=self._dataset_name, time_for_task=self._time_for_task, ml_memory_limit=self._ml_memory_limit, data_manager_path=data_manager_path, configspace_path=configspace_path, initial_configurations=initial_configurations, per_run_time_limit=self._per_run_time_limit, watcher=self._stopwatch, backend=self._backend, seed=self._seed, resampling_strategy=self._resampling_strategy, resampling_strategy_arguments=self._resampling_strategy_arguments, shared_mode=self._shared_mode) procs = [] if proc_smac is not None: procs.append(proc_smac) if proc_ensembles is not None: procs.append(proc_ensembles) if self._queue is not None: self._queue.put([time_for_load_data, data_manager_path, procs]) else: for proc in procs: proc.wait() # Delete AutoSklearn environment variable del_auto_seed() # In case try: del self._datamanager except Exception: pass if self._queue is None: self._load_models() return self
def get_run_from_dict(self, dct): configuration = Configuration(self.configuration_space, **dct['configuration']) return Run(configuration, dct['result'], dct['duration'])
def test_check_configuration(self): # TODO this is only a smoke test # TODO actually, this rather tests the evaluate methods in the # conditions module! cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("parent", [0, 1]) cs.add_hyperparameter(hp1) hp2 = UniformIntegerHyperparameter("child", 0, 10) cs.add_hyperparameter(hp2) cond1 = EqualsCondition(hp2, hp1, 0) cs.add_condition(cond1) # This automatically checks the configuration! Configuration(cs, dict(parent=0, child=5)) # and now for something more complicated cs = ConfigurationSpace() hp1 = CategoricalHyperparameter("input1", [0, 1]) cs.add_hyperparameter(hp1) hp2 = CategoricalHyperparameter("input2", [0, 1]) cs.add_hyperparameter(hp2) hp3 = CategoricalHyperparameter("input3", [0, 1]) cs.add_hyperparameter(hp3) hp4 = CategoricalHyperparameter("input4", [0, 1]) cs.add_hyperparameter(hp4) hp5 = CategoricalHyperparameter("input5", [0, 1]) cs.add_hyperparameter(hp5) hp6 = Constant("AND", "True") cs.add_hyperparameter(hp6) cond1 = EqualsCondition(hp6, hp1, 1) cond2 = NotEqualsCondition(hp6, hp2, 1) cond3 = InCondition(hp6, hp3, [1]) cond4 = EqualsCondition(hp6, hp4, 1) cond5 = EqualsCondition(hp6, hp5, 1) conj1 = AndConjunction(cond1, cond2) conj2 = OrConjunction(conj1, cond3) conj3 = AndConjunction(conj2, cond4, cond5) cs.add_condition(conj3) expected_outcomes = [ False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, True, False, False, False, True, False, False, False, True, False, False, False, False, False, False, False, True ] for idx, values in enumerate(product([0, 1], repeat=5)): # The hyperparameters aren't sorted, but the test assumes them to # be sorted. hyperparameters = sorted(cs.get_hyperparameters(), key=lambda t: t.name) instantiations = { hyperparameters[jdx + 1].name: values[jdx] for jdx in range(len(values)) } evaluation = conj3.evaluate(instantiations) self.assertEqual(expected_outcomes[idx], evaluation) if evaluation == False: self.assertRaisesRegexp(ValueError, "Inactive hyperparameter 'AND' must " "not be specified, but has the value: " "'True'.", Configuration, cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True" }) else: Configuration(cs, values={ "input1": values[0], "input2": values[1], "input3": values[2], "input4": values[3], "input5": values[4], "AND": "True" })
def retrieve_matadata(validation_directory, metric, configuration_space, cutoff=0, num_runs=1, only_best=False): # This looks weird! The dictionaries contain the following information # {dataset_1: (configuration: best_value), dataset_2: (configuration: # best_value)} outputs = defaultdict(list) configurations = dict() configurations_to_ids = dict() possible_experiment_directories = os.listdir(validation_directory) for ped in possible_experiment_directories: dataset_name = ped # This is hacky, replace by pySMAC! ped = os.path.join(validation_directory, ped, ped) if not os.path.exists(ped) or not os.path.isdir(ped): continue smac_output_dir = ped validation_files = [] validation_configuration_files = [] validation_run_results_files = [] # Configurations from smac-validate from a trajectory file for seed in [num_run * 1000 for num_run in range(num_runs)]: validation_file = os.path.join( smac_output_dir, 'validationResults-detailed-traj-run-%d-walltime.csv' % seed) validation_configuration_file = os.path.join( smac_output_dir, 'validationCallStrings-detailed-traj-run-%d-walltime.csv' % seed) validation_run_results_file = os.path.join( smac_output_dir, 'validationRunResultLineMatrix-detailed-traj-run-%d-walltime.csv' % seed) if os.path.exists(validation_file) and os.path.exists( validation_configuration_file) and os.path.exists( validation_run_results_file): validation_files.append(validation_file) validation_configuration_files.append( validation_configuration_file) validation_run_results_files.append( validation_run_results_file) # Configurations from smac-validate from a configurations file validation_file = os.path.join( smac_output_dir, 'validationResults-configurations-walltime.csv') validation_configuration_file = os.path.join( smac_output_dir, 'validationCallStrings-configurations-walltime.csv') validation_run_results_file = os.path.join( smac_output_dir, 'validationRunResultLineMatrix-configurations-walltime.csv') if os.path.exists(validation_file) and os.path.exists( validation_configuration_file) and os.path.exists( validation_run_results_file): validation_files.append(validation_file) validation_configuration_files.append( validation_configuration_file) validation_run_results_files.append(validation_run_results_file) for validation_file, validation_configuration_file, validation_run_results_file in \ zip(validation_files, validation_configuration_files, validation_run_results_files): configuration_to_time = dict() with open(validation_file) as fh: reader = csv.reader(fh) reader.next() for row in reader: current_time = float(row[0]) validation_configuration_id = int(row[4]) configuration_to_time[ validation_configuration_id] = current_time best = [] with open(validation_run_results_file) as fh: reader = csv.reader(fh) reader.next() for row in reader: seed = int(float(row[1])) results = row[2:] for i, result in enumerate(results): result = result.split(",")[-1] if not ";" in result: continue result = result.split(";") for result_ in result: metric_, value = result_.split(":") metric_ = metric_.replace(":", "").strip() value = value.strip() if metric_ == metric: value = float(value) best.append((value, i + 1)) best.sort() for test_performance, validation_configuration_id in best: if cutoff > 0 and \ configuration_to_time[validation_configuration_id] > \ cutoff: continue stop = False with open(validation_configuration_file) as fh: reader = csv.reader(fh) reader.next() for row in reader: if int(row[0]) == validation_configuration_id: configuration = row[1] configuration = configuration.split() configuration = { configuration[i]: configuration[i + 1] for i in range(0, len(configuration), 2) } for key in configuration.keys(): value = configuration[key] hp_name = key[1:] try: hyperparameter = \ configuration_space.get_hyperparameter( hp_name) except KeyError: break value = value.strip("'") if isinstance(hyperparameter, IntegerHyperparameter): value = int(float(value)) elif isinstance(hyperparameter, FloatHyperparameter): value = float(value) elif isinstance(hyperparameter, CategoricalHyperparameter): # Implementation tailored to the PCS # parser value = str(value) elif isinstance(hyperparameter, Constant): if isinstance(hyperparameter.value, float): value = float(value) elif isinstance(hyperparameter.value, int): value = int(value) else: value = value elif hyperparameter is None: value = '' else: raise ValueError((hp_name, )) configuration[hp_name] = value try: configuration = Configuration( configuration_space, configuration) except Exception as e: print("Configuration %s not applicable " \ "because of %s!" \ % (row[1], e)) break if str(configuration) in \ configurations_to_ids: global_configuration_id = \ configurations_to_ids[ str(configuration)] else: global_configuration_id = len(configurations) configurations[ global_configuration_id] = configuration configurations_to_ids[str(configuration)] = \ global_configuration_id if global_configuration_id is not None: outputs[dataset_name].append( (global_configuration_id, test_performance)) if only_best: stop = True break else: pass if stop is True: break return outputs, configurations