def get_hyperparameter_search_space(cls, dataset_properties,
                                        default=None,
                                        include=None,
                                        exclude=None):
        cs = ConfigurationSpace()

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = cls.get_available_components(
            data_prop=dataset_properties,
            include=include, exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No preprocessors found, please add no_preprocessing")

        if default is None:
            defaults = ['no_preprocessing', 'select_percentile', 'pca',
                        'truncatedSVD']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter('__choice__',
                                                 list(
                                                     available_preprocessors.keys()),
                                                 default=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            cs = add_component_deepcopy(cs, name,
                                        preprocessor_configuration_space)

        return cs
Ejemplo n.º 2
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = CategoricalHyperparameter(
            "loss", ["ls", "lad", "huber", "quantile"], default_value="ls")
        learning_rate = UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
        n_estimators = UniformIntegerHyperparameter(
            "n_estimators", 50, 500, default_value=100)
        max_depth = UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=10, default_value=3)
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2, log=False)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1, log=False)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(
            name="subsample", lower=0.01, upper=1.0, default_value=1.0, log=False)
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0, default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(
            name="max_leaf_nodes", value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        alpha = UniformFloatHyperparameter(
            "alpha", lower=0.75, upper=0.99, default_value=0.9)

        cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth,
                                min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, subsample, max_features,
                                max_leaf_nodes, min_impurity_decrease, alpha])

        cs.add_condition(InCondition(alpha, loss, ['huber', 'quantile']))
        return cs
Ejemplo n.º 3
0
 def get_hyperparameter_search_space(dataset_properties=None):
     N = UniformIntegerHyperparameter("N", 5, 20, default=10)
     precond = UniformFloatHyperparameter("precond", 0, 0.5, default=0.1)
     cs = ConfigurationSpace()
     cs.add_hyperparameter(N)
     cs.add_hyperparameter(precond)
     return cs
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = CategoricalHyperparameter(
            "criterion", ["gini", "entropy"], default_value="gini")
        max_features = UniformFloatHyperparameter("max_features", 0, 1,
                                                  default_value=0.5)

        max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        min_samples_split = UniformIntegerHyperparameter(
            "min_samples_split", 2, 20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            "min_samples_leaf", 1, 20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            'min_weight_fraction_leaf', 0.)
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.)

        bootstrap = CategoricalHyperparameter(
            "bootstrap", ["True", "False"], default_value="False")

        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, max_leaf_nodes, min_samples_split,
                                min_samples_leaf, min_weight_fraction_leaf,
                                min_impurity_decrease, bootstrap])

        return cs
Ejemplo n.º 5
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        n_iter = UnParametrizedHyperparameter("n_iter", value=300)
        tol = UniformFloatHyperparameter("tol", 10 ** -5, 10 ** -1,
                                         default_value=10 ** -3, log=True)
        alpha_1 = UniformFloatHyperparameter(name="alpha_1", lower=10 ** -10,
                                             upper=10 ** -3, default_value=10 ** -6)
        alpha_2 = UniformFloatHyperparameter(name="alpha_2", log=True,
                                             lower=10 ** -10, upper=10 ** -3,
                                             default_value=10 ** -6)
        lambda_1 = UniformFloatHyperparameter(name="lambda_1", log=True,
                                              lower=10 ** -10, upper=10 ** -3,
                                              default_value=10 ** -6)
        lambda_2 = UniformFloatHyperparameter(name="lambda_2", log=True,
                                              lower=10 ** -10, upper=10 ** -3,
                                              default_value=10 ** -6)
        threshold_lambda = UniformFloatHyperparameter(name="threshold_lambda",
                                                      log=True,
                                                      lower=10 ** 3,
                                                      upper=10 ** 5,
                                                      default_value=10 ** 4)
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")

        cs.add_hyperparameters([n_iter, tol, alpha_1, alpha_2, lambda_1,
                                lambda_2, threshold_lambda, fit_intercept])

        return cs
Ejemplo n.º 6
0
 def add_params(cs: ConfigurationSpace):
     '''
         adds parameters to ConfigurationSpace 
     '''
     switch = CategoricalHyperparameter(
         "StandardScaler", choices=[True, False], default=True)
     cs.add_hyperparameter(switch)
Ejemplo n.º 7
0
 def test_write_log_int(self):
     expected = "int_log '--int_log ' i (2, 4)\n"
     int_log = UniformIntegerHyperparameter("int_log", 10, 100, log=True)
     cs = ConfigurationSpace()
     cs.add_hyperparameter(int_log)
     value = irace.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 8
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        C = UniformFloatHyperparameter(
            "C", 0.03125, 32768, log=True, default_value=1.0)
        loss = CategoricalHyperparameter(
            "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"],
            default_value="squared_epsilon_insensitive")
        # Random Guess
        epsilon = UniformFloatHyperparameter(
            name="epsilon", lower=0.001, upper=1, default_value=0.1, log=True)
        dual = Constant("dual", "False")
        # These are set ad-hoc
        tol = UniformFloatHyperparameter(
            "tol", 1e-5, 1e-1, default_value=1e-4, log=True)
        fit_intercept =Constant("fit_intercept", "True")
        intercept_scaling = Constant("intercept_scaling", 1)

        cs.add_hyperparameters([C, loss, epsilon, dual, tol, fit_intercept,
                                intercept_scaling])

        dual_and_loss = ForbiddenAndConjunction(
            ForbiddenEqualsClause(dual, "False"),
            ForbiddenEqualsClause(loss, "epsilon_insensitive")
        )
        cs.add_forbidden_clause(dual_and_loss)

        return cs
Ejemplo n.º 9
0
 def test_write_ordinal(self):
     expected = "ord_a '--ord_a ' o {a,b,3}\n"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         OrdinalHyperparameter("ord_a", ["a", "b", 3]))
     value = irace.write(cs)
     self.assertEqual(expected, value)
    def get_hyperparameter_search_space(cls, dataset_properties=None,
                                        default=None,
                                        include=None,
                                        exclude=None):
        cs = ConfigurationSpace()

        # Compile a list of legal preprocessors for this problem
        available_preprocessors = cls.get_available_components(
            data_prop=dataset_properties,
            include=include, exclude=exclude)

        if len(available_preprocessors) == 0:
            raise ValueError(
                "No rescaling algorithm found.")

        if default is None:
            defaults = ['min/max', 'standardize', 'none', 'normalize']
            for default_ in defaults:
                if default_ in available_preprocessors:
                    default = default_
                    break

        preprocessor = CategoricalHyperparameter('__choice__',
                                                 list(
                                                     available_preprocessors.keys()),
                                                 default=default)
        cs.add_hyperparameter(preprocessor)
        for name in available_preprocessors:
            preprocessor_configuration_space = available_preprocessors[name]. \
                get_hyperparameter_search_space(dataset_properties)
            cs = add_component_deepcopy(cs, name,
                                        preprocessor_configuration_space)

        return cs
Ejemplo n.º 11
0
 def get_hyperparameter_search_space(dataset_properties=None):
     # TODO add replace by zero!
     strategy = CategoricalHyperparameter(
         "strategy", ["mean", "median", "most_frequent"], default_value="mean")
     cs = ConfigurationSpace()
     cs.add_hyperparameter(strategy)
     return cs
    def test_add_forbidden(self):
        m = numpy.ones([2, 3])
        preprocessors_list = ['pa', 'pb']
        classifier_list = ['ca', 'cb', 'cc']
        cs = ConfigurationSpace()
        preprocessor = CategoricalHyperparameter(name='preprocessor',
                                                 choices=preprocessors_list)
        classifier = CategoricalHyperparameter(name='classifier',
                                               choices=classifier_list)
        cs.add_hyperparameter(preprocessor)
        cs.add_hyperparameter(classifier)
        new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
            conf_space=cs, node_0_list=preprocessors_list,
            node_1_list=classifier_list, matches=m,
            node_0_name='preprocessor', node_1_name="classifier")
        self.assertEqual(len(new_cs.forbidden_clauses), 0)
        self.assertIsInstance(new_cs, ConfigurationSpace)

        m[1, 1] = 0
        new_cs = autosklearn.pipeline.create_searchspace_util.add_forbidden(
            conf_space=cs, node_0_list=preprocessors_list,
            node_1_list=classifier_list, matches=m,
            node_0_name='preprocessor', node_1_name="classifier")
        self.assertEqual(len(new_cs.forbidden_clauses), 1)
        self.assertEqual(new_cs.forbidden_clauses[0].components[0].value, 'cb')
        self.assertEqual(new_cs.forbidden_clauses[0].components[1].value, 'pb')
        self.assertIsInstance(new_cs, ConfigurationSpace)
Ejemplo n.º 13
0
 def test_write_new_q_float(self):
     expected = "Q16_float_a real [16.0, 1024.0] [520.0]"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         UniformFloatHyperparameter("float_a", 16, 1024, q=16))
     value = pcs_new.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 14
0
 def test_write_new_q_int(self):
     expected = "Q16_int_a integer [16, 1024] [520]"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         UniformIntegerHyperparameter("int_a", 16, 1024, q=16))
     value = pcs_new.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 15
0
    def get_hyperparameter_search_space(dataset_properties=None):
        if dataset_properties is not None and \
                (dataset_properties.get("sparse") is True or
                 dataset_properties.get("signed") is False):
            allow_chi2 = False
        else:
            allow_chi2 = True

        possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
        if allow_chi2:
            possible_kernels.append("chi2")
        kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf')
        n_components = UniformIntegerHyperparameter(
            "n_components", 50, 10000, default_value=100, log=True)
        gamma = UniformFloatHyperparameter("gamma", 3.0517578125e-05, 8,
                                           log=True, default_value=0.1)
        degree = UniformIntegerHyperparameter('degree', 2, 5, 3)
        coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)

        cs = ConfigurationSpace()
        cs.add_hyperparameters([kernel, degree, gamma, coef0, n_components])

        degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
        coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])

        gamma_kernels = ["poly", "rbf", "sigmoid"]
        if allow_chi2:
            gamma_kernels.append("chi2")
        gamma_condition = InCondition(gamma, kernel, gamma_kernels)
        cs.add_conditions([degree_depends_on_poly, coef0_condition, gamma_condition])
        return cs
Ejemplo n.º 16
0
 def test_write_new_log10(self):
     expected = "a real [10.0, 1000.0] [100.0]log"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         UniformFloatHyperparameter("a", 10, 1000, log=True))
     value = pcs_new.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 17
0
    def test_write_forbidden(self):
        cs = ConfigurationSpace()

        hp1 = CategoricalHyperparameter("parent", [0, 1])
        hp2 = UniformIntegerHyperparameter("child", 0, 2)
        hp3 = UniformIntegerHyperparameter("child2", 0, 2)
        hp4 = UniformIntegerHyperparameter("child3", 0, 2)
        hp5 = CategoricalHyperparameter("child4", [4, 5, 6, 7])

        cs.add_hyperparameters([hp1, hp2, hp3, hp4, hp5])

        forb2 = ForbiddenEqualsClause(hp1, 1)
        forb3 = ForbiddenInClause(hp2, range(2, 3))
        forb4 = ForbiddenInClause(hp3, range(2, 3))
        forb5 = ForbiddenInClause(hp4, range(2, 3))
        forb6 = ForbiddenInClause(hp5, [6, 7])

        and1 = ForbiddenAndConjunction(forb2, forb3)
        and2 = ForbiddenAndConjunction(forb2, forb4)
        and3 = ForbiddenAndConjunction(forb2, forb5)

        cs.add_forbidden_clauses(
            [forb2, forb3, forb4, forb5, forb6, and1, and2, and3])

        irace.write(cs)  # generates file called forbidden.txt
Ejemplo n.º 18
0
 def get_hyperparameter_search_space(dataset_properties=None):
     # TODO add replace by zero!
     strategy = CategoricalHyperparameter(
         "strategy", ["none", "weighting"], default_value="none")
     cs = ConfigurationSpace()
     cs.add_hyperparameter(strategy)
     return cs
Ejemplo n.º 19
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        n_estimators = Constant("n_estimators", 100)
        criterion = CategoricalHyperparameter(
            "criterion", ["gini", "entropy"], default_value="gini")

        # The maximum number of features used in the forest is calculated as m^max_features, where
        # m is the total number of features, and max_features is the hyperparameter specified below.
        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
        # corresponds with Geurts' heuristic.
        max_features = UniformFloatHyperparameter(
            "max_features", 0., 1., default_value=0.5)
        
        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UniformIntegerHyperparameter(
            "min_samples_split", 2, 20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            "min_samples_leaf", 1, 20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
        bootstrap = CategoricalHyperparameter(
            "bootstrap", ["True", "False"], default_value="True")
        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, max_leaf_nodes,
                                bootstrap, min_impurity_decrease])
        return cs
Ejemplo n.º 20
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "deviance")
        learning_rate = UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
        n_estimators = UniformIntegerHyperparameter(
            "n_estimators", 50, 500, default_value=100)
        max_depth = UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=10, default_value=3)
        criterion = CategoricalHyperparameter(
            'criterion', ['friedman_mse', 'mse', 'mae'],
            default_value='mse')
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(
                name="subsample", lower=0.01, upper=1.0, default_value=1.0)
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0 , default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(
            name="max_leaf_nodes", value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        cs.add_hyperparameters([loss, learning_rate, n_estimators, max_depth,
                                criterion, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, subsample,
                                max_features, max_leaf_nodes,
                                min_impurity_decrease])

        return cs
Ejemplo n.º 21
0
 def test_write_float(self):
     expected = "float_a '--float_a ' r (16.000000, 1024.000000)\n"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         UniformFloatHyperparameter("float_a", 16, 1024))
     value = irace.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 22
0
    def test_build_new_GreaterThanIntCondition(self):
        expected = "a real [0.0, 1.0] [0.5]\n" \
                   "b integer [0, 10] [5]\n\n" \
                   "b | a > 0.5"
        cs = ConfigurationSpace()
        a = UniformFloatHyperparameter("a", 0, 1, 0.5)
        b = UniformIntegerHyperparameter("b", 0, 10, 5)
        cs.add_hyperparameter(a)
        cs.add_hyperparameter(b)
        cond = GreaterThanCondition(b, a, 0.5)
        cs.add_condition(cond)

        value = pcs_new.write(cs)
        self.assertEqual(expected, value)

        expected = "a integer [0, 10] [5]\n" \
                   "b integer [0, 10] [5]\n\n" \
                   "b | a > 5"
        cs = ConfigurationSpace()
        a = UniformIntegerHyperparameter("a", 0, 10, 5)
        b = UniformIntegerHyperparameter("b", 0, 10, 5)
        cs.add_hyperparameter(a)
        cs.add_hyperparameter(b)
        cond = GreaterThanCondition(b, a, 5)
        cs.add_condition(cond)

        value = pcs_new.write(cs)
        self.assertEqual(expected, value)
Ejemplo n.º 23
0
 def test_write_categorical(self):
     expected = "cat_a '--cat_a ' c {a,b,c}\n"
     cs = ConfigurationSpace()
     cs.add_hyperparameter(
         CategoricalHyperparameter("cat_a", ["a", "b", "c"]))
     value = irace.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 24
0
 def get_hyperparameter_search_space(dataset_properties=None):
     gamma = UniformFloatHyperparameter(
         "gamma", 3.0517578125e-05, 8, default_value=1.0, log=True)
     n_components = UniformIntegerHyperparameter(
         "n_components", 50, 10000, default_value=100, log=True)
     cs = ConfigurationSpace()
     cs.add_hyperparameters([gamma, n_components])
     return cs
Ejemplo n.º 25
0
 def test_write_log_float(self):
     import numpy as np
     expected = "float_log '--float_log ' r (2.000000, 5.000000)\n"
     float_log = UniformFloatHyperparameter("float_log", np.exp(2), np.exp(5), log=True)
     cs = ConfigurationSpace()
     cs.add_hyperparameter(float_log)
     value = irace.write(cs)
     self.assertEqual(expected, value)
Ejemplo n.º 26
0
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     use_minimum_fraction = cs.add_hyperparameter(CategoricalHyperparameter(
         "use_minimum_fraction", ["True", "False"], default="True"))
     minimum_fraction = cs.add_hyperparameter(UniformFloatHyperparameter(
         "minimum_fraction", lower=.0001, upper=0.5, default=0.01, log=True))
     cs.add_condition(EqualsCondition(minimum_fraction,
                                      use_minimum_fraction, 'True'))
     return cs
Ejemplo n.º 27
0
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     alpha = UniformFloatHyperparameter(
         "alpha", 10 ** -5, 10., log=True, default_value=1.)
     fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
     tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1,
                                      default_value=1e-3, log=True)
     cs.add_hyperparameters([alpha, fit_intercept, tol])
     return cs
Ejemplo n.º 28
0
 def get_hyperparameter_search_space(dataset_properties=None):
     gamma = UniformFloatHyperparameter(
         "gamma", 0.3, 2., default=1.0)
     n_components = UniformIntegerHyperparameter(
         "n_components", 50, 10000, default=100, log=True)
     cs = ConfigurationSpace()
     cs.add_hyperparameter(gamma)
     cs.add_hyperparameter(n_components)
     return cs
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     alpha = cs.add_hyperparameter(UniformFloatHyperparameter(
         "alpha", 10 ** -5, 10., log=True, default=1.))
     fit_intercept = cs.add_hyperparameter(UnParametrizedHyperparameter(
         "fit_intercept", "True"))
     tol = cs.add_hyperparameter(UniformFloatHyperparameter(
         "tol", 1e-5, 1e-1, default=1e-4, log=True))
     return cs
Ejemplo n.º 30
0
def _construct_lt_condition(
        condition: Dict,
        cs: ConfigurationSpace,
) -> LessThanCondition:
    return LessThanCondition(
        child=cs.get_hyperparameter(condition['child']),
        parent=cs.get_hyperparameter(condition['parent']),
        value=condition['value'],
    )
Ejemplo n.º 31
0
def read(pcs_string, debug=False):
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    ord_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()
        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" %
                                          line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {
            "int": UniformIntegerHyperparameter,
            "float": UniformFloatHyperparameter,
            "categorical": CategoricalHyperparameter,
            "ordinal": OrdinalHyperparameter
        }

        try:
            param_list = pp_cont_param.parseString(line)
            name = param_list[0]
            if param_list[1] == 'integer':
                paramtype = 'int'
            elif param_list[1] == 'real':
                paramtype = 'float'
            else:
                paramtype = None

            if paramtype in ['int', 'float']:
                log = param_list[10:]
                param_list = param_list[:10]
                if len(log) > 0:
                    log = log[0]
                lower = float(param_list[3])
                upper = float(param_list[5])
                log_on = True if "log" in log else False
                default = float(param_list[8])
                param = create[paramtype](name=name,
                                          lower=lower,
                                          upper=upper,
                                          q=None,
                                          log=log_on,
                                          default=default)
                cont_ct += 1

        except pyparsing.ParseException:
            pass

        try:
            if "categorical" in line:
                param_list = pp_cat_param.parseString(line)
                name = param_list[0]
                choices = [choice for choice in param_list[3:-4:2]]
                default = param_list[-2]
                param = create["categorical"](name=name,
                                              choices=choices,
                                              default=default)
                cat_ct += 1

            elif "ordinal" in line:
                param_list = pp_ord_param.parseString(line)
                name = param_list[0]
                sequence = [seq for seq in param_list[3:-4:2]]
                default = param_list[-2]
                param = create["ordinal"](name=name,
                                          sequence=sequence,
                                          default=default)
                ord_ct += 1

        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(
                        ForbiddenEqualsClause(
                            configuration_space.get_hyperparameter(
                                tmp_list[0]), tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(
            ForbiddenAndConjunction(*clause_list))

    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        for condition in conditions_per_child[child_name]:
            condition = condition[2:]
            condition = ' '.join(condition)
            if '||' in str(condition):
                ors = []
                # 1st case we have a mixture of || and &&
                if '&&' in str(condition):
                    ors_combis = []
                    for cond_parts in str(condition).split('||'):
                        condition = str(cond_parts).split('&&')
                        # if length is 1 it must be or
                        if len(condition) == 1:
                            element_list = condition[0].split()
                            ors_combis.append(
                                condition_specification(
                                    child_name, element_list,
                                    configuration_space))
                        else:
                            # now taking care of ands
                            ands = []
                            for and_part in condition:
                                element_list = [
                                    element for part in condition
                                    for element in and_part.split()
                                ]
                                ands.append(
                                    condition_specification(
                                        child_name, element_list,
                                        configuration_space))
                            ors_combis.append(AndConjunction(*ands))
                    mixed_conjunction = OrConjunction(*ors_combis)
                    configuration_space.add_condition(mixed_conjunction)
                else:
                    # 2nd case: we only have ors
                    for cond_parts in str(condition).split('||'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ors.append(
                            condition_specification(child_name, element_list,
                                                    configuration_space))
                    or_conjunction = OrConjunction(*ors)
                    configuration_space.add_condition(or_conjunction)
            else:
                # 3rd case: we only have ands
                if '&&' in str(condition):
                    ands = []
                    for cond_parts in str(condition).split('&&'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ands.append(
                            condition_specification(child_name, element_list,
                                                    configuration_space))
                    and_conjunction = AndConjunction(*ands)
                    configuration_space.add_condition(and_conjunction)
                else:
                    # 4th case: we have a normal condition
                    element_list = [element for element in condition.split()]
                    normal_condition = condition_specification(
                        child_name, element_list, configuration_space)
                    configuration_space.add_condition(normal_condition)

    return configuration_space
Ejemplo n.º 32
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                        100,
                                                        1000,
                                                        default_value=500)
            num_leaves = UniformIntegerHyperparameter("num_leaves",
                                                      31,
                                                      1023,
                                                      default_value=31)
            learning_rate = UniformFloatHyperparameter("learning_rate",
                                                       0.025,
                                                       0.3,
                                                       default_value=0.1,
                                                       log=True)
            min_child_weight = UniformIntegerHyperparameter("min_child_weight",
                                                            1,
                                                            10,
                                                            default_value=1)
            subsample = UniformFloatHyperparameter("subsample",
                                                   0.5,
                                                   1,
                                                   default_value=1)
            colsample_bytree = UniformFloatHyperparameter("colsample_bytree",
                                                          0.5,
                                                          1,
                                                          default_value=1)
            reg_alpha = UniformFloatHyperparameter('reg_alpha',
                                                   1e-10,
                                                   10,
                                                   log=True,
                                                   default_value=1e-10)
            reg_lambda = UniformFloatHyperparameter("reg_lambda",
                                                    1e-10,
                                                    10,
                                                    log=True,
                                                    default_value=1e-10)
            cs.add_hyperparameters([
                n_estimators, num_leaves, learning_rate, min_child_weight,
                subsample, colsample_bytree, reg_alpha, reg_lambda
            ])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'n_estimators':
                hp.randint('lgb_n_estimators', 901) + 100,
                'num_leaves':
                hp.randint('lgb_num_leaves', 993) + 31,
                'learning_rate':
                hp.loguniform('lgb_learning_rate', np.log(0.025), np.log(0.3)),
                'min_child_weight':
                hp.randint('lgb_min_child_weight', 10) + 1,
                'subsample':
                hp.uniform('lgb_subsample', 0.5, 1),
                'colsample_bytree':
                hp.uniform('lgb_colsample_bytree', 0.5, 1),
                'reg_alpha':
                hp.loguniform('lgb_reg_alpha', np.log(1e-10), np.log(10)),
                'reg_lambda':
                hp.loguniform('lgb_reg_lambda', np.log(1e-10), np.log(10))
            }

            init_trial = {
                'n_estimators': 500,
                'num_leaves': 31,
                'learning_rate': 0.1,
                'min_child_weight': 1,
                'subsample': 1,
                'colsample_bytree': 1,
                'reg_alpha': 1e-10,
                'reg_lambda': 1e-10
            }

            return space
Ejemplo n.º 33
0
    def _get_base_search_space(
            self,
            cs: ConfigurationSpace,
            dataset_properties: Dict[str, BaseDatasetPropertiesType],
            include: Optional[Dict[str, Any]],
            exclude: Optional[Dict[str, Any]],
            pipeline: List[Tuple[str, PipelineStepType]]
    ) -> ConfigurationSpace:
        if include is None:
            include = self.include

        keys = [pair[0] for pair in pipeline]
        for key in include:
            if key not in keys:
                raise ValueError('Invalid key in include: %s; should be one '
                                 'of %s' % (key, keys))

        if exclude is None:
            exclude = self.exclude

        keys = [pair[0] for pair in pipeline]
        for key in exclude:
            if key not in keys:
                raise ValueError('Invalid key in exclude: %s; should be one '
                                 'of %s' % (key, keys))

        if self.search_space_updates is not None:
            self._check_search_space_updates(include=include,
                                             exclude=exclude)
            self.search_space_updates.apply(pipeline=pipeline)

        matches = get_match_array(
            pipeline, dataset_properties, include=include, exclude=exclude)

        # Now we have only legal combinations at this step of the pipeline
        # Simple sanity checks
        assert np.sum(matches) != 0, "No valid pipeline found."

        assert np.sum(matches) <= np.size(matches), \
            "'matches' is not binary; %s <= %d, %s" % \
            (str(np.sum(matches)), np.size(matches), str(matches.shape))

        # Iterate each dimension of the matches array (each step of the
        # pipeline) to see if we can add a hyperparameter for that step
        for node_idx, n_ in enumerate(pipeline):
            node_name, node = n_

            # if the node isn't a choice we can add it immediately because it
            #  must be active (if it wasn't, np.sum(matches) would be zero
            if isinstance(node, autoPyTorchChoice):
                choices_list = find_active_choices(
                    matches, node, node_idx,
                    dataset_properties,
                    include.get(node_name),
                    exclude.get(node_name)
                )

                # ignore type check here as mypy is not able to infer
                # that isinstance(node, autoPyTorchChooice) = True
                sub_config_space = node.get_hyperparameter_search_space(  # type: ignore[call-arg]
                    dataset_properties, include=choices_list)
                cs.add_configuration_space(node_name, sub_config_space)

            # If the node is a choice, we have to figure out which of its
            #  choices are actually legal choices
            else:
                cs.add_configuration_space(
                    node_name,
                    node.get_hyperparameter_search_space(dataset_properties,  # type: ignore[call-arg]
                                                         **node._get_search_space_updates()
                                                         )
                )

        # And now add forbidden parameter configurations
        # According to matches
        if np.sum(matches) < np.size(matches):
            cs = add_forbidden(
                conf_space=cs, pipeline=pipeline, matches=matches,
                dataset_properties=dataset_properties, include=include,
                exclude=exclude)

        return cs
Ejemplo n.º 34
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                        lower=50,
                                                        upper=500,
                                                        default_value=50,
                                                        log=False)
            sampling_strategy = CategoricalHyperparameter(
                name="sampling_strategy",
                choices=["majority", "not minority", "not majority", "all"],
                default_value="not minority")
            replacement = CategoricalHyperparameter("replacement",
                                                    ["True", "False"],
                                                    default_value="False")

            ab_n_estimators = UniformIntegerHyperparameter(
                name="ab_n_estimators",
                lower=50,
                upper=500,
                default_value=50,
                log=False)
            ab_learning_rate = UniformFloatHyperparameter(
                name="ab_learning_rate",
                lower=0.01,
                upper=2,
                default_value=0.1,
                log=True)
            ab_algorithm = CategoricalHyperparameter(
                name="ab_algorithm",
                choices=["SAMME.R", "SAMME"],
                default_value="SAMME.R")
            ab_max_depth = UniformIntegerHyperparameter(name="ab_max_depth",
                                                        lower=1,
                                                        upper=10,
                                                        default_value=1,
                                                        log=False)
            cs.add_hyperparameters([
                n_estimators, sampling_strategy, replacement, ab_n_estimators,
                ab_learning_rate, ab_algorithm, ab_max_depth
            ])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'n_estimators':
                hp.randint('easy_ensemble_n_estimators', 451) + 50,
                'sampling_strategy':
                hp.choice('easy_ensemble_sampling_strategy',
                          ["majority", "not minority", "not majority", "all"]),
                'replacement':
                hp.choice('easy_ensemble_replacement', ["True", "False"]),
                'ab_n_estimators':
                hp.randint('ab_n_estimators', 451) + 50,
                'ab_learning_rate':
                hp.loguniform('ab_learning_rate', np.log(0.01), np.log(2)),
                'ab_algorithm':
                hp.choice('ab_algorithm', ["SAMME.R", "SAMME"]),
                'ab_max_depth':
                hp.randint('ab_max_depth', 10) + 1
            }
            init_trial = {
                'n_estimators': 10,
                'sampling_strategy': "not minority",
                'replacement': "False",
                'ab_n_estimators': 50,
                'ab_learning_rate': 0.1,
                'ab_algorithm': "SAMME.R",
                'ab_max_depth': 1
            }
            return space
Ejemplo n.º 35
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = cs.add_hyperparameter(Constant("loss", "deviance"))
        learning_rate = cs.add_hyperparameter(UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default=0.1, log=True))
        n_estimators = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="n_estimators", lower=50, upper=500, default=100))
        max_depth = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="max_depth", lower=1, upper=10, default=3))
        min_samples_split = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default=2, log=False))
        min_samples_leaf = cs.add_hyperparameter(UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default=1, log=False))
        min_weight_fraction_leaf = cs.add_hyperparameter(
            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.))
        subsample = cs.add_hyperparameter(UniformFloatHyperparameter(
                name="subsample", lower=0.01, upper=1.0, default=1.0, log=False))
        max_features = cs.add_hyperparameter(UniformFloatHyperparameter(
            "max_features", 0.5, 5, default=1))
        max_leaf_nodes = cs.add_hyperparameter(UnParametrizedHyperparameter(
            name="max_leaf_nodes", value="None"))

        return cs
Ejemplo n.º 36
0
 def get_hyperparameter_search_space(dataset_properties=None):
     reg_param = UniformFloatHyperparameter('reg_param', 0.0, 1.0,
                                            default_value=0.0)
     cs = ConfigurationSpace()
     cs.add_hyperparameter(reg_param)
     return cs
Ejemplo n.º 37
0
def create_hyperspace(regressor_id):
    if regressor_id == 'knn':
        from autosklearn.pipeline.components.regression.k_nearest_neighbors import KNearestNeighborsRegressor
        cs = KNearestNeighborsRegressor.get_hyperparameter_search_space()
    elif regressor_id == 'liblinear_svr':
        from autosklearn.pipeline.components.regression.liblinear_svr import LibLinear_SVR
        cs = LibLinear_SVR.get_hyperparameter_search_space()
    elif regressor_id == 'random_forest':
        cs = ConfigurationSpace()
        n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 500, default_value=200)
        criterion = CategoricalHyperparameter("criterion",
                                              ['mse', 'friedman_mse', 'mae'])
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0, default_value=1.0)
        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UniformIntegerHyperparameter(
            "min_samples_split", 2, 20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            "min_samples_leaf", 1, 20, default_value=1)
        min_weight_fraction_leaf = \
            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)
        bootstrap = CategoricalHyperparameter(
            "bootstrap", ["True", "False"], default_value="True")
        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, max_leaf_nodes,
                                min_impurity_decrease, bootstrap])

    elif regressor_id == 'lightgbm':
        cs = ConfigurationSpace()
        n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500)
        num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 1023, default_value=31)
        learning_rate = UniformFloatHyperparameter("learning_rate", 0.025, 0.3, default_value=0.1, log=True)
        min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 10, default_value=1)
        subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1)
        colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.5, 1, default_value=1)
        reg_alpha = UniformFloatHyperparameter('reg_alpha', 1e-10, 10, log=True, default_value=1e-10)
        reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10)
        cs.add_hyperparameters([n_estimators, num_leaves, learning_rate, min_child_weight, subsample,
                                colsample_bytree, reg_alpha, reg_lambda])
    elif 'catboost' in regressor_id:
        cs = ConfigurationSpace()
        max_depth = UniformIntegerHyperparameter("max_depth", 4, 12, default_value=6)
        learning_rate = UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1, log=True)
        subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1)
        reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10)
        loss_function = CategoricalHyperparameter("loss_function", ['RMSE', 'MAE'], default_value='RMSE')

        if 'cpu' in regressor_id:
            n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500)
            colsample_bylevel = UniformFloatHyperparameter("colsample_bylevel", 0.5, 1, default_value=1)
            cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample,
                                    colsample_bylevel, reg_lambda, loss_function])
        elif 'gpu' in regressor_id:
            n_estimators = UniformIntegerHyperparameter("n_estimators", 1000, 10000, default_value=1000)
            min_child_samples = UniformIntegerHyperparameter("min_child_samples", 1, 15, default_value=1)
            cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample,
                                    min_child_samples, reg_lambda, loss_function])
    # ---ADD THE HYPERSPACE FOR YOUR REGRESSOR---------------
    else:
        raise ValueError('Undefined regressor identifier: %s!' % regressor_id)
    model = UnParametrizedHyperparameter("estimator", regressor_id)
    cs.add_hyperparameter(model)
    return cs
Ejemplo n.º 38
0
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     return cs
Ejemplo n.º 39
0
 def get_hyperparameter_search_space(dataset_properties=None):
     n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                 lower=10,
                                                 upper=100,
                                                 default=10)
     max_depth = UniformIntegerHyperparameter(name="max_depth",
                                              lower=2,
                                              upper=10,
                                              default=5)
     min_samples_split = UniformIntegerHyperparameter(
         name="min_samples_split", lower=2, upper=20, default=2)
     min_samples_leaf = UniformIntegerHyperparameter(
         name="min_samples_leaf", lower=1, upper=20, default=1)
     min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0)
     max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                   value="None")
     cs = ConfigurationSpace()
     cs.add_hyperparameter(n_estimators)
     cs.add_hyperparameter(max_depth)
     cs.add_hyperparameter(min_samples_split)
     cs.add_hyperparameter(min_samples_leaf)
     cs.add_hyperparameter(min_weight_fraction_leaf)
     cs.add_hyperparameter(max_leaf_nodes)
     return cs
Ejemplo n.º 40
0
 def get_hyperparameter_search_space(self, dataset_properties=None):
     self.dataset_properties = dataset_properties
     cs = ConfigurationSpace()
     cs = DataPreprocessor._get_hyperparameter_search_space_recursevely(
         dataset_properties, cs, self._transformers)
     return cs
Ejemplo n.º 41
0
    def get_pred_surface(self, rh, X_scaled, conf_list: list,
                         contour_step_size):
        """fit epm on the scaled input dimension and
        return data to plot a contour plot of the empirical performance

        Parameters
        ----------
        rh: RunHistory
            runhistory
        X_scaled: np.array
            configurations in scaled 2dim
        conf_list: list
            list of Configuration objects
        contour_step_size: float
            step-size for contour

        Returns
        -------
        contour_data: (np.array, np.array, np.array)
            x, y, Z for contour plots
        """
        # use PCA to reduce features to also at most 2 dims
        scen = copy.deepcopy(self.scenario)  # pca changes feats
        if scen.feature_array.shape[1] > 2:
            self.logger.debug(
                "Use PCA to reduce features to from %d dim to 2 dim",
                scen.feature_array.shape[1])
            # perform PCA
            insts = scen.feature_dict.keys()
            feature_array = np.array([scen.feature_dict[i] for i in insts])
            feature_array = StandardScaler().fit_transform(feature_array)
            feature_array = PCA(n_components=2).fit_transform(feature_array)
            # inject in scenario-object
            scen.feature_array = feature_array
            scen.feature_dict = dict([(inst, feature_array[idx, :])
                                      for idx, inst in enumerate(insts)])
            scen.n_features = 2

        # convert the data to train EPM on 2-dim featurespace (for contour-data)
        self.logger.debug("Convert data for epm.")
        X, y, types = convert_data_for_epm(scenario=scen,
                                           runhistory=rh,
                                           impute_inactive_parameters=True,
                                           logger=self.logger)
        types = np.array(np.zeros((2 + scen.feature_array.shape[1])),
                         dtype=np.uint)
        num_params = len(scen.cs.get_hyperparameters())

        # impute missing values in configs and insert MDS'ed (2dim) configs to the right positions
        conf_dict = {}
        # Remove forbidden clauses (this is necessary to enable the impute_inactive_values-method, see #226)
        cs_no_forbidden = copy.deepcopy(conf_list[0].configuration_space)
        cs_no_forbidden.forbidden_clauses = []
        for idx, c in enumerate(conf_list):
            c.configuration_space = cs_no_forbidden
            conf_list[idx] = impute_inactive_values(c)
            conf_dict[str(conf_list[idx].get_array())] = X_scaled[idx, :]

        # Debug compare elements:
        c1, c2 = {str(z) for z in X}, {str(z) for z in conf_dict.keys()}
        self.logger.debug(
            "{} elements not in both sets, {} elements in both sets, X (len {}) and conf_dict (len {}) "
            "(might be a problem related to forbidden clauses?)".format(
                len(c1 ^ c2), len(c1 & c2), len(c1 ^ c2), len(c1), len(c2)))
        # self.logger.debug("Elements: {}".format(str(c1 ^ c2)))

        X_trans = [
        ]  # X_trans is the same as X but with reduced 2-dim features (so shape is (N, 2) instead of (N, M))
        for x in X:
            x_scaled_conf = conf_dict[str(x[:num_params])]
            # append scaled config + pca'ed features (total of 4 values) per config/feature-sample
            X_trans.append(
                np.concatenate((x_scaled_conf, x[num_params:]), axis=0))
        X_trans = np.array(X_trans)

        self.logger.debug(
            "Train random forest for contour-plot. Shape of X: {}, shape of X_trans: {}"
            .format(X.shape, X_trans.shape))
        self.logger.debug("Faking configspace to be able to train rf...")
        # We need to fake config-space bypass imputation of inactive values in random forest implementation
        fake_cs = ConfigurationSpace(name="fake-cs-for-configurator-footprint")
        # We need to add fake hyperparameters. Always assume there are only two dimensions
        fake_cs.add_hyperparameters([
            UniformFloatHyperparameter('fake-%d' % i,
                                       lower=0.,
                                       upper=100000.,
                                       default_value=0.,
                                       log=False) for i in range(2)
        ])

        bounds = np.array([(0, np.nan), (0, np.nan)], dtype=object)
        model = RandomForestWithInstances(fake_cs,
                                          types,
                                          bounds,
                                          seed=self.rng.randint(MAXINT),
                                          instance_features=np.array(
                                              scen.feature_array),
                                          ratio_features=1.0)

        start = time.time()
        model.train(X_trans, y)
        self.logger.debug("Fitting random forest took %f time",
                          time.time() - start)

        x_min, x_max = X_scaled[:, 0].min() - 1, X_scaled[:, 0].max() + 1
        y_min, y_max = X_scaled[:, 1].min() - 1, X_scaled[:, 1].max() + 1
        xx, yy = np.meshgrid(np.arange(x_min, x_max, contour_step_size),
                             np.arange(y_min, y_max, contour_step_size))

        self.logger.debug("x_min: %f, x_max: %f, y_min: %f, y_max: %f", x_min,
                          x_max, y_min, y_max)
        self.logger.debug(
            "Predict on %d samples in grid to get surface (step-size: %f)",
            np.c_[xx.ravel(), yy.ravel()].shape[0], contour_step_size)

        start = time.time()
        Z, _ = model.predict_marginalized_over_instances(np.c_[xx.ravel(),
                                                               yy.ravel()])
        Z = Z.reshape(xx.shape)
        self.logger.debug("Predicting random forest took %f time",
                          time.time() - start)

        return xx, yy, Z
Ejemplo n.º 42
0
def read(pcs_string, debug=False):
    """
    Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
    definition from a pcs file.

    Example
    -------

    .. testsetup:: pcs_test

        from ConfigSpace import ConfigurationSpace
        import ConfigSpace.hyperparameters as CSH
        from ConfigSpace.read_and_write import pcs
        cs = ConfigurationSpace()
        cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3]))
        with open('configspace.pcs', 'w') as f:
             f.write(pcs.write(cs))

    .. doctest:: pcs_test

        >>> from ConfigSpace.read_and_write import pcs
        >>> with open('configspace.pcs', 'r') as fh:
        ...     deserialized_conf = pcs.read(fh)

    Parameters
    ----------
    pcs_string : str
        ConfigSpace definition in pcs format
    debug : bool
        Provides debug information. Defaults to False.

    Returns
    -------
    :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
        The deserialized ConfigurationSpace object

    """
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()

        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" % line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {"int": UniformIntegerHyperparameter,
                  "float": UniformFloatHyperparameter,
                  "categorical": CategoricalHyperparameter}

        try:
            param_list = pp_cont_param.parseString(line)
            il = param_list[9:]
            if len(il) > 0:
                il = il[0]
            param_list = param_list[:9]
            name = param_list[0]
            lower = float(param_list[2])
            upper = float(param_list[4])
            paramtype = "int" if "i" in il else "float"
            log = True if "l" in il else False
            default_value = float(param_list[7])
            param = create[paramtype](name=name, lower=lower, upper=upper,
                                      q=None, log=log, default_value=default_value)
            cont_ct += 1
        except pyparsing.ParseException:
            pass

        try:
            param_list = pp_cat_param.parseString(line)
            name = param_list[0]
            choices = [c for c in param_list[2:-4:2]]
            default_value = param_list[-2]
            param = create["categorical"](name=name, choices=choices,
                                          default_value=default_value)
            cat_ct += 1
        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        # TODO test this properly!
        # TODO Add a try/catch here!
        # noinspection PyUnusedLocal
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    # TODO maybe add a check if the hyperparameter is
                    # actually in the configuration space
                    clause_list.append(ForbiddenEqualsClause(
                        configuration_space.get_hyperparameter(tmp_list[0]),
                        tmp_list[2]))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(ForbiddenAndConjunction(
            *clause_list))

    # Now handle conditions
    # If there are two conditions for one child, these two conditions are an
    # AND-conjunction of conditions, thus we have to connect them
    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        condition_objects = []
        for condition in conditions_per_child[child_name]:
            child = configuration_space.get_hyperparameter(child_name)
            parent_name = condition[2]
            parent = configuration_space.get_hyperparameter(parent_name)
            restrictions = condition[5:-1:2]

            # TODO: cast the type of the restriction!
            if len(restrictions) == 1:
                condition = EqualsCondition(child, parent, restrictions[0])
            else:
                condition = InCondition(child, parent, values=restrictions)
            condition_objects.append(condition)

        # Now we have all condition objects for this child, so we can build a
        #  giant AND-conjunction of them (if number of conditions >= 2)!

        if len(condition_objects) > 1:
            and_conjunction = AndConjunction(*condition_objects)
            configuration_space.add_condition(and_conjunction)
        else:
            configuration_space.add_condition(condition_objects[0])

    return configuration_space
Ejemplo n.º 43
0
    def get_hyperparameter_search_space(
        self,
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        default: Optional[str] = None,
        include: Optional[List[str]] = None,
        exclude: Optional[List[str]] = None,
    ) -> ConfigurationSpace:
        """Returns the configuration space of the current chosen components

        Args:
            dataset_properties (Optional[Dict[str, BaseDatasetPropertiesType]]): Describes the dataset to work on
            default (Optional[str]): Default scheduler to use
            include: Optional[Dict[str, Any]]: what components to include. It is an exhaustive
                list, and will exclusively use this components.
            exclude: Optional[Dict[str, Any]]: which components to skip

        Returns:
            ConfigurationSpace: the configuration space of the hyper-parameters of the
                 chosen component
        """
        cs = ConfigurationSpace()

        if dataset_properties is None:
            dataset_properties = {}

        dataset_properties = {**self.dataset_properties, **dataset_properties}

        # Compile a list of legal trainers for this problem
        available_trainers = self.get_available_components(
            dataset_properties=dataset_properties,
            include=include,
            exclude=exclude)

        if len(available_trainers) == 0:
            raise ValueError("No trainer found")

        if default is None:
            defaults = [
                'StandardTrainer',
            ]
            for default_ in defaults:
                if default_ in available_trainers:
                    default = default_
                    break
        updates = self._get_search_space_updates()
        if '__choice__' in updates.keys():
            choice_hyperparameter = updates['__choice__']
            if not set(choice_hyperparameter.value_range).issubset(
                    available_trainers):
                raise ValueError("Expected given update for {} to have "
                                 "choices in {} got {}".format(
                                     self.__class__.__name__,
                                     available_trainers,
                                     choice_hyperparameter.value_range))
            trainer = CategoricalHyperparameter(
                '__choice__',
                choice_hyperparameter.value_range,
                default_value=choice_hyperparameter.default_value)
        else:
            trainer = CategoricalHyperparameter('__choice__',
                                                list(
                                                    available_trainers.keys()),
                                                default_value=default)
        cs.add_hyperparameter(trainer)
        for name in trainer.choices:
            updates = self._get_search_space_updates(prefix=name)
            config_space = available_trainers[
                name].get_hyperparameter_search_space(
                    dataset_properties,  # type:ignore
                    **updates)
            parent_hyperparameter = {'parent': trainer, 'value': name}
            cs.add_configuration_space(
                name,
                config_space,
                parent_hyperparameter=parent_hyperparameter)

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
Ejemplo n.º 44
0
def read(pcs_string, debug=False):
    """
    Read in a :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
    definition from a pcs file.

    Example
    -------

    .. testsetup:: pcs_new_test

        from ConfigSpace import ConfigurationSpace
        import ConfigSpace.hyperparameters as CSH
        from ConfigSpace.read_and_write import pcs_new
        cs = ConfigurationSpace()
        cs.add_hyperparameter(CSH.CategoricalHyperparameter('a', choices=[1, 2, 3]))
        with open('configspace.pcs_new', 'w') as f:
             f.write(pcs_new.write(cs))

    .. doctest:: pcs_new_test

        >>> from ConfigSpace.read_and_write import pcs_new
        >>> with open('configspace.pcs_new', 'r') as fh:
        ...     deserialized_conf = pcs_new.read(fh)

    Parameters
    ----------
    pcs_string : str
        ConfigSpace definition in pcs format
    debug : bool
        Provides debug information. Defaults to False.

    Returns
    -------
    :py:class:`~ConfigSpace.configuration_space.ConfigurationSpace`
        The deserialized ConfigurationSpace object

    """
    configuration_space = ConfigurationSpace()
    conditions = []
    forbidden = []

    # some statistics
    ct = 0
    cont_ct = 0
    cat_ct = 0
    ord_ct = 0
    line_ct = 0

    for line in pcs_string:
        line_ct += 1

        if "#" in line:
            # It contains a comment
            pos = line.find("#")
            line = line[:pos]

        # Remove quotes and whitespaces at beginning and end
        line = line.replace('"', "").replace("'", "")
        line = line.strip()
        if "|" in line:
            # It's a condition
            try:
                c = pp_condition.parseString(line)
                conditions.append(c)
            except pyparsing.ParseException:
                raise NotImplementedError("Could not parse condition: %s" %
                                          line)
            continue
        if "}" not in line and "]" not in line:
            continue
        if line.startswith("{") and line.endswith("}"):
            forbidden.append(line)
            continue
        if len(line.strip()) == 0:
            continue

        ct += 1
        param = None

        create = {
            "int": UniformIntegerHyperparameter,
            "float": UniformFloatHyperparameter,
            "categorical": CategoricalHyperparameter,
            "ordinal": OrdinalHyperparameter
        }

        try:
            param_list = pp_cont_param.parseString(line)
            name = param_list[0]
            if param_list[1] == 'integer':
                paramtype = 'int'
            elif param_list[1] == 'real':
                paramtype = 'float'
            else:
                paramtype = None

            if paramtype in ['int', 'float']:
                log = param_list[10:]
                param_list = param_list[:10]
                if len(log) > 0:
                    log = log[0]
                lower = float(param_list[3])
                upper = float(param_list[5])
                log_on = True if "log" in log else False
                default_value = float(param_list[8])
                param = create[paramtype](name=name,
                                          lower=lower,
                                          upper=upper,
                                          q=None,
                                          log=log_on,
                                          default_value=default_value)
                cont_ct += 1

        except pyparsing.ParseException:
            pass

        try:
            if "categorical" in line:
                param_list = pp_cat_param.parseString(line)
                name = param_list[0]
                choices = [choice for choice in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["categorical"](
                    name=name,
                    choices=choices,
                    default_value=default_value,
                )
                cat_ct += 1

            elif "ordinal" in line:
                param_list = pp_ord_param.parseString(line)
                name = param_list[0]
                sequence = [seq for seq in param_list[3:-4:2]]
                default_value = param_list[-2]
                param = create["ordinal"](
                    name=name,
                    sequence=sequence,
                    default_value=default_value,
                )
                ord_ct += 1

        except pyparsing.ParseException:
            pass

        if param is None:
            raise NotImplementedError("Could not parse: %s" % line)

        configuration_space.add_hyperparameter(param)

    for clause in forbidden:
        param_list = pp_forbidden_clause.parseString(clause)
        tmp_list = []
        clause_list = []
        for value in param_list[1:]:
            if len(tmp_list) < 3:
                tmp_list.append(value)
            else:
                # So far, only equals is supported by SMAC
                if tmp_list[1] == '=':
                    hp = configuration_space.get_hyperparameter(tmp_list[0])
                    if isinstance(hp, NumericalHyperparameter):
                        if isinstance(hp, IntegerHyperparameter):
                            forbidden_value = int(tmp_list[2])
                        elif isinstance(hp, FloatHyperparameter):
                            forbidden_value = float(tmp_list[2])
                        else:
                            raise NotImplementedError
                        if forbidden_value < hp.lower or forbidden_value > hp.upper:
                            raise ValueError(
                                f'forbidden_value is set out of the bound, it needs to'
                                f' be set between [{hp.lower}, {hp.upper}]'
                                f' but its value is {forbidden_value}')
                    elif isinstance(
                            hp,
                        (CategoricalHyperparameter, OrdinalHyperparameter)):
                        hp_values = hp.choices if isinstance(hp, CategoricalHyperparameter)\
                            else hp.sequence
                        forbidden_value_in_hp_values = tmp_list[2] in hp_values
                        if forbidden_value_in_hp_values:
                            forbidden_value = tmp_list[2]
                        else:
                            raise ValueError(
                                f'forbidden_value is set out of the allowed value '
                                f'sets, it needs to be one member from {hp_values} '
                                f'but its value is {forbidden_value}')
                    else:
                        raise ValueError('Unsupported Hyperparamter sorts')

                    clause_list.append(
                        ForbiddenEqualsClause(
                            configuration_space.get_hyperparameter(
                                tmp_list[0]), forbidden_value))
                else:
                    raise NotImplementedError()
                tmp_list = []
        configuration_space.add_forbidden_clause(
            ForbiddenAndConjunction(*clause_list))

    conditions_per_child = OrderedDict()
    for condition in conditions:
        child_name = condition[0]
        if child_name not in conditions_per_child:
            conditions_per_child[child_name] = list()
        conditions_per_child[child_name].append(condition)

    for child_name in conditions_per_child:
        for condition in conditions_per_child[child_name]:
            condition = condition[2:]
            condition = ' '.join(condition)
            if '||' in str(condition):
                ors = []
                # 1st case we have a mixture of || and &&
                if '&&' in str(condition):
                    ors_combis = []
                    for cond_parts in str(condition).split('||'):
                        condition = str(cond_parts).split('&&')
                        # if length is 1 it must be or
                        if len(condition) == 1:
                            element_list = condition[0].split()
                            ors_combis.append(
                                condition_specification(
                                    child_name,
                                    element_list,
                                    configuration_space,
                                ))
                        else:
                            # now taking care of ands
                            ands = []
                            for and_part in condition:
                                element_list = [
                                    element for part in condition
                                    for element in and_part.split()
                                ]
                                ands.append(
                                    condition_specification(
                                        child_name,
                                        element_list,
                                        configuration_space,
                                    ))
                            ors_combis.append(AndConjunction(*ands))
                    mixed_conjunction = OrConjunction(*ors_combis)
                    configuration_space.add_condition(mixed_conjunction)
                else:
                    # 2nd case: we only have ors
                    for cond_parts in str(condition).split('||'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ors.append(
                            condition_specification(
                                child_name,
                                element_list,
                                configuration_space,
                            ))
                    or_conjunction = OrConjunction(*ors)
                    configuration_space.add_condition(or_conjunction)
            else:
                # 3rd case: we only have ands
                if '&&' in str(condition):
                    ands = []
                    for cond_parts in str(condition).split('&&'):
                        element_list = [
                            element for element in cond_parts.split()
                        ]
                        ands.append(
                            condition_specification(
                                child_name,
                                element_list,
                                configuration_space,
                            ))
                    and_conjunction = AndConjunction(*ands)
                    configuration_space.add_condition(and_conjunction)
                else:
                    # 4th case: we have a normal condition
                    element_list = [element for element in condition.split()]
                    normal_condition = condition_specification(
                        child_name,
                        element_list,
                        configuration_space,
                    )
                    configuration_space.add_condition(normal_condition)

    return configuration_space
Ejemplo n.º 45
0
    def add_params(cs: ConfigurationSpace):
        '''
            adds parameters to ConfigurationSpace 
        '''
        try:
            classifier = cs.get_hyperparameter("classifier")
            if "RandomForest" not in classifier.choices:
                return

            n_estimators = UniformIntegerHyperparameter(name="rf:n_estimators",
                                                        lower=10,
                                                        upper=100,
                                                        default_value=10,
                                                        log=True)
            cs.add_hyperparameter(n_estimators)
            criterion = CategoricalHyperparameter(name="rf:criterion",
                                                  choices=["gini", "entropy"],
                                                  default_value="gini")
            cs.add_hyperparameter(criterion)
            max_features = CategoricalHyperparameter(
                name="rf:max_features",
                choices=["sqrt", "log2", "None"],
                default_value="sqrt")
            cs.add_hyperparameter(max_features)
            max_depth = UniformIntegerHyperparameter(name="rf:max_depth",
                                                     lower=10,
                                                     upper=2**31,
                                                     default_value=2**31,
                                                     log=True)
            cs.add_hyperparameter(max_depth)
            min_samples_split = UniformIntegerHyperparameter(
                name="rf:min_samples_split",
                lower=2,
                upper=100,
                default_value=2,
                log=True)
            cs.add_hyperparameter(min_samples_split)
            min_samples_leaf = UniformIntegerHyperparameter(
                name="rf:min_samples_leaf",
                lower=2,
                upper=100,
                default_value=10,
                log=True)
            cs.add_hyperparameter(min_samples_leaf)
            bootstrap = CategoricalHyperparameter(name="rf:bootstrap",
                                                  choices=[True, False],
                                                  default_value=True)
            cs.add_hyperparameter(bootstrap)

            cond = InCondition(child=n_estimators,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=criterion,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=max_features,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=max_depth,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=min_samples_split,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=min_samples_leaf,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            cond = InCondition(child=bootstrap,
                               parent=classifier,
                               values=["RandomForest"])
            cs.add_condition(cond)
            print(cs)
        except:
            return
Ejemplo n.º 46
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter("criterion",
                                                  ["gini", "entropy"],
                                                  default_value="gini")

            # The maximum number of features used in the forest is calculated as m^max_features, where
            # m is the total number of features, and max_features is the hyperparameter specified below.
            # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
            # corresponds with Geurts' heuristic.
            max_features = UniformFloatHyperparameter("max_features",
                                                      0.,
                                                      1.,
                                                      default_value=0.5)

            max_depth = UnParametrizedHyperparameter("max_depth", "None")
            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                            1,
                                                            20,
                                                            default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                "min_weight_fraction_leaf", 0.)
            max_leaf_nodes = UnParametrizedHyperparameter(
                "max_leaf_nodes", "None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                'min_impurity_decrease', 0.0)
            bootstrap = CategoricalHyperparameter("bootstrap",
                                                  ["True", "False"],
                                                  default_value="True")
            cs.add_hyperparameters([
                n_estimators, criterion, max_features, max_depth,
                min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                max_leaf_nodes, bootstrap, min_impurity_decrease
            ])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'n_estimators':
                hp.choice('rf_n_estimators', [100]),
                'criterion':
                hp.choice('rf_criterion', ["gini", "entropy"]),
                'max_features':
                hp.uniform('rf_max_features', 0, 1),
                'max_depth':
                hp.choice('rf_max_depth', [None]),
                'min_samples_split':
                hp.randint('rf_min_samples_split', 19) + 2,
                'min_samples_leaf':
                hp.randint('rf_min_samples_leaf', 20) + 1,
                'min_weight_fraction_leaf':
                hp.choice('rf_min_weight_fraction_leaf', [0]),
                'max_leaf_nodes':
                hp.choice('rf_max_leaf_nodes', [None]),
                'min_impurity_decrease':
                hp.choice('rf_min_impurity_decrease', [0]),
                'bootstrap':
                hp.choice('rf_bootstrap', ["True", "False"])
            }

            init_trial = {
                'n_estimators': 100,
                'criterion': "gini",
                'max_features': 0.5,
                'max_depth': None,
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0,
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0,
                'bootstrap': "False"
            }

            return space
Ejemplo n.º 47
0
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
    UniformIntegerHyperparameter, CategoricalHyperparameter, \
    UnParametrizedHyperparameter, Constant
from ConfigSpace.conditions import EqualsCondition, InCondition
from automl.utl import json_utils

cs = ConfigurationSpace()

loss = CategoricalHyperparameter(
    "loss", ["hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
    default_value="log")
penalty = CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet"],
                                    default_value="l2")
alpha = UniformFloatHyperparameter("alpha",
                                   1e-7,
                                   1e-1,
                                   log=True,
                                   default_value=0.0001)
l1_ratio = UniformFloatHyperparameter("l1_ratio",
                                      1e-9,
                                      1,
                                      log=True,
                                      default_value=0.15)
fit_intercept = Constant("fit_intercept", "True")
tol = UniformFloatHyperparameter("tol",
                                 1e-5,
                                 1e-1,
                                 log=True,
                                 default_value=1e-4)
epsilon = UniformFloatHyperparameter("epsilon",
Ejemplo n.º 48
0
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     minimum_fraction = UniformFloatHyperparameter(
         "minimum_fraction", lower=.0001, upper=0.5, default_value=0.01, log=True)
     cs.add_hyperparameter(minimum_fraction)
     return cs
Ejemplo n.º 49
0
    def _convert_dict_to_config(config_list: List[str],
                                cs: ConfigurationSpace) -> Configuration:
        """Since we save a configurations in a dictionary str->str we have to
        try to figure out the type (int, float, str) of each parameter value

        Parameters
        ----------
        config_list: List[str]
            Configuration as a list of "str='str'"
        cs: ConfigurationSpace
            Configuration Space to translate dict object into Confiuration object
        """
        config_dict = {}
        v = ''  # type: Union[str, float, int, bool]
        for param in config_list:
            k, v = param.split("=")
            v = v.strip("'")
            hp = cs.get_hyperparameter(k)
            if isinstance(hp, FloatHyperparameter):
                v = float(v)
            elif isinstance(hp, IntegerHyperparameter):
                v = int(v)
            elif isinstance(hp, (CategoricalHyperparameter, Constant)):
                # Checking for the correct type requires jumping some hoops
                # First, we gather possible interpretations of our string
                interpretations = [
                    v
                ]  # type: List[Union[str, bool, int, float]]
                if v in ["True", "False"]:
                    # Special Case for booleans (assuming we support them)
                    # This is important to avoid false positive warnings triggered by 1 == True or "False" == True
                    interpretations.append(True if v == 'True' else False)
                else:
                    for t in [int, float]:
                        try:
                            interpretations.append(t(v))
                        except ValueError:
                            continue

                # Second, check if it's in the choices / the correct type.
                legal = {
                    interpretation
                    for interpretation in interpretations
                    if hp.is_legal(interpretation)
                }

                # Third, issue warnings if the interpretation is ambigious
                if len(legal) != 1:
                    logging.getLogger("smac.trajlogger").warning(
                        "Ambigous or no interpretation of value {} for hp {} found ({} possible interpretations). "
                        "Passing string, but this will likely result in an error"
                        .format(v, hp.name, len(legal)))
                else:
                    v = legal.pop()

            config_dict[k] = v

        config = Configuration(configuration_space=cs, values=config_dict)
        config.origin = "External Trajectory"

        return config
Ejemplo n.º 50
0
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
    UniformIntegerHyperparameter, CategoricalHyperparameter, \
    UnParametrizedHyperparameter, Constant
from automl.utl import json_utils

cs = ConfigurationSpace()

# the smoothing parameter is a non-negative float
# I will limit it to 1000 and put it on a logarithmic scale. (SF)
# Please adjust that, if you know a proper range, this is just a guess.
alpha = UniformFloatHyperparameter(name="alpha",
                                   lower=1e-2,
                                   upper=100,
                                   default_value=1,
                                   log=True)

fit_prior = CategoricalHyperparameter(name="fit_prior",
                                      choices=["True", "False"],
                                      default_value="True")

cs.add_hyperparameters([alpha, fit_prior])

json_utils.write_cs_to_json_file(cs, "BernoulliNB")
Ejemplo n.º 51
0
def setups_to_configspace(setups,
                          default_params,
                          keyfield='parameter_name',
                          logscale_parameters=None,
                          ignore_parameters=None,
                          ignore_constants=True):
    # setups is result from openml.setups.list_setups call
    # note that this config space is not equal to the one
    # obtained from auto-sklearn; but useful for creating
    # the pcs file
    parameter_values = {}
    flow_id = None
    for setup_id in setups:
        current = setups[setup_id]
        if flow_id is None:
            flow_id = current.flow_id
        else:
            if current.flow_id != flow_id:
                raise ValueError(
                    'flow ids are expected to be equal. Expected %d, saw %s' %
                    (flow_id, current.flow_id))

        for param_id in current.parameters.keys():
            name = getattr(current.parameters[param_id], keyfield)
            value = current.parameters[param_id].value
            if name not in parameter_values.keys():
                parameter_values[name] = set()
            parameter_values[name].add(value)

    uncovered = set(parameter_values.keys()) - set(default_params.keys())
    if len(uncovered) > 0:
        raise ValueError(
            'Mismatch between keys default_params and parameter_values. Missing'
            % str(uncovered))

    def is_castable_to(value, type):
        try:
            type(value)
            return True
        except ValueError:
            return False

    cs = ConfigurationSpace()
    if logscale_parameters is None:
        logscale_parameters = set()
    # for parameter in logscale_parameters:
    #     if parameter not in parameter_values.keys():
    #         raise ValueError('(Logscale) Parameter not recognized: %s' %parameter)

    constants = set()
    for name in parameter_values.keys():
        if ignore_parameters is not None and name in ignore_parameters:
            continue

        all_values = parameter_values[name]
        if len(all_values) <= 1:
            constants.add(name)
            if ignore_constants:
                continue

        if all(is_castable_to(item, int) for item in all_values):
            all_values = [int(item) for item in all_values]
            lower = min(all_values)
            upper = max(all_values)
            default = default_params[name]
            if not is_castable_to(default, int):
                sys.stderr.write(
                    'Illegal default for parameter %s (expected int): %s' %
                    (name, str(default)))
                default = int(lower + lower + upper / 2)

            hyper = UniformIntegerHyperparameter(name=name,
                                                 lower=lower,
                                                 upper=upper,
                                                 default=default,
                                                 log=name
                                                 in logscale_parameters)
            cs.add_hyperparameter(hyper)
        elif all(is_castable_to(item, float) for item in all_values):
            all_values = [float(item) for item in all_values]
            lower = min(all_values)
            upper = max(all_values)
            default = default_params[name]
            if not is_castable_to(default, float):
                sys.stderr.write(
                    'Illegal default for parameter %s (expected int): %s' %
                    (name, str(default)))
                default = lower + lower + upper / 2

            hyper = UniformFloatHyperparameter(name=name,
                                               lower=lower,
                                               upper=upper,
                                               default=default,
                                               log=name in logscale_parameters)
            cs.add_hyperparameter(hyper)
        else:
            values = [flow_to_sklearn(item) for item in all_values]
            hyper = CategoricalHyperparameter(name=name,
                                              choices=values,
                                              default=default_params[name])
            cs.add_hyperparameter(hyper)
    return cs, constants
Ejemplo n.º 52
0
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformIntegerHyperparameter, CategoricalHyperparameter
from automl.utl import json_utils

cs = ConfigurationSpace()

n_neighbors = UniformIntegerHyperparameter(name="n_neighbors",
                                           lower=1,
                                           upper=100,
                                           log=True,
                                           default_value=5)
weights = CategoricalHyperparameter(name="weights",
                                    choices=["uniform", "distance"],
                                    default_value="uniform")
p = CategoricalHyperparameter(name="p", choices=[1, 2], default_value=2)
cs.add_hyperparameters([n_neighbors, weights, p])

json_utils.write_cs_to_json_file(cs, "KNeighborsClassifier")
Ejemplo n.º 53
0
def get_branin_config_space() -> ConfigurationSpace:
    cs = ConfigurationSpace()
    cs.add_hyperparameter(UniformFloatHyperparameter('x', -5, 10))
    cs.add_hyperparameter(UniformFloatHyperparameter('y', 0, 15))
    return cs
Ejemplo n.º 54
0
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
    UniformIntegerHyperparameter, CategoricalHyperparameter, \
    UnParametrizedHyperparameter, Constant
from automl.utl import json_utils

cs = ConfigurationSpace()

# base_estimator = Constant(name="base_estimator", value="None")
n_estimators = UniformIntegerHyperparameter(
    name="n_estimators", lower=50, upper=500, default_value=50,
    log=False)
learning_rate = UniformFloatHyperparameter(
    name="learning_rate", lower=0.01, upper=2, default_value=0.1,
    log=True)
loss = CategoricalHyperparameter(
    name="loss", choices=["linear", "square", "exponential"],
    default_value="linear")
max_depth = UniformIntegerHyperparameter(
    name="max_depth", lower=1, upper=10, default_value=1, log=False)

cs.add_hyperparameters([n_estimators, learning_rate, loss, max_depth])

json_utils.write_cs_to_json_file(cs, "AdaBoostRegressor")
Ejemplo n.º 55
0
    def get_hyperparameter_search_space(
        dataset_properties: Optional[Dict[str, BaseDatasetPropertiesType]] = None,
        use_augmenter: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="use_augmenter",
                                                                             value_range=(True, False),
                                                                             default_value=True,
                                                                             ),
        scale_offset: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="scale_offset",
                                                                            value_range=(0, 0.4),
                                                                            default_value=0.2,
                                                                            ),
        translate_percent_offset: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="translate_percent_offset",
            value_range=(0, 0.4),
            default_value=0.2),
        shear: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="shear",
                                                                     value_range=(0, 45),
                                                                     default_value=30,
                                                                     ),
        rotate: HyperparameterSearchSpace = HyperparameterSearchSpace(hyperparameter="rotate",
                                                                      value_range=(0, 360),
                                                                      default_value=45,
                                                                      ),
    ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        use_augmenter = get_hyperparameter(use_augmenter, CategoricalHyperparameter)
        scale_offset = get_hyperparameter(scale_offset, UniformFloatHyperparameter)
        translate_percent_offset = get_hyperparameter(translate_percent_offset, UniformFloatHyperparameter)
        shear = get_hyperparameter(shear, UniformIntegerHyperparameter)
        rotate = get_hyperparameter(rotate, UniformIntegerHyperparameter)
        cs.add_hyperparameters([use_augmenter, scale_offset, translate_percent_offset])
        cs.add_hyperparameters([shear, rotate])

        # only add hyperparameters to configuration space if we are using the augmenter
        cs.add_condition(CS.EqualsCondition(scale_offset, use_augmenter, True))
        cs.add_condition(CS.EqualsCondition(translate_percent_offset, use_augmenter, True))
        cs.add_condition(CS.EqualsCondition(shear, use_augmenter, True))
        cs.add_condition(CS.EqualsCondition(rotate, use_augmenter, True))

        return cs
Ejemplo n.º 56
0
    def _get_hyperparameter_search_space(self, include=None, exclude=None,
                                         dataset_properties=None):
        """Create the hyperparameter configuration space.

        Parameters
        ----------
        include : dict (optional, default=None)

        Returns
        -------
        cs : ConfigSpace.configuration_space.Configuration
            The configuration space describing the SimpleRegressionClassifier.
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(dataset_properties, dict):
            dataset_properties = dict()
        if not 'target_type' in dataset_properties:
            dataset_properties['target_type'] = 'classification'
        if dataset_properties['target_type'] != 'classification':
            dataset_properties['target_type'] = 'classification'

        if 'sparse' not in dataset_properties:
            # This dataset is probaby dense
            dataset_properties['sparse'] = False

        cs = self._get_base_search_space(
            cs=cs, dataset_properties=dataset_properties,
            exclude=exclude, include=include, pipeline=self.steps)

        classifiers = cs.get_hyperparameter('classifier:__choice__').choices
        preprocessors = cs.get_hyperparameter('preprocessor:__choice__').choices
        available_classifiers = self._final_estimator.get_available_components(
            dataset_properties)

        possible_default_classifier = copy.copy(list(
            available_classifiers.keys()))
        default = cs.get_hyperparameter('classifier:__choice__').default_value
        del possible_default_classifier[possible_default_classifier.index(default)]

        # A classifier which can handle sparse data after the densifier is
        # forbidden for memory issues
        for key in classifiers:
            if SPARSE in available_classifiers[key].get_properties()['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'classifier:__choice__'), key),
                                    ForbiddenEqualsClause(
                                        cs.get_hyperparameter(
                                            'preprocessor:__choice__'), 'densifier')
                                ))
                            # Success
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_classifier.pop()
                            except IndexError:
                                raise ValueError("Cannot find a legal default configuration.")
                            cs.get_hyperparameter(
                                'classifier:__choice__').default_value = default

        # which would take too long
        # Combinations of non-linear models with feature learning:
        classifiers_ = ["adaboost", "decision_tree", "extra_trees",
                        "gradient_boosting", "k_nearest_neighbors",
                        "libsvm_svc", "random_forest", "gaussian_nb",
                        "decision_tree", "xgradient_boosting"]
        feature_learning = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]

        for c, f in product(classifiers_, feature_learning):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError as e:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default_value = default

        # Won't work
        # Multinomial NB etc don't use with features learning, pca etc
        classifiers_ = ["multinomial_nb"]
        preproc_with_negative_X = ["kitchen_sinks", "pca", "truncatedSVD",
                                   "fast_ica", "kernel_pca", "nystroem_sampler"]

        for c, f in product(classifiers_, preproc_with_negative_X):
            if c not in classifiers:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "preprocessor:__choice__"), f),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "classifier:__choice__"), c)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_classifier.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'classifier:__choice__').default_value = default

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs
Ejemplo n.º 57
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()

            hidden_size = UniformIntegerHyperparameter("hidden_size",
                                                       100,
                                                       500,
                                                       default_value=200)
            activation = CategoricalHyperparameter(
                "activation", ["identity", "logistic", "tanh", "relu"],
                default_value="relu")
            solver = CategoricalHyperparameter("solver", ["sgd", "adam"],
                                               default_value="adam")

            alpha = UniformFloatHyperparameter("alpha",
                                               1e-7,
                                               1.,
                                               log=True,
                                               default_value=0.0001)

            learning_rate = CategoricalHyperparameter(
                "learning_rate", ["adaptive", "invscaling", "constant"],
                default_value="constant")

            learning_rate_init = UniformFloatHyperparameter(
                "learning_rate_init",
                1e-4,
                3e-1,
                default_value=0.001,
                log=True)

            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             log=True,
                                             default_value=1e-4)
            momentum = UniformFloatHyperparameter("momentum",
                                                  0.6,
                                                  1,
                                                  q=0.05,
                                                  default_value=0.9)

            nesterovs_momentum = CategoricalHyperparameter(
                "nesterovs_momentum", [True, False], default_value=True)
            beta1 = UniformFloatHyperparameter("beta1",
                                               0.6,
                                               1,
                                               default_value=0.9)
            power_t = UniformFloatHyperparameter("power_t",
                                                 1e-5,
                                                 1,
                                                 log=True,
                                                 default_value=0.5)
            cs.add_hyperparameters([
                hidden_size, activation, solver, alpha, learning_rate,
                learning_rate_init, tol, momentum, nesterovs_momentum, beta1,
                power_t
            ])

            learning_rate_condition = EqualsCondition(learning_rate, solver,
                                                      "sgd")
            momentum_condition = EqualsCondition(momentum, solver, "sgd")
            nesterovs_momentum_condition = EqualsCondition(
                nesterovs_momentum, solver, "sgd")
            beta1_condition = EqualsCondition(beta1, solver, "adam")

            power_t_condition = EqualsCondition(power_t, learning_rate,
                                                "invscaling")

            cs.add_conditions([
                learning_rate_condition, momentum_condition,
                nesterovs_momentum_condition, beta1_condition,
                power_t_condition
            ])

            return cs
        elif optimizer == 'tpe':
            space = {
                'hidden_size':
                hp.randint("mlp_hidden_size", 450) + 50,
                'activation':
                hp.choice('mlp_activation',
                          ["identity", "logistic", "tanh", "relu"]),
                'solver':
                hp.choice('mlp_solver', [("sgd", {
                    'learning_rate':
                    hp.choice('mlp_learning_rate', [
                        ("adaptive", {}), ("constant", {}),
                        ("invscaling", {
                            'power_t': hp.uniform('mlp_power_t', 1e-5, 1)
                        })
                    ]),
                    'momentum':
                    hp.uniform('mlp_momentum', 0.6, 1),
                    'nesterovs_momentum':
                    hp.choice('mlp_nesterovs_momentum', [True, False])
                }), ("adam", {
                    'beta1': hp.uniform('mlp_beta1', 0.6, 1)
                })]),
                'alpha':
                hp.loguniform('mlp_alpha', np.log(1e-7), np.log(1e-1)),
                'learning_rate_init':
                hp.loguniform('mlp_learning_rate_init', np.log(1e-6),
                              np.log(1e-1)),
                'tol':
                hp.loguniform('mlp_tol', np.log(1e-5), np.log(1e-1))
            }

            return space
Ejemplo n.º 58
0
    def get_hyperparameter_search_space(
        dataset_properties: Optional[Dict[str,
                                          BaseDatasetPropertiesType]] = None,
        num_blocks: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_blocks", value_range=(1, 10), default_value=5),
        num_filters: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="num_filters",
            value_range=(4, 64),
            default_value=32),
        kernel_size: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="kernel_size",
            value_range=(4, 64),
            default_value=32),
        use_dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="use_dropout",
            value_range=(True, False),
            default_value=False),
        dropout: HyperparameterSearchSpace = HyperparameterSearchSpace(
            hyperparameter="dropout", value_range=(0, 0.5), default_value=0.1),
    ) -> ConfigurationSpace:
        cs = ConfigurationSpace()

        min_num_blocks, max_num_blocks = num_blocks.value_range
        num_blocks_hp = get_hyperparameter(num_blocks,
                                           UniformIntegerHyperparameter)
        cs.add_hyperparameter(num_blocks_hp)

        add_hyperparameter(cs, kernel_size, UniformIntegerHyperparameter)

        use_dropout_hp = get_hyperparameter(use_dropout,
                                            CategoricalHyperparameter)
        cs.add_hyperparameter(use_dropout_hp)

        dropout_hp = get_hyperparameter(dropout, UniformFloatHyperparameter)
        cs.add_hyperparameter(dropout_hp)
        cs.add_condition(CS.EqualsCondition(dropout_hp, use_dropout_hp, True))

        for i in range(0, int(max_num_blocks)):
            num_filter_search_space = HyperparameterSearchSpace(
                f"num_filters_{i}",
                value_range=num_filters.value_range,
                default_value=num_filters.default_value,
                log=num_filters.log)
            num_filters_hp = get_hyperparameter(num_filter_search_space,
                                                UniformIntegerHyperparameter)
            cs.add_hyperparameter(num_filters_hp)
            if i >= int(min_num_blocks):
                cs.add_condition(
                    CS.GreaterThanCondition(num_filters_hp, num_blocks_hp, i))

        return cs
Ejemplo n.º 59
0
    def get_hyperparameter_search_space(dataset_properties=None):
        if dataset_properties is not None and \
                (dataset_properties.get("is_sparse") is True or
                 dataset_properties.get("signed") is False):
            allow_chi2 = False
        else:
            allow_chi2 = True

        possible_kernels = ['poly', 'rbf', 'sigmoid', 'cosine']
        if allow_chi2:
            possible_kernels.append("chi2")
        kernel = CategoricalHyperparameter('kernel', possible_kernels, 'rbf')
        degree = UniformIntegerHyperparameter('degree', 2, 5, 3)
        gamma = UniformFloatHyperparameter("gamma",
                                           3.0517578125e-05,
                                           8,
                                           log=True,
                                           default=0.1)
        coef0 = UniformFloatHyperparameter("coef0", -1, 1, default=0)
        n_components = UniformIntegerHyperparameter("n_components",
                                                    50,
                                                    10000,
                                                    default=100,
                                                    log=True)

        cs = ConfigurationSpace()
        cs.add_hyperparameter(kernel)
        cs.add_hyperparameter(degree)
        cs.add_hyperparameter(gamma)
        cs.add_hyperparameter(coef0)
        cs.add_hyperparameter(n_components)

        degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
        coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])

        gamma_kernels = ["poly", "rbf", "sigmoid"]
        if allow_chi2:
            gamma_kernels.append("chi2")
        gamma_condition = InCondition(gamma, kernel, gamma_kernels)
        cs.add_condition(degree_depends_on_poly)
        cs.add_condition(coef0_condition)
        cs.add_condition(gamma_condition)
        return cs
Ejemplo n.º 60
0
    def _get_hyperparameter_search_space(self, include=None, exclude=None,
                                         dataset_properties=None):
        """Return the configuration space for the CASH problem.

        Parameters
        ----------
        include_estimators : list of str
            If include_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            only the SVM use :python:`include_regressors=['svr']`.
            Cannot be used together with :python:`exclude_regressors`.

        exclude_estimators : list of str
            If exclude_estimators is given, only the regressors specified
            are used. Specify them by their module name; e.g., to include
            all regressors except the SVM use
            :python:`exclude_regressors=['svr']`.
            Cannot be used together with :python:`include_regressors`.

        include_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            only the PCA use :python:`include_preprocessors=['pca']`.
            Cannot be used together with :python:`exclude_preprocessors`.

        exclude_preprocessors : list of str
            If include_preprocessors is given, only the preprocessors specified
            are used. Specify them by their module name; e.g., to include
            all preprocessors except the PCA use
            :python:`exclude_preprocessors=['pca']`.
            Cannot be used together with :python:`include_preprocessors`.

        Returns
        -------
        cs : ConfigSpace.configuration_space.Configuration
            The configuration space describing the SimpleRegressionClassifier.
        """
        cs = ConfigurationSpace()

        if dataset_properties is None or not isinstance(dataset_properties, dict):
            dataset_properties = dict()
        if 'target_type' not in dataset_properties:
            dataset_properties['target_type'] = 'regression'
        if dataset_properties['target_type'] != 'regression':
            dataset_properties['target_type'] = 'regression'

        if 'sparse' not in dataset_properties:
            # This dataset is probably dense
            dataset_properties['sparse'] = False

        cs = self._get_base_search_space(
            cs=cs, dataset_properties=dataset_properties,
            exclude=exclude, include=include, pipeline=self.steps)

        regressors = cs.get_hyperparameter('regressor:__choice__').choices
        preprocessors = cs.get_hyperparameter('feature_preprocessor:__choice__').choices
        available_regressors = self._final_estimator.get_available_components(
            dataset_properties)

        possible_default_regressor = copy.copy(list(
            available_regressors.keys()))
        default = cs.get_hyperparameter('regressor:__choice__').default_value
        del possible_default_regressor[
            possible_default_regressor.index(default)]

        # A regressor which can handle sparse data after the densifier is
        # forbidden for memory issues
        for key in regressors:
            if SPARSE in available_regressors[key].get_properties(dataset_properties=None)['input']:
                if 'densifier' in preprocessors:
                    while True:
                        try:
                            forb_reg = ForbiddenEqualsClause(
                                cs.get_hyperparameter('regressor:__choice__'), key)
                            forb_fpp = ForbiddenEqualsClause(cs.get_hyperparameter(
                                'feature_preprocessor:__choice__'), 'densifier')
                            cs.add_forbidden_clause(
                                ForbiddenAndConjunction(forb_reg, forb_fpp))
                            # Success
                            break
                        except ValueError:
                            # Change the default and try again
                            try:
                                default = possible_default_regressor.pop()
                            except IndexError:
                                raise ValueError(
                                    "Cannot find a legal default configuration.")
                            cs.get_hyperparameter(
                                'regressor:__choice__').default_value = default

        # which would take too long
        # Combinations of tree-based models with feature learning:
        regressors_ = ["adaboost", "decision_tree", "extra_trees",
                       "gaussian_process", "gradient_boosting",
                       "k_nearest_neighbors", "random_forest", "xgradient_boosting"]
        feature_learning_ = ["kitchen_sinks", "kernel_pca", "nystroem_sampler"]

        for r, f in product(regressors_, feature_learning_):
            if r not in regressors:
                continue
            if f not in preprocessors:
                continue
            while True:
                try:
                    cs.add_forbidden_clause(ForbiddenAndConjunction(
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "regressor:__choice__"), r),
                        ForbiddenEqualsClause(cs.get_hyperparameter(
                            "feature_preprocessor:__choice__"), f)))
                    break
                except KeyError:
                    break
                except ValueError:
                    # Change the default and try again
                    try:
                        default = possible_default_regressor.pop()
                    except IndexError:
                        raise ValueError(
                            "Cannot find a legal default configuration.")
                    cs.get_hyperparameter(
                        'regressor:__choice__').default_value = default

        self.configuration_space_ = cs
        self.dataset_properties_ = dataset_properties
        return cs