def test_constant(self):
        # Test construction
        c1 = Constant("value", 1)
        c2 = Constant("value", 1)
        c3 = Constant("value", 2)
        c4 = Constant("valuee", 1)
        c5 = Constant("valueee", 2)

        # Test the representation
        self.assertEqual("value, Type: Constant, Value: 1", c1.__repr__())

        # Test the equals operator (and the ne operator in the last line)
        self.assertFalse(c1 == 1)
        self.assertEqual(c1, c2)
        self.assertFalse(c1 == c3)
        self.assertFalse(c1 == c4)
        self.assertTrue(c1 != c5)

        # Test that only string, integers and floats are allowed
        self.assertRaises(TypeError, Constant, "value", dict())
        self.assertRaises(TypeError, Constant, "value", None)
        self.assertRaises(TypeError, Constant, "value", True)

        # Test that only string names are allowed
        self.assertRaises(TypeError, Constant, 1, "value")
        self.assertRaises(TypeError, Constant, dict(), "value")
        self.assertRaises(TypeError, Constant, None, "value")
        self.assertRaises(TypeError, Constant, True, "value")
예제 #2
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "deviance")
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=1,
                                                   default_value=0.1,
                                                   log=True)
        n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                    50,
                                                    500,
                                                    default_value=100)
        max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                 lower=1,
                                                 upper=10,
                                                 default_value=3)
        criterion = CategoricalHyperparameter(
            # 固定使用mse
            'criterion',
            ['mse'],
            #['friedman_mse', 'mse', 'mae'],
            default_value='mse')
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(name="subsample",
                                               lower=0.01,
                                               upper=1.0,
                                               default_value=1.0)
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.1,
                                                  1.0,
                                                  default_value=1)
        max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                      value="None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            name='min_impurity_decrease', value=0.0)
        cs.add_hyperparameters([
            loss, learning_rate, n_estimators, max_depth, criterion,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            subsample, max_features, max_leaf_nodes, min_impurity_decrease
        ])

        return cs
예제 #3
0
    def get_hyperparameter_search_space(dataset_properties=None):
        percentile = UniformFloatHyperparameter(
            name="percentile", lower=1, upper=99, default=50)

        score_func = CategoricalHyperparameter(
            name="score_func", choices=["chi2", "f_classif"], default="chi2")
        if dataset_properties is not None:
            # Chi2 can handle sparse data, so we respect this
            if 'is_sparse' in dataset_properties and dataset_properties['is_sparse']:
                score_func = Constant(
                    name="score_func", value="chi2")

        cs = ConfigurationSpace()
        cs.add_hyperparameter(percentile)
        cs.add_hyperparameter(score_func)

        return cs
예제 #4
0
def get_configspace_from_metalearning(metalearning_entry):
    cs = ConfigurationSpace()
    categorical_and_none_hyperparams = []

    for hyperparam in metalearning_entry:
        if len(metalearning_entry[hyperparam]['choices']
               ) == 1 and metalearning_entry[hyperparam]['default'] is None:
            categorical_and_none_hyperparams.append(
                Constant(hyperparam, 'None'))
        else:
            categorical_and_none_hyperparams.append(
                CategoricalHyperparameter(
                    name=hyperparam,
                    choices=metalearning_entry[hyperparam]['choices'],
                    default_value=metalearning_entry[hyperparam]['default']))
    cs.add_hyperparameters(categorical_and_none_hyperparams)

    return cs
예제 #5
0
    def test_greater_and_less_condition(self):
        child = Constant('child', 'child')
        hp1 = UniformFloatHyperparameter("float", 0, 5)
        hp2 = UniformIntegerHyperparameter("int", 0, 5)
        hp3 = OrdinalHyperparameter("ord", list(range(6)))

        for hp in [hp1, hp2, hp3]:
            gt = GreaterThanCondition(child, hp, 1)
            self.assertFalse(gt.evaluate({hp.name: 0}))
            self.assertTrue(gt.evaluate({hp.name: 2}))
            self.assertFalse(gt.evaluate({hp.name: None}))

            lt = LessThanCondition(child, hp, 1)
            self.assertTrue(lt.evaluate({hp.name: 0}))
            self.assertFalse(lt.evaluate({hp.name: 2}))
            self.assertFalse(lt.evaluate({hp.name: None}))

        hp4 = CategoricalHyperparameter("cat", list(range(6)))
        self.assertRaisesRegexp(
            ValueError, "Parent hyperparameter in a > or < "
            "condition must be a subclass of "
            "NumericalHyperparameter or "
            "OrdinalHyperparameter, but is "
            "<class 'ConfigSpace.hyperparameters.CategoricalHyperparameter'>",
            GreaterThanCondition, child, hp4, 1)
        self.assertRaisesRegexp(
            ValueError, "Parent hyperparameter in a > or < "
            "condition must be a subclass of "
            "NumericalHyperparameter or "
            "OrdinalHyperparameter, but is "
            "<class 'ConfigSpace.hyperparameters.CategoricalHyperparameter'>",
            LessThanCondition, child, hp4, 1)

        hp5 = OrdinalHyperparameter("ord",
                                    ['cold', 'luke warm', 'warm', 'hot'])

        gt = GreaterThanCondition(child, hp5, 'warm')
        self.assertTrue(gt.evaluate({hp5.name: 'hot'}))
        self.assertFalse(gt.evaluate({hp5.name: 'cold'}))

        lt = LessThanCondition(child, hp5, 'warm')
        self.assertTrue(lt.evaluate({hp5.name: 'luke warm'}))
        self.assertFalse(lt.evaluate({hp5.name: 'warm'}))
예제 #6
0
 def test_get_types(self):
     cs = ConfigurationSpace()
     cs.add_hyperparameter(CategoricalHyperparameter('a', ['a', 'b']))
     cs.add_hyperparameter(UniformFloatHyperparameter('b', 1, 5))
     cs.add_hyperparameter(UniformIntegerHyperparameter('c', 3, 7))
     cs.add_hyperparameter(Constant('d', -5))
     cs.add_hyperparameter(OrdinalHyperparameter('e', ['cold', 'hot']))
     cs.add_hyperparameter(CategoricalHyperparameter('f', ['x', 'y']))
     types, bounds = get_types(cs, None)
     np.testing.assert_array_equal(types, [2, 0, 0, 0, 0, 2])
     self.assertEqual(bounds[0][0], 2)
     self.assertFalse(np.isfinite(bounds[0][1]))
     np.testing.assert_array_equal(bounds[1], [0, 1])
     np.testing.assert_array_equal(bounds[2], [0, 1])
     self.assertEqual(bounds[3][0], 0)
     self.assertFalse(np.isfinite(bounds[3][1]))
     np.testing.assert_array_equal(bounds[4], [0, 1])
     self.assertEqual(bounds[5][0], 2)
     self.assertFalse(np.isfinite(bounds[5][1]))
    def test_all_components_have_the_same_child(self):
        hp1 = CategoricalHyperparameter("input1", [0, 1])
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        hp4 = CategoricalHyperparameter("input4", [0, 1])
        hp5 = CategoricalHyperparameter("input5", [0, 1])
        hp6 = Constant("AND", "True")

        cond1 = EqualsCondition(hp1, hp2, 1)
        cond2 = EqualsCondition(hp1, hp3, 1)
        cond3 = EqualsCondition(hp1, hp4, 1)
        cond4 = EqualsCondition(hp6, hp4, 1)
        cond5 = EqualsCondition(hp6, hp5, 1)

        AndConjunction(cond1, cond2, cond3)
        AndConjunction(cond4, cond5)
        self.assertRaisesRegexp(
            ValueError, "All Conjunctions and Conditions must have "
            "the same child.", AndConjunction, cond1, cond4)
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter(
                "criterion", ["gini", "entropy"], default_value="gini")
            max_features = UniformFloatHyperparameter("max_features", 0, 1,
                                                      default_value=0.5, q=0.05)

            max_depth = UnParametrizedHyperparameter(name="max_depth", value="None")
            max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                "min_samples_leaf", 1, 20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                'min_weight_fraction_leaf', 0.)
            min_impurity_decrease = UnParametrizedHyperparameter(
                'min_impurity_decrease', 0.)

            bootstrap = CategoricalHyperparameter(
                "bootstrap", ["True", "False"], default_value="False")

            cs.add_hyperparameters([n_estimators, criterion, max_features,
                                    max_depth, max_leaf_nodes, min_samples_split,
                                    min_samples_leaf, min_weight_fraction_leaf,
                                    min_impurity_decrease, bootstrap])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {'n_estimators': 100,
                     'criterion': hp.choice('ets_criterion', ['gini', 'entropy']),
                     'max_features': hp.uniform('ets_max_features', 0, 1),
                     'max_depth': "None",
                     'max_leaf_nodes': "None",
                     'min_samples_leaf': hp.randint('ets_samples_leaf', 20) + 1,
                     'min_samples_split': hp.randint('ets_samples_split', 19) + 2,
                     'min_weight_fraction_leaf': 0.,
                     'min_impurity_decrease': 0.,
                     'bootstrap': hp.choice('ets_bootstrap', ['True', 'False'])}
            return space
예제 #9
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                              default_value="gini")

        # The maximum number of features used in the forest is calculated as m^max_features, where
        # m is the total number of features, and max_features is the hyperparameter specified below.
        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
        # corresponds with Geurts' heuristic.
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.,
                                                  1.,
                                                  default_value=0.5)

        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")

        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            'min_weight_fraction_leaf', 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="False")
        cs.add_hyperparameters([
            n_estimators, criterion, max_features, max_depth,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            max_leaf_nodes, min_impurity_decrease, bootstrap
        ])

        return cs
예제 #10
0
    def get_hyperparameter_search_space(dataset_properties=None):
        alpha = UniformFloatHyperparameter(
            name="alpha", lower=0.01, upper=0.5, default=0.1)

        score_func = CategoricalHyperparameter(
            name="score_func", choices=["chi2", "f_classif"], default="chi2")
        if dataset_properties is not None:
            # Chi2 can handle sparse data, so we respect this
            if 'sparse' in dataset_properties and dataset_properties['sparse']:
                score_func = Constant(
                    name="score_func", value="chi2")

        mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'], 'fpr')

        cs = ConfigurationSpace()
        cs.add_hyperparameter(alpha)
        cs.add_hyperparameter(score_func)
        cs.add_hyperparameter(mode)

        return cs
예제 #11
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = CategoricalHyperparameter(
            "loss", ["least_squares"], default_value="least_squares")
        learning_rate = UniformFloatHyperparameter(
            name="learning_rate", lower=0.01, upper=1, default_value=0.1, log=True)
        max_iter = UniformIntegerHyperparameter(
            "max_iter", 32, 512, default_value=100)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=200, default_value=20, log=True)
        max_depth = UnParametrizedHyperparameter(
            name="max_depth", value="None")
        max_leaf_nodes = UniformIntegerHyperparameter(
            name="max_leaf_nodes", lower=3, upper=2047, default_value=31, log=True)
        max_bins = Constant("max_bins", 255)
        l2_regularization = UniformFloatHyperparameter(
            name="l2_regularization", lower=1E-10, upper=1, default_value=1E-10, log=True)
        early_stop = CategoricalHyperparameter(
            name="early_stop", choices=["off", "train", "valid"], default_value="off")
        tol = UnParametrizedHyperparameter(
            name="tol", value=1e-7)
        scoring = UnParametrizedHyperparameter(
            name="scoring", value="loss")
        n_iter_no_change = UniformIntegerHyperparameter(
            name="n_iter_no_change", lower=1, upper=20, default_value=10)
        validation_fraction = UniformFloatHyperparameter(
            name="validation_fraction", lower=0.01, upper=0.4, default_value=0.1)

        cs.add_hyperparameters([loss, learning_rate, max_iter, min_samples_leaf,
                                max_depth, max_leaf_nodes, max_bins, l2_regularization,
                                early_stop, tol, scoring, n_iter_no_change,
                                validation_fraction])

        n_iter_no_change_cond = InCondition(
            n_iter_no_change, early_stop, ["valid", "train"])
        validation_fraction_cond = EqualsCondition(
            validation_fraction, early_stop, "valid")

        cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond])

        return cs
예제 #12
0
    def test_and_conjunction(self):
        self.assertRaises(TypeError, AndConjunction, "String1", "String2")

        hp1 = CategoricalHyperparameter("input1", [0, 1])
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        hp4 = Constant("And", "True")
        cond1 = EqualsCondition(hp4, hp1, 1)

        # Only one condition in an AndConjunction!
        self.assertRaises(ValueError, AndConjunction, cond1)

        cond2 = EqualsCondition(hp4, hp2, 1)
        cond3 = EqualsCondition(hp4, hp3, 1)

        andconj1 = AndConjunction(cond1, cond2)
        andconj1_ = AndConjunction(cond1, cond2)
        self.assertEqual(andconj1, andconj1_)

        # Test setting vector idx
        hyperparameter_idx = {
            hp1.name: 0,
            hp2.name: 1,
            hp3.name: 2,
            hp4.name: 3
        }
        andconj1.set_vector_idx(hyperparameter_idx)
        self.assertEqual(andconj1.get_parents_vector(), [0, 1])
        self.assertEqual(andconj1.get_children_vector(), [3, 3])

        andconj2 = AndConjunction(cond2, cond3)
        self.assertNotEqual(andconj1, andconj2)

        andconj3 = AndConjunction(cond1, cond2, cond3)
        self.assertEqual(
            "(And | input1 == 1 && And | input2 == 1 && And | "
            "input3 == 1)", str(andconj3))

        # Test __eq__
        self.assertNotEqual(andconj1, andconj3)
        self.assertNotEqual(andconj1, "String")
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = UnParametrizedHyperparameter("criterion", "gini")
        max_features = UnParametrizedHyperparameter("max_features", "0.5")

        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UnParametrizedHyperparameter("min_samples_split", 2)
        min_samples_leaf = UnParametrizedHyperparameter("min_samples_leaf", 1)

        min_weight_fraction_leaf = UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)
        bootstrap = UnParametrizedHyperparameter("bootstrap", "True")

        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, max_leaf_nodes,
                                bootstrap, min_impurity_decrease])
        return cs
예제 #14
0
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            criterion = CategoricalHyperparameter(
                "criterion", ["gini", "entropy"], default_value="gini")
            max_depth_factor = UniformFloatHyperparameter(
                'max_depth_factor', 0., 2., default_value=0.5)
            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                "min_samples_leaf", 1, 20, default_value=1)
            min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.0)
            max_features = UnParametrizedHyperparameter('max_features', 1.0)
            max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
            min_impurity_decrease = UnParametrizedHyperparameter('min_impurity_decrease', 0.0)

            cs.add_hyperparameters([criterion, max_features, max_depth_factor,
                                    min_samples_split, min_samples_leaf,
                                    min_weight_fraction_leaf, max_leaf_nodes,
                                    min_impurity_decrease])
            return cs
예제 #15
0
def choice(label: str, options: List, default=None):
    if len(options) == 1:
        return Constant(label, _encode(options[0]))  # fixme: if declare probability in here?
    # fixme: copy from hyperflow/hdl2shps/hdl2shps.py:354
    choice2proba = {}
    not_specific_proba_choices = []
    sum_proba = 0
    choices = []
    raw_choices = []
    for option in options:
        if isinstance(option, (tuple, list)) and len(option) == 2:
            choice = None
            proba = None
            for item in option:
                if isinstance(item, (float, int)) and 0 <= item <= 1:
                    proba = item
                else:
                    choice = item
            assert choice is not None and proba is not None
            choice2proba[choice] = proba
            sum_proba += proba
        else:
            choice = option
            not_specific_proba_choices.append(choice)
        choices.append(_encode(choice))
        raw_choices.append(choice)
    if sum_proba <= 1:
        if len(not_specific_proba_choices) > 0:
            p_rest = (1 - sum_proba) / len(not_specific_proba_choices)
            for not_specific_proba_choice in not_specific_proba_choices:
                choice2proba[not_specific_proba_choice] = p_rest
    else:
        choice2proba = {k: 1 / len(options) for k in choices}
    proba_list = [choice2proba[k] for k in raw_choices]
    kwargs = {}
    if default:
        kwargs.update({'default_value': _encode(default)})
    hp=CategoricalHyperparameter(label, choices, weights=proba_list, **kwargs)
    hp.probabilities=proba_list  # fixme: don't make sense
    return hp
예제 #16
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "deviance")
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=1,
                                                   default=0.1,
                                                   log=True)
        n_estimators = UniformIntegerHyperparameter("n_estimators",
                                                    50,
                                                    500,
                                                    default=100)
        max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                 lower=1,
                                                 upper=10,
                                                 default=3)
        min_samples_split = UniformIntegerHyperparameter(
            name="min_samples_split", lower=2, upper=20, default=2, log=False)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf", lower=1, upper=20, default=1, log=False)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        subsample = UniformFloatHyperparameter(name="subsample",
                                               lower=0.01,
                                               upper=1.0,
                                               default=1.0,
                                               log=False)
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.5,
                                                  5,
                                                  default=1)
        max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                      value="None")
        cs.add_hyperparameters([
            loss, learning_rate, n_estimators, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, subsample,
            max_features, max_leaf_nodes
        ])

        return cs
예제 #17
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            alpha = UniformFloatHyperparameter(name="alpha",
                                               lower=0.01,
                                               upper=0.5,
                                               default_value=0.1)

            score_func = CategoricalHyperparameter(
                name="score_func",
                choices=["chi2", "f_classif"],
                default_value="chi2")
            if dataset_properties is not None:
                # Chi2 can handle sparse data, so we respect this
                if 'sparse' in dataset_properties and dataset_properties[
                        'sparse']:
                    score_func = Constant(name="score_func", value="chi2")

            mode = CategoricalHyperparameter('mode', ['fpr', 'fdr', 'fwe'],
                                             'fpr')

            cs = ConfigurationSpace()
            cs.add_hyperparameter(alpha)
            cs.add_hyperparameter(score_func)
            cs.add_hyperparameter(mode)

            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'alpha':
                hp.uniform('gus_alpha', 0.01, 0.5),
                'score_func':
                hp.choice('gus_score_func',
                          ['chi2', 'f_classif', 'mutual_info']),
                'mode':
                hp.choice('gus_mode', ['fpr', 'fdr', 'fwe'])
            }
            return space
 def get_hyperparameter_search_space(dataset_properties=None):
     n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                 lower=10,
                                                 upper=100,
                                                 default=10)
     max_depth = UniformIntegerHyperparameter(name="max_depth",
                                              lower=2,
                                              upper=10,
                                              default=5)
     min_samples_split = UniformIntegerHyperparameter(
         name="min_samples_split", lower=2, upper=20, default=2)
     min_samples_leaf = UniformIntegerHyperparameter(
         name="min_samples_leaf", lower=1, upper=20, default=1)
     min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0)
     max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                   value="None")
     cs = ConfigurationSpace()
     cs.add_hyperparameters([
         n_estimators, max_depth, min_samples_split, min_samples_leaf,
         min_weight_fraction_leaf, max_leaf_nodes
     ])
     return cs
예제 #19
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        n_estimators = Constant("n_estimators", 100)
        criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                              default_value="gini")
        max_features = UniformFloatHyperparameter("max_features",
                                                  0,
                                                  1,
                                                  default_value=0.5,
                                                  q=0.05)

        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")

        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            'min_weight_fraction_leaf', 0.)
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.)

        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="False")

        cs.add_hyperparameters([
            n_estimators, criterion, max_features, max_depth, max_leaf_nodes,
            min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
            min_impurity_decrease, bootstrap
        ])

        return cs
    def test_constant(self):
        # Test construction
        c1 = Constant("value", 1)
        c2 = Constant("value", 1)
        c3 = Constant("value", 2)
        c4 = Constant("valuee", 1)
        c5 = Constant("valueee", 2)

        # Test attributes are accessible
        self.assertEqual(c5.name, "valueee")
        self.assertEqual(c5.value, 2)

        # Test the representation
        self.assertEqual("value, Type: Constant, Value: 1", c1.__repr__())

        # Test the equals operator (and the ne operator in the last line)
        self.assertFalse(c1 == 1)
        self.assertEqual(c1, c2)
        self.assertFalse(c1 == c3)
        self.assertFalse(c1 == c4)
        self.assertTrue(c1 != c5)

        # Test that only string, integers and floats are allowed
        self.assertRaises(TypeError, Constant, "value", dict())
        self.assertRaises(TypeError, Constant, "value", None)
        self.assertRaises(TypeError, Constant, "value", True)

        # Test that only string names are allowed
        self.assertRaises(TypeError, Constant, 1, "value")
        self.assertRaises(TypeError, Constant, dict(), "value")
        self.assertRaises(TypeError, Constant, None, "value")
        self.assertRaises(TypeError, Constant, True, "value")

        # test that meta-data is stored correctly
        c1_meta = Constant("value", 1, dict(self.meta_data))
        self.assertEqual(c1_meta.meta, self.meta_data)
예제 #21
0
    def get_hyperparameter_search_space(dataset_properties=None, optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()

            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter(
                "criterion", ["mse", "mae"], default_value="mse")

            # The maximum number of features used in the forest is calculated as m^max_features, where
            # m is the total number of features, and max_features is the hyperparameter specified below.
            # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
            # corresponds with Geurts' heuristic.
            max_features = UniformFloatHyperparameter(
                "max_features", 0., 1., default_value=0.5)

            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                "min_samples_leaf", 1, 20, default_value=1)

            bootstrap = CategoricalHyperparameter(
                "bootstrap", ["True", "False"], default_value="False")
            cs.add_hyperparameters([n_estimators, criterion, max_features, min_samples_split, min_samples_leaf,
                                    bootstrap])

            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {'n_estimators': hp.choice('et_n_estimators', [100]),
                     'criterion': hp.choice('et_criterion', ["mse", "mae"]),
                     'max_features': hp.uniform('et_max_features', 0, 1),
                     'min_samples_split': hp.randint('et_min_samples_split', 19) + 2,
                     'min_samples_leaf': hp.randint('et_min_samples_leaf,', 20) + 1,
                     'bootstrap': hp.choice('et_bootstrap', ["True", "False"])}

            init_trial = {'n_estimators': 100, 'criterion': "mse", 'max_features': 0.5,
                          'min_samples_split': 2, 'min_samples_leaf': 1, 'bootstrap': "False"}
            return space
예제 #22
0
def get_random_forest_default_search_space(seed):
    cs = ConfigurationSpace('sklearn.ensemble.RandomForestClassifier', seed)
    imputation = CategoricalHyperparameter('imputation__strategy',
                                           ['mean', 'median', 'most_frequent'])
    n_estimators = Constant("classifier__n_estimators", 100)
    criterion = CategoricalHyperparameter("classifier__criterion",
                                          ["gini", "entropy"],
                                          default_value="gini")

    # The maximum number of features used in the forest is calculated as m^max_features, where
    # m is the total number of features, and max_features is the hyperparameter specified below.
    # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
    # corresponds with Geurts' heuristic.
    max_features = UniformFloatHyperparameter("classifier__max_features",
                                              0.,
                                              1.,
                                              default_value=0.5)

    # max_depth = UnParametrizedHyperparameter("classifier__max_depth", None)
    min_samples_split = UniformIntegerHyperparameter(
        "classifier__min_samples_split", 2, 20, default_value=2)
    min_samples_leaf = UniformIntegerHyperparameter(
        "classifier__min_samples_leaf", 1, 20, default_value=1)
    min_weight_fraction_leaf = UnParametrizedHyperparameter(
        "classifier__min_weight_fraction_leaf", 0.)
    # max_leaf_nodes = UnParametrizedHyperparameter("classifier__max_leaf_nodes", "None")
    min_impurity_decrease = UnParametrizedHyperparameter(
        'classifier__min_impurity_decrease', 0.0)
    bootstrap = CategoricalHyperparameter("classifier__bootstrap",
                                          ["True", "False"],
                                          default_value="True")
    cs.add_hyperparameters([
        imputation, n_estimators, criterion, max_features, min_samples_split,
        min_samples_leaf, min_weight_fraction_leaf, bootstrap,
        min_impurity_decrease
    ])

    return cs
예제 #23
0
    def test_nested_conjunctions(self):
        hp1 = CategoricalHyperparameter("input1", [0, 1])
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        hp4 = CategoricalHyperparameter("input4", [0, 1])
        hp5 = CategoricalHyperparameter("input5", [0, 1])
        hp6 = Constant("AND", "True")

        cond1 = EqualsCondition(hp6, hp1, 1)
        cond2 = EqualsCondition(hp6, hp2, 1)
        cond3 = EqualsCondition(hp6, hp3, 1)
        cond4 = EqualsCondition(hp6, hp4, 1)
        cond5 = EqualsCondition(hp6, hp5, 1)

        conj1 = AndConjunction(cond1, cond2)
        conj2 = OrConjunction(conj1, cond3)
        conj3 = AndConjunction(conj2, cond4, cond5)

        # TODO: this does not look nice, And should depend on a large
        # conjunction, there should not be many ANDs inside this string!
        self.assertEqual("(((AND | input1 == 1 && AND | input2 == 1) || AND | "
                         "input3 == 1) && AND | input4 == 1 && AND | input5 "
                         "== 1)", str(conj3))
예제 #24
0
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     cs.add_hyperparameter(Constant("n_estimators", 100))
     cs.add_hyperparameter(
         CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                   default="gini"))
     cs.add_hyperparameter(
         UniformFloatHyperparameter("max_features", 0.5, 5, default=1))
     cs.add_hyperparameter(UnParametrizedHyperparameter(
         "max_depth", "None"))
     cs.add_hyperparameter(
         UniformIntegerHyperparameter("min_samples_split", 2, 20,
                                      default=2))
     cs.add_hyperparameter(
         UniformIntegerHyperparameter("min_samples_leaf", 1, 20, default=1))
     cs.add_hyperparameter(
         UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.))
     cs.add_hyperparameter(
         UnParametrizedHyperparameter("max_leaf_nodes", "None"))
     cs.add_hyperparameter(
         CategoricalHyperparameter("bootstrap", ["True", "False"],
                                   default="True"))
     return cs
예제 #25
0
 def get_hyperparameter_search_space(dataset_properties=None,
                                     optimizer='smac'):
     cs = ConfigurationSpace()
     n_estimators = UniformFloatHyperparameter("n_estimators",
                                               100,
                                               1000,
                                               default_value=500,
                                               q=50)
     num_leaves = UniformIntegerHyperparameter("num_leaves",
                                               31,
                                               2047,
                                               default_value=128)
     max_depth = Constant('max_depth', 15)
     learning_rate = UniformFloatHyperparameter("learning_rate",
                                                1e-3,
                                                0.3,
                                                default_value=0.1,
                                                log=True)
     min_child_samples = UniformIntegerHyperparameter("min_child_samples",
                                                      5,
                                                      30,
                                                      default_value=20)
     subsample = UniformFloatHyperparameter("subsample",
                                            0.7,
                                            1,
                                            default_value=1,
                                            q=0.1)
     colsample_bytree = UniformFloatHyperparameter("colsample_bytree",
                                                   0.7,
                                                   1,
                                                   default_value=1,
                                                   q=0.1)
     cs.add_hyperparameters([
         n_estimators, num_leaves, max_depth, learning_rate,
         min_child_samples, subsample, colsample_bytree
     ])
     return cs
예제 #26
0
    def test_or_conjunction(self):
        self.assertRaises(TypeError, AndConjunction, "String1", "String2")

        hp1 = CategoricalHyperparameter("input1", [0, 1])
        hp2 = CategoricalHyperparameter("input2", [0, 1])
        hp3 = CategoricalHyperparameter("input3", [0, 1])
        hp4 = Constant("Or", "True")
        cond1 = EqualsCondition(hp4, hp1, 1)

        self.assertRaises(ValueError, OrConjunction, cond1)

        cond2 = EqualsCondition(hp4, hp2, 1)
        cond3 = EqualsCondition(hp4, hp3, 1)

        andconj1 = OrConjunction(cond1, cond2)
        andconj1_ = OrConjunction(cond1, cond2)
        self.assertEqual(andconj1, andconj1_)

        andconj2 = OrConjunction(cond2, cond3)
        self.assertNotEqual(andconj1, andconj2)

        andconj3 = OrConjunction(cond1, cond2, cond3)
        self.assertEqual("(Or | input1 == 1 || Or | input2 == 1 || Or | "
                         "input3 == 1)", str(andconj3))
예제 #27
0
 def test_get_types_with_inactive(self):
     cs = ConfigurationSpace()
     a = cs.add_hyperparameter(CategoricalHyperparameter('a', ['a', 'b']))
     b = cs.add_hyperparameter(UniformFloatHyperparameter('b', 1, 5))
     c = cs.add_hyperparameter(UniformIntegerHyperparameter('c', 3, 7))
     d = cs.add_hyperparameter(Constant('d', -5))
     e = cs.add_hyperparameter(OrdinalHyperparameter('e', ['cold', 'hot']))
     f = cs.add_hyperparameter(CategoricalHyperparameter('f', ['x', 'y']))
     cs.add_condition(EqualsCondition(b, a, 'a'))
     cs.add_condition(EqualsCondition(c, a, 'a'))
     cs.add_condition(EqualsCondition(d, a, 'a'))
     cs.add_condition(EqualsCondition(e, a, 'a'))
     cs.add_condition(EqualsCondition(f, a, 'a'))
     types, bounds = get_types(cs, None)
     np.testing.assert_array_equal(types, [2, 0, 0, 2, 0, 3])
     self.assertEqual(bounds[0][0], 2)
     self.assertFalse(np.isfinite(bounds[0][1]))
     np.testing.assert_array_equal(bounds[1], [-1, 1])
     np.testing.assert_array_equal(bounds[2], [-1, 1])
     self.assertEqual(bounds[3][0], 2)
     self.assertFalse(np.isfinite(bounds[3][1]))
     np.testing.assert_array_equal(bounds[4], [0, 2])
     self.assertEqual(bounds[5][0], 3)
     self.assertFalse(np.isfinite(bounds[5][1]))
예제 #28
0
 def get_hyperparameter_search_space(dataset_properties=None,
                                     optimizer='smac'):
     n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                 lower=10,
                                                 upper=100,
                                                 default_value=10)
     max_depth = UniformIntegerHyperparameter(name="max_depth",
                                              lower=2,
                                              upper=10,
                                              default_value=5)
     min_samples_split = UniformIntegerHyperparameter(
         name="min_samples_split", lower=2, upper=20, default_value=2)
     min_samples_leaf = UniformIntegerHyperparameter(
         name="min_samples_leaf", lower=1, upper=20, default_value=1)
     min_weight_fraction_leaf = Constant('min_weight_fraction_leaf', 1.0)
     max_leaf_nodes = UnParametrizedHyperparameter(name="max_leaf_nodes",
                                                   value="None")
     bootstrap = CategoricalHyperparameter('bootstrap', ['True', 'False'])
     cs = ConfigurationSpace()
     cs.add_hyperparameters([
         n_estimators, max_depth, min_samples_split, min_samples_leaf,
         min_weight_fraction_leaf, max_leaf_nodes, bootstrap
     ])
     return cs
예제 #29
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            epsilon = CategoricalHyperparameter("epsilon",
                                                [1e-4, 1e-3, 1e-2, 1e-1, 1],
                                                default_value=1e-4)
            loss = CategoricalHyperparameter(
                "loss", ["epsilon_insensitive", "squared_epsilon_insensitive"],
                default_value="epsilon_insensitive")
            dual = CategoricalHyperparameter("dual", ['True', 'False'],
                                             default_value='True')
            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             default_value=1e-4,
                                             log=True)
            C = UniformFloatHyperparameter("C",
                                           0.03125,
                                           32768,
                                           log=True,
                                           default_value=1.0)
            fit_intercept = Constant("fit_intercept", "True")
            intercept_scaling = Constant("intercept_scaling", 1)
            cs.add_hyperparameters([
                epsilon, loss, dual, tol, C, fit_intercept, intercept_scaling
            ])

            dual_and_loss = ForbiddenAndConjunction(
                ForbiddenEqualsClause(dual, "False"),
                ForbiddenEqualsClause(loss, "epsilon_insensitive"))
            cs.add_forbidden_clause(dual_and_loss)
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'loss':
                hp.choice('liblinear_combination', [{
                    'loss': "epsilon_insensitive",
                    'dual': "True"
                }, {
                    'loss': "squared_epsilon_insensitive",
                    'dual': "True"
                }, {
                    'loss': "squared_epsilon_insensitive",
                    'dual': "False"
                }]),
                'dual':
                None,
                'tol':
                hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)),
                'C':
                hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)),
                'fit_intercept':
                hp.choice('liblinear_fit_intercept', ["True"]),
                'intercept_scaling':
                hp.choice('liblinear_intercept_scaling', [1])
            }

            init_trial = {
                'loss': {
                    'loss': "epsilon_insensitive",
                    'dual': "True"
                },
                'tol': 1e-4,
                'C': 1,
                'fit_intercept': "True",
                'intercept_scaling': 1
            }

            return space
from ConfigSpace.conditions import InCondition
from automl.utl import json_utils

cs = ConfigurationSpace()
loss = CategoricalHyperparameter("loss", ["ls", "lad", "huber", "quantile"],
                                 default_value="ls")
learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                           lower=0.01,
                                           upper=1,
                                           default_value=0.1,
                                           log=True)
n_estimators = UniformIntegerHyperparameter("n_estimators",
                                            50,
                                            500,
                                            default_value=100)
max_depth = Constant("max_depth_none", "None")
min_samples_split = UniformFloatHyperparameter("min_samples_split",
                                               0.,
                                               1.,
                                               default_value=0.5)
min_samples_leaf = UniformFloatHyperparameter("min_samples_leaf",
                                              0.,
                                              0.5,
                                              default_value=0.0001)
min_weight_fraction_leaf = Constant("min_weight_fraction_leaf", 0.)
subsample = UniformFloatHyperparameter(name="subsample",
                                       lower=0.01,
                                       upper=1.0,
                                       default_value=1.0,
                                       log=False)
max_features = UniformFloatHyperparameter("max_features",
예제 #31
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()

            penalty = CategoricalHyperparameter("penalty", ["l1", "l2"],
                                                default_value="l2")
            loss = CategoricalHyperparameter("loss",
                                             ["hinge", "squared_hinge"],
                                             default_value="squared_hinge")
            dual = CategoricalHyperparameter("dual", ['True', 'False'],
                                             default_value='True')
            # This is set ad-hoc
            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             default_value=1e-4,
                                             log=True)
            C = UniformFloatHyperparameter("C",
                                           0.03125,
                                           32768,
                                           log=True,
                                           default_value=1.0)
            multi_class = Constant("multi_class", "ovr")
            # These are set ad-hoc
            fit_intercept = Constant("fit_intercept", "True")
            intercept_scaling = Constant("intercept_scaling", 1)
            cs.add_hyperparameters([
                penalty, loss, dual, tol, C, multi_class, fit_intercept,
                intercept_scaling
            ])

            penalty_and_loss = ForbiddenAndConjunction(
                ForbiddenEqualsClause(penalty, "l1"),
                ForbiddenEqualsClause(loss, "hinge"))
            constant_penalty_and_loss = ForbiddenAndConjunction(
                ForbiddenEqualsClause(dual, "False"),
                ForbiddenEqualsClause(penalty, "l2"),
                ForbiddenEqualsClause(loss, "hinge"))
            penalty_and_dual = ForbiddenAndConjunction(
                ForbiddenEqualsClause(dual, "True"),
                ForbiddenEqualsClause(penalty, "l1"))
            cs.add_forbidden_clause(penalty_and_loss)
            cs.add_forbidden_clause(constant_penalty_and_loss)
            cs.add_forbidden_clause(penalty_and_dual)
            return cs
        elif optimizer == 'tpe':
            space = {
                'penalty':
                hp.choice('liblinear_combination', [{
                    'penalty': "l1",
                    'loss': "squared_hinge",
                    'dual': "False"
                }, {
                    'penalty': "l2",
                    'loss': "hinge",
                    'dual': "True"
                }, {
                    'penalty': "l2",
                    'loss': "squared_hinge",
                    'dual': "True"
                }, {
                    'penalty': "l2",
                    'loss': "squared_hinge",
                    'dual': "False"
                }]),
                'loss':
                None,
                'dual':
                None,
                'tol':
                hp.loguniform('liblinear_tol', np.log(1e-5), np.log(1e-1)),
                'C':
                hp.loguniform('liblinear_C', np.log(0.03125), np.log(32768)),
                'multi_class':
                hp.choice('liblinear_multi_class', ["ovr"]),
                'fit_intercept':
                hp.choice('liblinear_fit_intercept', ["True"]),
                'intercept_scaling':
                hp.choice('liblinear_intercept_scaling', [1])
            }

            init_trial = {
                'penalty': {
                    'penalty': "l2",
                    'loss': "squared_hinge",
                    'dual': "True"
                },
                'tol': 1e-4,
                'C': 1,
                'multiclass': "ovr",
                'fit_intercept': "True",
                'intercept_scaling': 1
            }

            return space