def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            epsilon = CategoricalHyperparameter("epsilon",
                                                [1e-4, 1e-3, 1e-2, 1e-1, 1],
                                                default_value=1e-4)
            C = UniformFloatHyperparameter("C",
                                           0.03125,
                                           32768,
                                           log=True,
                                           default_value=1.0)
            # No linear kernel here, because we have liblinear
            kernel = CategoricalHyperparameter(
                name="kernel",
                choices=["rbf", "poly", "sigmoid"],
                default_value="rbf")
            degree = UniformIntegerHyperparameter("degree",
                                                  2,
                                                  5,
                                                  default_value=3)
            gamma = UniformFloatHyperparameter("gamma",
                                               3.0517578125e-05,
                                               8,
                                               log=True,
                                               default_value=0.1)
            coef0 = UniformFloatHyperparameter("coef0", -1, 1, default_value=0)
            # probability is no hyperparameter, but an argument to the SVM algo
            shrinking = CategoricalHyperparameter("shrinking",
                                                  ["True", "False"],
                                                  default_value="True")
            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             default_value=1e-3,
                                             log=True)
            # cache size is not a hyperparameter, but an argument to the program!
            max_iter = UnParametrizedHyperparameter("max_iter", 2000)

            cs = ConfigurationSpace()
            cs.add_hyperparameters([
                epsilon, C, kernel, degree, gamma, coef0, shrinking, tol,
                max_iter
            ])

            degree_depends_on_poly = EqualsCondition(degree, kernel, "poly")
            coef0_condition = InCondition(coef0, kernel, ["poly", "sigmoid"])
            cs.add_condition(degree_depends_on_poly)
            cs.add_condition(coef0_condition)

            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            coef0 = hp.uniform("libsvm_coef0", -1, 1)
            space = {
                'C':
                hp.loguniform('libsvm_C', np.log(0.03125), np.log(32768)),
                'gamma':
                hp.loguniform('libsvm_gamma', np.log(3.0517578125e-5),
                              np.log(8)),
                'shrinking':
                hp.choice('libsvm_shrinking', ["True", "False"]),
                'tol':
                hp.loguniform('libsvm_tol', np.log(1e-5), np.log(1e-1)),
                'max_iter':
                hp.choice('libsvm_max_iter', [2000]),
                'kernel':
                hp.choice('libsvm_kernel',
                          [("poly", {
                              'degree': hp.randint('libsvm_degree', 4) + 2,
                              'coef0': coef0
                          }), ("rbf", {}), ("sigmoid", {
                              'coef0': coef0
                          })])
            }

            init_trial = {
                'C': 1,
                'gamma': 0.1,
                'shrinking': "True",
                'tol': 1e-3,
                'max_iter': 2000,
                'kernel': ("rbf", {})
            }

            return space
Exemple #2
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        loss = cs.add_hyperparameter(
            CategoricalHyperparameter("loss", [
                "hinge", "log", "modified_huber", "squared_hinge", "perceptron"
            ],
                                      default="log"))
        penalty = cs.add_hyperparameter(
            CategoricalHyperparameter("penalty", ["l1", "l2", "elasticnet"],
                                      default="l2"))
        alpha = cs.add_hyperparameter(
            UniformFloatHyperparameter("alpha",
                                       10e-7,
                                       1e-1,
                                       log=True,
                                       default=0.0001))
        l1_ratio = cs.add_hyperparameter(
            UniformFloatHyperparameter("l1_ratio",
                                       1e-9,
                                       1,
                                       log=True,
                                       default=0.15))
        fit_intercept = cs.add_hyperparameter(
            UnParametrizedHyperparameter("fit_intercept", "True"))
        n_iter = cs.add_hyperparameter(
            UniformIntegerHyperparameter("n_iter",
                                         5,
                                         1000,
                                         log=True,
                                         default=20))
        epsilon = cs.add_hyperparameter(
            UniformFloatHyperparameter("epsilon",
                                       1e-5,
                                       1e-1,
                                       default=1e-4,
                                       log=True))
        learning_rate = cs.add_hyperparameter(
            CategoricalHyperparameter("learning_rate",
                                      ["optimal", "invscaling", "constant"],
                                      default="optimal"))
        eta0 = cs.add_hyperparameter(
            UniformFloatHyperparameter("eta0", 10**-7, 0.1, default=0.01))
        power_t = cs.add_hyperparameter(
            UniformFloatHyperparameter("power_t", 1e-5, 1, default=0.25))
        average = cs.add_hyperparameter(
            CategoricalHyperparameter("average", ["False", "True"],
                                      default="False"))

        # TODO add passive/aggressive here, although not properly documented?
        elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
        epsilon_condition = EqualsCondition(epsilon, loss, "modified_huber")
        # eta0 seems to be always active according to the source code; when
        # learning_rate is set to optimial, eta0 is the starting value:
        # https://github.com/scikit-learn/scikit-learn/blob/0.15.X/sklearn/linear_model/sgd_fast.pyx
        #eta0_and_inv = EqualsCondition(eta0, learning_rate, "invscaling")
        #eta0_and_constant = EqualsCondition(eta0, learning_rate, "constant")
        #eta0_condition = OrConjunction(eta0_and_inv, eta0_and_constant)
        power_t_condition = EqualsCondition(power_t, learning_rate,
                                            "invscaling")

        cs.add_condition(elasticnet)
        cs.add_condition(epsilon_condition)
        cs.add_condition(power_t_condition)

        return cs
def get_configspace_instance(algo_id='random_forest'):
    cs = ConfigurationSpace()
    if algo_id == 'random_forest':
        criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                              default_value="gini")

        # The maximum number of features used in the forest is calculated as m^max_features, where
        # m is the total number of features, and max_features is the hyperparameter specified below.
        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
        # corresponds with Geurts' heuristic.
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.,
                                                  1.,
                                                  default_value=0.5)

        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            "min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)
        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="True")
        cs.add_hyperparameters([
            criterion, max_features, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
            bootstrap, min_impurity_decrease
        ])
    elif algo_id == 'liblinear_svc':
        penalty = CategoricalHyperparameter("penalty", ["l1", "l2"],
                                            default_value="l2")
        loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"],
                                         default_value="squared_hinge")
        dual = CategoricalHyperparameter("dual", ['True', 'False'],
                                         default_value='True')
        # This is set ad-hoc
        tol = UniformFloatHyperparameter("tol",
                                         1e-5,
                                         1e-1,
                                         default_value=1e-4,
                                         log=True)
        C = UniformFloatHyperparameter("C",
                                       0.03125,
                                       32768,
                                       log=True,
                                       default_value=1.0)
        multi_class = Constant("multi_class", "ovr")
        # These are set ad-hoc
        fit_intercept = Constant("fit_intercept", "True")
        intercept_scaling = Constant("intercept_scaling", 1)
        cs.add_hyperparameters([
            penalty, loss, dual, tol, C, multi_class, fit_intercept,
            intercept_scaling
        ])

        penalty_and_loss = ForbiddenAndConjunction(
            ForbiddenEqualsClause(penalty, "l1"),
            ForbiddenEqualsClause(loss, "hinge"))
        constant_penalty_and_loss = ForbiddenAndConjunction(
            ForbiddenEqualsClause(dual, "False"),
            ForbiddenEqualsClause(penalty, "l2"),
            ForbiddenEqualsClause(loss, "hinge"))
        penalty_and_dual = ForbiddenAndConjunction(
            ForbiddenEqualsClause(dual, "True"),
            ForbiddenEqualsClause(penalty, "l1"))
        cs.add_forbidden_clause(penalty_and_loss)
        cs.add_forbidden_clause(constant_penalty_and_loss)
        cs.add_forbidden_clause(penalty_and_dual)
    elif algo_id == 'lightgbm':
        n_estimators = UniformFloatHyperparameter("n_estimators",
                                                  100,
                                                  1000,
                                                  default_value=500,
                                                  q=50)
        num_leaves = UniformIntegerHyperparameter("num_leaves",
                                                  31,
                                                  2047,
                                                  default_value=128)
        max_depth = Constant('max_depth', 15)
        learning_rate = UniformFloatHyperparameter("learning_rate",
                                                   1e-3,
                                                   0.3,
                                                   default_value=0.1,
                                                   log=True)
        min_child_samples = UniformIntegerHyperparameter("min_child_samples",
                                                         5,
                                                         30,
                                                         default_value=20)
        subsample = UniformFloatHyperparameter("subsample",
                                               0.7,
                                               1,
                                               default_value=1,
                                               q=0.1)
        colsample_bytree = UniformFloatHyperparameter("colsample_bytree",
                                                      0.7,
                                                      1,
                                                      default_value=1,
                                                      q=0.1)
        cs.add_hyperparameters([
            n_estimators, num_leaves, max_depth, learning_rate,
            min_child_samples, subsample, colsample_bytree
        ])
    elif algo_id == 'adaboost':
        n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                    lower=50,
                                                    upper=500,
                                                    default_value=50,
                                                    log=False)
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=2,
                                                   default_value=0.1,
                                                   log=True)
        algorithm = CategoricalHyperparameter(name="algorithm",
                                              choices=["SAMME.R", "SAMME"],
                                              default_value="SAMME.R")
        max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                 lower=2,
                                                 upper=8,
                                                 default_value=3,
                                                 log=False)
        cs.add_hyperparameters(
            [n_estimators, learning_rate, algorithm, max_depth])
    elif algo_id == 'lda':
        shrinkage = CategoricalHyperparameter("shrinkage",
                                              ["None", "auto", "manual"],
                                              default_value="None")
        shrinkage_factor = UniformFloatHyperparameter("shrinkage_factor", 0.,
                                                      1., 0.5)
        n_components = UniformIntegerHyperparameter('n_components',
                                                    1,
                                                    250,
                                                    default_value=10)
        tol = UniformFloatHyperparameter("tol",
                                         1e-5,
                                         1e-1,
                                         default_value=1e-4,
                                         log=True)
        cs.add_hyperparameters(
            [shrinkage, shrinkage_factor, n_components, tol])
        cs.add_condition(EqualsCondition(shrinkage_factor, shrinkage,
                                         "manual"))
    elif algo_id == 'extra_trees':
        criterion = CategoricalHyperparameter("criterion", ["gini", "entropy"],
                                              default_value="gini")

        # The maximum number of features used in the forest is calculated as m^max_features, where
        # m is the total number of features, and max_features is the hyperparameter specified below.
        # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
        # corresponds with Geurts' heuristic.
        max_features = UniformFloatHyperparameter("max_features",
                                                  0.,
                                                  1.,
                                                  default_value=0.5)

        max_depth = UnParametrizedHyperparameter(name="max_depth",
                                                 value="None")

        min_samples_split = UniformIntegerHyperparameter("min_samples_split",
                                                         2,
                                                         20,
                                                         default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                        1,
                                                        20,
                                                        default_value=1)
        min_weight_fraction_leaf = UnParametrizedHyperparameter(
            'min_weight_fraction_leaf', 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)

        bootstrap = CategoricalHyperparameter("bootstrap", ["True", "False"],
                                              default_value="False")
        cs.add_hyperparameters([
            criterion, max_features, max_depth, min_samples_split,
            min_samples_leaf, min_weight_fraction_leaf, max_leaf_nodes,
            min_impurity_decrease, bootstrap
        ])
    else:
        raise ValueError('Invalid algorithm - %s' % algo_id)
    return cs
 def get_hyperparameter_search_space(dataset_properties=None):
     cs = ConfigurationSpace()
     threshold = UnParametrizedHyperparameter("threshold", 0.)
     cs.add_hyperparameter(threshold)
     return cs
Exemple #5
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()

            loss = CategoricalHyperparameter("loss", [
                "squared_loss", "huber", "epsilon_insensitive",
                "squared_epsilon_insensitive"
            ],
                                             default_value="squared_loss")
            penalty = CategoricalHyperparameter("penalty",
                                                ["l1", "l2", "elasticnet"],
                                                default_value="l2")
            alpha = UniformFloatHyperparameter("alpha",
                                               1e-7,
                                               1e-1,
                                               log=True,
                                               default_value=0.0001)
            l1_ratio = UniformFloatHyperparameter("l1_ratio",
                                                  1e-9,
                                                  1,
                                                  log=True,
                                                  default_value=0.15)
            fit_intercept = UnParametrizedHyperparameter(
                "fit_intercept", "True")
            tol = UniformFloatHyperparameter("tol",
                                             1e-5,
                                             1e-1,
                                             log=True,
                                             default_value=1e-4)
            epsilon_huber = UniformFloatHyperparameter("epsilon_huber",
                                                       1e-5,
                                                       1e-1,
                                                       default_value=1e-4,
                                                       log=True)
            epsilon_insensitive = UniformFloatHyperparameter(
                "epsilon_insensitive",
                1e-5,
                1e-1,
                default_value=1e-4,
                log=True)
            learning_rate = CategoricalHyperparameter(
                "learning_rate", ["optimal", "invscaling", "constant"],
                default_value="invscaling")
            eta0 = UniformFloatHyperparameter("eta0",
                                              1e-7,
                                              1e-1,
                                              default_value=0.01,
                                              log=True)
            power_t = UniformFloatHyperparameter("power_t",
                                                 1e-5,
                                                 1,
                                                 log=True,
                                                 default_value=0.5)
            average = CategoricalHyperparameter("average", ["False", "True"],
                                                default_value="False")
            cs.add_hyperparameters([
                loss, penalty, alpha, l1_ratio, fit_intercept, tol,
                epsilon_huber, epsilon_insensitive, learning_rate, eta0,
                power_t, average
            ])

            elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
            epsilon_huber_condition = EqualsCondition(epsilon_huber, loss,
                                                      "huber")
            epsilon_insensitive_condition = InCondition(
                epsilon_insensitive, loss,
                ["epsilon_insensitive", "squared_epsilon_insensitive"])
            power_t_condition = EqualsCondition(power_t, learning_rate,
                                                "invscaling")

            # eta0 is only relevant if learning_rate!='optimal' according to code
            # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/
            # linear_model/sgd_fast.pyx#L603
            eta0_in_inv_con = InCondition(eta0, learning_rate,
                                          ["invscaling", "constant"])
            cs.add_conditions([
                elasticnet, epsilon_huber_condition,
                epsilon_insensitive_condition, power_t_condition,
                eta0_in_inv_con
            ])

            return cs
        elif optimizer == 'tpe':
            eta0 = hp.loguniform('sgd_eta0', np.log(1e-7), np.log(1e-1))
            epsilon_insensitive = hp.loguniform('sgd_epsilon_insensitive',
                                                np.log(1e-5), np.log(1e-1))
            space = {
                'loss':
                hp.choice('sgd_loss',
                          [("huber", {
                              'epsilon_huber':
                              hp.loguniform('sgd_epsilon_huber', np.log(1e-5),
                                            np.log(1e-1))
                          }), ("squared_loss", {}),
                           ("epsilon_insensitive", {
                               'epsilon_insensitive': epsilon_insensitive
                           }),
                           ("squared_epsilon_insensitive", {
                               'epsilon_insensitive': epsilon_insensitive
                           })]),
                'penalty':
                hp.choice('sgd_penalty', [("elasticnet", {
                    'l1_ratio':
                    hp.loguniform('sgd_l1_ratio', np.log(1e-9), np.log(1))
                }), ("l1", None), ("l2", None)]),
                'alpha':
                hp.loguniform('sgd_alpha', np.log(1e-7), np.log(1e-1)),
                'fit_intercept':
                hp.choice('sgd_fit_intercept', ["True"]),
                'tol':
                hp.loguniform('sgd_tol', np.log(1e-5), np.log(1e-1)),
                'learning_rate':
                hp.choice(
                    'sgd_learning_rate',
                    [("optimal", {}),
                     ("invscaling", {
                         'power_t':
                         hp.loguniform('sgd_power_t', np.log(1e-5), np.log(1)),
                         'eta0':
                         eta0
                     }), ("constant", {
                         'eta0': eta0
                     })]),
                'average':
                hp.choice('sgd_average', ["True", "False"])
            }

            init_trial = {
                'loss': ("squared_loss", {}),
                'penalty': ("l2", {}),
                'alpha': 1e-4,
                'fit_intercept': "True",
                'tol': 1e-4,
                'learning_rate': ("invscaling", {
                    'power_t': 0.5,
                    'eta0': 0.01
                }),
                'average': "False"
            }

            return space
Exemple #6
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = Constant("n_estimators", 100)
            criterion = CategoricalHyperparameter("criterion",
                                                  ["gini", "entropy"],
                                                  default_value="gini")

            # The maximum number of features used in the forest is calculated as m^max_features, where
            # m is the total number of features, and max_features is the hyperparameter specified below.
            # The default is 0.5, which yields sqrt(m) features as max_features in the estimator. This
            # corresponds with Geurts' heuristic.
            max_features = UniformFloatHyperparameter("max_features",
                                                      0.,
                                                      1.,
                                                      default_value=0.5)

            max_depth = UnParametrizedHyperparameter("max_depth", "None")
            min_samples_split = UniformIntegerHyperparameter(
                "min_samples_split", 2, 20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter("min_samples_leaf",
                                                            1,
                                                            20,
                                                            default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                "min_weight_fraction_leaf", 0.)
            max_leaf_nodes = UnParametrizedHyperparameter(
                "max_leaf_nodes", "None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                'min_impurity_decrease', 0.0)
            bootstrap = CategoricalHyperparameter("bootstrap",
                                                  ["True", "False"],
                                                  default_value="True")
            cs.add_hyperparameters([
                n_estimators, criterion, max_features, max_depth,
                min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                max_leaf_nodes, bootstrap, min_impurity_decrease
            ])
            return cs
        elif optimizer == 'tpe':
            space = {
                'n_estimators':
                hp.choice('rf_n_estimators', [100]),
                'criterion':
                hp.choice('rf_criterion', ["gini", "entropy"]),
                'max_features':
                hp.uniform('rf_max_features', 0, 1),
                'max_depth':
                hp.choice('rf_max_depth', [None]),
                'min_samples_split':
                hp.randint('rf_min_samples_split', 19) + 2,
                'min_samples_leaf':
                hp.randint('rf_min_samples_leaf', 20) + 1,
                'min_weight_fraction_leaf':
                hp.choice('rf_min_weight_fraction_leaf', [0]),
                'max_leaf_nodes':
                hp.choice('rf_max_leaf_nodes', [None]),
                'min_impurity_decrease':
                hp.choice('rf_min_impurity_decrease', [0]),
                'bootstrap':
                hp.choice('rf_bootstrap', ["True", "False"])
            }

            init_trial = {
                'n_estimators': 100,
                'criterion': "gini",
                'max_features': 0.5,
                'max_depth': None,
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0,
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0,
                'bootstrap': "False"
            }

            return space
Exemple #7
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        loss = CategoricalHyperparameter("loss", [
            "squared_loss", "huber", "epsilon_insensitive",
            "squared_epsilon_insensitive"
        ],
                                         default_value="squared_loss")
        penalty = CategoricalHyperparameter("penalty",
                                            ["l1", "l2", "elasticnet"],
                                            default_value="l2")
        alpha = UniformFloatHyperparameter("alpha",
                                           1e-7,
                                           1e-1,
                                           log=True,
                                           default_value=0.0001)
        l1_ratio = UniformFloatHyperparameter("l1_ratio",
                                              1e-9,
                                              1,
                                              log=True,
                                              default_value=0.15)
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        tol = UniformFloatHyperparameter("tol",
                                         1e-5,
                                         1e-1,
                                         log=True,
                                         default_value=1e-4)
        epsilon_huber = UniformFloatHyperparameter("epsilon_huber",
                                                   1e-5,
                                                   1e-1,
                                                   default_value=1e-4,
                                                   log=True)
        epsilon_insensitive = UniformFloatHyperparameter("epsilon_insensitive",
                                                         1e-5,
                                                         1e-1,
                                                         default_value=1e-4,
                                                         log=True)
        learning_rate = CategoricalHyperparameter(
            "learning_rate", ["optimal", "invscaling", "constant"],
            default_value="invscaling")
        eta0 = UniformFloatHyperparameter("eta0",
                                          1e-7,
                                          1e-1,
                                          default_value=0.01,
                                          log=True)
        power_t = UniformFloatHyperparameter("power_t",
                                             1e-5,
                                             1,
                                             log=True,
                                             default_value=0.5)
        average = CategoricalHyperparameter("average", ["False", "True"],
                                            default_value="False")
        cs.add_hyperparameters([
            loss, penalty, alpha, l1_ratio, fit_intercept, tol, epsilon_huber,
            epsilon_insensitive, learning_rate, eta0, power_t, average
        ])

        elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
        epsilon_huber_condition = EqualsCondition(epsilon_huber, loss, "huber")
        epsilon_insensitive_condition = InCondition(
            epsilon_insensitive, loss,
            ["epsilon_insensitive", "squared_epsilon_insensitive"])
        power_t_condition = EqualsCondition(power_t, learning_rate,
                                            "invscaling")

        # eta0 is only relevant if learning_rate!='optimal' according to code
        # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/
        # linear_model/sgd_fast.pyx#L603
        eta0_in_inv_con = InCondition(eta0, learning_rate,
                                      ["invscaling", "constant"])
        cs.add_conditions([
            elasticnet, epsilon_huber_condition, epsilon_insensitive_condition,
            power_t_condition, eta0_in_inv_con
        ])

        return cs
Exemple #8
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        loss = CategoricalHyperparameter("loss", ["log"], default_value="log")
        #"hinge", "log", "modified_huber", "squared_hinge", "perceptron"],
        #default_value="log")
        penalty = CategoricalHyperparameter(
            #"penalty", ["l1", "l2", "elasticnet"], default_value="l2")
            "penalty",
            ["l2"],
            default_value="l2")
        alpha = UniformFloatHyperparameter(
            #"alpha", 1e-7, 1e-1, log=True, default_value=0.0001)
            "alpha",
            1e-7,
            1,
            log=True,
            default_value=1)
        #l1_ratio = UniformFloatHyperparameter(
        #    "l1_ratio", 1e-9, 1,  log=True, default_value=0.15)
        fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
        # 将tol固定为1e-7(训练充分)
        #tol = UniformFloatHyperparameter("tol", 1e-5, 1e-1, log=True,
        #                                 default_value=1e-4)
        tol = UniformFloatHyperparameter("tol",
                                         1e-4,
                                         1.1e-4,
                                         log=True,
                                         default_value=1e-4)
        #epsilon = UniformFloatHyperparameter(
        #    "epsilon", 1e-5, 1e-1, default_value=1e-4, log=True)
        learning_rate = CategoricalHyperparameter(
            "learning_rate",
            ["constant"],
            #["optimal", "invscaling", "constant"], default_value="invscaling")
            default_value="constant")
        eta0 = UniformFloatHyperparameter(
            #"eta0", 1e-7, 1e-1, default_value=0.01, log=True)
            "eta0",
            1e-2,
            1.1e-2,
            default_value=1e-2,
            log=True)
        #power_t = UniformFloatHyperparameter("power_t", 1e-5, 1,
        #                                     default_value=0.5)
        average = CategoricalHyperparameter(
            #"average", ["False", "True"], default_value="False")
            "average",
            ["False"],
            default_value="False")
        #cs.add_hyperparameters([loss, penalty, alpha, l1_ratio, fit_intercept,
        #                        tol, epsilon, learning_rate, eta0, power_t,
        #                        average])
        cs.add_hyperparameters([
            loss, penalty, alpha, fit_intercept, tol, learning_rate, eta0,
            average
        ])

        # TODO add passive/aggressive here, although not properly documented?
        '''
        elasticnet = EqualsCondition(l1_ratio, penalty, "elasticnet")
        epsilon_condition = EqualsCondition(epsilon, loss, "modified_huber")

        power_t_condition = EqualsCondition(power_t, learning_rate,
                                            "invscaling")

        # eta0 is only relevant if learning_rate!='optimal' according to code
        # https://github.com/scikit-learn/scikit-learn/blob/0.19.X/sklearn/
        # linear_model/sgd_fast.pyx#L603
        '''
        #eta0_in_inv_con = InCondition(eta0, learning_rate, ["constant"])
        #, "invscaling"])
        #cs.add_conditions([eta0_in_inv_con])
        #cs.add_conditions([elasticnet, epsilon_condition, power_t_condition,
        #                   eta0_in_inv_con])

        return cs
Exemple #9
0
    def __init__(self,
                 task_type,
                 estimator_id: str,
                 data: DataNode,
                 metric,
                 share_fe=False,
                 output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 dataset_id='default',
                 eval_type='holdout',
                 mth='rb',
                 sw_size=3,
                 n_jobs=1,
                 seed=1,
                 fe_algo='tree_based',
                 enable_intersection=True,
                 number_of_unit_resource=2,
                 total_resource=30):
        self.task_type = task_type
        self.metric = metric
        self.number_of_unit_resource = number_of_unit_resource
        # One unit of resource, that's, the number of trials per iteration.
        self.one_unit_of_resource = 5
        self.total_resource = total_resource
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.estimator_id = estimator_id
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.n_jobs = n_jobs
        self.mth = mth
        self.seed = seed
        self.sliding_window_size = sw_size
        task_id = '%s-%d-%s' % (dataset_id, seed, estimator_id)
        self.logger = get_logger(self.__class__.__name__ + '-' + task_id)
        np.random.seed(self.seed)

        # Bandit settings.
        # self.arms = ['fe', 'hpo']
        self.arms = ['hpo', 'fe']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.update_flag = dict()
        # Global incumbent.
        self.inc = dict()
        self.local_inc = dict()
        self.local_hist = {'fe': [], 'hpo': []}
        for arm in self.arms:
            self.rewards[arm] = list()
            self.update_flag[arm] = False
            self.evaluation_cost[arm] = list()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_perf = float("-INF")
        self.early_stopped_flag = False
        self.enable_intersection = enable_intersection

        # Fetch hyperparameter space.
        if self.task_type in CLS_TASKS:
            from solnml.components.models.classification import _classifiers, _addons
            if estimator_id in _classifiers:
                clf_class = _classifiers[estimator_id]
            elif estimator_id in _addons.components:
                clf_class = _addons.components[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = clf_class.get_hyperparameter_search_space()
            model = UnParametrizedHyperparameter("estimator", estimator_id)
            cs.add_hyperparameter(model)
        elif self.task_type in REG_TASKS:
            from solnml.components.models.regression import _regressors, _addons
            if estimator_id in _regressors:
                reg_class = _regressors[estimator_id]
            elif estimator_id in _addons.components:
                reg_class = _addons.components[estimator_id]
            else:
                raise ValueError("Algorithm %s not supported!" % estimator_id)
            cs = reg_class.get_hyperparameter_search_space()
            model = UnParametrizedHyperparameter("estimator", estimator_id)
            cs.add_hyperparameter(model)
        else:
            raise ValueError("Unknown task type %s!" % self.task_type)

        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        # Build the Feature Engineering component.
        if self.task_type in CLS_TASKS:
            fe_evaluator = ClassificationEvaluator(
                self.default_config,
                scorer=self.metric,
                name='fe',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
            hpo_evaluator = ClassificationEvaluator(
                self.default_config,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
        elif self.task_type in REG_TASKS:
            fe_evaluator = RegressionEvaluator(
                self.default_config,
                scorer=self.metric,
                name='fe',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
            hpo_evaluator = RegressionEvaluator(
                self.default_config,
                scorer=self.metric,
                data_node=self.original_data,
                name='hpo',
                resampling_strategy=self.evaluation_type,
                seed=self.seed)
        else:
            raise ValueError('Invalid task type!')

        self.fe_algo = fe_algo
        self.optimizer['fe'] = build_fe_optimizer(self.fe_algo,
                                                  self.evaluation_type,
                                                  self.task_type,
                                                  self.original_data,
                                                  fe_evaluator,
                                                  estimator_id,
                                                  per_run_time_limit,
                                                  per_run_mem_limit,
                                                  self.seed,
                                                  shared_mode=self.share_fe,
                                                  n_jobs=n_jobs)

        self.inc['fe'], self.local_inc[
            'fe'] = self.original_data, self.original_data

        # Build the HPO component.
        # trials_per_iter = max(len(self.optimizer['fe'].trans_types), 20)
        trials_per_iter = self.one_unit_of_resource * self.number_of_unit_resource

        self.optimizer['hpo'] = build_hpo_optimizer(
            self.evaluation_type,
            hpo_evaluator,
            cs,
            output_dir=output_dir,
            per_run_time_limit=per_run_time_limit,
            trials_per_iter=trials_per_iter,
            seed=self.seed,
            n_jobs=n_jobs)

        self.inc['hpo'], self.local_inc[
            'hpo'] = self.default_config, self.default_config
        self.init_config = cs.get_default_configuration()
        self.local_hist['fe'].append(self.original_data)
        self.local_hist['hpo'].append(self.default_config)
Exemple #10
0
def evaluate_evaluation_based_fe(dataset, time_limit, run_id, seed):
    from solnml.components.fe_optimizers.evaluation_based_optimizer import EvaluationBasedOptimizer

    # Prepare the configuration for random forest.
    from ConfigSpace.hyperparameters import UnParametrizedHyperparameter
    from autosklearn.pipeline.components.classification.random_forest import RandomForest
    cs = RandomForest.get_hyperparameter_search_space()
    clf_hp = UnParametrizedHyperparameter("estimator", 'random_forest')
    cs.add_hyperparameter(clf_hp)
    print(cs.get_default_configuration())
    """
    Configuration:
      bootstrap, Value: 'True'
      criterion, Value: 'gini'
      estimator, Constant: 'random_forest'
      max_depth, Constant: 'None'
      max_features, Value: 0.5
      max_leaf_nodes, Constant: 'None'
      min_impurity_decrease, Constant: 0.0
      min_samples_leaf, Value: 1
      min_samples_split, Value: 2
      min_weight_fraction_leaf, Constant: 0.0
      n_estimators, Constant: 100
    """
    evaluator = ClassificationEvaluator(cs.get_default_configuration(),
                                        name='fe',
                                        seed=seed,
                                        resampling_strategy='holdout')

    train_data, test_data = load_train_test_data(dataset)
    optimizer = EvaluationBasedOptimizer(MULTICLASS_CLS,
                                         train_data,
                                         evaluator,
                                         'random_forest',
                                         300,
                                         10000,
                                         seed,
                                         trans_set=None)

    _start_time = time.time()
    _iter_id = 0
    while True:
        if time.time(
        ) > _start_time + time_limit or optimizer.early_stopped_flag:
            break
        score, iteration_cost, inc = optimizer.iterate()
        print('%d - %.4f' % (_iter_id, score))
        _iter_id += 1

    final_train_data = optimizer.apply(train_data, optimizer.incumbent)
    val_score = evaluator(None, data_node=final_train_data)
    print('==> Best validation score', val_score, score)

    final_test_data = optimizer.apply(test_data, optimizer.incumbent)
    X_train, y_train = final_train_data.data
    clf = fetch_predict_estimator(MULTICLASS_CLS,
                                  cs.get_default_configuration(), X_train,
                                  y_train)
    X_test, y_test = final_test_data.data
    y_pred = clf.predict(X_test)

    from solnml.components.metrics.cls_metrics import balanced_accuracy
    test_score = balanced_accuracy(y_test, y_pred)
    print('==> Test score', test_score)

    save_path = save_dir + 'hmab_fe_%s_%d_%d.pkl' % (dataset, time_limit,
                                                     run_id)
    with open(save_path, 'wb') as f:
        pickle.dump([dataset, val_score, test_score], f)
Exemple #11
0
    def get_hyperparameter_search_space(dataset_properties=None):

        cs = ConfigurationSpace()

        # 1. 核心参数
        # iterations trees for multi-class classification problems
        n_estimators = UnParametrizedHyperparameter(name="n_estimators",
                                                    value=100)
        '''
        boosting_type = CategoricalHyperparameter(
            'boosting_type', ['gbdt', 'gbrt', 'rf', 'random_forest', 'dart',
                              'goss'], default_value='gbdt')
        '''
        # 只使用gbdt
        boosting_type = UnParametrizedHyperparameter(name="boosting_type",
                                                     value='gbdt')
        # Shrinkage rate
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=1e-6,
                                                   upper=1,
                                                   default_value=0.1,
                                                   log=True)
        # Number of boosting iterations, LightGBM constructs num_class * num_
        # Max number of leaves in one tree
        num_leaves = UniformIntegerHyperparameter(name="num_leaves",
                                                  lower=7,
                                                  upper=127,
                                                  default_value=31)
        '''
        tree_learner = CategoricalHyperparameter(
            name='tree_learner', ['serial', 'feature', 'data', 'voting'],
            default_value='serial')
        '''
        # 只使用连续线性的learner
        tree_learner = UnParametrizedHyperparameter(name='tree_learner',
                                                    value='serial')
        # Using default number of threads in OpenMP
        # 使用真实cpu核数-2的核运行
        num_threads = UnParametrizedHyperparameter(
            name="num_threads", value=cpu_count(logical=False) - 2)

        # 2. 学习控制参数
        # limit the max depthfor tree model.This is used to deal with
        # over-fitting when  # data is small. Tree still grows leaf-wise
        # 默认不限制树的深度
        max_depth = UnParametrizedHyperparameter(name="max_depth", value=-1)
        # Minimal number of data in one leaf. Can be used to deal with
        # over-fitting
        min_data_in_leaf = UniformIntegerHyperparameter(
            name="min_data_in_leaf", lower=10, upper=100, default_value=20)
        # Minimal sum hessian in one leaf
        min_sum_hessian_in_leaf = UniformFloatHyperparameter(
            name="min_sum_hessian_in_leaf",
            lower=1e-5,
            upper=100,
            default_value=1e-3,
            log=True)
        # Frequency for bagging
        # 默认不使用bagging
        bagging_freq = UnParametrizedHyperparameter(name="bagging_freq",
                                                    value=0)
        # 写成超参格式,但不参与搜索
        # Like feature_fraction, but this will randomly select part of data
        # without resampling
        bagging_fraction = UniformFloatHyperparameter(name="bagging_fraction",
                                                      lower=1e-10,
                                                      upper=1,
                                                      default_value=1,
                                                      log=True)
        # Random seed for bagging
        bagging_seed = UnParametrizedHyperparameter(name="bagging_seed",
                                                    value=3)
        # L1 regularization
        lambda_l1 = UniformFloatHyperparameter(name="lambda_l1",
                                               lower=1e-10,
                                               upper=0.1,
                                               default_value=1e-10,
                                               log=True)
        # L2 regularization
        lambda_l2 = UniformFloatHyperparameter(name="lambda_l2",
                                               lower=1e-10,
                                               upper=0.1,
                                               default_value=1e-10,
                                               log=True)
        # The minimal gain to perform split
        min_gain_to_split = UniformFloatHyperparameter(
            name="min_gain_to_split",
            lower=1e-10,
            upper=1,
            default_value=1e-10,
            log=True)
        # 只在dart模式启用(默认不启用)
        # Dropout rate: a fraction of previous trees to drop during the dropout
        drop_rate = UniformFloatHyperparameter(name="drop_rate",
                                               lower=1e-10,
                                               upper=1,
                                               default_value=0.1,
                                               log=True)
        # Random seed to choose dropping models
        drop_seed = UnParametrizedHyperparameter(name="drop_seed", value=4)
        # LightGBM will randomly select part of features on each iteration if
        # feature_fraction smaller than 1.0. For example, if you set it to 0.8,
        # LightGBM will select 80% of features before training each tree
        feature_fraction = UniformFloatHyperparameter(name="feature_fraction",
                                                      lower=0.1,
                                                      upper=1,
                                                      default_value=1,
                                                      log=True)
        # Random seed for feature_fraction
        feature_fraction_seed = UnParametrizedHyperparameter(
            name="feature_fraction_seed", value=2)
        # Will stop training if one metric of one validation data doesn’t
        # improve in last early_stopping_round rounds
        # 使用50轮早停
        early_stopping_round = UnParametrizedHyperparameter(
            name="early_stopping_round", value=50)

        # 3. IO参数
        # Max number of bins that feature values will be bucketed in
        # 设定为常量255,师兄说项目创始人推荐的参数
        max_bin = UnParametrizedHyperparameter(name="max_bin", value=255)

        cs.add_hyperparameters([
            # 1. 核心参数
            boosting_type,
            n_estimators,
            learning_rate,
            num_leaves,
            tree_learner,
            num_threads,
            # 2. 学习控制参数
            max_depth,
            min_data_in_leaf,
            min_sum_hessian_in_leaf,
            bagging_freq,
            lambda_l1,
            lambda_l2,
            min_gain_to_split,
            feature_fraction,
            feature_fraction_seed,
            early_stopping_round,
            # 3. IO参数
            max_bin
        ])
        '''
        # drop只和dart有关,加入限制条件
        drop_rate_condition = EqualsCondition(
            drop_rate, boosting_type, 'dart',
        )
        drop_seed_condition = EqualsCondition(
            drop_seed, boosting_type, 'dart',
        )
        cs.add_conditions([
            drop_rate_condition, drop_seed_condition
        ])
        '''

        return cs
Exemple #12
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()
        loss = Constant("loss", "auto")
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.05,
                                                   upper=0.5,
                                                   default_value=0.1,
                                                   log=True)
        max_iter = UniformIntegerHyperparameter(name="max_iter",
                                                lower=50,
                                                upper=500,
                                                default_value=100,
                                                log=True)
        min_samples_leaf = UniformIntegerHyperparameter(
            name="min_samples_leaf",
            lower=10,
            upper=200,
            default_value=20,
            log=True)
        max_depth = CategoricalHyperparameter(name="max_depth",
                                              choices=[3, 4, 5, 6, 7, 8],
                                              default_value=4)
        max_leaf_nodes = UniformIntegerHyperparameter(name="max_leaf_nodes",
                                                      lower=8,
                                                      upper=255,
                                                      default_value=31,
                                                      log=True)
        max_bins = Constant("max_bins", 255)
        l2_regularization = UniformFloatHyperparameter(
            name="l2_regularization",
            lower=1E-7,
            upper=1,
            default_value=1E-5,
            log=True)

        early_stop = CategoricalHyperparameter(
            name="early_stop",
            choices=["off", "valid", "train"],
            default_value="off")
        tol = UnParametrizedHyperparameter(name="tol", value=1e-7)
        scoring = UnParametrizedHyperparameter(name="scoring", value="loss")
        n_iter_no_change = UniformIntegerHyperparameter(
            name="n_iter_no_change", lower=1, upper=20, default_value=10)
        validation_fraction = CategoricalHyperparameter(
            name="validation_fraction",
            choices=[0.1, 0.15, 0.2],
            default_value=0.15)

        cs.add_hyperparameters([
            loss, learning_rate, max_iter, min_samples_leaf, max_depth,
            max_leaf_nodes, max_bins, l2_regularization, early_stop, tol,
            scoring, n_iter_no_change, validation_fraction
        ])

        n_iter_no_change_cond = InCondition(n_iter_no_change, early_stop,
                                            ["valid", "train"])
        validation_fraction_cond = EqualsCondition(validation_fraction,
                                                   early_stop, "valid")

        cs.add_conditions([n_iter_no_change_cond, validation_fraction_cond])

        return cs
Exemple #13
0
from ConfigSpace.configuration_space import ConfigurationSpace
from ConfigSpace.hyperparameters import UniformFloatHyperparameter, \
    UniformIntegerHyperparameter, CategoricalHyperparameter, \
    UnParametrizedHyperparameter, Constant
from automl.utl import json_utils

C = UniformFloatHyperparameter("C", 1e-5, 10, 1.0, log=True)
fit_intercept = UnParametrizedHyperparameter("fit_intercept", "True")
loss = CategoricalHyperparameter("loss", ["hinge", "squared_hinge"],
                                 default_value="hinge")

tol = UniformFloatHyperparameter("tol",
                                 1e-5,
                                 1e-1,
                                 default_value=1e-4,
                                 log=True)
# Note: Average could also be an Integer if > 1
average = CategoricalHyperparameter('average', ['False', 'True'],
                                    default_value='False')

cs = ConfigurationSpace()
cs.add_hyperparameters([loss, fit_intercept, tol, C, average])

json_utils.write_cs_to_json_file(cs, "PassiveAggressiveClassifier")
def create_hyperspace(regressor_id):
    if regressor_id == 'knn':
        from autosklearn.pipeline.components.regression.k_nearest_neighbors import KNearestNeighborsRegressor
        cs = KNearestNeighborsRegressor.get_hyperparameter_search_space()
    elif regressor_id == 'liblinear_svr':
        from autosklearn.pipeline.components.regression.liblinear_svr import LibLinear_SVR
        cs = LibLinear_SVR.get_hyperparameter_search_space()
    elif regressor_id == 'random_forest':
        cs = ConfigurationSpace()
        n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 500, default_value=200)
        criterion = CategoricalHyperparameter("criterion",
                                              ['mse', 'friedman_mse', 'mae'])
        max_features = UniformFloatHyperparameter(
            "max_features", 0.1, 1.0, default_value=1.0)
        max_depth = UnParametrizedHyperparameter("max_depth", "None")
        min_samples_split = UniformIntegerHyperparameter(
            "min_samples_split", 2, 20, default_value=2)
        min_samples_leaf = UniformIntegerHyperparameter(
            "min_samples_leaf", 1, 20, default_value=1)
        min_weight_fraction_leaf = \
            UnParametrizedHyperparameter("min_weight_fraction_leaf", 0.)
        max_leaf_nodes = UnParametrizedHyperparameter("max_leaf_nodes", "None")
        min_impurity_decrease = UnParametrizedHyperparameter(
            'min_impurity_decrease', 0.0)
        bootstrap = CategoricalHyperparameter(
            "bootstrap", ["True", "False"], default_value="True")
        cs.add_hyperparameters([n_estimators, criterion, max_features,
                                max_depth, min_samples_split, min_samples_leaf,
                                min_weight_fraction_leaf, max_leaf_nodes,
                                min_impurity_decrease, bootstrap])

    elif regressor_id == 'lightgbm':
        cs = ConfigurationSpace()
        n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500)
        num_leaves = UniformIntegerHyperparameter("num_leaves", 31, 1023, default_value=31)
        learning_rate = UniformFloatHyperparameter("learning_rate", 0.025, 0.3, default_value=0.1, log=True)
        min_child_weight = UniformIntegerHyperparameter("min_child_weight", 1, 10, default_value=1)
        subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1)
        colsample_bytree = UniformFloatHyperparameter("colsample_bytree", 0.5, 1, default_value=1)
        reg_alpha = UniformFloatHyperparameter('reg_alpha', 1e-10, 10, log=True, default_value=1e-10)
        reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10)
        cs.add_hyperparameters([n_estimators, num_leaves, learning_rate, min_child_weight, subsample,
                                colsample_bytree, reg_alpha, reg_lambda])
    elif 'catboost' in regressor_id:
        cs = ConfigurationSpace()
        max_depth = UniformIntegerHyperparameter("max_depth", 4, 12, default_value=6)
        learning_rate = UniformFloatHyperparameter("learning_rate", 0.01, 0.3, default_value=0.1, log=True)
        subsample = UniformFloatHyperparameter("subsample", 0.5, 1, default_value=1)
        reg_lambda = UniformFloatHyperparameter("reg_lambda", 1e-10, 10, log=True, default_value=1e-10)
        loss_function = CategoricalHyperparameter("loss_function", ['RMSE', 'MAE'], default_value='RMSE')

        if 'cpu' in regressor_id:
            n_estimators = UniformIntegerHyperparameter("n_estimators", 100, 1000, default_value=500)
            colsample_bylevel = UniformFloatHyperparameter("colsample_bylevel", 0.5, 1, default_value=1)
            cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample,
                                    colsample_bylevel, reg_lambda, loss_function])
        elif 'gpu' in regressor_id:
            n_estimators = UniformIntegerHyperparameter("n_estimators", 1000, 10000, default_value=1000)
            min_child_samples = UniformIntegerHyperparameter("min_child_samples", 1, 15, default_value=1)
            cs.add_hyperparameters([n_estimators, max_depth, learning_rate, subsample,
                                    min_child_samples, reg_lambda, loss_function])
    # ---ADD THE HYPERSPACE FOR YOUR REGRESSOR---------------
    else:
        raise ValueError('Undefined regressor identifier: %s!' % regressor_id)
    model = UnParametrizedHyperparameter("estimator", regressor_id)
    cs.add_hyperparameter(model)
    return cs
def evaluate(mth, dataset, run_id):
    print(mth, dataset, run_id)
    train_data, test_data = load_train_test_data(dataset,
                                                 test_size=0.3,
                                                 task_type=MULTICLASS_CLS)

    cs = _classifiers[algo_name].get_hyperparameter_search_space()
    model = UnParametrizedHyperparameter("estimator", algo_name)
    cs.add_hyperparameter(model)
    default_hpo_config = cs.get_default_configuration()
    metric = get_metric('bal_acc')

    fe_evaluator = ClassificationEvaluator(default_hpo_config,
                                           scorer=metric,
                                           name='fe',
                                           resampling_strategy='holdout',
                                           seed=1)
    fe_optimizer = BayesianOptimizationOptimizer(task_type=MULTICLASS_CLS,
                                                 input_data=train_data,
                                                 evaluator=fe_evaluator,
                                                 model_id=algo_name,
                                                 time_limit_per_trans=600,
                                                 mem_limit_per_trans=5120,
                                                 number_of_unit_resource=10,
                                                 seed=1)
    config_space = fe_optimizer.hyperparameter_space

    def objective_function(config):
        return fe_optimizer.evaluate_function(config)

    if mth == 'gp_bo':
        bo = BO(objective_function, config_space, max_runs=max_runs)
        bo.run()
        print('new BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'lite_bo':
        from litebo.facade.bo_facade import BayesianOptimization
        bo = BayesianOptimization(objective_function,
                                  config_space,
                                  max_runs=max_runs)
        bo.run()
        print('lite BO result')
        print(bo.get_incumbent())
        perf_bo = bo.history_container.incumbent_value
    elif mth == 'smac':
        from smac.scenario.scenario import Scenario
        from smac.facade.smac_facade import SMAC
        # Scenario object
        scenario = Scenario({
            "run_obj": "quality",
            "runcount-limit": max_runs,
            "cs": config_space,
            "deterministic": "true"
        })
        smac = SMAC(scenario=scenario,
                    rng=np.random.RandomState(42),
                    tae_runner=objective_function)
        incumbent = smac.optimize()
        perf_bo = objective_function(incumbent)
        print('SMAC BO result')
        print(perf_bo)
    else:
        raise ValueError('Invalid method.')
    return perf_bo
Exemple #16
0
    def __init__(self, classifier_id: str, data: DataNode,
                 share_fe=False, output_dir='logs',
                 per_run_time_limit=120,
                 per_run_mem_limit=5120,
                 eval_type='cv', dataset_id='default',
                 mth='rb', sw_size=3, strategy='avg',
                 n_jobs=1, seed=1):
        self.per_run_time_limit = per_run_time_limit
        self.per_run_mem_limit = per_run_mem_limit
        self.classifier_id = classifier_id
        self.evaluation_type = eval_type
        self.original_data = data.copy_()
        self.share_fe = share_fe
        self.output_dir = output_dir
        self.mth = mth
        self.strategy = strategy
        self.seed = seed
        self.sliding_window_size = sw_size
        self.logger = get_logger('%s:%s-%d=>%s' % (__class__.__name__, dataset_id, seed, classifier_id))
        np.random.seed(self.seed)

        # Bandit settings.
        self.arms = ['fe', 'hpo']
        self.rewards = dict()
        self.optimizer = dict()
        self.evaluation_cost = dict()
        self.inc = dict()
        self.local_inc = dict()
        for arm in self.arms:
            self.rewards[arm] = list()
            self.evaluation_cost[arm] = list()
        self.pull_cnt = 0
        self.action_sequence = list()
        self.final_rewards = list()
        self.incumbent_perf = -1.
        self.incumbent_source = None
        self.update_flag = dict()
        self.imp_rewards = dict()
        for arm in self.arms:
            self.update_flag[arm] = True
            self.imp_rewards[arm] = list()

        from autosklearn.pipeline.components.classification import _classifiers
        clf_class = _classifiers[classifier_id]
        cs = clf_class.get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", classifier_id)
        cs.add_hyperparameter(model)
        self.config_space = cs
        self.default_config = cs.get_default_configuration()
        self.config_space.seed(self.seed)

        # Build the Feature Engineering component.
        fe_evaluator = Evaluator(self.default_config,
                                 name='fe', resampling_strategy=self.evaluation_type,
                                 seed=self.seed)
        self.optimizer['fe'] = EvaluationBasedOptimizer(
                self.original_data, fe_evaluator,
                classifier_id, per_run_time_limit, per_run_mem_limit, self.seed,
                shared_mode=self.share_fe, n_jobs=n_jobs)
        self.inc['fe'], self.local_inc['fe'] = self.original_data, self.original_data

        # Build the HPO component.
        trials_per_iter = len(self.optimizer['fe'].trans_types)
        hpo_evaluator = Evaluator(self.default_config,
                                  data_node=self.original_data, name='hpo',
                                  resampling_strategy=self.evaluation_type,
                                  seed=self.seed)
        if n_jobs == 1:
            self.optimizer['hpo'] = SMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed)
        else:
            self.optimizer['hpo'] = PSMACOptimizer(
                hpo_evaluator, cs, output_dir=output_dir, per_run_time_limit=per_run_time_limit,
                trials_per_iter=trials_per_iter // 2, seed=self.seed,
                n_jobs=n_jobs
            )
        self.inc['hpo'], self.local_inc['hpo'] = self.default_config, self.default_config
Exemple #17
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            loss = Constant("loss", "deviance")
            learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                       lower=0.01,
                                                       upper=1,
                                                       default_value=0.1,
                                                       log=True)
            # n_estimators = UniformIntegerHyperparameter(
            #     "n_estimators", 100, 100, default_value=100)
            n_estimators = Constant("n_estimators", 100)
            max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                     lower=1,
                                                     upper=8,
                                                     default_value=3)
            criterion = CategoricalHyperparameter('criterion',
                                                  ['friedman_mse', 'mse'],
                                                  default_value='mse')
            min_samples_split = UniformIntegerHyperparameter(
                name="min_samples_split", lower=2, upper=20, default_value=2)
            min_samples_leaf = UniformIntegerHyperparameter(
                name="min_samples_leaf", lower=1, upper=20, default_value=1)
            min_weight_fraction_leaf = UnParametrizedHyperparameter(
                "min_weight_fraction_leaf", 0.)
            subsample = UniformFloatHyperparameter(name="subsample",
                                                   lower=0.1,
                                                   upper=1.0,
                                                   default_value=1.0)
            max_features = UniformFloatHyperparameter("max_features",
                                                      0.1,
                                                      1.0,
                                                      default_value=1)
            max_leaf_nodes = UnParametrizedHyperparameter(
                name="max_leaf_nodes", value="None")
            min_impurity_decrease = UnParametrizedHyperparameter(
                name='min_impurity_decrease', value=0.0)
            cs.add_hyperparameters([
                loss, learning_rate, n_estimators, max_depth, criterion,
                min_samples_split, min_samples_leaf, min_weight_fraction_leaf,
                subsample, max_features, max_leaf_nodes, min_impurity_decrease
            ])
            return cs
        elif optimizer == 'tpe':
            space = {
                'loss':
                hp.choice('gb_loss', ["deviance"]),
                'learning_rate':
                hp.loguniform('gb_learning_rate', np.log(0.01), np.log(1)),
                # 'n_estimators': hp.randint('gb_n_estimators', 451) + 50,
                'n_estimators':
                hp.choice('gb_n_estimators', [100]),
                'max_depth':
                hp.randint('gb_max_depth', 8) + 1,
                'criterion':
                hp.choice('gb_criterion', ['friedman_mse', 'mse']),
                'min_samples_split':
                hp.randint('gb_min_samples_split', 19) + 2,
                'min_samples_leaf':
                hp.randint('gb_min_samples_leaf', 20) + 1,
                'min_weight_fraction_leaf':
                hp.choice('gb_min_weight_fraction_leaf', [0]),
                'subsample':
                hp.uniform('gb_subsample', 0.1, 1),
                'max_features':
                hp.uniform('gb_max_features', 0.1, 1),
                'max_leaf_nodes':
                hp.choice('gb_max_leaf_nodes', [None]),
                'min_impurity_decrease':
                hp.choice('gb_min_impurity_decrease', [0])
            }

            init_trial = {
                'loss': "deviance",
                'learning_rate': 0.1,
                'n_estimators': 100,
                'max_depth': 3,
                'criterion': "friedman_mse",
                'min_samples_split': 2,
                'min_samples_leaf': 1,
                'min_weight_fraction_leaf': 0,
                'subsample': 1,
                'max_features': 1,
                'max_leaf_nodes': None,
                'min_impurity_decrease': 0
            }
            return space
Exemple #18
0
    def get_hyperparameter_search_space(dataset_properties=None):
        cs = ConfigurationSpace()

        # Parameterized Hyperparameters
        max_depth = UniformIntegerHyperparameter(name="max_depth",
                                                 lower=1,
                                                 upper=10,
                                                 default_value=3)
        learning_rate = UniformFloatHyperparameter(name="learning_rate",
                                                   lower=0.01,
                                                   upper=1,
                                                   default_value=0.1,
                                                   log=True)
        n_estimators = Constant("n_estimators", 512)
        booster = CategoricalHyperparameter("booster", ["gbtree", "dart"])
        subsample = UniformFloatHyperparameter(name="subsample",
                                               lower=0.01,
                                               upper=1.0,
                                               default_value=1.0,
                                               log=False)
        min_child_weight = UniformIntegerHyperparameter(
            name="min_child_weight",
            lower=1e-10,
            upper=20,
            default_value=1,
            log=False)
        colsample_bytree = UniformFloatHyperparameter(
            name="colsample_bytree",
            lower=0.1,
            upper=1.0,
            default_value=1,
        )
        colsample_bylevel = UniformFloatHyperparameter(
            name="colsample_bylevel",
            lower=0.1,
            upper=1.0,
            default_value=1,
        )
        reg_alpha = UniformFloatHyperparameter(name="reg_alpha",
                                               lower=1e-10,
                                               upper=1e-1,
                                               log=True,
                                               default_value=1e-10)
        reg_lambda = UniformFloatHyperparameter(name="reg_lambda",
                                                lower=1e-10,
                                                upper=1e-1,
                                                log=True,
                                                default_value=1e-10)

        # DART Hyperparameters
        sample_type = CategoricalHyperparameter(
            'sample_type',
            ['uniform', 'weighted'],
            default_value='uniform',
        )
        normalize_type = CategoricalHyperparameter(
            'normalize_type',
            ['tree', 'forest'],
            default_value='tree',
        )
        rate_drop = UniformFloatHyperparameter(
            'rate_drop',
            1e-10,
            1 - (1e-10),
            default_value=0.5,
        )

        # Unparameterized Hyperparameters
        # https://xgboost.readthedocs.io/en/latest//parameter.html
        # minimum loss reduction required to make a further partition on a
        # leaf node of the tree
        gamma = UnParametrizedHyperparameter(name="gamma", value=0)
        # absolute regularization (in contrast to eta), comparable to
        # gradient clipping in deep learning - according to the internet this
        #  is most important for unbalanced data
        max_delta_step = UnParametrizedHyperparameter(name="max_delta_step",
                                                      value=0)
        base_score = UnParametrizedHyperparameter(name="base_score", value=0.5)
        scale_pos_weight = UnParametrizedHyperparameter(
            name="scale_pos_weight", value=1)

        cs.add_hyperparameters([
            # Active
            max_depth,
            learning_rate,
            n_estimators,
            booster,
            subsample,
            colsample_bytree,
            colsample_bylevel,
            reg_alpha,
            reg_lambda,
            # DART
            sample_type,
            normalize_type,
            rate_drop,
            # Inactive
            min_child_weight,
            max_delta_step,
            gamma,
            base_score,
            scale_pos_weight
        ])

        sample_type_condition = EqualsCondition(
            sample_type,
            booster,
            'dart',
        )
        normalize_type_condition = EqualsCondition(
            normalize_type,
            booster,
            'dart',
        )
        rate_drop_condition = EqualsCondition(
            rate_drop,
            booster,
            'dart',
        )

        cs.add_conditions([
            sample_type_condition,
            normalize_type_condition,
            rate_drop_condition,
        ])
        return cs
Exemple #19
0
    def get_hyperparameter_search_space(dataset_properties=None):
        C = UniformFloatHyperparameter(name="C",
                                       lower=0.03125,
                                       upper=32768,
                                       log=True,
                                       default_value=1.0)
        # Random Guess
        epsilon = UniformFloatHyperparameter(name="epsilon",
                                             lower=0.001,
                                             upper=1,
                                             default_value=0.1,
                                             log=True)

        kernel = CategoricalHyperparameter(
            name="kernel",
            choices=['linear', 'poly', 'rbf', 'sigmoid'],
            default_value="rbf")
        degree = UniformIntegerHyperparameter(name="degree",
                                              lower=2,
                                              upper=5,
                                              default_value=3)

        gamma = UniformFloatHyperparameter(name="gamma",
                                           lower=3.0517578125e-05,
                                           upper=8,
                                           log=True,
                                           default_value=0.1)

        # TODO this is totally ad-hoc
        coef0 = UniformFloatHyperparameter(name="coef0",
                                           lower=-1,
                                           upper=1,
                                           default_value=0)
        # probability is no hyperparameter, but an argument to the SVM algo
        shrinking = CategoricalHyperparameter(name="shrinking",
                                              choices=["True", "False"],
                                              default_value="True")
        tol = UniformFloatHyperparameter(name="tol",
                                         lower=1e-5,
                                         upper=1e-1,
                                         default_value=1e-3,
                                         log=True)
        max_iter = UnParametrizedHyperparameter("max_iter", -1)

        cs = ConfigurationSpace()
        cs.add_hyperparameters([
            C, kernel, degree, gamma, coef0, shrinking, tol, max_iter, epsilon
        ])

        degree_depends_on_kernel = InCondition(child=degree,
                                               parent=kernel,
                                               values=('poly', 'rbf',
                                                       'sigmoid'))
        gamma_depends_on_kernel = InCondition(child=gamma,
                                              parent=kernel,
                                              values=('poly', 'rbf'))
        coef0_depends_on_kernel = InCondition(child=coef0,
                                              parent=kernel,
                                              values=('poly', 'sigmoid'))
        cs.add_conditions([
            degree_depends_on_kernel, gamma_depends_on_kernel,
            coef0_depends_on_kernel
        ])

        return cs
def evaluate_metalearning_configs(first_bandit):
    score_list = []
    for config in first_bandit.meta_configs:
        try:
            config = config.get_dictionary()
            # print(config)
            arm = None
            cs = ConfigurationSpace()
            for key in config:
                key_str = key.split(":")
                if key_str[0] == 'classifier':
                    if key_str[1] == '__choice__':
                        arm = config[key]
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                "estimator", config[key]))
                    else:
                        cs.add_hyperparameter(
                            UnParametrizedHyperparameter(
                                key_str[2], config[key]))

            if arm in first_bandit.arms:
                transformed_node = apply_metalearning_fe(
                    first_bandit.sub_bandits[arm].optimizer['fe'], config)
                default_config = cs.sample_configuration(1)
                hpo_evaluator = Evaluator(
                    None,
                    data_node=transformed_node,
                    name='hpo',
                    resampling_strategy=first_bandit.eval_type,
                    seed=first_bandit.seed)

                start_time = time.time()
                score = 1 - hpo_evaluator(default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, default_config, transformed_node, time_cost))
                transformed_node.score = score

                # Evaluate the default config
                start_time = time.time()
                score = 1 - hpo_evaluator(
                    first_bandit.sub_bandits[arm].default_config)
                time_cost = time.time() - start_time
                score_list.append(
                    (arm, score, first_bandit.sub_bandits[arm].default_config,
                     transformed_node, time_cost))
                transformed_node.score = score
        except Exception as e:
            print(e)

    # Sort the meta-configs
    score_list.sort(key=lambda x: x[1], reverse=True)
    meta_arms = list()
    for arm_score_config in score_list:
        if arm_score_config[0] in meta_arms:
            continue

        first_bandit.sub_bandits[
            arm_score_config[0]].default_config = arm_score_config[2]
        first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats(
            'fe',
            (arm_score_config[1], arm_score_config[4], arm_score_config[3]))
        # first_bandit.sub_bandits[arm_score_config[0]].collect_iter_stats('hpo',
        #                                                                  (arm_score_config[1], arm_score_config[4],
        #                                                                   arm_score_config[2]))
        first_bandit.sub_bandits[arm_score_config[0]].optimizer[
            'fe'].hp_config = arm_score_config[2]
        meta_arms.append(arm_score_config[0])
    for arm in first_bandit.arms:
        if arm not in meta_arms:
            meta_arms.append(arm)

    first_bandit.final_rewards.append(score_list[0][1])
    first_bandit.action_sequence.append(score_list[0][0])
    first_bandit.time_records.append(score_list[0][2])
    first_bandit.arms = meta_arms
    first_bandit.logger.info("Arms after evaluating meta-configs: " +
                             str(first_bandit.arms))
    _estimators = _classifiers
else:
    from solnml.components.models.regression import _regressors

    _estimators = _regressors

eval_type = 'holdout'
output_dir = args.output_dir
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for dataset in dataset_list:
    train_data, test_data = load_train_test_data(dataset)
    for algo in algorithms:
        cs = _estimators[algo].get_hyperparameter_search_space()
        model = UnParametrizedHyperparameter("estimator", algo)
        cs.add_hyperparameter(model)
        default_hpo_config = cs.get_default_configuration()

        if task == 'cls':
            fe_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric,
                                                   name='fe', resampling_strategy=eval_type,
                                                   seed=1)
            hpo_evaluator = ClassificationEvaluator(default_hpo_config, scorer=metric,
                                                    data_node=train_data, name='hpo',
                                                    resampling_strategy=eval_type,
                                                    seed=1)
        else:
            fe_evaluator = RegressionEvaluator(default_hpo_config, scorer=metric,
                                               name='fe', resampling_strategy=eval_type,
                                               seed=1)
Exemple #22
0
    def get_hyperparameter_search_space(dataset_properties=None):

        cs = ConfigurationSpace()

        Window_size = UniformIntegerHyperparameter(
            name="Window_size", lower=5, upper=50, default_value=20)

        Difference = CategoricalHyperparameter(
            name="Difference", choices=["True", "False"], default_value="True")

        tsfresh_feature = CategoricalHyperparameter(
            name="tsfresh_feature", choices=["True", "False"], default_value="True")

        C = UniformFloatHyperparameter(
            name="C", lower=0.03125, upper=32768, log=True, default_value=1.0)
        
        epsilon = UniformFloatHyperparameter(name="epsilon", lower=0.001,
                                             upper=1, default_value=0.1,
                                             log=True)

        kernel = CategoricalHyperparameter(
            name="kernel", choices=['linear', 'poly', 'rbf', 'sigmoid'],
            default_value="rbf")

        degree = UniformIntegerHyperparameter(
            name="degree", lower=2, upper=5, default_value=3)

        gamma = CategoricalHyperparameter("gamma", ["auto", "value"], default_value="auto")

        gamma_value = UniformFloatHyperparameter(
            name="gamma_value", lower=0.0001, upper=8, default_value=1)

        # TODO this is totally ad-hoc
        coef0 = UniformFloatHyperparameter(
            name="coef0", lower=-1, upper=1, default_value=0)

        # probability is no hyperparameter, but an argument to the SVM algo
        shrinking = CategoricalHyperparameter(
            name="shrinking", choices=["True", "False"], default_value="True")

        tol = UniformFloatHyperparameter(
            name="tol", lower=1e-5, upper=1e-1, default_value=1e-3, log=True)

        max_iter = UnParametrizedHyperparameter("max_iter", 200000)

        
        cs.add_hyperparameters([Window_size, Difference, tsfresh_feature,C, kernel, degree, gamma, gamma_value, coef0, shrinking,
                               tol, max_iter, epsilon])

        degree_depends_on_kernel = InCondition(child=degree, parent=kernel,
                                               values=["poly"])
        gamma_depends_on_kernel = InCondition(child=gamma, parent=kernel,
                                              values=["rbf", "poly", "sigmoid"])
        coef0_depends_on_kernel = InCondition(child=coef0, parent=kernel,
                                              values=["poly", "sigmoid"])
        gamma_value_depends_on_gamma = InCondition(child=gamma_value, parent=gamma, 
                                              values=["value"])
        cs.add_conditions([degree_depends_on_kernel, gamma_depends_on_kernel,
                           coef0_depends_on_kernel, gamma_value_depends_on_gamma])

        return cs