예제 #1
0
    def tree_impl_back(cls, data):
        cls.check_data(data)
        n_estimators = min(
            [500, floor(sqrt(data.n_instances() * data.n_attributes()))])

        data_for_speed = {
            'n_estimators': ['z', [2, 1000]],
            'max_depth': ['z', [2, data.n_instances()]]
        }  # Entre outros
        node = {
            'bootstrap': ['c', [True, False]],
            'min_impurity_decrease': ['r', [0, 1]],
            'max_leaf_nodes':
            ['o', [2, 3, 5, 8, 12, 17, 23, 30, 38, 47, 57,
                   999999]],  # 999999 ~ None
            'max_features': ['r', [0.001, 1]],
            # For some reason, the interval [1, n_attributes] didn't work for
            # RF (maybe it is relative to a subset).
            'min_weight_fraction_leaf': ['r', [0, 0.5]],
            # According to ValueError exception.
            'min_samples_leaf': ['z', [1, floor(data.n_instances() / 2)]],
            # Int (# of instances) is better than float
            # (proportion of instances) because different floats can collide
            # to a same int, making intervals of useless real values.
            'min_samples_split': ['z', [2, floor(data.n_instances() / 2)]],
            # Same reason as min_samples_leaf
            'max_depth': ['z', [2, data.n_instances()]],
            'criterion': ['c', ['gini', 'entropy']],
            # Docs say that this parameter is tree-specific,
            # but we cannot choose the tree.
            'n_estimators': ['c', [n_estimators]],
            # Only to set the default, not for search.
            # See DT.py for more details about other settings.
        }
        return HPTree(node, children=[])
예제 #2
0
 def cs_impl(cls, data=None):
     node = {
         '@with_mean/std':
             ['c', [(True, False), (False, True), (True, True)]]
         # (False, False) seems to be useless
     }
     return HPTree(node, children=[])
예제 #3
0
    def cs_impl(self):
        node = {
            'iterations': ['c', [100, 500, 1000, 3000, 5000]],
            'learning_rate': ['r', [0.000001, 1.0]],
            'depth': ['z', [1, 15]],
            'l2_leaf_reg': ['r', [0.01, 99999]],
            'loss_function': ['c', ['MultiClass']],
            'border_count': ['z', [1, 255]],
            'thread_count': ['c', [-1]]
        }

        return HPTree(node, children=[])
예제 #4
0
    def cs_impl(self):
        # todo: set random seed; set 'cache_size'
        node = {
            'C': ['r', [0.0001, 100]],
            'shrinking': ['c', [True, False]],
            'probability': ['c', [False]],
            'tol': ['o',
                    [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                     1000, 10000]],
            'class_weight': ['c', [None, 'balanced']],
            # 'verbose': [False],
            'max_iter': ['c', [1000000]],
            'decision_function_shape': ['c', ['ovr', 'ovo']]
        }

        kernel_linear = HPTree({'kernel': ['c', ['linear']]}, children=[])

        kernel_poly = HPTree({
            'kernel': ['c', ['poly']],
            'degree': ['z', [0, 10]],
            'coef0': ['r', [0.0, 100]],
        }, children=[])

        kernel_rbf = HPTree({'kernel': ['c', ['rbf']]}, children=[])

        kernel_sigmoid = HPTree({
            'kernel': ['c', ['sigmoid']],
            'coef0': ['r', [0.0, 100]],
        }, children=[])

        kernel_nonlinear = HPTree({'gamma': ['r', [0.00001, 100]]},
                                  children=[kernel_poly, kernel_rbf,
                                            kernel_sigmoid])

        return HPTree(node, children=[kernel_linear, kernel_nonlinear])
예제 #5
0
 def cs_impl(self):
     n_estimators = [100, 500, 1000, 3000, 5000]
     node = {
         'bootstrap': ['c', [True, False]],
         'criterion': ['c', ['gini', 'entropy']],
         'max_features': ['c', ['auto', 'sqrt', 'log2', None]],
         'min_impurity_decrease': ['r', [0, 0.2]],
         'min_samples_split': ['r', [1e-6, 0.3]],
         'min_samples_leaf': ['r', [1e-6, 0.3]],
         'min_weight_fraction_leaf': ['r', [0, 0.3]],
         'max_depth': ['z', [2, 1000]],
         'n_estimators': ['c', n_estimators],
     }
     return HPTree(node, children=[])
예제 #6
0
    def cs_impl(cls, data=None):
        # Assumes worst case of k-fold CV, i.e. k=2. Undersampling is another
        # problem, handled by @n_instances.
        cls.check_data(data)
        kmax = floor(min(400, data.n_instances() / 2 - 1))

        # TODO: put knn hyperparameters here?
        node = {
            # TODO: implement 'minority'
            'vote': ['c', ['majority', 'consensus']],
            'algorithm': ['c', ['ENN', 'RENN', 'AENN']],
            'k': ['c', exponential_integers(kmax)]
            # (False, False) seems to be useless
        }

        tree = KNN.cs(delete=[k])
        del tree['']
        return HPTree(node, children=[])
예제 #7
0
    def cs_impl(self):

        # Todo: set random seed
        max_neurons = 10000

        node = {
            'alpha': [
                'o',
                [
                    0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                    1000, 10000
                ]
            ],
            # https://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_alpha.html
            'max_iter': ['c', [10000]],  # We assume that non converged is bad.
            # 'Number of epochs'/'gradient steps'.
            'tol': [
                'o',
                [
                    0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100,
                    1000, 10000
                ]
            ],
            # Maybe useless when learning_rate is set to ‘adaptive’.
            'nesterovs_momentum': ['c', [True, False]],
            # number of inputs times outputs
            # '@in_out': ['c', [data.n_attributes * data.n_classes]],
            '@neurons': ['c', exponential_integers(max_neurons, 3)]
        }

        zero_hidden_layers = HPTree({
            'hidden_layer_sizes': ['c', [()]],
        },
                                    children=[])

        one_hidden_layer = HPTree(
            {
                '@hidden_layer_size1': ['r', [0, 1]],
                # @ indicates that this hyperparameter is auxiliary
                # (will be converted in constructor)
                'activation': ['c', ['identity', 'logistic', 'tanh', 'relu']],
                # Only used when there is at least one hidden layer
            },
            children=[])

        two_hidden_layers = HPTree(
            {
                '@hidden_layer_size2': ['r', [0, 1]],
                # @ indicates that this hyperparameter is auxiliary
                # (will be converted in constructor)
            },
            children=[one_hidden_layer])

        three_hidden_layers = HPTree(
            {
                '@hidden_layer_size3': ['r', [0, 1]],
                # @ indicates that this hyperparameter is auxiliary
                # (will be converted in constructor)
            },
            children=[two_hidden_layers])

        layers = [
            zero_hidden_layers, one_hidden_layer, two_hidden_layers,
            three_hidden_layers
        ]

        early_stopping = HPTree(
            {
                'early_stopping': ['c', [True]],
                # Only effective when solver=’sgd’ or ‘adam’.
                'validation_fraction':
                ['c', [0.01, 0.05, 0.1, 0.15, 0.20, 0.25, 0.30]],
                # Only used if early_stopping is True.
            },
            children=layers)

        late_stopping = HPTree(
            {
                'early_stopping': ['c', [False]],
                # Only effective when solver=’sgd’ or ‘adam’.
            },
            children=layers)

        stoppings = [early_stopping, late_stopping]

        solver_adam = HPTree(
            {
                'solver': ['c', ['adam']],
                'beta_1': ['r', [0.0, 0.999999]],  # 'adam'
                'beta_2': ['r', [0.0, 0.999999]],  # 'adam'
                'epsilon': ['r', [0.0000000001, 1.0]],  # 'adam'
            },
            children=stoppings)

        learning_rate_constant = HPTree(
            {
                'learning_rate': ['c', ['constant']],
                # only for solver=sgd (i will believe the docs, but it seems like
                # 'learning_rate' is for 'adam' also).
                'power_t': ['r', [0.0, 2.0]],
                # only for learning_rate=constant; it is unclear if MLP benefits
                # from power_t > 1
            },
            children=stoppings)

        learning_rate_invscaling = HPTree(
            {
                'learning_rate': ['c', ['invscaling']],
                # only for solver=sgd (i will believe the docs, but it seems like
                # 'learning_rate' is for 'adam' also).
            },
            children=stoppings)

        learning_rate_adaptive = HPTree(
            {
                'learning_rate': ['c', ['adaptive']],
                # only for solver=sgd (i will believe the docs, but it seems like
                # 'learning_rate' is for 'adam' also).
            },
            children=stoppings)

        solver_sgd = HPTree(
            {
                'solver': ['c', ['sgd']],
                'momentum': ['r', [0.0, 1.0]],
                # Only used when solver=’sgd’ (i will believe the docs,
                # but it seems like 'momentum' is for 'adam' also).
            },
            children=[
                learning_rate_constant, learning_rate_invscaling,
                learning_rate_adaptive
            ])

        solver_non_newton = HPTree(
            {
                'n_iter_no_change': ['c', [10]],
                # Only effective when solver=’sgd’ or ‘adam’.
                'batch_size': ['c', ['auto']],
                #                      min([1000, floor(data.n_instances() / 2)])]],
                # useless for solver lbfgs
                # useless for solver lbfgs
                'learning_rate_init': ['r', [0.000001, 0.5]],
                # Only used when solver=’sgd’ or ‘adam’
                'shuffle': ['c', [True, False]],
                # Only used when solver=’sgd’ or ‘adam’.
            },
            children=[solver_adam, solver_sgd])

        solver_lbfgs = HPTree({
            'solver': ['c', ['lbfgs']],
        }, children=layers)

        tree = HPTree(node, children=[solver_non_newton, solver_lbfgs])

        return tree
예제 #8
0
 def cs_impl(cls, data):
     return HPTree(node={}, children=[])
예제 #9
0
 def cs_impl(cls, data=None):
     node = {
         'sampling_strategy':
         ['c', ['not minority', 'not majority', 'all']]
     }
     return HPTree(node, children=[])
예제 #10
0
 def cs_impl(cls, data):
     cls.check_data(data)
     # TODO: set random_state
     node = {'n_components': ['z', [1, data.n_attributes()]]}
     node.update(cls.specific_node(data))
     return HPTree(node, children=[])
예제 #11
0
 def cs_impl(self):
     node = {'@nb_type': ['c', ["MultinomialNB", "ComplementNB"]]}
     return HPTree(node=node, children=[])
예제 #12
0
 def cs_impl(self, data=None):
     return HPTree(node=self.freeze_hptree(),
                   children=[],
                   name=self.name + self.components[0].name)
예제 #13
0
 def rec(nnd, child=None):
     l = [] if child is None else [child]
     name, node = nnd
     return HPTree(name=name, node=node, children=l)
예제 #14
0
 def cs_impl(self):
     return HPTree({'oper': ['c', ['+', '-', '*', '.']]}, [])
예제 #15
0
 def cs_impl(cls, data):
     HPTree({}, [])
예제 #16
0
 def cs_impl(cls, data):
     return HPTree(
         # TODO: check if it would be better to adopt a 'z' hyperparameter
         node={'ratio': ['r', [1e-05, 1]]},
         children=[])