def tree_impl_back(cls, data): cls.check_data(data) n_estimators = min( [500, floor(sqrt(data.n_instances() * data.n_attributes()))]) data_for_speed = { 'n_estimators': ['z', [2, 1000]], 'max_depth': ['z', [2, data.n_instances()]] } # Entre outros node = { 'bootstrap': ['c', [True, False]], 'min_impurity_decrease': ['r', [0, 1]], 'max_leaf_nodes': ['o', [2, 3, 5, 8, 12, 17, 23, 30, 38, 47, 57, 999999]], # 999999 ~ None 'max_features': ['r', [0.001, 1]], # For some reason, the interval [1, n_attributes] didn't work for # RF (maybe it is relative to a subset). 'min_weight_fraction_leaf': ['r', [0, 0.5]], # According to ValueError exception. 'min_samples_leaf': ['z', [1, floor(data.n_instances() / 2)]], # Int (# of instances) is better than float # (proportion of instances) because different floats can collide # to a same int, making intervals of useless real values. 'min_samples_split': ['z', [2, floor(data.n_instances() / 2)]], # Same reason as min_samples_leaf 'max_depth': ['z', [2, data.n_instances()]], 'criterion': ['c', ['gini', 'entropy']], # Docs say that this parameter is tree-specific, # but we cannot choose the tree. 'n_estimators': ['c', [n_estimators]], # Only to set the default, not for search. # See DT.py for more details about other settings. } return HPTree(node, children=[])
def cs_impl(cls, data=None): node = { '@with_mean/std': ['c', [(True, False), (False, True), (True, True)]] # (False, False) seems to be useless } return HPTree(node, children=[])
def cs_impl(self): node = { 'iterations': ['c', [100, 500, 1000, 3000, 5000]], 'learning_rate': ['r', [0.000001, 1.0]], 'depth': ['z', [1, 15]], 'l2_leaf_reg': ['r', [0.01, 99999]], 'loss_function': ['c', ['MultiClass']], 'border_count': ['z', [1, 255]], 'thread_count': ['c', [-1]] } return HPTree(node, children=[])
def cs_impl(self): # todo: set random seed; set 'cache_size' node = { 'C': ['r', [0.0001, 100]], 'shrinking': ['c', [True, False]], 'probability': ['c', [False]], 'tol': ['o', [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000]], 'class_weight': ['c', [None, 'balanced']], # 'verbose': [False], 'max_iter': ['c', [1000000]], 'decision_function_shape': ['c', ['ovr', 'ovo']] } kernel_linear = HPTree({'kernel': ['c', ['linear']]}, children=[]) kernel_poly = HPTree({ 'kernel': ['c', ['poly']], 'degree': ['z', [0, 10]], 'coef0': ['r', [0.0, 100]], }, children=[]) kernel_rbf = HPTree({'kernel': ['c', ['rbf']]}, children=[]) kernel_sigmoid = HPTree({ 'kernel': ['c', ['sigmoid']], 'coef0': ['r', [0.0, 100]], }, children=[]) kernel_nonlinear = HPTree({'gamma': ['r', [0.00001, 100]]}, children=[kernel_poly, kernel_rbf, kernel_sigmoid]) return HPTree(node, children=[kernel_linear, kernel_nonlinear])
def cs_impl(self): n_estimators = [100, 500, 1000, 3000, 5000] node = { 'bootstrap': ['c', [True, False]], 'criterion': ['c', ['gini', 'entropy']], 'max_features': ['c', ['auto', 'sqrt', 'log2', None]], 'min_impurity_decrease': ['r', [0, 0.2]], 'min_samples_split': ['r', [1e-6, 0.3]], 'min_samples_leaf': ['r', [1e-6, 0.3]], 'min_weight_fraction_leaf': ['r', [0, 0.3]], 'max_depth': ['z', [2, 1000]], 'n_estimators': ['c', n_estimators], } return HPTree(node, children=[])
def cs_impl(cls, data=None): # Assumes worst case of k-fold CV, i.e. k=2. Undersampling is another # problem, handled by @n_instances. cls.check_data(data) kmax = floor(min(400, data.n_instances() / 2 - 1)) # TODO: put knn hyperparameters here? node = { # TODO: implement 'minority' 'vote': ['c', ['majority', 'consensus']], 'algorithm': ['c', ['ENN', 'RENN', 'AENN']], 'k': ['c', exponential_integers(kmax)] # (False, False) seems to be useless } tree = KNN.cs(delete=[k]) del tree[''] return HPTree(node, children=[])
def cs_impl(self): # Todo: set random seed max_neurons = 10000 node = { 'alpha': [ 'o', [ 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000 ] ], # https://scikit-learn.org/stable/auto_examples/neural_networks/plot_mlp_alpha.html 'max_iter': ['c', [10000]], # We assume that non converged is bad. # 'Number of epochs'/'gradient steps'. 'tol': [ 'o', [ 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000, 10000 ] ], # Maybe useless when learning_rate is set to ‘adaptive’. 'nesterovs_momentum': ['c', [True, False]], # number of inputs times outputs # '@in_out': ['c', [data.n_attributes * data.n_classes]], '@neurons': ['c', exponential_integers(max_neurons, 3)] } zero_hidden_layers = HPTree({ 'hidden_layer_sizes': ['c', [()]], }, children=[]) one_hidden_layer = HPTree( { '@hidden_layer_size1': ['r', [0, 1]], # @ indicates that this hyperparameter is auxiliary # (will be converted in constructor) 'activation': ['c', ['identity', 'logistic', 'tanh', 'relu']], # Only used when there is at least one hidden layer }, children=[]) two_hidden_layers = HPTree( { '@hidden_layer_size2': ['r', [0, 1]], # @ indicates that this hyperparameter is auxiliary # (will be converted in constructor) }, children=[one_hidden_layer]) three_hidden_layers = HPTree( { '@hidden_layer_size3': ['r', [0, 1]], # @ indicates that this hyperparameter is auxiliary # (will be converted in constructor) }, children=[two_hidden_layers]) layers = [ zero_hidden_layers, one_hidden_layer, two_hidden_layers, three_hidden_layers ] early_stopping = HPTree( { 'early_stopping': ['c', [True]], # Only effective when solver=’sgd’ or ‘adam’. 'validation_fraction': ['c', [0.01, 0.05, 0.1, 0.15, 0.20, 0.25, 0.30]], # Only used if early_stopping is True. }, children=layers) late_stopping = HPTree( { 'early_stopping': ['c', [False]], # Only effective when solver=’sgd’ or ‘adam’. }, children=layers) stoppings = [early_stopping, late_stopping] solver_adam = HPTree( { 'solver': ['c', ['adam']], 'beta_1': ['r', [0.0, 0.999999]], # 'adam' 'beta_2': ['r', [0.0, 0.999999]], # 'adam' 'epsilon': ['r', [0.0000000001, 1.0]], # 'adam' }, children=stoppings) learning_rate_constant = HPTree( { 'learning_rate': ['c', ['constant']], # only for solver=sgd (i will believe the docs, but it seems like # 'learning_rate' is for 'adam' also). 'power_t': ['r', [0.0, 2.0]], # only for learning_rate=constant; it is unclear if MLP benefits # from power_t > 1 }, children=stoppings) learning_rate_invscaling = HPTree( { 'learning_rate': ['c', ['invscaling']], # only for solver=sgd (i will believe the docs, but it seems like # 'learning_rate' is for 'adam' also). }, children=stoppings) learning_rate_adaptive = HPTree( { 'learning_rate': ['c', ['adaptive']], # only for solver=sgd (i will believe the docs, but it seems like # 'learning_rate' is for 'adam' also). }, children=stoppings) solver_sgd = HPTree( { 'solver': ['c', ['sgd']], 'momentum': ['r', [0.0, 1.0]], # Only used when solver=’sgd’ (i will believe the docs, # but it seems like 'momentum' is for 'adam' also). }, children=[ learning_rate_constant, learning_rate_invscaling, learning_rate_adaptive ]) solver_non_newton = HPTree( { 'n_iter_no_change': ['c', [10]], # Only effective when solver=’sgd’ or ‘adam’. 'batch_size': ['c', ['auto']], # min([1000, floor(data.n_instances() / 2)])]], # useless for solver lbfgs # useless for solver lbfgs 'learning_rate_init': ['r', [0.000001, 0.5]], # Only used when solver=’sgd’ or ‘adam’ 'shuffle': ['c', [True, False]], # Only used when solver=’sgd’ or ‘adam’. }, children=[solver_adam, solver_sgd]) solver_lbfgs = HPTree({ 'solver': ['c', ['lbfgs']], }, children=layers) tree = HPTree(node, children=[solver_non_newton, solver_lbfgs]) return tree
def cs_impl(cls, data): return HPTree(node={}, children=[])
def cs_impl(cls, data=None): node = { 'sampling_strategy': ['c', ['not minority', 'not majority', 'all']] } return HPTree(node, children=[])
def cs_impl(cls, data): cls.check_data(data) # TODO: set random_state node = {'n_components': ['z', [1, data.n_attributes()]]} node.update(cls.specific_node(data)) return HPTree(node, children=[])
def cs_impl(self): node = {'@nb_type': ['c', ["MultinomialNB", "ComplementNB"]]} return HPTree(node=node, children=[])
def cs_impl(self, data=None): return HPTree(node=self.freeze_hptree(), children=[], name=self.name + self.components[0].name)
def rec(nnd, child=None): l = [] if child is None else [child] name, node = nnd return HPTree(name=name, node=node, children=l)
def cs_impl(self): return HPTree({'oper': ['c', ['+', '-', '*', '.']]}, [])
def cs_impl(cls, data): HPTree({}, [])
def cs_impl(cls, data): return HPTree( # TODO: check if it would be better to adopt a 'z' hyperparameter node={'ratio': ['r', [1e-05, 1]]}, children=[])