def knn_model_tpe(): estim = HyperoptEstimator(classifier=knn('my_clf'), preprocessing=[pca('my_pca')], algo=tpe.suggest, max_evals=150, trial_timeout=60, verbose=0) estim.fit(x_train, y_train) print("f1score", f1_score(estim.predict(x_test), y_test)) print("accuracy score", accuracy_score(estim.predict(x_test), y_test)) print(estim.best_model())
def bench_classifiers(name): classifiers = [ ada_boost(name + '.ada_boost'), # boo gaussian_nb(name + '.gaussian_nb'), # eey knn(name + '.knn', sparse_data=True), # eey linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1), # eey random_forest(name + '.random_forest'), # boo sgd(name + '.sgd') # eey ] if xgboost: classifiers.append(xgboost_classification(name + '.xgboost')) # boo return hp.choice('%s' % name, classifiers)
def tpe_classifier(name='clf'): linear_svc_space = hp.choice('liblinear_combination', [{'penalty': "l1", 'loss': "squared_hinge", 'dual': False}, {'penalty': "l2", 'loss': "hinge", 'dual': True}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': True}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': False}]) return hp.choice(name, [gaussian_nb('hpsklearn_gaussian_nb'), liblinear_svc('hpsklearn_liblinear_svc', C=hp.choice('hpsklearn_liblinear_svc_c', [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]), loss=linear_svc_space['loss'], penalty=linear_svc_space['penalty'], dual=linear_svc_space['dual'], tol=hp.choice('hpsklearn_liblinear_svc_tol', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]) ), decision_tree('decision_tree', criterion=hp.choice('decision_tree_criterion', ["gini", "entropy"]), max_depth=hp.randint('decision_tree_max_depth', 10) + 1, min_samples_split=hp.randint('decision_tree_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('decision_tree_min_samples_leaf', 20) + 1), knn('knn', n_neighbors=hp.randint('knn_n', 100) + 1, weights=hp.choice('knn_weights', ['uniform', 'distance']), p=hp.choice('knn_p', [1, 2])), extra_trees('et', n_estimators=100, criterion=hp.choice('et_criterion', ["gini", "entropy"]), max_features=hp.randint('et_max_features', 20) * 0.05 + 0.05, min_samples_split=hp.randint('et_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('et_min_samples_leaf', 20) + 1, bootstrap=hp.choice('et_bootstrap', [True, False])), random_forest('rf', n_estimators=100, criterion=hp.choice('rf_criterion', ["gini", "entropy"]), max_features=hp.randint('rf_max_features', 20) * 0.05 + 0.05, min_samples_split=hp.randint('rf_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('rf_min_samples_leaf', 20) + 1, bootstrap=hp.choice('rf_bootstrap', [True, False])), gradient_boosting('gb', n_estimators=100, learning_rate=hp.choice('gb_lr', [1e-3, 1e-2, 1e-1, 0.5, 1.]), max_depth=hp.randint('gb_max_depth', 10) + 1, min_samples_split=hp.randint('gb_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('gb_min_samples_leaf', 20) + 1, subsample=hp.randint('gb_subsample', 20) * 0.05 + 0.05, max_features=hp.randint('gb_max_features', 20) * 0.05 + 0.05, ) ])
def find_the_best(X_train, X_test, y_train, y_test): from hyperopt import tpe import hpsklearn import hpsklearn.demo_support import time X_train = np.asarray(X_train) y_train = np.asarray(y_train) X_test = np.asarray(X_test) y_test = np.asarray(y_test) from hpsklearn import HyperoptEstimator, random_forest, svc, knn estimator = hpsklearn.HyperoptEstimator( preprocessing=[], classifier=knn('myknn'), algo=tpe.suggest, # trial_timeout=500.0, # seconds max_evals=100, ) fit_iterator = estimator.fit_iter(X_train, y_train) fit_iterator.next() plot_helper = hpsklearn.demo_support.PlotHelper(estimator, mintodate_ylim=(-.01, .05)) while len(estimator.trials.trials) < estimator.max_evals: fit_iterator.send(1) # -- try one more model plot_helper.post_iter() plot_helper.post_loop() plt.show() # -- Model selection was done on a subset of the training data. # -- Now that we've picked a model, train on all training data. estimator.retrain_best_model_on_full_data(X_train, y_train) print('Best preprocessing pipeline:') for pp in estimator._best_preprocs: print(pp) print() print('Best classifier:\n', estimator._best_learner) print(estimator.best_model()) test_predictions = estimator.predict(X_test) acc_in_percent = 100 * np.mean(test_predictions == y_test) print() print('Prediction accuracy in generalization is ', acc_in_percent)
Y_test_mini.sum(axis=0) ]))) print(seed_val) print("\ndata is loaded - next step > model testing\n") n_job = 6 select_classes = [0, 1, 2, 3, 4, 5] val_dist = X_val_mini.shape[0] / X_train_mini.shape[0] name = 'my_est_oVa' tic_mod_all = time.time() select_alg = [ ada_boost(name + '.ada_boost'), gaussian_nb(name + '.gaussian_nb'), knn(name + '.knn', sparse_data=True), linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1), random_forest(name + '.random_forest'), sgd(name + '.sgd'), xgboost_classification(name + '.xgboost') ] # fitting models estim_one_vs_rest = dict() # scoring models algo_scoring = dict() save_score_path = r'C:/Users/anden/PycharmProjects/NovelEEG/results' for alg in [select_alg[args.index]]: tic_mod = time.time()