def gaussian_nb_model_tpe(): estim = HyperoptEstimator(classifier=gaussian_nb('my_clf'), preprocessing=[pca('my_pca')], algo=tpe.suggest, max_evals=150, trial_timeout=60, verbose=0) estim.fit(x_train, y_train) print("f1score", f1_score(estim.predict(x_test), y_test)) print("accuracy score", accuracy_score(estim.predict(x_test), y_test)) print(estim.best_model())
def bench_classifiers(name): classifiers = [ ada_boost(name + '.ada_boost'), # boo gaussian_nb(name + '.gaussian_nb'), # eey knn(name + '.knn', sparse_data=True), # eey linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1), # eey random_forest(name + '.random_forest'), # boo sgd(name + '.sgd') # eey ] if xgboost: classifiers.append(xgboost_classification(name + '.xgboost')) # boo return hp.choice('%s' % name, classifiers)
def tpe_classifier(name='clf'): linear_svc_space = hp.choice('liblinear_combination', [{'penalty': "l1", 'loss': "squared_hinge", 'dual': False}, {'penalty': "l2", 'loss': "hinge", 'dual': True}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': True}, {'penalty': "l2", 'loss': "squared_hinge", 'dual': False}]) return hp.choice(name, [gaussian_nb('hpsklearn_gaussian_nb'), liblinear_svc('hpsklearn_liblinear_svc', C=hp.choice('hpsklearn_liblinear_svc_c', [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]), loss=linear_svc_space['loss'], penalty=linear_svc_space['penalty'], dual=linear_svc_space['dual'], tol=hp.choice('hpsklearn_liblinear_svc_tol', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1]) ), decision_tree('decision_tree', criterion=hp.choice('decision_tree_criterion', ["gini", "entropy"]), max_depth=hp.randint('decision_tree_max_depth', 10) + 1, min_samples_split=hp.randint('decision_tree_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('decision_tree_min_samples_leaf', 20) + 1), knn('knn', n_neighbors=hp.randint('knn_n', 100) + 1, weights=hp.choice('knn_weights', ['uniform', 'distance']), p=hp.choice('knn_p', [1, 2])), extra_trees('et', n_estimators=100, criterion=hp.choice('et_criterion', ["gini", "entropy"]), max_features=hp.randint('et_max_features', 20) * 0.05 + 0.05, min_samples_split=hp.randint('et_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('et_min_samples_leaf', 20) + 1, bootstrap=hp.choice('et_bootstrap', [True, False])), random_forest('rf', n_estimators=100, criterion=hp.choice('rf_criterion', ["gini", "entropy"]), max_features=hp.randint('rf_max_features', 20) * 0.05 + 0.05, min_samples_split=hp.randint('rf_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('rf_min_samples_leaf', 20) + 1, bootstrap=hp.choice('rf_bootstrap', [True, False])), gradient_boosting('gb', n_estimators=100, learning_rate=hp.choice('gb_lr', [1e-3, 1e-2, 1e-1, 0.5, 1.]), max_depth=hp.randint('gb_max_depth', 10) + 1, min_samples_split=hp.randint('gb_min_samples_split', 19) + 2, min_samples_leaf=hp.randint('gb_min_samples_leaf', 20) + 1, subsample=hp.randint('gb_subsample', 20) * 0.05 + 0.05, max_features=hp.randint('gb_max_features', 20) * 0.05 + 0.05, ) ])
def main(): df_train = pd.read_csv('../train_dataset.csv') df_test = pd.read_csv('../test_dataset.csv') X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values estim = HyperoptEstimator(classifier=gaussian_nb('myNB'), algo=tpe.suggest, max_evals=150, trial_timeout=120, verbose=True) estim.fit(X_train, y_train) print("\n\n{}\n\n".format(estim.score(X_test, y_test))) print("\n\n{}\n\n".format(estim.best_model()))
Y_val_mini.sum(axis=0), Y_test_mini.sum(axis=0) ]))) print(seed_val) print("\ndata is loaded - next step > model testing\n") n_job = 6 select_classes = [0, 1, 2, 3, 4, 5] val_dist = X_val_mini.shape[0] / X_train_mini.shape[0] name = 'my_est_oVa' tic_mod_all = time.time() select_alg = [ ada_boost(name + '.ada_boost'), gaussian_nb(name + '.gaussian_nb'), knn(name + '.knn', sparse_data=True), linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1), random_forest(name + '.random_forest'), sgd(name + '.sgd'), xgboost_classification(name + '.xgboost') ] # fitting models estim_one_vs_rest = dict() # scoring models algo_scoring = dict() save_score_path = r'C:/Users/anden/PycharmProjects/NovelEEG/results' for alg in [select_alg[args.index]]:
print("\ndata is loaded - next step > model testing\n") n_job = 6 select_classes = [0, 1, 2, 3, 4, 5] val_dist = X_val_mini.shape[0] / X_train_mini.shape[0] name = 'my_est_oVa' tic_mod_all = time.time() #select_alg = [ada_boost(name + '.ada_boost'), # gaussian_nb(name + '.gaussian_nb'), # knn(name + '.knn', sparse_data=True), # linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1), # random_forest(name + '.random_forest'), # sgd(name + '.sgd'), # xgboost_classification(name + '.xgboost')] select_alg = [gaussian_nb(name + '.gaussian_nb')] # fitting models estim_one_vs_rest = dict() # scoring models algo_scoring = dict() save_score_path = os.getcwd() + r'/tmp' for alg in select_alg: tic_mod = time.time() print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n", "running on %s" % (alg.name + '.one_V_all'), "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") clf_method = one_vs_rest(str(alg.name + '.one_V_all'), estimator=alg, n_jobs=n_job) estim_one_vs_rest[alg.name + '.one_V_all'] = HyperoptEstimator(