예제 #1
0
def knn_model_tpe():
    estim = HyperoptEstimator(classifier=knn('my_clf'),
                              preprocessing=[pca('my_pca')],
                              algo=tpe.suggest,
                              max_evals=150,
                              trial_timeout=60,
                              verbose=0)
    estim.fit(x_train, y_train)
    print("f1score", f1_score(estim.predict(x_test), y_test))
    print("accuracy score", accuracy_score(estim.predict(x_test), y_test))
    print(estim.best_model())
예제 #2
0
def bench_classifiers(name):
    classifiers = [
        ada_boost(name + '.ada_boost'),  # boo
        gaussian_nb(name + '.gaussian_nb'),  # eey
        knn(name + '.knn', sparse_data=True),  # eey
        linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1),  # eey
        random_forest(name + '.random_forest'),  # boo
        sgd(name + '.sgd')  # eey
    ]
    if xgboost:
        classifiers.append(xgboost_classification(name + '.xgboost'))  # boo
    return hp.choice('%s' % name, classifiers)
예제 #3
0
def tpe_classifier(name='clf'):
    linear_svc_space = hp.choice('liblinear_combination',
                                 [{'penalty': "l1", 'loss': "squared_hinge", 'dual': False},
                                  {'penalty': "l2", 'loss': "hinge", 'dual': True},
                                  {'penalty': "l2", 'loss': "squared_hinge", 'dual': True},
                                  {'penalty': "l2", 'loss': "squared_hinge", 'dual': False}])
    return hp.choice(name,
                     [gaussian_nb('hpsklearn_gaussian_nb'),
                      liblinear_svc('hpsklearn_liblinear_svc',
                                    C=hp.choice('hpsklearn_liblinear_svc_c',
                                                [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]),
                                    loss=linear_svc_space['loss'],
                                    penalty=linear_svc_space['penalty'],
                                    dual=linear_svc_space['dual'],
                                    tol=hp.choice('hpsklearn_liblinear_svc_tol', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1])
                                    ),
                      decision_tree('decision_tree',
                                    criterion=hp.choice('decision_tree_criterion', ["gini", "entropy"]),
                                    max_depth=hp.randint('decision_tree_max_depth', 10) + 1,
                                    min_samples_split=hp.randint('decision_tree_min_samples_split', 19) + 2,
                                    min_samples_leaf=hp.randint('decision_tree_min_samples_leaf', 20) + 1),
                      knn('knn',
                          n_neighbors=hp.randint('knn_n', 100) + 1,
                          weights=hp.choice('knn_weights', ['uniform', 'distance']),
                          p=hp.choice('knn_p', [1, 2])),
                      extra_trees('et',
                                  n_estimators=100,
                                  criterion=hp.choice('et_criterion', ["gini", "entropy"]),
                                  max_features=hp.randint('et_max_features', 20) * 0.05 + 0.05,
                                  min_samples_split=hp.randint('et_min_samples_split', 19) + 2,
                                  min_samples_leaf=hp.randint('et_min_samples_leaf', 20) + 1,
                                  bootstrap=hp.choice('et_bootstrap', [True, False])),
                      random_forest('rf',
                                    n_estimators=100,
                                    criterion=hp.choice('rf_criterion', ["gini", "entropy"]),
                                    max_features=hp.randint('rf_max_features', 20) * 0.05 + 0.05,
                                    min_samples_split=hp.randint('rf_min_samples_split', 19) + 2,
                                    min_samples_leaf=hp.randint('rf_min_samples_leaf', 20) + 1,
                                    bootstrap=hp.choice('rf_bootstrap', [True, False])),
                      gradient_boosting('gb',
                                        n_estimators=100,
                                        learning_rate=hp.choice('gb_lr', [1e-3, 1e-2, 1e-1, 0.5, 1.]),
                                        max_depth=hp.randint('gb_max_depth', 10) + 1,
                                        min_samples_split=hp.randint('gb_min_samples_split', 19) + 2,
                                        min_samples_leaf=hp.randint('gb_min_samples_leaf', 20) + 1,
                                        subsample=hp.randint('gb_subsample', 20) * 0.05 + 0.05,
                                        max_features=hp.randint('gb_max_features', 20) * 0.05 + 0.05,
                                        )
                      ])
예제 #4
0
def find_the_best(X_train, X_test, y_train, y_test):
    from hyperopt import tpe
    import hpsklearn
    import hpsklearn.demo_support
    import time
    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)
    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test)

    from hpsklearn import HyperoptEstimator, random_forest, svc, knn

    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=[],
        classifier=knn('myknn'),
        algo=tpe.suggest,
        # trial_timeout=500.0,  # seconds
        max_evals=100,
    )

    fit_iterator = estimator.fit_iter(X_train, y_train)
    fit_iterator.next()
    plot_helper = hpsklearn.demo_support.PlotHelper(estimator,
                                                    mintodate_ylim=(-.01, .05))
    while len(estimator.trials.trials) < estimator.max_evals:
        fit_iterator.send(1)  # -- try one more model
        plot_helper.post_iter()

    plot_helper.post_loop()
    plt.show()
    # -- Model selection was done on a subset of the training data.
    # -- Now that we've picked a model, train on all training data.
    estimator.retrain_best_model_on_full_data(X_train, y_train)

    print('Best preprocessing pipeline:')
    for pp in estimator._best_preprocs:
        print(pp)
    print()
    print('Best classifier:\n', estimator._best_learner)

    print(estimator.best_model())
    test_predictions = estimator.predict(X_test)
    acc_in_percent = 100 * np.mean(test_predictions == y_test)
    print()
    print('Prediction accuracy in generalization is ', acc_in_percent)
예제 #5
0
                   Y_test_mini.sum(axis=0)
               ])))
        print(seed_val)

        print("\ndata is loaded  - next step > model testing\n")

        n_job = 6
        select_classes = [0, 1, 2, 3, 4, 5]
        val_dist = X_val_mini.shape[0] / X_train_mini.shape[0]
        name = 'my_est_oVa'

        tic_mod_all = time.time()
        select_alg = [
            ada_boost(name + '.ada_boost'),
            gaussian_nb(name + '.gaussian_nb'),
            knn(name + '.knn', sparse_data=True),
            linear_discriminant_analysis(name +
                                         '.linear_discriminant_analysis',
                                         n_components=1),
            random_forest(name + '.random_forest'),
            sgd(name + '.sgd'),
            xgboost_classification(name + '.xgboost')
        ]

        # fitting models
        estim_one_vs_rest = dict()
        # scoring models
        algo_scoring = dict()
        save_score_path = r'C:/Users/anden/PycharmProjects/NovelEEG/results'
        for alg in [select_alg[args.index]]:
            tic_mod = time.time()