Esempio n. 1
0
def test_demo_boston():
    boston = datasets.load_boston()
    X_train, X_test, y_train, y_test = train_test_split(boston.data,
                                                        boston.target,
                                                        test_size=.25,
                                                        random_state=1)
    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        regressor=hpsklearn.components.any_regressor('reg'),
        algo=tpe.suggest,
        trial_timeout=15.0,  # seconds
        max_evals=10,
        seed=1)
    # /BEGIN `Demo version of estimator.fit()`
    print('', file=sys.stderr)
    print('====Demo regression on Boston dataset====', file=sys.stderr)
    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)
    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial',
              n_trial,
              'loss:',
              estimator.trials.losses()[-1],
              file=sys.stderr)
        # hpsklearn.demo_support.scatter_error_vs_time(estimator)
        # hpsklearn.demo_support.bar_classifier_choice(estimator)
    estimator.retrain_best_model_on_full_data(X_train, y_train)
    # /END Demo version of `estimator.fit()`
    print('Test R2:', estimator.score(X_test, y_test), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)
Esempio n. 2
0
def tune(X_train, X_test, y_train, y_test):
    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=hpsklearn.components.any_preprocessing('pp'),
        classifier=hpsklearn.components.any_classifier('clf'),
        algo=tpe.suggest,
        trial_timeout=300,  # seconds
        max_evals=10,
        seed=1)

    print('\n====Demo classification on Iris dataset====', file=sys.stderr)
    iterator = estimator.fit_iter(X_train, y_train)
    next(iterator)
    n_trial = 0
    while len(estimator.trials.trials) < estimator.max_evals:
        iterator.send(1)  # -- try one more model
        n_trial += 1
        print('Trial',
              n_trial,
              'loss:',
              estimator.trials.losses()[-1],
              file=sys.stderr)
        #hpsklearn.scatter_error_vs_time(estimator)
        #hpsklearn.demo_support.bar_classifier_choice(estimator)
    estimator.retrain_best_model_on_full_data(X_train, y_train)
    print('Test accuracy:', estimator.score(X_test, y_test), file=sys.stderr)
    print('Best model:', estimator.best_model(), file=sys.stderr)
    print('====End of demo====', file=sys.stderr)
Esempio n. 3
0
def find_the_best(X_train, X_test, y_train, y_test):
    from hyperopt import tpe
    import hpsklearn
    import hpsklearn.demo_support
    import time
    X_train = np.asarray(X_train)
    y_train = np.asarray(y_train)
    X_test = np.asarray(X_test)
    y_test = np.asarray(y_test)

    from hpsklearn import HyperoptEstimator, random_forest, svc, knn

    estimator = hpsklearn.HyperoptEstimator(
        preprocessing=[],
        classifier=knn('myknn'),
        algo=tpe.suggest,
        # trial_timeout=500.0,  # seconds
        max_evals=100,
    )

    fit_iterator = estimator.fit_iter(X_train, y_train)
    fit_iterator.next()
    plot_helper = hpsklearn.demo_support.PlotHelper(estimator,
                                                    mintodate_ylim=(-.01, .05))
    while len(estimator.trials.trials) < estimator.max_evals:
        fit_iterator.send(1)  # -- try one more model
        plot_helper.post_iter()

    plot_helper.post_loop()
    plt.show()
    # -- Model selection was done on a subset of the training data.
    # -- Now that we've picked a model, train on all training data.
    estimator.retrain_best_model_on_full_data(X_train, y_train)

    print('Best preprocessing pipeline:')
    for pp in estimator._best_preprocs:
        print(pp)
    print()
    print('Best classifier:\n', estimator._best_learner)

    print(estimator.best_model())
    test_predictions = estimator.predict(X_test)
    acc_in_percent = 100 * np.mean(test_predictions == y_test)
    print()
    print('Prediction accuracy in generalization is ', acc_in_percent)
Esempio n. 4
0
            classifier = getattr(hpsklearn.components,
                                 args.classifier)(args.name + '.classifier')
    else:
        if args.regressor == 'any':
            regressor = hpsklearn.components.any_regressor(args.name +
                                                           '.regressor')
        else:
            regressor = getattr(hpsklearn.components,
                                args.regressor)(args.name + '.regressor')
    algos = {
        'rand.suggest': hyperopt.rand.suggest,
        'tpe.suggest': hyperopt.tpe.suggest
    }
    algo = algos[args.algo]
    estimator = hpsklearn.HyperoptEstimator(classifier=classifier,
                                            regressor=regressor,
                                            algo=algo,
                                            max_evals=args.max_evals)

    logger.info('fit hyperopt estimator')
    trials = None
    warm_start = False
    if args.mongo is not None:
        logger.info('connect to MongoDB at ' + args.mongo)
        trials = MongoTrials(args.mongo)
        warm_start = True
        estimator.trials = trials
    estimator.fit(X, y, n_folds=args.n_folds, warm_start=warm_start)

    best_model_file = os.path.join(args.output_dir, 'best_model')
    logger.info('save the best model to ' + best_model_file)
    if not os.path.exists(args.output_dir):
# Critere de performance
def compute_pred_score(y_true, y_pred):
    y_pred_unq = np.unique(y_pred)
    for i in y_pred_unq:
        if ((i != -1) & (i != 1) & (i != 0)):
            raise ValueError('The predictions can contain only -1, 1, or 0!')
    y_comp = y_true * y_pred
    score = float(10 * np.sum(y_comp == -1) + np.sum(y_comp == 0))
    score /= y_comp.shape[0]
    return score


X_train_fname = 'training_templates.csv'
y_train_fname = 'training_labels.txt'
X_test_fname = 'testing_templates.csv'
X_train = pd.read_csv(X_train_fname, sep=',', header=None).values
X_test = pd.read_csv(X_test_fname, sep=',', header=None).values
y_train = np.loadtxt(y_train_fname, dtype=np.int)

n_estimators = 200

estimator = hpsklearn.HyperoptEstimator(
    preprocessing=hpsklearn.components.any_preprocessing('pp'),
    classifier=hpsklearn.components.any_classifier('clf'),
    algo=hyperopt.tpe.suggest,
    trial_timeout=15.0,  # seconds
    max_evals=15,
)

print('Best classifier:\n', estimator._best_learner)