def test_demo_boston(): boston = datasets.load_boston() X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, test_size=.25, random_state=1) estimator = hpsklearn.HyperoptEstimator( preprocessing=hpsklearn.components.any_preprocessing('pp'), regressor=hpsklearn.components.any_regressor('reg'), algo=tpe.suggest, trial_timeout=15.0, # seconds max_evals=10, seed=1) # /BEGIN `Demo version of estimator.fit()` print('', file=sys.stderr) print('====Demo regression on Boston dataset====', file=sys.stderr) iterator = estimator.fit_iter(X_train, y_train) next(iterator) n_trial = 0 while len(estimator.trials.trials) < estimator.max_evals: iterator.send(1) # -- try one more model n_trial += 1 print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], file=sys.stderr) # hpsklearn.demo_support.scatter_error_vs_time(estimator) # hpsklearn.demo_support.bar_classifier_choice(estimator) estimator.retrain_best_model_on_full_data(X_train, y_train) # /END Demo version of `estimator.fit()` print('Test R2:', estimator.score(X_test, y_test), file=sys.stderr) print('====End of demo====', file=sys.stderr)
def tune(X_train, X_test, y_train, y_test): estimator = hpsklearn.HyperoptEstimator( preprocessing=hpsklearn.components.any_preprocessing('pp'), classifier=hpsklearn.components.any_classifier('clf'), algo=tpe.suggest, trial_timeout=300, # seconds max_evals=10, seed=1) print('\n====Demo classification on Iris dataset====', file=sys.stderr) iterator = estimator.fit_iter(X_train, y_train) next(iterator) n_trial = 0 while len(estimator.trials.trials) < estimator.max_evals: iterator.send(1) # -- try one more model n_trial += 1 print('Trial', n_trial, 'loss:', estimator.trials.losses()[-1], file=sys.stderr) #hpsklearn.scatter_error_vs_time(estimator) #hpsklearn.demo_support.bar_classifier_choice(estimator) estimator.retrain_best_model_on_full_data(X_train, y_train) print('Test accuracy:', estimator.score(X_test, y_test), file=sys.stderr) print('Best model:', estimator.best_model(), file=sys.stderr) print('====End of demo====', file=sys.stderr)
def find_the_best(X_train, X_test, y_train, y_test): from hyperopt import tpe import hpsklearn import hpsklearn.demo_support import time X_train = np.asarray(X_train) y_train = np.asarray(y_train) X_test = np.asarray(X_test) y_test = np.asarray(y_test) from hpsklearn import HyperoptEstimator, random_forest, svc, knn estimator = hpsklearn.HyperoptEstimator( preprocessing=[], classifier=knn('myknn'), algo=tpe.suggest, # trial_timeout=500.0, # seconds max_evals=100, ) fit_iterator = estimator.fit_iter(X_train, y_train) fit_iterator.next() plot_helper = hpsklearn.demo_support.PlotHelper(estimator, mintodate_ylim=(-.01, .05)) while len(estimator.trials.trials) < estimator.max_evals: fit_iterator.send(1) # -- try one more model plot_helper.post_iter() plot_helper.post_loop() plt.show() # -- Model selection was done on a subset of the training data. # -- Now that we've picked a model, train on all training data. estimator.retrain_best_model_on_full_data(X_train, y_train) print('Best preprocessing pipeline:') for pp in estimator._best_preprocs: print(pp) print() print('Best classifier:\n', estimator._best_learner) print(estimator.best_model()) test_predictions = estimator.predict(X_test) acc_in_percent = 100 * np.mean(test_predictions == y_test) print() print('Prediction accuracy in generalization is ', acc_in_percent)
classifier = getattr(hpsklearn.components, args.classifier)(args.name + '.classifier') else: if args.regressor == 'any': regressor = hpsklearn.components.any_regressor(args.name + '.regressor') else: regressor = getattr(hpsklearn.components, args.regressor)(args.name + '.regressor') algos = { 'rand.suggest': hyperopt.rand.suggest, 'tpe.suggest': hyperopt.tpe.suggest } algo = algos[args.algo] estimator = hpsklearn.HyperoptEstimator(classifier=classifier, regressor=regressor, algo=algo, max_evals=args.max_evals) logger.info('fit hyperopt estimator') trials = None warm_start = False if args.mongo is not None: logger.info('connect to MongoDB at ' + args.mongo) trials = MongoTrials(args.mongo) warm_start = True estimator.trials = trials estimator.fit(X, y, n_folds=args.n_folds, warm_start=warm_start) best_model_file = os.path.join(args.output_dir, 'best_model') logger.info('save the best model to ' + best_model_file) if not os.path.exists(args.output_dir):
# Critere de performance def compute_pred_score(y_true, y_pred): y_pred_unq = np.unique(y_pred) for i in y_pred_unq: if ((i != -1) & (i != 1) & (i != 0)): raise ValueError('The predictions can contain only -1, 1, or 0!') y_comp = y_true * y_pred score = float(10 * np.sum(y_comp == -1) + np.sum(y_comp == 0)) score /= y_comp.shape[0] return score X_train_fname = 'training_templates.csv' y_train_fname = 'training_labels.txt' X_test_fname = 'testing_templates.csv' X_train = pd.read_csv(X_train_fname, sep=',', header=None).values X_test = pd.read_csv(X_test_fname, sep=',', header=None).values y_train = np.loadtxt(y_train_fname, dtype=np.int) n_estimators = 200 estimator = hpsklearn.HyperoptEstimator( preprocessing=hpsklearn.components.any_preprocessing('pp'), classifier=hpsklearn.components.any_classifier('clf'), algo=hyperopt.tpe.suggest, trial_timeout=15.0, # seconds max_evals=15, ) print('Best classifier:\n', estimator._best_learner)