def applyHPSKLEARN(X_train, y_train, X_test, y_test, SavePath, max_evals=100, trial_timeout=100, useSavedModels = True): if not useSavedModels or not os.path.isfile(SavePath+".pckl"): HPSKLEARNModel = HyperoptEstimator(regressor=any_regressor('reg'), preprocessing=any_preprocessing('pre'), loss_fn=mean_squared_error, max_evals=max_evals, trial_timeout=trial_timeout, algo=tpe.suggest) # perform the search HPSKLEARNModel.fit(X_train, y_train) pickle.dump(HPSKLEARNModel, open(SavePath+".pckl", 'wb')) else: HPSKLEARNModel = pickle.load(open(SavePath+".pckl", 'rb')) # summarize performance score = HPSKLEARNModel.score(X_test, y_test) y_hat = HPSKLEARNModel.predict(X_test) print("HPSKLEARN - Score: ") print("MAE: %.4f" % score) # summarize the best model print(HPSKLEARNModel.best_model()) return y_hat
def train_hypsklearn(X_train, X_test, y_train, y_test, mtype, common_name_model, problemtype, classes, default_featurenames, transform_model, settings, model_session): modelname = common_name_model + '.pickle' files = list() if mtype in [' classification', 'c']: estim = HyperoptEstimator(classifier=any_classifier('my_clf'), preprocessing=any_preprocessing('my_pre'), algo=tpe.suggest, max_evals=100, trial_timeout=120) # Search the hyperparameter space based on the data estim.fit(X_train, y_train) elif mtype in ['regression', 'r']: estim = HyperoptEstimator(classifier=any_regressor('my_clf'), preprocessing=any_preprocessing('my_pre'), algo=tpe.suggest, max_evals=100, trial_timeout=120) # Search the hyperparameter space based on the data estim.fit(X_train, y_train) # Show the results print(estim.score(X_test, y_test)) print(estim.best_model()) scores = estim.score(X_test, y_test) bestmodel = str(estim.best_model()) print('saving classifier to disk') f = open(modelname, 'wb') pickle.dump(estim, f) f.close() files.append(modelname) modeldir = os.getcwd() return modelname, modeldir, files
def _create_estimator_random_regressor( regressor=any_regressor('my_rgs'), preprocessing=any_preprocessing('my_pre'), max_evals=100, trial_timeout=120, seed=None, algo=tpe.suggest, fit_increment=1): """ :param regressor: :param preprocessing: :param max_evals: :param trial_timeout: :param seed: :param algo: :return: """ estim = HyperoptEstimator(regressor=regressor, preprocessing=preprocessing, algo=algo, max_evals=max_evals, trial_timeout=trial_timeout, ex_preprocs=None, classifier=None, space=None, loss_fn=None, continuous_loss_fn=False, verbose=False, fit_increment=fit_increment, fit_increment_dump_filename=None, seed=seed, use_partial_fit=False, refit=True) return estim
def run(dataset, config): log.info("\n**** Hyperopt-sklearn ****\n") is_classification = config.type == 'classification' default = lambda: 0 metrics_to_loss_mapping = dict( acc=(default, False), # lambda y, pred: 1.0 - accuracy_score(y, pred) auc=(lambda y, pred: 1.0 - roc_auc_score(y, pred), False), f1=(lambda y, pred: 1.0 - f1_score(y, pred), False), # logloss=(log_loss, True), mae=(mean_absolute_error, False), mse=(mean_squared_error, False), msle=(mean_squared_log_error, False), r2=(default, False), # lambda y, pred: 1.0 - r2_score(y, pred) rmse=(mean_squared_error, False), ) loss_fn, continuous_loss_fn = metrics_to_loss_mapping[ config.metric] if config.metric in metrics_to_loss_mapping else (None, False) if loss_fn is None: log.warning("Performance metric %s not supported: defaulting to %s.", config.metric, 'accuracy' if is_classification else 'r2') if loss_fn is default: loss_fn = None training_params = { k: v for k, v in config.framework_params.items() if not k.startswith('_') } log.warning("Ignoring cores constraint of %s cores.", config.cores) log.info( "Running hyperopt-sklearn with a maximum time of %ss on %s cores, optimizing %s.", config.max_runtime_seconds, 'all', config.metric) X_train = dataset.train.X_enc y_train = dataset.train.y_enc if is_classification: classifier = any_classifier('clf') regressor = None else: classifier = None regressor = any_regressor('rgr') estimator = HyperoptEstimator(classifier=classifier, regressor=regressor, algo=tpe.suggest, loss_fn=loss_fn, continuous_loss_fn=continuous_loss_fn, trial_timeout=config.max_runtime_seconds, seed=config.seed, **training_params) with InterruptTimeout(config.max_runtime_seconds * 4 / 3, sig=signal.SIGQUIT): with InterruptTimeout(config.max_runtime_seconds, before_interrupt=ft.partial( kill_proc_tree, timeout=5, include_parent=False)): with Timer() as training: estimator.fit(X_train, y_train) log.info('Predicting on the test set.') X_test = dataset.test.X_enc y_test = dataset.test.y_enc predictions = estimator.predict(X_test) if is_classification: probabilities = "predictions" # encoding is handled by caller in `__init__.py` else: probabilities = None return result(output_file=config.output_predictions_file, predictions=predictions, truth=y_test, probabilities=probabilities, target_is_encoded=is_classification, models_count=len(estimator.trials), training_duration=training.duration)
def run(dataset: Dataset, config: TaskConfig): log.info("\n**** Hyperopt-sklearn ****\n") is_classification = config.type == 'classification' default = lambda: 0 metrics_to_loss_mapping = dict( acc=(default, False), # lambda y, pred: 1.0 - accuracy_score(y, pred) auc=(lambda y, pred: 1.0 - roc_auc_score(y, pred), False), f1=(lambda y, pred: 1.0 - f1_score(y, pred), False), # logloss=(log_loss, True), mae=(mean_absolute_error, False), mse=(mean_squared_error, False), msle=(mean_squared_log_error, False), r2=(default, False), # lambda y, pred: 1.0 - r2_score(y, pred) ) loss_fn, continuous_loss_fn = metrics_to_loss_mapping[ config.metric] if config.metric in metrics_to_loss_mapping else (None, False) if loss_fn is None: log.warning("Performance metric %s not supported: defaulting to %s.", config.metric, 'accuracy' if is_classification else 'r2') if loss_fn is default: loss_fn = None log.warning("Ignoring cores constraint of %s cores.", config.cores) log.info( "Running hyperopt-sklearn with a maximum time of %ss on %s cores, optimizing %s.", config.max_runtime_seconds, 'all', config.metric) X_train, X_test = impute(dataset.train.X_enc, dataset.test.X_enc) y_train, y_test = dataset.train.y_enc, dataset.test.y_enc if is_classification: classifier = any_classifier('clf') regressor = None else: classifier = None regressor = any_regressor('rgr') estimator = HyperoptEstimator(classifier=classifier, regressor=regressor, algo=tpe.suggest, loss_fn=loss_fn, continuous_loss_fn=continuous_loss_fn, trial_timeout=config.max_runtime_seconds, seed=config.seed, **config.framework_params) with InterruptTimeout(config.max_runtime_seconds * 4 / 3, sig=signal.SIGQUIT): with InterruptTimeout(config.max_runtime_seconds, before_interrupt=ft.partial( kill_proc_tree, timeout=5, include_parent=False)): with Timer() as training: estimator.fit(X_train, y_train) predictions = estimator.predict(X_test) probabilities = Encoder('one-hot', target=False, encoded_type=float).fit_transform( predictions) if is_classification else None save_predictions_to_file(dataset=dataset, output_file=config.output_predictions_file, probabilities=probabilities, predictions=predictions, truth=y_test, target_is_encoded=True) return dict(models_count=len(estimator.trials), training_duration=training.duration)
y, test_size=0.3, random_state=42) print('Prepared data: X_train: %s y_train: %s' % (X_train.shape, y_train.shape)) print('Prepared data: X_test: %s y_test: %s' % (X_test.shape, y_test.shape)) # replace training dataset X = X_train y = y_train """ ESTIMATOR WITH BAYESIAN TUNING """ from hpsklearn import HyperoptEstimator, any_regressor, any_preprocessing from hyperopt import tpe # Instantiate a HyperoptEstimator with the search space and number of evaluations clf = HyperoptEstimator(regressor=any_regressor('my_clf'), preprocessing=any_preprocessing('my_pre'), algo=tpe.suggest, max_evals=250, trial_timeout=300) clf.fit(X, y) print(clf.best_model()) y_hat = clf.predict(X_test) dscores = metrics_regression(y_test, y_hat, X.shape[1]) tf = t.since('test') print( '\nBayesian tuning -test: bias = %.3f mae = %.3f r2 = %.3f (time: %s)' % (dscores['bias'], dscores['mae'], dscores['r2'], format_duration(tf))) # training
"""Find the ideal hyperparameters for a network architecture""" from AngryTops.ModelTraining.FormatInputOutput import get_input_output, scale from hpsklearn import HyperoptEstimator, any_regressor from hyperopt import tpe import numpy as np import sklearn # Download the data and split into training and test sets (X_train, y_train), (X_test, y_test), (jets_scalar, lep_scalar, output_scalar), \ (event_training, event_testing) = get_input_output(input_filename='/Users/fardinsyed/Desktop/Top_Quark_Project/AngryTops/csv/topreco_5dec.csv', rep='pxpypzE', scaling=True, multi_input=False, shuffle=True, single_output="target_b_had_Pt") y_train = y_train.reshape(y_train.shape[0], -1) y_test = y_test.reshape(y_test.shape[0], -1) # Instantiate a HyperoptEstimator with the search space and number of evaluations estim = HyperoptEstimator(regressor=any_regressor('gradient_boosting_regression'), preprocessing=[], algo=tpe.suggest, max_evals=10, trial_timeout=30000) # Search the hyperparameter space based on the data estim.fit(X_train, y_train) # Show the results print(estim.score(X_test, y_test)) # 0.962785714286 print(estim.best_model())
header=0, encoding='utf-8') y_all = train_df["Y"] train_df = train_df.drop(["ID", "Y"], axis=1) quantity = [ attr for attr in train_df.columns if train_df.dtypes[attr] != 'object' ] # 数值变量集合 print(len(quantity)) train_df = train_df[quantity] # X_all = Imputer().fit_transform(train_df) for key in quantity: train_df[key] = train_df[key].fillna(0) num_test = 0.33 # 测试集占据比例,,如果是整数的话就是样本的数量 X_train, X_test, y_train, y_test = train_test_split(train_df, y_all, test_size=num_test, random_state=23) print(X_train.head()) print("------------") print(X_test.head()) print("------------") print(y_train.head()) print("------------") print(y_test.head()) print("------------") estim = HyperoptEstimator(classifier=any_regressor('clf'), algo=tpe.suggest, seed=0) estim.fit(X_train, y_train) print(estim.score(X_test, y_test)) print(estim.best_model())