コード例 #1
0
def applyHPSKLEARN(X_train, y_train, X_test, y_test, SavePath,
                  max_evals=100, trial_timeout=100, useSavedModels = True):

    if not useSavedModels or not os.path.isfile(SavePath+".pckl"):
        HPSKLEARNModel = HyperoptEstimator(regressor=any_regressor('reg'),
                                preprocessing=any_preprocessing('pre'),
                                loss_fn=mean_squared_error,
                                max_evals=max_evals,
                                trial_timeout=trial_timeout,
                                algo=tpe.suggest)
        # perform the search
        HPSKLEARNModel.fit(X_train, y_train)
        pickle.dump(HPSKLEARNModel, open(SavePath+".pckl", 'wb'))
    else:
        HPSKLEARNModel = pickle.load(open(SavePath+".pckl", 'rb'))

    # summarize performance
    score = HPSKLEARNModel.score(X_test, y_test)
    y_hat = HPSKLEARNModel.predict(X_test)
    print("HPSKLEARN - Score: ")
    print("MAE: %.4f" % score)
    # summarize the best model
    print(HPSKLEARNModel.best_model())
    
    return y_hat
コード例 #2
0
def train_hypsklearn(X_train, X_test, y_train, y_test, mtype,
                     common_name_model, problemtype, classes,
                     default_featurenames, transform_model, settings,
                     model_session):

    modelname = common_name_model + '.pickle'
    files = list()

    if mtype in [' classification', 'c']:

        estim = HyperoptEstimator(classifier=any_classifier('my_clf'),
                                  preprocessing=any_preprocessing('my_pre'),
                                  algo=tpe.suggest,
                                  max_evals=100,
                                  trial_timeout=120)

        # Search the hyperparameter space based on the data
        estim.fit(X_train, y_train)

    elif mtype in ['regression', 'r']:

        estim = HyperoptEstimator(classifier=any_regressor('my_clf'),
                                  preprocessing=any_preprocessing('my_pre'),
                                  algo=tpe.suggest,
                                  max_evals=100,
                                  trial_timeout=120)

        # Search the hyperparameter space based on the data

        estim.fit(X_train, y_train)

    # Show the results
    print(estim.score(X_test, y_test))
    print(estim.best_model())
    scores = estim.score(X_test, y_test)
    bestmodel = str(estim.best_model())

    print('saving classifier to disk')
    f = open(modelname, 'wb')
    pickle.dump(estim, f)
    f.close()

    files.append(modelname)
    modeldir = os.getcwd()

    return modelname, modeldir, files
コード例 #3
0
ファイル: auto_ml_core.py プロジェクト: sn0wfree/auto_ml
    def _create_estimator_random_regressor(
            regressor=any_regressor('my_rgs'),
            preprocessing=any_preprocessing('my_pre'),
            max_evals=100,
            trial_timeout=120,
            seed=None,
            algo=tpe.suggest,
            fit_increment=1):
        """

        :param regressor:
        :param preprocessing:
        :param max_evals:
        :param trial_timeout:
        :param seed:
        :param algo:
        :return:
        """

        estim = HyperoptEstimator(regressor=regressor,
                                  preprocessing=preprocessing,
                                  algo=algo,
                                  max_evals=max_evals,
                                  trial_timeout=trial_timeout,
                                  ex_preprocs=None,
                                  classifier=None,
                                  space=None,
                                  loss_fn=None,
                                  continuous_loss_fn=False,
                                  verbose=False,
                                  fit_increment=fit_increment,
                                  fit_increment_dump_filename=None,
                                  seed=seed,
                                  use_partial_fit=False,
                                  refit=True)

        return estim
コード例 #4
0
def run(dataset, config):
    log.info("\n**** Hyperopt-sklearn ****\n")

    is_classification = config.type == 'classification'

    default = lambda: 0
    metrics_to_loss_mapping = dict(
        acc=(default, False),  # lambda y, pred: 1.0 - accuracy_score(y, pred)
        auc=(lambda y, pred: 1.0 - roc_auc_score(y, pred), False),
        f1=(lambda y, pred: 1.0 - f1_score(y, pred), False),
        # logloss=(log_loss, True),
        mae=(mean_absolute_error, False),
        mse=(mean_squared_error, False),
        msle=(mean_squared_log_error, False),
        r2=(default, False),  # lambda y, pred: 1.0 - r2_score(y, pred)
        rmse=(mean_squared_error, False),
    )
    loss_fn, continuous_loss_fn = metrics_to_loss_mapping[
        config.metric] if config.metric in metrics_to_loss_mapping else (None,
                                                                         False)
    if loss_fn is None:
        log.warning("Performance metric %s not supported: defaulting to %s.",
                    config.metric, 'accuracy' if is_classification else 'r2')
    if loss_fn is default:
        loss_fn = None

    training_params = {
        k: v
        for k, v in config.framework_params.items() if not k.startswith('_')
    }

    log.warning("Ignoring cores constraint of %s cores.", config.cores)
    log.info(
        "Running hyperopt-sklearn with a maximum time of %ss on %s cores, optimizing %s.",
        config.max_runtime_seconds, 'all', config.metric)

    X_train = dataset.train.X_enc
    y_train = dataset.train.y_enc

    if is_classification:
        classifier = any_classifier('clf')
        regressor = None
    else:
        classifier = None
        regressor = any_regressor('rgr')

    estimator = HyperoptEstimator(classifier=classifier,
                                  regressor=regressor,
                                  algo=tpe.suggest,
                                  loss_fn=loss_fn,
                                  continuous_loss_fn=continuous_loss_fn,
                                  trial_timeout=config.max_runtime_seconds,
                                  seed=config.seed,
                                  **training_params)

    with InterruptTimeout(config.max_runtime_seconds * 4 / 3,
                          sig=signal.SIGQUIT):
        with InterruptTimeout(config.max_runtime_seconds,
                              before_interrupt=ft.partial(
                                  kill_proc_tree,
                                  timeout=5,
                                  include_parent=False)):
            with Timer() as training:
                estimator.fit(X_train, y_train)

    log.info('Predicting on the test set.')
    X_test = dataset.test.X_enc
    y_test = dataset.test.y_enc
    predictions = estimator.predict(X_test)

    if is_classification:
        probabilities = "predictions"  # encoding is handled by caller in `__init__.py`
    else:
        probabilities = None

    return result(output_file=config.output_predictions_file,
                  predictions=predictions,
                  truth=y_test,
                  probabilities=probabilities,
                  target_is_encoded=is_classification,
                  models_count=len(estimator.trials),
                  training_duration=training.duration)
コード例 #5
0
ファイル: exec.py プロジェクト: sebhrusen/automlbenchmark
def run(dataset: Dataset, config: TaskConfig):
    log.info("\n**** Hyperopt-sklearn ****\n")

    is_classification = config.type == 'classification'

    default = lambda: 0
    metrics_to_loss_mapping = dict(
        acc=(default, False),  # lambda y, pred: 1.0 - accuracy_score(y, pred)
        auc=(lambda y, pred: 1.0 - roc_auc_score(y, pred), False),
        f1=(lambda y, pred: 1.0 - f1_score(y, pred), False),
        # logloss=(log_loss, True),
        mae=(mean_absolute_error, False),
        mse=(mean_squared_error, False),
        msle=(mean_squared_log_error, False),
        r2=(default, False),  # lambda y, pred: 1.0 - r2_score(y, pred)
    )
    loss_fn, continuous_loss_fn = metrics_to_loss_mapping[
        config.metric] if config.metric in metrics_to_loss_mapping else (None,
                                                                         False)
    if loss_fn is None:
        log.warning("Performance metric %s not supported: defaulting to %s.",
                    config.metric, 'accuracy' if is_classification else 'r2')
    if loss_fn is default:
        loss_fn = None

    log.warning("Ignoring cores constraint of %s cores.", config.cores)
    log.info(
        "Running hyperopt-sklearn with a maximum time of %ss on %s cores, optimizing %s.",
        config.max_runtime_seconds, 'all', config.metric)

    X_train, X_test = impute(dataset.train.X_enc, dataset.test.X_enc)
    y_train, y_test = dataset.train.y_enc, dataset.test.y_enc

    if is_classification:
        classifier = any_classifier('clf')
        regressor = None
    else:
        classifier = None
        regressor = any_regressor('rgr')

    estimator = HyperoptEstimator(classifier=classifier,
                                  regressor=regressor,
                                  algo=tpe.suggest,
                                  loss_fn=loss_fn,
                                  continuous_loss_fn=continuous_loss_fn,
                                  trial_timeout=config.max_runtime_seconds,
                                  seed=config.seed,
                                  **config.framework_params)

    with InterruptTimeout(config.max_runtime_seconds * 4 / 3,
                          sig=signal.SIGQUIT):
        with InterruptTimeout(config.max_runtime_seconds,
                              before_interrupt=ft.partial(
                                  kill_proc_tree,
                                  timeout=5,
                                  include_parent=False)):
            with Timer() as training:
                estimator.fit(X_train, y_train)

    predictions = estimator.predict(X_test)
    probabilities = Encoder('one-hot', target=False,
                            encoded_type=float).fit_transform(
                                predictions) if is_classification else None

    save_predictions_to_file(dataset=dataset,
                             output_file=config.output_predictions_file,
                             probabilities=probabilities,
                             predictions=predictions,
                             truth=y_test,
                             target_is_encoded=True)

    return dict(models_count=len(estimator.trials),
                training_duration=training.duration)
コード例 #6
0
                                                        y,
                                                        test_size=0.3,
                                                        random_state=42)
    print('Prepared data: X_train: %s  y_train: %s' %
          (X_train.shape, y_train.shape))
    print('Prepared data: X_test: %s  y_test: %s' %
          (X_test.shape, y_test.shape))
    # replace training dataset
    X = X_train
    y = y_train
    """ ESTIMATOR WITH BAYESIAN TUNING """

    from hpsklearn import HyperoptEstimator, any_regressor, any_preprocessing
    from hyperopt import tpe
    # Instantiate a HyperoptEstimator with the search space and number of evaluations
    clf = HyperoptEstimator(regressor=any_regressor('my_clf'),
                            preprocessing=any_preprocessing('my_pre'),
                            algo=tpe.suggest,
                            max_evals=250,
                            trial_timeout=300)

    clf.fit(X, y)
    print(clf.best_model())
    y_hat = clf.predict(X_test)
    dscores = metrics_regression(y_test, y_hat, X.shape[1])
    tf = t.since('test')
    print(
        '\nBayesian tuning -test:  bias = %.3f  mae = %.3f  r2 = %.3f (time: %s)'
        %
        (dscores['bias'], dscores['mae'], dscores['r2'], format_duration(tf)))
    # training
コード例 #7
0
"""Find the ideal hyperparameters for a network architecture"""
from AngryTops.ModelTraining.FormatInputOutput import get_input_output, scale
from hpsklearn import HyperoptEstimator, any_regressor
from hyperopt import tpe
import numpy as np
import sklearn

# Download the data and split into training and test sets
(X_train, y_train), (X_test, y_test), (jets_scalar, lep_scalar, output_scalar), \
(event_training, event_testing) = get_input_output(input_filename='/Users/fardinsyed/Desktop/Top_Quark_Project/AngryTops/csv/topreco_5dec.csv', rep='pxpypzE', scaling=True, multi_input=False, shuffle=True, single_output="target_b_had_Pt")
y_train = y_train.reshape(y_train.shape[0], -1)
y_test = y_test.reshape(y_test.shape[0], -1)

# Instantiate a HyperoptEstimator with the search space and number of evaluations
estim = HyperoptEstimator(regressor=any_regressor('gradient_boosting_regression'),
                          preprocessing=[],
                          algo=tpe.suggest,
                          max_evals=10,
                          trial_timeout=30000)

# Search the hyperparameter space based on the data
estim.fit(X_train, y_train)

# Show the results

print(estim.score(X_test, y_test))
# 0.962785714286

print(estim.best_model())
コード例 #8
0
    header=0,
    encoding='utf-8')
y_all = train_df["Y"]
train_df = train_df.drop(["ID", "Y"], axis=1)
quantity = [
    attr for attr in train_df.columns if train_df.dtypes[attr] != 'object'
]  # 数值变量集合
print(len(quantity))
train_df = train_df[quantity]
# X_all = Imputer().fit_transform(train_df)
for key in quantity:
    train_df[key] = train_df[key].fillna(0)
num_test = 0.33  # 测试集占据比例,,如果是整数的话就是样本的数量
X_train, X_test, y_train, y_test = train_test_split(train_df,
                                                    y_all,
                                                    test_size=num_test,
                                                    random_state=23)
print(X_train.head())
print("------------")
print(X_test.head())
print("------------")
print(y_train.head())
print("------------")
print(y_test.head())
print("------------")
estim = HyperoptEstimator(classifier=any_regressor('clf'),
                          algo=tpe.suggest,
                          seed=0)
estim.fit(X_train, y_train)
print(estim.score(X_test, y_test))
print(estim.best_model())