Beispiel #1
0
def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100)

    params = {
        ('no', 'ols'): {
            'offset': randint(3, 6)
        },
        ('pr', 'ols'): {
            'offset': randint(1, 3)
        },
    }

    # Fitting
    evl.fit(X,
            y,
            estimators={
                'pr': [OLS()],
                'no': [OLS()]
            },
            param_dicts=params,
            preprocessing={
                'pr': [Scale()],
                'no': []
            },
            n_iter=3)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198)

    np.testing.assert_approx_equal(
        evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1)

    assert evl.summary['params'][('no', 'ols')]['offset'] == 3
    assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
def test_bench_equality():
    """[Model Selection] Test benchmark correspondence with eval."""

    with open(os.devnull, 'w') as f, redirect_stderr(f):
        evl = Evaluator(mape_scorer, cv=5)
        evl.fit(X,
                y,
                estimators={
                    'pr': [OLS()],
                    'no': [OLS()]
                },
                param_dicts={},
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                })

        out = benchmark(X, y, mape_scorer, 5, {
            'pr': [OLS()],
            'no': [OLS()]
        }, {
            'pr': [Scale()],
            'no': []
        }, None)

    np.testing.assert_approx_equal(out['test_score-m']['no.ols'],
                                   evl.results['test_score-m']['no.ols'])
def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer,
                    cv=5,
                    shuffle=False,
                    random_state=100,
                    verbose=True)

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X,
                y,
                estimators=[OLS()],
                param_dicts={'ols': {
                    'offset': randint(1, 10)
                }},
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                },
                n_iter=3)

    np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'],
                                   -24.903229451043195)

    np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'],
                                   -26.510708862278072, 1)

    assert evl.results['params']['no.ols']['offset'] == 4
    assert evl.results['params']['pr.ols']['offset'] == 4
Beispiel #4
0
def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100):
    '''基层模型超参数调节,当前评估指标为auc'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing')
    evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations)
    df_params = pd.DataFrame(evl.results)
    return df_params
def test_no_prep():
    """[Model Selection] Test run without preprocessing."""
    evl = Evaluator(mape_scorer, verbose=True, cv=5, shuffle=False,
                    random_state=100)

    with open(os.devnull, 'w') as f, redirect_stderr(f):
        evl.fit(X, y,
                estimators=[OLS()],
                param_dicts={'ols': {'offset': randint(1, 10)}},
                n_iter=3)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean']['ols'],
            -24.903229451043195)

    assert evl.summary['params']['ols']['offset'] == 4
def test_w_prep_fit():
    """[Model Selection] Test run with preprocessing, single step."""
    evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100)

    evl.fit(X, y,
            estimators=[OLS()],
            param_dicts={'ols': {'offset': randint(1, 10)}},
            preprocessing={'pr': [Scale()], 'no': []},
            n_iter=3)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean'][('no', 'ols')],
            -24.903229451043195)

    np.testing.assert_approx_equal(
            evl.summary['test_score_mean'][('pr', 'ols')],
            -26.510708862278072, 1)

    assert evl.summary['params'][('no', 'ols')]['offset'] == 4
    assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
Beispiel #7
0
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'):
    '''中间层超参数调节,加入需按顺序'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    params_pre = pd.read_csv(pre_params)
    params_pre.set_index(['Unnamed: 0'], inplace = True)
    for case_name, params in params_pre["params"].items():
        case_est = case_name
        params = eval(params)
        for est_name, est in pre_layer_learners:
            if est_name == case_est:
                est.set_params(**params)
    in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True)
    in_layer.add(pre_layer_learners,proba=True)
    preprocess = [in_layer]
    evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing')
    evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations)
    df_params_layer = pd.DataFrame(evl.results)
    return in_layer, df_params_layer
def test_w_prep_set_params():
    """[Model Selection] Test run with preprocessing, sep param dists."""
    evl = Evaluator(mape_scorer,
                    cv=5,
                    shuffle=False,
                    random_state=100,
                    verbose=2)

    params = {
        'no.ols': {
            'offset': randint(3, 6)
        },
        'pr.ols': {
            'offset': randint(1, 3)
        },
    }

    with open(os.devnull, 'w') as f, redirect_stdout(f):

        evl.fit(X,
                y,
                estimators={
                    'pr': [OLS()],
                    'no': [OLS()]
                },
                param_dicts=params,
                preprocessing={
                    'pr': [Scale()],
                    'no': []
                },
                n_iter=10)

    np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'],
                                   -18.684229451043198)

    np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'],
                                   -7.2594502123869491)
    assert evl.results['params']['no.ols']['offset'] == 3
    assert evl.results['params']['pr.ols']['offset'] == 1
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners,
                        param_dicts):
    in_layer = EnsembleTransformer()
    print("adding base learners to transformer")
    in_layer.add('stack', base_learners)

    preprocess = [in_layer]
    print("creating scorer")
    scorer = make_scorer(mean_absolute_error, greater_is_better=False)
    evl = Evaluator(scorer, cv=4, verbose=1)
    print("fitting evaluator")
    evl.fit(
        x_train.values,
        y_train.values,
        meta_learners,
        param_dicts,
        preprocessing={'meta': preprocess},
        n_iter=40  # bump this up to do a larger grid search
    )

    table = pd.DataFrame(evl.summary)
    table.to_html('iteration5.html')
    table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
from scipy.stats import randint

# Here we name the estimators ourselves
ests = [('gnb', GaussianNB()), ('knn', KNeighborsClassifier())]

# Now we map parameters to these
# The gnb doesn't have any parameters so we can skip it
pars = {'n_neighbors': randint(2, 20)}
params = {'knn': pars}

##############################################################################
# We can now run an evaluation over these estimators and parameter distributions
# by calling the ``fit`` method.

evaluator = Evaluator(accuracy_scorer, cv=10, random_state=seed, verbose=1)
evaluator.fit(X, y, ests, params, n_iter=10)

##############################################################################
# The full history of the evaluation can be found in ``cv_results``. To compare
# models with their best parameters, we can pass the ``results`` attribute to
# a :obj:`pandas.DataFrame` or print it as a table. We use ``m`` to denote
# mean values and ``s`` to denote standard deviation across folds for brevity.
# Note that the timed prediction is for the training set, for comparability with
# training time.

print("Score comparison with best params founds:\n\n%r" % evaluator.results)

##############################################################################
# Preprocessing
# ^^^^^^^^^^^^^
#
Beispiel #11
0
evl = Evaluator(
    scorer,
    cv=2,
    random_state=SEED,
    verbose=5,
)

# In[ ]:

evl.fit(
    xtrain,
    ytrain,
    estimators=base_learners,
    param_dicts=param_dicts,
    preprocessing={
        'sc': [StandardScaler()],
        'none': []
    },
    n_iter=2  # bump this up to do a larger grid search
)

# In[ ]:

pd.DataFrame(evl.results)

# There you have it, a comparison of tuned models in one grid search!
#
# Optimal parameters are then easily accessed.

# In[ ]:
Beispiel #12
0
                    needs_threshold=False)

ensemble = SequentialEnsemble(model_selection=True,
                              n_jobs=1,
                              shuffle=False,
                              random_state=seed)

ensemble.add('stack', ests_1, preprocessing=pre_cases)
ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4))
# ensemble.fit(X_train, y_train)
# y_pred = ensemble.predict(X_test)
# ens = ensemble
evaluator = Evaluator(scorer=score, random_state=seed, verbose=True)
evaluator.fit(data_pix,
              spacial_pix,
              estimators=[],
              param_dicts=pars_1,
              n_iter=5,
              preprocessing=pre_cases)

print(evaluator.results)

spacial_pix = spacial_pix.astype('int')
unique, counts = np.unique(y_test, return_counts=True)
print(np.asarray((unique, counts)).T)

# print(confusion_matrix(y_test, y_pred, labels=unique))
# print(precision_score(y_test, y_pred, average='micro', labels=unique))
# print(mean_absolute_error(y_test, y_pred))
# print(mean_squared_error(y_test, y_pred))

Beispiel #13
0
# Set parameter mapping
# Here, we differentiate distributions between cases for the random forest
params = {
    'svc': {
        'C': uniform(0, 10)
    },
    'class.rf': {
        'max_depth': randint(2, 10)
    },
    'proba.rf': {
        'max_depth': randint(2, 10),
        'max_features': uniform(0.5, 0.5)
    }
}

scorer = make_scorer(accuracy_score)
evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2)

evaluator.fit(X,
              y,
              meta_learners,
              params,
              preprocessing=preprocessing,
              n_iter=2)

##############################################################################
# We can now compare the performance of the best fit for each candidate
# meta learner.

print("Results:\n%s" % evaluator.results)