Ejemplo n.º 1
0
def base_hyperparam_tuning (X,y,base_learners, param_dicts, n_iterations = 100):
    '''基层模型超参数调节,当前评估指标为auc'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    evl = Evaluator(scorer, cv=5, verbose = 20, backend= 'multiprocessing')
    evl.fit(X, y, estimators=base_learners, param_dicts=param_dicts, n_iter = n_iterations)
    df_params = pd.DataFrame(evl.results)
    return df_params
Ejemplo n.º 2
0
def layer_hyperparam_tuning(X,y,pre_layer_learners, local_layer_learners, param_dicts_layer, n_iterations = 50, pre_params = 'params_base.csv'):
    '''中间层超参数调节,加入需按顺序'''
    X = X.values
    y = y.values
    scorer = make_scorer(metrics.roc_auc_score, greater_is_better=True)
    params_pre = pd.read_csv(pre_params)
    params_pre.set_index(['Unnamed: 0'], inplace = True)
    for case_name, params in params_pre["params"].items():
        case_est = case_name
        params = eval(params)
        for est_name, est in pre_layer_learners:
            if est_name == case_est:
                est.set_params(**params)
    in_layer = SuperLearner(folds = 10, backend= 'multiprocessing', model_selection=True)
    in_layer.add(pre_layer_learners,proba=True)
    preprocess = [in_layer]
    evl = Evaluator(scorer,cv=5,verbose = 20,backend= 'multiprocessing')
    evl.fit(X, y, local_layer_learners, param_dicts = param_dicts_layer, preprocessing={'meta': preprocess},n_iter=n_iterations)
    df_params_layer = pd.DataFrame(evl.results)
    return in_layer, df_params_layer
Ejemplo n.º 3
0
def test_fit_score():
    """[Parallel | Evaluation] Test fit-score function."""
    out = fit_score(case='test',
                    tr_list=[],
                    est_name='ols',
                    est=OLS(),
                    params=(0, {
                        'offset': 2
                    }),
                    x=X,
                    y=y,
                    idx=((0, 5), (5, 10)),
                    scorer=make_scorer(mape, greater_is_better=False),
                    error_score=None)

    assert out[0] == 'test'
    assert out[1] == 'ols'
    assert out[2] == 0

    np.testing.assert_almost_equal(out[3], -1.5499999999999992, 5)
    np.testing.assert_almost_equal(out[4], -2.0749999999999993, 5)
Ejemplo n.º 4
0
def evaluateSecondLayer(base_learners, x_train, y_train, meta_learners,
                        param_dicts):
    in_layer = EnsembleTransformer()
    print("adding base learners to transformer")
    in_layer.add('stack', base_learners)

    preprocess = [in_layer]
    print("creating scorer")
    scorer = make_scorer(mean_absolute_error, greater_is_better=False)
    evl = Evaluator(scorer, cv=4, verbose=1)
    print("fitting evaluator")
    evl.fit(
        x_train.values,
        y_train.values,
        meta_learners,
        param_dicts,
        preprocessing={'meta': preprocess},
        n_iter=40  # bump this up to do a larger grid search
    )

    table = pd.DataFrame(evl.summary)
    table.to_html('iteration5.html')
    table.to_csv('iteration5.csv', index=False, header=False, sep='\t')
# The following example evaluates a `Naive Bayes`_ estimator and a
# `K-Nearest-Neighbor`_ estimator under three different preprocessing scenarios:
# no preprocessing, standard scaling, and subset selection.
# In the latter case, preprocessing is constituted by selecting a subset of
# features.
#
# The scoring function
# ^^^^^^^^^^^^^^^^^^^^
#
# .. currentmodule:: mlens.metrics
#
# An important note is that the scoring function must be wrapped by
# :func:`make_scorer`, to ensure all scoring functions behave similarly regardless
# of whether they measure accuracy or errors. To wrap a function, simple do:
from mlens.metrics import make_scorer
accuracy_scorer = make_scorer(accuracy_score, greater_is_better=True)

##############################################################################
# .. currentmodule:: mlens.model_selection
#
# The ``make_scorer`` wrapper
# is a copy of the Scikit-learn's :func:`sklearn.metrics.make_scorer`, and you
# can import the Scikit-learn version as well.
# Note however that to pickle the :class:`Evaluator`, you **must** import
# ``make_scorer`` from ``mlens``.
#
# A simple evaluation
# ^^^^^^^^^^^^^^^^^^^
#
# Before throwing preprocessing into the mix, let's see how to evaluate a set of
# estimator. First, we need a list of estimator and a dictionary of parameter
Ejemplo n.º 6
0
        'colsample_bytree': uniform(0.6, 0.4),
        'reg_lambda': uniform(1, 2),
        'reg_alpha': uniform(1, 2),
    },
    'rf': {
        'max_depth': randint(2, 5),
        'min_samples_split': randint(5, 20),
        'min_samples_leaf': randint(10, 20),
        'n_estimators': randint(50, 100),
        'max_features': uniform(0.6, 0.3)
    }
}

# In[ ]:

scorer = make_scorer(mean_absolute_error, greater_is_better=False)

evl = Evaluator(
    scorer,
    cv=2,
    random_state=SEED,
    verbose=5,
)

# In[ ]:

evl.fit(
    xtrain,
    ytrain,
    estimators=base_learners,
    param_dicts=param_dicts,
Ejemplo n.º 7
0
    from contextlib import redirect_stderr
except ImportError:
    from mlens.externals.fixes import redirect as redirect_stderr

np.random.seed(100)

# Stack is nonsense here - we just need proba to be false
X, y = Data('stack', False, False).get_data((100, 2), 20)


def failed_score(p, y):
    """Bad scoring function to test exception handling."""
    raise ValueError("This fails.")


mape_scorer = make_scorer(mape, greater_is_better=False)
bad_scorer = make_scorer(failed_score)


def test_check():
    """[Model Selection] Test check of valid estimator."""
    np.testing.assert_raises(ValueError, Evaluator, mape)


def test_raises():
    """[Model Selection] Test raises on error."""

    evl = Evaluator(bad_scorer)

    np.testing.assert_raises(ValueError,
                             evl.fit,
Ejemplo n.º 8
0
pars_1 = {}

sc = StandardScaler()
pca = PCA()
fa = FactorAnalysis()
nmf = NMF()

pre_cases = {
    'case-1': [sc],
    # 'case-2': [sc],
    # 'case-3': [pca],
    # 'case-4': [fa]
}

score = make_scorer(score_func=accuracy_score,
                    greater_is_better=True,
                    needs_proba=False,
                    needs_threshold=False)

ensemble = SequentialEnsemble(model_selection=True,
                              n_jobs=1,
                              shuffle=False,
                              random_state=seed)

ensemble.add('stack', ests_1, preprocessing=pre_cases)
ensemble.add_meta(SVC(kernel='linear', degree=5, tol=1e-4))
# ensemble.fit(X_train, y_train)
# y_pred = ensemble.predict(X_test)
# ens = ensemble
evaluator = Evaluator(scorer=score, random_state=seed, verbose=True)
evaluator.fit(data_pix,
              spacial_pix,
Ejemplo n.º 9
0
from sklearn.linear_model import LinearRegression, TheilSenRegressor, RANSACRegressor, Ridge, Lasso
# from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

from mlens.metrics import make_scorer
from mlens.model_selection import Evaluator


seed = 2018
np.random.seed(seed)

# image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',')
# label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',')

r2_scorer = make_scorer(r2_score)

ests = [('rdg', Ridge(max_iter=4000)), ('las', Lasso(max_iter=4000))]

a = uniform(0, 10)

params = {
    'rdg': {'alpha': a},
    'las': {'alpha': a}
}

preproc = {
    'none': [],
    'sc': [StandardScaler()]
}
Ejemplo n.º 10
0
import numpy as np

# from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor, BaggingRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression, Ridge, Lasso
from sklearn.metrics import r2_score, accuracy_score
# from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVR

from mlens.metrics import make_scorer
from mlens.ensemble import SuperLearner

seed = np.random.seed(2018)

scorer = make_scorer(r2_score, greater_is_better=False)


def build_ensemble(incl_meta,
                   meta_type='log',
                   preprocessors=None,
                   estimators=None,
                   propagate_features=None):
    if propagate_features:
        n = len(propagate_features)
        propagate_features_1 = propagate_features
        propagate_features_2 = [i for i in range(n)]
    else:
        propagate_features_1 = propagate_features_2 = None

    if not estimators:
        estimators = [('rfr', RandomForestRegressor(random_state=seed)),
Ejemplo n.º 11
0
# Set parameter mapping
# Here, we differentiate distributions between cases for the random forest
params = {
    'svc': {
        'C': uniform(0, 10)
    },
    'class.rf': {
        'max_depth': randint(2, 10)
    },
    'proba.rf': {
        'max_depth': randint(2, 10),
        'max_features': uniform(0.5, 0.5)
    }
}

scorer = make_scorer(accuracy_score)
evaluator = Evaluator(scorer=scorer, random_state=seed, cv=2)

evaluator.fit(X,
              y,
              meta_learners,
              params,
              preprocessing=preprocessing,
              n_iter=2)

##############################################################################
# We can now compare the performance of the best fit for each candidate
# meta learner.

print("Results:\n%s" % evaluator.results)
Ejemplo n.º 12
0
# from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import Lasso, Ridge, LinearRegression, LogisticRegression
from sklearn.metrics import r2_score
# from sklearn.model_selection import train_test_split
# from sklearn.preprocessing import StandardScaler

from mlens.model_selection import Evaluator
from mlens.metrics import make_scorer
# from mlens.preprocessing import Subset

# image_set = np.genfromtxt('../testdata/c1_gn.csv', delimiter=',')
# label_set = np.genfromtxt('../testdata/c1_L_gn.csv', delimiter=',')
#
# X_train, X_test, y_train, y_test = train_test_split(image_set, label_set, test_size=0.33)

score_f = make_scorer(score_func=r2_score, greater_is_better=False)

evaluator = Evaluator(scorer=score_f, shuffle=True, verbose=True)

estimators = [
    ('las', Lasso(copy_X=True, max_iter=4000)),
    ('rdg', Ridge(copy_X=True, max_iter=4000)),
    # ('rfr', RandomForestRegressor()),
]

params = {
    'las': {
        'alpha': uniform(0, 5)
    },
    'rdg': {
        'alpha': uniform(0, 5)