def test_bench_equality(): """[Model Selection] Test benchmark correspondence with eval.""" with open(os.devnull, 'w') as f, redirect_stderr(f): evl = Evaluator(mape_scorer, cv=5) evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts={}, preprocessing={ 'pr': [Scale()], 'no': [] }) out = benchmark(X, y, mape_scorer, 5, { 'pr': [OLS()], 'no': [OLS()] }, { 'pr': [Scale()], 'no': [] }, None) np.testing.assert_approx_equal(out['test_score-m']['no.ols'], evl.results['test_score-m']['no.ols'])
def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def test_w_prep(): """[Model Selection] Test run with preprocessing, double step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) # Preprocessing with open(os.devnull, 'w') as f, redirect_stderr(f): evl.preprocess(X, y, {'pr': [Scale()], 'no': []}) # Fitting evl.evaluate(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=True) with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -24.903229451043195) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -26.510708862278072, 1) assert evl.results['params']['no.ols']['offset'] == 4 assert evl.results['params']['pr.ols']['offset'] == 4
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) params = { ('no', 'ols'): { 'offset': randint(3, 6) }, ('pr', 'ols'): { 'offset': randint(1, 3) }, } # Fitting evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 3 assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS, PREPROCESSING), ), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer) np.testing.assert_raises(ValueError, evl.fit, X, y, [OLS()], {('bad', 'ols'): {'offset': randint(1, 10)}}, preprocessing={'prep': [Scale()]})
def test_scale_transformation(): """[Utils] Scale: check transformation.""" g = np.array([[-2., -4.], [0., 0.], [2., 4.]]) x = np.arange(6).reshape(3, 2) x[:, 1] *= 2 s = Scale().fit_transform(x) np.testing.assert_array_equal(s, g)
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer, verbose=2) np.testing.assert_raises( ValueError, evl.fit, X, y, estimators=[OLS()], param_dicts={'bad.ols': { 'offset': randint(1, 10) }}, preprocessing={'prep': [Scale()]})
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, preprocessing={'pr': [Scale()], 'no': []}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=2) params = { 'no.ols': { 'offset': randint(3, 6) }, 'pr.ols': { 'offset': randint(1, 3) }, } with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=10) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -18.684229451043198) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -7.2594502123869491) assert evl.results['params']['no.ols']['offset'] == 3 assert evl.results['params']['pr.ols']['offset'] == 1
############################################################################ # # .. note:: # When constructing a :class:`Pipeline` for use with the :class:`Transformer`, # the ``return_y`` argument must be ``True``. ############################################################################ # To link the transformer's sub-graph with the learner's sub-graph, # we set the ``preprocess`` argument of the learner equal to the ``name`` # of the :class:`Transformer`. Note that any number of learners can share # the same transformer and in fact should when the same preprocessing is desired. from mlens.utils.dummy import Scale from mlens.parallel import Transformer, Pipeline pipeline = Pipeline([('trans', Scale())], return_y=True) transformer = Transformer(estimator=pipeline, indexer=indexer, name='sc', verbose=True) ############################################################################ # To build the learner we pass the ``name`` of the transformer as # the ``preprocess`` argument: learner = Learner(estimator=OLS(), preprocess='sc', indexer=indexer, scorer=mse, verbose=True)
def test_scale_not_fitted(): """[Utils] Scale: check not fitted.""" np.testing.assert_raises(NotFittedError, Scale().transform, X)
def test_assert_correct_layer_format_2(): """[Utils] assert_correct_format: prep - list, est - list.""" assert_correct_format([OLS()], [Scale()])
def test_assert_correct_layer_format_3(): """[Utils] assert_correct_format: prep - dict, est - dict.""" assert_correct_format({'a': [OLS()]}, {'a': [Scale()]})
def test_assert_correct_layer_format_tuple(): """[Utils] assert_correct_format: prep - dict, est - list.""" np.testing.assert_raises(LayerSpecificationError, assert_correct_format, OLS(), {'a': [Scale()]})
def test_assert_correct_layer_format_dict_keys(): """[Utils] assert_correct_format: assert raises on no key overlap.""" np.testing.assert_raises(LayerSpecificationError, assert_correct_format, {'a': [OLS()]}, {'b': [Scale()]})
def test_assert_correct_layer_format_4(): """[Utils] assert_correct_format: prep - inst, est - inst.""" assert_correct_format(OLS(), Scale())
import numpy as np np.random.seed(2) X = np.arange(20).reshape(10, 2) y = np.random.rand(10) layer = Layer(stack=group) print(run(layer, 'fit', X, y, return_preds=True)) ############################################################################ # To use some preprocessing before fitting the estimators, we can use the # ``transformers`` argument when creating our ``group``: group = make_group(indexer, [OLS(1), OLS(2)], [Scale()]) layer = Layer(stack=group) print(run(layer, 'fit', X, y, return_preds=True)) ############################################################################ # # Multitasking # ------------ # # If we want our estimators two have different preprocessing, we can easily # achieve this either by specifying different cases when making the group, # or by making two separate groups. In the first case: group = make_group(indexer, {
Test layer push and pop ops. """ import os import numpy as np from mlens.index import INDEXERS from mlens.testing.dummy import Data, ECM from mlens.utils.dummy import Scale, LogisticRegression from mlens.parallel import make_group, Layer, run from mlens.externals.sklearn.base import clone try: from contextlib import redirect_stdout except ImportError: from mlens.externals.fixes import redirect as redirect_stdout PREPROCESSING_1 = {'no1': [], 'sc1': [('scale', Scale())]} ESTIMATORS_PROBA_1 = {'sc1': [('offs1', LogisticRegression(offset=2)), ('null1', LogisticRegression())], 'no1': [('offs1', LogisticRegression(offset=2)), ('null1', LogisticRegression())]} PREPROCESSING_2 = {'no2': [], 'sc2': [('scale', Scale())]} ESTIMATORS_PROBA_2 = {'sc2': [('offs2', LogisticRegression(offset=2)), ('null2', LogisticRegression())], 'no2': [('offs2', LogisticRegression(offset=2)), ('null2', LogisticRegression())]} def scorer(p, y): return np.mean(p - y)
def test_pipeline(): """[Parallel | Pipeline] Test estimator""" check_estimator(Pipeline(Scale()))
from mlens.utils.exceptions import ParameterChangeWarning from mlens.testing import Data from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble from mlens.externals.sklearn.base import clone try: from sklearn.utils.estimator_checks import check_estimator run_sklearn = True except ImportError: check_estimator = None run_sklearn = False data = Data('stack', False, False) X, y = data.get_data((25, 4), 3) est = TransformerEstimator(Scale(), FoldIndex(), dtype=np.float64) Est = TransformerEstimator class Tmp(Est): """Temporary class Wrapper to get full estimator on no-args instantiation. For compatibility with older Scikit-learn versions. """ def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex())