def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS, PREPROCESSING), ), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def test_full_tuple_shape(): """[Base] FoldIndex: test the tuple shape on generation.""" tups = [(tri, tei) for tri, tei in FoldIndex(5, X=X).generate()] assert tups == [(((1, 5), ), (0, 1)), (((0, 1), (2, 5)), (1, 2)), (((0, 2), (3, 5)), (2, 3)), (((0, 3), (4, 5)), (3, 4)), (((0, 4), ), (4, 5))]
def test_scores_wo_folds_sklearn(): """[SuperLearner] test scoring without folds on sklearn scorer.""" if not run_sklearn: return with open(os.devnull, 'w') as f, redirect_stdout(f): ens3.fit(X2, y2) ens3.predict(X2) scores = dict() for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True): col = 0 for est_name, __ in sorted(ECM): s = mean_squared_error(y2[tei], F2[tei][:, col]) if est_name not in scores: scores[est_name] = [] scores[est_name].append(s) col += 1 for k in scores: scores[k] = np.mean(scores[k]) for k in scores: assert scores[k] == ens3.data['score-m']['layer-1/%s' % k]
def test_full_index_is_fitted(): """[Base] FoldIndex: check fit methods.""" idx = FoldIndex(4) assert not idx.n_samples idx.fit(X) assert idx.n_samples idx = FoldIndex(4) assert not idx.n_samples for _ in idx.generate(X): pass assert idx.n_samples idx = FoldIndex(4, X) assert idx.n_samples
def test_full_array_shape(): """[Base] FoldIndex: test the array shape on generation.""" tr = [np.array([2, 3, 4]), np.array([0, 1, 4]), np.array([0, 1, 2, 3])] te = [np.array([0, 1]), np.array([2, 3]), np.array([4])] for i, (tri, tei) in enumerate(FoldIndex(3, X).generate(as_array=True)): np.testing.assert_array_equal(tri, tr[i]) np.testing.assert_array_equal(tei, te[i])
def test_scores_w_folds(): """[SuperLearner] test scoring with folds.""" scores = {'null-1': [], 'offs-1': [], 'sc.offs-2': [], 'sc.null-2': []} for _, tei in FoldIndex(FOLDS, X1).generate(as_array=True): col = 0 for case in sorted(PREPROCESSING): for est_name, _ in sorted(ESTIMATORS[case]): s = rmse(y1[tei], F1[tei][:, col]) if case != 'no': scores['%s.%s-2' % (case, est_name)].append(s) else: scores['%s-1' % est_name].append(s) col += 1
def test_scores_wo_folds_in_script(): """[SuperLearner] test scoring without folds and in-script scorer.""" ens2_b.fit(X2, y2) scores = dict() for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True): col = 0 for est_name, __ in sorted(ECM): s = in_script_func(y2[tei], F2[tei][:, col]) if not est_name in scores: scores[est_name] = [] scores[est_name].append(s) col += 1 for k in scores: scores[k] = np.mean(scores[k]) for k in scores: assert scores[k] == ens2_b.data['score-m']['layer-1/%s' % k]
def test_scores_wo_folds(): """[SuperLearner] test scoring without folds.""" scores = dict() for _, tei in FoldIndex(FOLDS, X2).generate(as_array=True): col = 0 for est_name, _ in sorted(ECM): s = rmse(y2[tei], F2[tei][:, col]) if not est_name in scores: scores[est_name] = [] scores[est_name].append(s) col += 1 for k in scores: scores[k] = np.mean(scores[k]) for k in scores: assert scores[k] == ens2.data['score-m']['layer-1/%s' % k]
from mlens.utils.exceptions import ParameterChangeWarning from mlens.testing import Data from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble from mlens.externals.sklearn.base import clone try: from sklearn.utils.estimator_checks import check_estimator run_sklearn = True except ImportError: check_estimator = None run_sklearn = False data = Data('stack', False, False) X, y = data.get_data((25, 4), 3) est = TransformerEstimator(Scale(), FoldIndex(), dtype=np.float64) Est = TransformerEstimator class Tmp(Est): """Temporary class Wrapper to get full estimator on no-args instantiation. For compatibility with older Scikit-learn versions. """ def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex())
def test_learner_attr(): """[Module | LearnerEstimator] test setting attribute""" est.fit(X, y) est.indexer = FoldIndex(1) np.testing.assert_raises(NotFittedError, est.predict, X)
from mlens.testing import Data from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble from mlens.externals.sklearn.base import clone try: from sklearn.utils.estimator_checks import check_estimator run_sklearn = True except ImportError: check_estimator = None run_sklearn = False data = Data('stack', False, False) X, y = data.get_data((25, 4), 3) (F, wf), (P, wp) = data.ground_truth(X, y) est = LearnerEstimator(OLS(), FoldIndex(), dtype=np.float64) Est = LearnerEstimator class Tmp(Est): """Temporary class Wrapper to get full estimator on no-args instantiation. For compatibility with older Scikit-learn versions. """ def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex())
def test_full_raises_on_empty(): """[Base] FoldIndex: check raises error on singular array.""" with np.testing.assert_raises(ValueError): FoldIndex(2, np.empty(1))
from mlens.estimators import LearnerEstimator, TransformerEstimator, LayerEnsemble from mlens.externals.sklearn.base import clone try: from sklearn.utils.estimator_checks import check_estimator run_sklearn = True except ImportError: check_estimator = None run_sklearn = False data = Data('stack', False, True) X, y = data.get_data((25, 4), 3) (F, wf), (P, wp) = data.ground_truth(X, y) Est = LayerEnsemble est = LayerEnsemble(make_group(FoldIndex(), ESTIMATORS, PREPROCESSING), dtype=np.float64) class Tmp(Est): """Temporary class Wrapper to get full estimator on no-args instantiation. For compatibility with older Scikit-learn versions. """ def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS, PREPROCESSING), ), TransformerEstimator: (Scale(), FoldIndex())
wrapper around a generic estimator along with a cross-validation strategy. The job of the learner is to manage all sub-computations required for fitting and prediction. In fact, it's public methods are generators from sub-learners, that do the actual computation. A learner is the parent node of an estimator's computational sub-graph induced by the cross-validation strategy. A learner is created by specifying an ``estimator`` and an ``indexer``, along with a set of optional arguments, most notably the ``name`` of the learner. Naming is important, is it is used for cache referencing. If setting it manually, ensure you give the learner a unique name. """ from mlens.utils.dummy import OLS from mlens.parallel import Learner, Job from mlens.index import FoldIndex indexer = FoldIndex(folds=2) learner = Learner(estimator=OLS(), indexer=indexer, name='ols') ###################################################################### # The learner doesn't do any heavy lifting itself, it manages the creation a sub-graph # of auxiliary :class:`SubLearner` nodes for each fold during estimation. # This process is dynamic: the sub-learners are temporary instances created for each # estimation. ###################################################################### # To fit a learner, we need a cache reference. When fitting all estimators from the # main process, this reference can be a list. If not (e.g. multiprocessing), the # reference should instead be a ``str`` pointing to the path of the cache directory. # Prior to running a job (``fit``, ``predict``, ``transform``), the learner must be # configured on the given data by calling the ``setup`` method. This takes cares of # indexing the training set for cross-validation, assigning output columns et.c.
def test_full_raises_on_oversampling(): """[Base] FoldIndex: check raises error.""" with np.testing.assert_raises(ValueError): FoldIndex(100, X)
def test_learner_attr(): """[Module | TransformerEstimator] test setting attribute""" est.fit(X, y) est.indexer = FoldIndex(2) np.testing.assert_warns(ParameterChangeWarning, est.predict, X)
def test_full_raises_on_fold_1(): """[Base] FoldIndex: check raises error on folds=1.""" with np.testing.assert_raises(ValueError): FoldIndex(1, X)
To parallelize the implementation, we can use the :class:`Layer` class. A layer is a handle that will run any number of :class:`Group` instances attached to it in parallel. Each group in turn is a wrapper around a ``indexer-transformers-estimators`` triplet. Basics ------ So, to fit our two learners in parallel, we first need a :class:`Group` object to handle them. """ from mlens.parallel import Layer, Group, make_group, run from mlens.utils.dummy import OLS, Scale from mlens.index import FoldIndex indexer = FoldIndex(folds=2) group = make_group(indexer, [OLS(1), OLS(2)], None) ############################################################################ # This ``group`` object is now a complete description of how to fit our two # learners using the prescribed indexing method. # # To train the estimators, we need feed the group to a :class:`Layer` instance: import numpy as np np.random.seed(2) X = np.arange(20).reshape(10, 2) y = np.random.rand(10)
def test_full_warns_on_fold_1(): """[Base] FoldIndex: check warns on folds=1 if not raise_on_exception.""" with np.testing.assert_warns(UserWarning): FoldIndex(1, X, raise_on_exception=False)
def test_full_raises_on_float(): """[Base] FoldIndex: check raises error on float.""" with np.testing.assert_raises(ValueError): FoldIndex(0.5, X)
return super(OLSSparse, self).predict(X.toarray()) X = np.random.rand(10, 50).astype(np.float32) y = np.arange(10).astype(np.float32) first_prop = [1, 2, 3] n_first_prop = len(first_prop) second_prop = [i for i in range(n_first_prop)] second_prop.append(second_prop[-1] + 1) second_prop.append(second_prop[-1] + 1) n_second_prop = len(second_prop) ens1 = TempClass() ens1.add([OLS(0), OLS(1)], FoldIndex(), propagate_features=first_prop) ens2 = TempClass() ens2.add([OLS(0), OLS(1)], FoldIndex(), propagate_features=first_prop) ens2.add([OLS(2), OLS(3)], FoldIndex(), propagate_features=second_prop) ens3 = TempClass() ens3.add([OLSSparse(0), OLSSparse(1)], FoldIndex(), propagate_features=first_prop) ens3.add([OLSSparse(2), OLSSparse(3)], FoldIndex(), propagate_features=second_prop) ens4 = TempClass() ens4.add([OLS(), OLS(1), OLS(2)], FoldIndex(), shuffle=True, random_state=SEED)