Exemple #1
0
def run(cls, kls, proba, preprocessing, **kwargs):
    """Function for executing specified test."""
    model_selection = kwargs.pop('model_selection', None)
    if kls == 'subsemble':
        p = kwargs['partitions']
    else:
        p = 1

    ests = ESTS[(proba, preprocessing)]
    prep = PREPROCESSING if preprocessing else None

    data = Data(kls, proba, preprocessing, **kwargs)

    X, y = data.get_data((LEN, WIDTH), MOD)
    (F, wf), _ = data.ground_truth(X, y, p)

    with open(os.devnull, 'w') as f, redirect_stderr(f):
        ens = cls()
        ens.add(kls, ests, prep, proba=proba, dtype=np.float64, **kwargs)

        if model_selection:
            ens.model_selection = True

        ens.fit(X, y)

        pred, _ = ens.transform(X, y)

    np.testing.assert_array_equal(F, pred)
Exemple #2
0
def test_ground_truth():
    """[Utils] testing: test ground truth for stacking."""

    gf = np.array([[ 11.        ,  17.        , -14.        , -42.        ],
                   [ 15.        ,  29.        , -10.        , -30.        ],
                   [ 17.64705882,  39.64705882,  -2.35294118,  -6.35294118],
                   [ 22.35294118,  52.35294118,   2.35294118,   6.35294118],
                   [ 25.        ,  63.        ,  10.        ,  30.        ],
                   [ 29.        ,  75.        ,  14.        ,  42.        ]])

    gwf = np.array([[ -7.        ,   9.        ],
                   [ -3.52941176,   5.88235294],
                   [ -5.        ,   7.        ],
                   [ -5.        ,  11.        ],
                   [ -1.52941176,   7.88235294],
                   [ -3.        ,   9.        ],
                   [  1.        ,   1.        ],
                   [  1.17647059,   1.17647059],
                   [  1.        ,   1.        ],
                   [  3.        ,   3.        ],
                   [  3.17647059,   3.17647059],
                   [  3.        ,   3.        ]])


    gp = np.array([[  8.57142857,  14.57142857, -11.42857143, -31.42857143],
                   [ 13.14285714,  27.14285714,  -6.85714286, -18.85714286],
                   [ 17.71428571,  39.71428571,  -2.28571429,  -6.28571429],
                   [ 22.28571429,  52.28571429,   2.28571429,   6.28571429],
                   [ 26.85714286,  64.85714286,   6.85714286,  18.85714286],
                   [ 31.42857143,  77.42857143,  11.42857143,  31.42857143]])

    gwp = np.array([[-4.        ,  6.28571429],
                    [-2.        ,  8.28571429],
                    [1.14285714 ,  1.14285714],
                    [3.14285714 ,  3.14285714]])

    data = Data('stack', False, True, folds=3)
    t, z = data.get_data((6, 2), 2)
    (F, wf), (P, wp) = data.ground_truth(t, z)

    np.testing.assert_array_almost_equal(F, gf)
    np.testing.assert_array_almost_equal(wf, gwf)
    np.testing.assert_array_almost_equal(P, gp)
    np.testing.assert_array_almost_equal(wp, gwp)
Exemple #3
0
"""ML-ENSEMBLE

Place holder for more rigorous tests.

"""
import numpy as np
from mlens.metrics import rmse
from mlens.testing.dummy import Data, ESTIMATORS, PREPROCESSING, OLS

from mlens.ensemble import TemporalEnsemble

LEN = 20
WIDTH = 2
MOD = 2

data = Data('temporal', False, True, step_size=5, window=10, lag=2)
X, y = data.get_data((LEN, WIDTH), MOD)

(F, wf), (P, wp) = data.ground_truth(X, y)


def test_run():
    """[Blend] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y[5:])
    g = meta.predict(P)

    ens = TemporalEnsemble(step_size=5, window=10, lag=2)
    ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
    ens.add(OLS(), meta=True, dtype=np.float64)
Exemple #4
0
"""ML-ENSEMBLE

:author: Sebastian Flennerhag
"""

import numpy as np
from mlens.preprocessing import Subset, Shift
from mlens.testing.dummy import Data

X, _ = Data('stack', False, False).get_data((10, 4), 2)

sub = Subset([0, 1])


def test_subset_1():
    """[Preprocessing | Subset]: assert correct subset."""
    assert sub.fit_transform(X).shape[1] == 2


def test_subset_2():
    """[Preprocessing | Subset]: assert X is returned for empty subset."""
    sub.set_params(**{'subset': None})
    out = sub.fit_transform(X)
    assert id(out) == id(X)


def test_shift():
    """[Preprocessing | Shift] test lagging."""
    sh = Shift(2)

    sh.fit(X)
Exemple #5
0
"""ML-ENSEMBLE

Place holder for more rigorous tests.

"""
import numpy as np
from mlens.metrics import rmse
from mlens.testing.dummy import Data, ESTIMATORS, PREPROCESSING, OLS

from mlens.ensemble import BlendEnsemble

LEN = 20
WIDTH = 2
MOD = 2

data = Data('blend', False, True)
X, y = data.get_data((LEN, WIDTH), MOD)

(F, wf), (P, wp) = data.ground_truth(X, y)


def test_run():
    """[Blend] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y[10:])
    g = meta.predict(P)

    ens = BlendEnsemble()
    ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
    ens.add(OLS(), meta=True, dtype=np.float64)
Exemple #6
0
LEN = 12
WIDTH = 4
MOD = 2

class Tmp(BaseEnsemble):

    def __init__(
            self, shuffle=False, random_state=None, scorer=None, verbose=False,
            layers=None, array_check=None, model_selection=False, sample_size=20):
        super(Tmp, self).__init__(
            shuffle=shuffle, random_state=random_state, scorer=scorer,
            verbose=verbose, layers=layers, array_check=array_check,
            model_selection=model_selection, sample_size=sample_size)


data = Data('stack', False, True, folds=3)
X, y = data.get_data((LEN, WIDTH), MOD)

lg = EstimatorContainer()
lc = lg.get_sequential('stack', False, False)
layer = lg.get_layer('stack', False, False)


def test_clone():
    """[Ensemble | Sequential] Test cloning."""
    cloned = clone(lc)

    params = lc.get_params(deep=False)
    params_cloned = cloned.get_params(deep=False)

    for par, param in params.items():
def fail_func(y, p):
    """Test for use of in-script scoring functions."""
    raise ValueError


def null_func(y, p):
    """Test for failed aggregation"""
    return 'not_value'


FOLDS = 3
LEN = 6
WIDTH = 2
MOD = 2

data1 = Data('stack', False, True, folds=FOLDS)
X1, y1 = data1.get_data((LEN, WIDTH), MOD)
(F1, wf1), (P1, wp1) = data1.ground_truth(X1, y1, 1, False)
G1 = OLS().fit(F1, y1).predict(P1)

data2 = Data('stack', False, False, folds=FOLDS)
X2, y2 = data1.get_data((LEN, WIDTH), MOD)
(F2, wf2), (P2, wp2) = data2.ground_truth(X2, y2, 1, False)
G2 = OLS().fit(F2, y2).predict(P2)

ens1 = SuperLearner(folds=FOLDS, scorer=rmse, verbose=5)
ens1.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
ens1.add_meta(OLS(), dtype=np.float64)

ens1_b = SuperLearner(folds=FOLDS, scorer=in_script_func)
ens1_b.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
Exemple #8
0
                            BlendEnsemble,
                            Subsemble)

from mlens.ensemble.base import Sequential
from mlens.testing.dummy import (Data,
                                 PREPROCESSING,
                                 ESTIMATORS,
                                 ECM,
                                 EstimatorContainer)

FOLDS = 3
LEN = 24
WIDTH = 2
MOD = 2

data = Data('stack', False, True, FOLDS)
X, y = data.get_data((LEN, WIDTH), MOD)

est = EstimatorContainer()
lc_s = est.get_layer_estimator('stack', False, True)
lc_b = est.get_layer_estimator('blend', False, False)
lc_u = est.get_layer_estimator('subsemble', False, False)

l_s = est.get_layer('stack', False, True)
l_b = est.get_layer('blend', False, False)
l_u = est.get_layer('subsemble', False, False)

seq = Sequential(stack=[l_s, l_b, l_u])


def test_fit_seq():
Exemple #9
0
ESTIMATORS_PROBA_1 = {'sc1': [('offs1', LogisticRegression(offset=2)),
                              ('null1', LogisticRegression())],
                      'no1': [('offs1', LogisticRegression(offset=2)),
                              ('null1', LogisticRegression())]}

PREPROCESSING_2 = {'no2': [], 'sc2': [('scale', Scale())]}
ESTIMATORS_PROBA_2 = {'sc2': [('offs2', LogisticRegression(offset=2)),
                             ('null2', LogisticRegression())],
                      'no2': [('offs2', LogisticRegression(offset=2)),
                             ('null2', LogisticRegression())]}


def scorer(p, y): return np.mean(p - y)


data = Data('stack', True, True)

X, y = data.get_data((25, 4), 3)

idx1 = INDEXERS['stack']()
g1 = make_group(
    idx1, ESTIMATORS_PROBA_1, PREPROCESSING_1,
    learner_kwargs={'proba': True, 'verbose': True},
    transformer_kwargs={'verbose': True})

idx2 = INDEXERS['subsemble']()
g2 = make_group(
    idx2, ESTIMATORS_PROBA_2, PREPROCESSING_2,
    learner_kwargs={'proba': False, 'verbose': True},
    transformer_kwargs={'verbose': True})