コード例 #1
0
def test_ground_truth():
    """[Utils] testing: test ground truth for stacking."""

    gf = np.array([[17., 11., -42.], [29., 15., -30.],
                   [39.64705882, 17.64705882, -6.35294118],
                   [52.35294118, 22.35294118, 6.35294118], [63., 25., 30.],
                   [75., 29., 42.]])

    gwf = np.array([[-5., 11.], [-7., 9.], [-1.52941176, 7.88235294],
                    [-3.52941176, 5.88235294], [-3., 9.], [-5., 7.], [3., 3.],
                    [3.17647059, 3.17647059], [3., 3.]])

    gp = np.array([[14.57142857, 8.57142857, -31.42857143],
                   [27.14285714, 13.14285714, -18.85714286],
                   [39.71428571, 17.71428571, -6.28571429],
                   [52.28571429, 22.28571429, 6.28571429],
                   [64.85714286, 26.85714286, 18.85714286],
                   [77.42857143, 31.42857143, 31.42857143]])

    gwp = np.array([[-2., 8.28571429], [-4., 6.28571429],
                    [3.14285714, 3.14285714]])

    t, z = Data('stack', False, True).get_data((6, 2), 2)

    with open(os.devnull, 'w') as f, redirect_stdout(f):
        (F, wf), (P, wp) = Data('stack', False, True, 3).ground_truth(t, z)

    np.testing.assert_array_almost_equal(F, gf)
    np.testing.assert_array_almost_equal(wf, gwf)
    np.testing.assert_array_almost_equal(P, gp)
    np.testing.assert_array_almost_equal(wp, gwp)
コード例 #2
0
def run(cls, proba, preprocessing, **kwargs):
    """Function for executing specified test."""
    if cls == 'subset':
        p = kwargs['n_partitions']
    else:
        p = 1

    ests = ESTS[(proba, preprocessing)]
    prep = PREPROCESSING if preprocessing else None

    data = Data(cls, proba, preprocessing, **kwargs)

    X, y = data.get_data((LEN, WIDTH), MOD)
    (F, wf), _ = data.ground_truth(X, y, p)

    ens = EnsembleTransformer()
    ens.add(cls, ests, prep, proba=proba, **kwargs)
    ens.fit(X, y)

    pred = ens.transform(X)

    np.testing.assert_array_equal(F, pred)
コード例 #3
0
ファイル: test_subset_proba.py プロジェクト: undarmaa/mlens
                               lc_from_file,
                               lc_from_csv,
                               lc_predict,
                               lc_transform)

PROBA = True
PROCESSING = True
LEN = 12
WIDTH = 2
FOLDS = 3
PARTITIONS = 2
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y, subsets=PARTITIONS)

layer = lg.get_layer('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)
lc = lg.get_layer_container('subset', PROBA, PROCESSING, PARTITIONS, FOLDS)

layer.indexer.fit(X)

cache = Cache(X, y, data)


def test_layer_fit():
    """[Parallel | Subset | Prep | Proba] test layer fit."""
    layer_fit(layer, cache, F, wf)
コード例 #4
0
ファイル: test_a_evaluation.py プロジェクト: xc35/mlens
"""ML-Ensemble

"""

import numpy as np
from mlens.parallel.evaluation import fit_score
from mlens.utils.dummy import OLS, Data
from mlens.metrics import mape, make_scorer

X, y = Data('stack', False, False).get_data((10, 2), 3)


def test_fit_score():
    """[Parallel | Evaluation] Test fit-score function."""
    out = fit_score(case='test',
                    tr_list=[],
                    est_name='ols',
                    est=OLS(),
                    params=(0, {
                        'offset': 2
                    }),
                    x=X,
                    y=y,
                    idx=((0, 5), (5, 10)),
                    scorer=make_scorer(mape, greater_is_better=False),
                    error_score=None)

    assert out[0] == 'test'
    assert out[1] == 'ols'
    assert out[2] == 0
コード例 #5
0
ファイル: test_sequential.py プロジェクト: tongli12/mlens
Place holder for more rigorous tests.

"""
import numpy as np
from mlens.ensemble import (SequentialEnsemble, SuperLearner, BlendEnsemble,
                            Subsemble)

from mlens.utils.dummy import (Data, PREPROCESSING, ESTIMATORS, ECM,
                               LayerGenerator)

FOLDS = 3
LEN = 24
WIDTH = 2
MOD = 2

data = Data('stack', False, True, FOLDS)
X, y = data.get_data((LEN, WIDTH), MOD)

lc_s = LayerGenerator().get_layer_container('stack', False, True)
lc_b = LayerGenerator().get_layer_container('blend', False, False)
lc_u = LayerGenerator().get_layer_container('subset', False, False)


def test_fit():
    """[Sequential] Test multilayer fitting."""

    S = lc_s.fit(X, y, -1)[-1]
    B = lc_b.fit(S, y, -1)[-1]
    U = lc_u.fit(B, y, -1)[-1]

    ens = SequentialEnsemble()
コード例 #6
0
from mlens.utils.dummy import LayerGenerator, Data, Cache
from mlens.utils.dummy import (layer_fit, layer_predict, layer_transform,
                               lc_fit, lc_from_file, lc_from_csv, lc_predict,
                               lc_transform, lc_feature_prop)

PROBA = True
PROCESSING = False
LEN = 6
WIDTH = 2
FOLDS = 3
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('blend', PROBA, PROCESSING)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y)

layer = lg.get_layer('blend', PROBA, PROCESSING)
lc = lg.get_layer_container('blend', PROBA, PROCESSING)
lc_p = lg.get_layer_container('blend',
                              PROBA,
                              PROCESSING,
                              propagate_features=[1])

layer.indexer.fit(X)

cache = Cache(X, y, data)
コード例 #7
0
Place holder for more rigorous tests.

"""
import numpy as np
from mlens.metrics import rmse
from mlens.base import BlendIndex
from mlens.utils.dummy import Data, ESTIMATORS, PREPROCESSING, OLS

from mlens.ensemble import BlendEnsemble

LEN = 6
WIDTH = 2
MOD = 2

data = Data('blend', False, True)
X, y = data.get_data((LEN, WIDTH), MOD)

(F, wf), (P, wp) = data.ground_truth(X, y, 1, False)


def test_run():
    """[Blend] 'fit' and 'predict' runs correctly."""
    meta = OLS()
    meta.fit(F, y[3:])
    g = meta.predict(P)

    ens = BlendEnsemble(test_size=3)
    ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
    ens.add(OLS(), meta=True, dtype=np.float64)
コード例 #8
0
                               lc_fit,
                               lc_from_file,
                               lc_from_csv,
                               lc_predict,
                               lc_transform)

PROBA = False
PROCESSING = True
LEN = 6
WIDTH = 2
FOLDS = 3
MOD, r = divmod(LEN, FOLDS)
assert r == 0

lg = LayerGenerator()
data = Data('stack', PROBA, PROCESSING, FOLDS)

X, y = data.get_data((LEN, WIDTH), MOD)
(F, wf), (P, wp) = data.ground_truth(X, y)

layer = lg.get_layer('stack', PROBA, PROCESSING, FOLDS)
lc = lg.get_layer_container('stack', PROBA, PROCESSING, FOLDS)

layer.indexer.fit(X)

cache = Cache(X, y, data)


def test_layer_fit():
    """[Parallel | Stack | Prep] test layer fit."""
    layer_fit(layer, cache, F, wf)
コード例 #9
0
"""ML-ENSEMBLE

:author: Sebastian Flennerhag
"""

import numpy as np
from mlens.preprocessing import Subset, Shift
from mlens.utils.dummy import Data

X, _ = Data('stack', False, False).get_data((10, 4), 2)

sub = Subset([0, 1])


def test_subset_1():
    """[Preprocessing | Subset]: assert correct subset."""
    assert sub.fit_transform(X).shape[1] == 2


def test_subset_2():
    """[Preprocessing | Subset]: assert X is returned for empty subset."""
    sub.set_params(**{'subset': None})
    out = sub.fit_transform(X)
    assert id(out) == id(X)


def test_shift():
    """[Preprocessing | Shift] test lagging."""
    sh = Shift(2)

    sh.fit(X)
コード例 #10
0
def fail_func(y, p):
    """Test for use of in-script scoring functions."""
    raise ValueError


def null_func(y, p):
    """Test for failed aggregation"""
    return 'not_value'


FOLDS = 3
LEN = 6
WIDTH = 2
MOD = 2

data1 = Data('stack', False, True, FOLDS)
X1, y1 = data1.get_data((LEN, WIDTH), MOD)
(F1, wf1), (P1, wp1) = data1.ground_truth(X1, y1, 1, False)
G1 = OLS().fit(F1, y1).predict(P1)

data2 = Data('stack', False, False, FOLDS)
X2, y2 = data1.get_data((LEN, WIDTH), MOD)
(F2, wf2), (P2, wp2) = data2.ground_truth(X2, y2, 1, False)
G2 = OLS().fit(F2, y2).predict(P2)

ens1 = SuperLearner(folds=FOLDS, scorer=rmse, verbose=100)
ens1.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
ens1.add_meta(OLS(), dtype=np.float64)

ens1_b = SuperLearner(folds=FOLDS, scorer=in_script_func)
ens1_b.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
コード例 #11
0
ファイル: test_a_single_run.py プロジェクト: undarmaa/mlens
"""ML-ENSEMBLE

"""

import numpy as np

from mlens.utils.dummy import ECM, Data
from mlens.ensemble.base import LayerContainer
from mlens.externals.sklearn.base import clone

X, y = Data('stack', False, False).get_data((6, 2), 2)

lc = LayerContainer()
lc.add(ECM, 'full')


def get_gt():
    """Build ground truth."""

    F = np.empty((X.shape[0], len(ECM)))

    for i, (_, est) in enumerate(ECM):

        e = clone(est)

        assert e is not est

        e.fit(X, y)

        F[:, i] = e.predict(X)
コード例 #12
0
ファイル: test_base.py プロジェクト: chrinide/mlens_dev
"""ML-ENSEMBLE

Test base functionality.
"""

import numpy as np
from mlens.externals.sklearn.base import clone
from mlens.utils.dummy import Data, LayerGenerator

LEN = 6
WIDTH = 2
MOD = 2

data = Data('stack', False, True, n_splits=5)
X, y = data.get_data((LEN, WIDTH), MOD)

lc = LayerGenerator().get_layer_container('stack', False, False)
layer = LayerGenerator().get_layer('stack', False, False)


def test_clone():
    """[Ensemble | LayerContainer] Test cloning."""
    cloned = clone(lc)

    params = lc.get_params(deep=False)
    params_cloned = cloned.get_params(deep=False)

    for par, param in params.items():
        if par == 'layers':
            assert param is not params_cloned[par]
        else: