def test_ground_truth(): """[Utils] testing: test ground truth for stacking.""" gf = np.array([[17., 11., -42.], [29., 15., -30.], [39.64705882, 17.64705882, -6.35294118], [52.35294118, 22.35294118, 6.35294118], [63., 25., 30.], [75., 29., 42.]]) gwf = np.array([[-5., 11.], [-7., 9.], [-1.52941176, 7.88235294], [-3.52941176, 5.88235294], [-3., 9.], [-5., 7.], [3., 3.], [3.17647059, 3.17647059], [3., 3.]]) gp = np.array([[14.57142857, 8.57142857, -31.42857143], [27.14285714, 13.14285714, -18.85714286], [39.71428571, 17.71428571, -6.28571429], [52.28571429, 22.28571429, 6.28571429], [64.85714286, 26.85714286, 18.85714286], [77.42857143, 31.42857143, 31.42857143]]) gwp = np.array([[-2., 8.28571429], [-4., 6.28571429], [3.14285714, 3.14285714]]) t, z = Data('stack', False, True).get_data((6, 2), 2) with open(os.devnull, 'w') as f, redirect_stdout(f): (F, wf), (P, wp) = Data('stack', False, True, 3).ground_truth(t, z) np.testing.assert_array_almost_equal(F, gf) np.testing.assert_array_almost_equal(wf, gwf) np.testing.assert_array_almost_equal(P, gp) np.testing.assert_array_almost_equal(wp, gwp)
def run(cls, proba, preprocessing, **kwargs): """Function for executing specified test.""" if cls == 'subset': p = kwargs['n_partitions'] else: p = 1 ests = ESTS[(proba, preprocessing)] prep = PREPROCESSING if preprocessing else None data = Data(cls, proba, preprocessing, **kwargs) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), _ = data.ground_truth(X, y, p) ens = EnsembleTransformer() ens.add(cls, ests, prep, proba=proba, **kwargs) ens.fit(X, y) pred = ens.transform(X) np.testing.assert_array_equal(F, pred)
lc_from_file, lc_from_csv, lc_predict, lc_transform) PROBA = True PROCESSING = True LEN = 12 WIDTH = 2 FOLDS = 3 PARTITIONS = 2 MOD, r = divmod(LEN, FOLDS) assert r == 0 lg = LayerGenerator() data = Data('subset', PROBA, PROCESSING, PARTITIONS, FOLDS) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y, subsets=PARTITIONS) layer = lg.get_layer('subset', PROBA, PROCESSING, PARTITIONS, FOLDS) lc = lg.get_layer_container('subset', PROBA, PROCESSING, PARTITIONS, FOLDS) layer.indexer.fit(X) cache = Cache(X, y, data) def test_layer_fit(): """[Parallel | Subset | Prep | Proba] test layer fit.""" layer_fit(layer, cache, F, wf)
"""ML-Ensemble """ import numpy as np from mlens.parallel.evaluation import fit_score from mlens.utils.dummy import OLS, Data from mlens.metrics import mape, make_scorer X, y = Data('stack', False, False).get_data((10, 2), 3) def test_fit_score(): """[Parallel | Evaluation] Test fit-score function.""" out = fit_score(case='test', tr_list=[], est_name='ols', est=OLS(), params=(0, { 'offset': 2 }), x=X, y=y, idx=((0, 5), (5, 10)), scorer=make_scorer(mape, greater_is_better=False), error_score=None) assert out[0] == 'test' assert out[1] == 'ols' assert out[2] == 0
Place holder for more rigorous tests. """ import numpy as np from mlens.ensemble import (SequentialEnsemble, SuperLearner, BlendEnsemble, Subsemble) from mlens.utils.dummy import (Data, PREPROCESSING, ESTIMATORS, ECM, LayerGenerator) FOLDS = 3 LEN = 24 WIDTH = 2 MOD = 2 data = Data('stack', False, True, FOLDS) X, y = data.get_data((LEN, WIDTH), MOD) lc_s = LayerGenerator().get_layer_container('stack', False, True) lc_b = LayerGenerator().get_layer_container('blend', False, False) lc_u = LayerGenerator().get_layer_container('subset', False, False) def test_fit(): """[Sequential] Test multilayer fitting.""" S = lc_s.fit(X, y, -1)[-1] B = lc_b.fit(S, y, -1)[-1] U = lc_u.fit(B, y, -1)[-1] ens = SequentialEnsemble()
from mlens.utils.dummy import LayerGenerator, Data, Cache from mlens.utils.dummy import (layer_fit, layer_predict, layer_transform, lc_fit, lc_from_file, lc_from_csv, lc_predict, lc_transform, lc_feature_prop) PROBA = True PROCESSING = False LEN = 6 WIDTH = 2 FOLDS = 3 MOD, r = divmod(LEN, FOLDS) assert r == 0 lg = LayerGenerator() data = Data('blend', PROBA, PROCESSING) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y) layer = lg.get_layer('blend', PROBA, PROCESSING) lc = lg.get_layer_container('blend', PROBA, PROCESSING) lc_p = lg.get_layer_container('blend', PROBA, PROCESSING, propagate_features=[1]) layer.indexer.fit(X) cache = Cache(X, y, data)
Place holder for more rigorous tests. """ import numpy as np from mlens.metrics import rmse from mlens.base import BlendIndex from mlens.utils.dummy import Data, ESTIMATORS, PREPROCESSING, OLS from mlens.ensemble import BlendEnsemble LEN = 6 WIDTH = 2 MOD = 2 data = Data('blend', False, True) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y, 1, False) def test_run(): """[Blend] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y[3:]) g = meta.predict(P) ens = BlendEnsemble(test_size=3) ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add(OLS(), meta=True, dtype=np.float64)
lc_fit, lc_from_file, lc_from_csv, lc_predict, lc_transform) PROBA = False PROCESSING = True LEN = 6 WIDTH = 2 FOLDS = 3 MOD, r = divmod(LEN, FOLDS) assert r == 0 lg = LayerGenerator() data = Data('stack', PROBA, PROCESSING, FOLDS) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y) layer = lg.get_layer('stack', PROBA, PROCESSING, FOLDS) lc = lg.get_layer_container('stack', PROBA, PROCESSING, FOLDS) layer.indexer.fit(X) cache = Cache(X, y, data) def test_layer_fit(): """[Parallel | Stack | Prep] test layer fit.""" layer_fit(layer, cache, F, wf)
"""ML-ENSEMBLE :author: Sebastian Flennerhag """ import numpy as np from mlens.preprocessing import Subset, Shift from mlens.utils.dummy import Data X, _ = Data('stack', False, False).get_data((10, 4), 2) sub = Subset([0, 1]) def test_subset_1(): """[Preprocessing | Subset]: assert correct subset.""" assert sub.fit_transform(X).shape[1] == 2 def test_subset_2(): """[Preprocessing | Subset]: assert X is returned for empty subset.""" sub.set_params(**{'subset': None}) out = sub.fit_transform(X) assert id(out) == id(X) def test_shift(): """[Preprocessing | Shift] test lagging.""" sh = Shift(2) sh.fit(X)
def fail_func(y, p): """Test for use of in-script scoring functions.""" raise ValueError def null_func(y, p): """Test for failed aggregation""" return 'not_value' FOLDS = 3 LEN = 6 WIDTH = 2 MOD = 2 data1 = Data('stack', False, True, FOLDS) X1, y1 = data1.get_data((LEN, WIDTH), MOD) (F1, wf1), (P1, wp1) = data1.ground_truth(X1, y1, 1, False) G1 = OLS().fit(F1, y1).predict(P1) data2 = Data('stack', False, False, FOLDS) X2, y2 = data1.get_data((LEN, WIDTH), MOD) (F2, wf2), (P2, wp2) = data2.ground_truth(X2, y2, 1, False) G2 = OLS().fit(F2, y2).predict(P2) ens1 = SuperLearner(folds=FOLDS, scorer=rmse, verbose=100) ens1.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens1.add_meta(OLS(), dtype=np.float64) ens1_b = SuperLearner(folds=FOLDS, scorer=in_script_func) ens1_b.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
"""ML-ENSEMBLE """ import numpy as np from mlens.utils.dummy import ECM, Data from mlens.ensemble.base import LayerContainer from mlens.externals.sklearn.base import clone X, y = Data('stack', False, False).get_data((6, 2), 2) lc = LayerContainer() lc.add(ECM, 'full') def get_gt(): """Build ground truth.""" F = np.empty((X.shape[0], len(ECM))) for i, (_, est) in enumerate(ECM): e = clone(est) assert e is not est e.fit(X, y) F[:, i] = e.predict(X)
"""ML-ENSEMBLE Test base functionality. """ import numpy as np from mlens.externals.sklearn.base import clone from mlens.utils.dummy import Data, LayerGenerator LEN = 6 WIDTH = 2 MOD = 2 data = Data('stack', False, True, n_splits=5) X, y = data.get_data((LEN, WIDTH), MOD) lc = LayerGenerator().get_layer_container('stack', False, False) layer = LayerGenerator().get_layer('stack', False, False) def test_clone(): """[Ensemble | LayerContainer] Test cloning.""" cloned = clone(lc) params = lc.get_params(deep=False) params_cloned = cloned.get_params(deep=False) for par, param in params.items(): if par == 'layers': assert param is not params_cloned[par] else: