def run(cls, kls, proba, preprocessing, **kwargs): """Function for executing specified test.""" model_selection = kwargs.pop('model_selection', None) if kls == 'subsemble': p = kwargs['partitions'] else: p = 1 ests = ESTS[(proba, preprocessing)] prep = PREPROCESSING if preprocessing else None data = Data(kls, proba, preprocessing, **kwargs) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), _ = data.ground_truth(X, y, p) with open(os.devnull, 'w') as f, redirect_stderr(f): ens = cls() ens.add(kls, ests, prep, proba=proba, dtype=np.float64, **kwargs) if model_selection: ens.model_selection = True ens.fit(X, y) pred, _ = ens.transform(X, y) np.testing.assert_array_equal(F, pred)
def test_ground_truth(): """[Utils] testing: test ground truth for stacking.""" gf = np.array([[ 11. , 17. , -14. , -42. ], [ 15. , 29. , -10. , -30. ], [ 17.64705882, 39.64705882, -2.35294118, -6.35294118], [ 22.35294118, 52.35294118, 2.35294118, 6.35294118], [ 25. , 63. , 10. , 30. ], [ 29. , 75. , 14. , 42. ]]) gwf = np.array([[ -7. , 9. ], [ -3.52941176, 5.88235294], [ -5. , 7. ], [ -5. , 11. ], [ -1.52941176, 7.88235294], [ -3. , 9. ], [ 1. , 1. ], [ 1.17647059, 1.17647059], [ 1. , 1. ], [ 3. , 3. ], [ 3.17647059, 3.17647059], [ 3. , 3. ]]) gp = np.array([[ 8.57142857, 14.57142857, -11.42857143, -31.42857143], [ 13.14285714, 27.14285714, -6.85714286, -18.85714286], [ 17.71428571, 39.71428571, -2.28571429, -6.28571429], [ 22.28571429, 52.28571429, 2.28571429, 6.28571429], [ 26.85714286, 64.85714286, 6.85714286, 18.85714286], [ 31.42857143, 77.42857143, 11.42857143, 31.42857143]]) gwp = np.array([[-4. , 6.28571429], [-2. , 8.28571429], [1.14285714 , 1.14285714], [3.14285714 , 3.14285714]]) data = Data('stack', False, True, folds=3) t, z = data.get_data((6, 2), 2) (F, wf), (P, wp) = data.ground_truth(t, z) np.testing.assert_array_almost_equal(F, gf) np.testing.assert_array_almost_equal(wf, gwf) np.testing.assert_array_almost_equal(P, gp) np.testing.assert_array_almost_equal(wp, gwp)
"""ML-ENSEMBLE Place holder for more rigorous tests. """ import numpy as np from mlens.metrics import rmse from mlens.testing.dummy import Data, ESTIMATORS, PREPROCESSING, OLS from mlens.ensemble import TemporalEnsemble LEN = 20 WIDTH = 2 MOD = 2 data = Data('temporal', False, True, step_size=5, window=10, lag=2) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y) def test_run(): """[Blend] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y[5:]) g = meta.predict(P) ens = TemporalEnsemble(step_size=5, window=10, lag=2) ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add(OLS(), meta=True, dtype=np.float64)
"""ML-ENSEMBLE :author: Sebastian Flennerhag """ import numpy as np from mlens.preprocessing import Subset, Shift from mlens.testing.dummy import Data X, _ = Data('stack', False, False).get_data((10, 4), 2) sub = Subset([0, 1]) def test_subset_1(): """[Preprocessing | Subset]: assert correct subset.""" assert sub.fit_transform(X).shape[1] == 2 def test_subset_2(): """[Preprocessing | Subset]: assert X is returned for empty subset.""" sub.set_params(**{'subset': None}) out = sub.fit_transform(X) assert id(out) == id(X) def test_shift(): """[Preprocessing | Shift] test lagging.""" sh = Shift(2) sh.fit(X)
"""ML-ENSEMBLE Place holder for more rigorous tests. """ import numpy as np from mlens.metrics import rmse from mlens.testing.dummy import Data, ESTIMATORS, PREPROCESSING, OLS from mlens.ensemble import BlendEnsemble LEN = 20 WIDTH = 2 MOD = 2 data = Data('blend', False, True) X, y = data.get_data((LEN, WIDTH), MOD) (F, wf), (P, wp) = data.ground_truth(X, y) def test_run(): """[Blend] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y[10:]) g = meta.predict(P) ens = BlendEnsemble() ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add(OLS(), meta=True, dtype=np.float64)
LEN = 12 WIDTH = 4 MOD = 2 class Tmp(BaseEnsemble): def __init__( self, shuffle=False, random_state=None, scorer=None, verbose=False, layers=None, array_check=None, model_selection=False, sample_size=20): super(Tmp, self).__init__( shuffle=shuffle, random_state=random_state, scorer=scorer, verbose=verbose, layers=layers, array_check=array_check, model_selection=model_selection, sample_size=sample_size) data = Data('stack', False, True, folds=3) X, y = data.get_data((LEN, WIDTH), MOD) lg = EstimatorContainer() lc = lg.get_sequential('stack', False, False) layer = lg.get_layer('stack', False, False) def test_clone(): """[Ensemble | Sequential] Test cloning.""" cloned = clone(lc) params = lc.get_params(deep=False) params_cloned = cloned.get_params(deep=False) for par, param in params.items():
def fail_func(y, p): """Test for use of in-script scoring functions.""" raise ValueError def null_func(y, p): """Test for failed aggregation""" return 'not_value' FOLDS = 3 LEN = 6 WIDTH = 2 MOD = 2 data1 = Data('stack', False, True, folds=FOLDS) X1, y1 = data1.get_data((LEN, WIDTH), MOD) (F1, wf1), (P1, wp1) = data1.ground_truth(X1, y1, 1, False) G1 = OLS().fit(F1, y1).predict(P1) data2 = Data('stack', False, False, folds=FOLDS) X2, y2 = data1.get_data((LEN, WIDTH), MOD) (F2, wf2), (P2, wp2) = data2.ground_truth(X2, y2, 1, False) G2 = OLS().fit(F2, y2).predict(P2) ens1 = SuperLearner(folds=FOLDS, scorer=rmse, verbose=5) ens1.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens1.add_meta(OLS(), dtype=np.float64) ens1_b = SuperLearner(folds=FOLDS, scorer=in_script_func) ens1_b.add(ESTIMATORS, PREPROCESSING, dtype=np.float64)
BlendEnsemble, Subsemble) from mlens.ensemble.base import Sequential from mlens.testing.dummy import (Data, PREPROCESSING, ESTIMATORS, ECM, EstimatorContainer) FOLDS = 3 LEN = 24 WIDTH = 2 MOD = 2 data = Data('stack', False, True, FOLDS) X, y = data.get_data((LEN, WIDTH), MOD) est = EstimatorContainer() lc_s = est.get_layer_estimator('stack', False, True) lc_b = est.get_layer_estimator('blend', False, False) lc_u = est.get_layer_estimator('subsemble', False, False) l_s = est.get_layer('stack', False, True) l_b = est.get_layer('blend', False, False) l_u = est.get_layer('subsemble', False, False) seq = Sequential(stack=[l_s, l_b, l_u]) def test_fit_seq():
ESTIMATORS_PROBA_1 = {'sc1': [('offs1', LogisticRegression(offset=2)), ('null1', LogisticRegression())], 'no1': [('offs1', LogisticRegression(offset=2)), ('null1', LogisticRegression())]} PREPROCESSING_2 = {'no2': [], 'sc2': [('scale', Scale())]} ESTIMATORS_PROBA_2 = {'sc2': [('offs2', LogisticRegression(offset=2)), ('null2', LogisticRegression())], 'no2': [('offs2', LogisticRegression(offset=2)), ('null2', LogisticRegression())]} def scorer(p, y): return np.mean(p - y) data = Data('stack', True, True) X, y = data.get_data((25, 4), 3) idx1 = INDEXERS['stack']() g1 = make_group( idx1, ESTIMATORS_PROBA_1, PREPROCESSING_1, learner_kwargs={'proba': True, 'verbose': True}, transformer_kwargs={'verbose': True}) idx2 = INDEXERS['subsemble']() g2 = make_group( idx2, ESTIMATORS_PROBA_2, PREPROCESSING_2, learner_kwargs={'proba': False, 'verbose': True}, transformer_kwargs={'verbose': True})