def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (FoldIndex(), OLS()), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def test_bench_equality(): """[Model Selection] Test benchmark correspondence with eval.""" with open(os.devnull, 'w') as f, redirect_stderr(f): evl = Evaluator(mape_scorer, cv=5) evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts={}, preprocessing={ 'pr': [Scale()], 'no': [] }) out = benchmark(X, y, mape_scorer, 5, { 'pr': [OLS()], 'no': [OLS()] }, { 'pr': [Scale()], 'no': [] }, None) np.testing.assert_approx_equal(out['test_score-m']['no.ols'], evl.results['test_score-m']['no.ols'])
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) params = { ('no', 'ols'): { 'offset': randint(3, 6) }, ('pr', 'ols'): { 'offset': randint(1, 3) }, } # Fitting evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -18.684229451043198) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -7.2594502123869491, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 3 assert evl.summary['params'][('pr', 'ols')]['offset'] == 1
def test_assert_format(): """[Utils] _assert_format: test correct dict passes.""" instances = { 'a': [('ols-1', OLS()), ('ols-2', OLS(offset=1))], 'b': [OLS(), ('ols-2', OLS(offset=1))] } assert not _assert_format(instances)
def test_check_instances_list_same(): """[Utils] check_instances: test correct list is returned as is.""" instances = [('ols-1', OLS()), ('ols-2', OLS(offset=1))] out = _check_instances(instances) assert id(out) == id(instances) for i in range(2): for j in range(2): assert id(out[i][j]) == id(instances[i][j])
def test_check_instances_list_formatting(): """[Utils] check_instances: test formatting of list.""" instances = [OLS(), ('ols', OLS()), ('ols', OLS()), ['list', OLS()]] formatted = _check_instances(instances) strings = [] for i in formatted: assert isinstance(i, tuple) assert isinstance(i[0], str) assert isinstance(i[1], OLS) assert i[0] not in strings strings.append(i[0])
def test_formatting_list(): """[Utils] _format_instances: test correct formatting of list.""" instances = [OLS(), ('ols', OLS()), ('ols', OLS()), ['list', OLS()]] formatted = _format_instances(instances, False) strings = [] for i in formatted: assert isinstance(i, tuple) assert isinstance(i[0], str) assert isinstance(i[1], OLS) assert i[0] not in strings strings.append(i[0])
def test_check_instances_dict(): """[Utils] check_instances: test correct dict is returned as is.""" instances = {'a': [('ols-a1', OLS()), ('ols-a2', OLS(offset=1))], 'b': [('ols-b1', OLS()), ('ols-b2', OLS(offset=1))], } out = _check_instances(instances) assert id(out) == id(instances) for k in out: ou = out[k] it = instances[k] for i in range(2): for j in range(2): assert id(ou[i][j]) == id(it[i][j])
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=True) with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=3) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -24.903229451043195) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -26.510708862278072, 1) assert evl.results['params']['no.ols']['offset'] == 4 assert evl.results['params']['pr.ols']['offset'] == 4
def test_w_prep(): """[Model Selection] Test run with preprocessing, double step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) # Preprocessing with open(os.devnull, 'w') as f, redirect_stderr(f): evl.preprocess(X, y, {'pr': [Scale()], 'no': []}) # Fitting evl.evaluate(X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def test_ols_preds(): """[Utils] OLS: check predictions.""" g = np.array([29.5, 34.5, 39.5, 44.5, 49.5, 54.5]) p = OLS().fit(X[:6], y[:6]).predict(X[6:]) np.testing.assert_array_almost_equal(p, g)
def __init__(self): args = { LearnerEstimator: (OLS(), FoldIndex()), LayerEnsemble: (make_group(FoldIndex(), ESTIMATORS, PREPROCESSING), ), TransformerEstimator: (Scale(), FoldIndex()) }[Est] super(Tmp, self).__init__(*args)
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer) np.testing.assert_raises(ValueError, evl.fit, X, y, [OLS()], {('bad', 'ols'): {'offset': randint(1, 10)}}, preprocessing={'prep': [Scale()]})
def test__get_context(): """[Utils] _get_context message formatting.""" out = _get_context(None) assert out is '' out = _get_context(OLS()) assert out == '[ols] '
def test_raises(): """[Model Selection] Test raises on error.""" evl = Evaluator(bad_scorer) np.testing.assert_raises(ValueError, evl.fit, X, y, [OLS()], {'ols': {'offset': randint(1, 10)}}, n_iter=1)
def test_clone(): """[Parallel | Learner] Testing cloning""" lr = Learner(OLS(), indexer=data.indexer, name='lr') l = clone(lr) l.set_indexer(data.indexer) assert not l.__fitted__ F = _run(l, 'fit', X, y, return_preds=True) H = _run(lr, 'fit', X, y, return_preds=True, refit=False) np.testing.assert_array_equal(F, H)
def test_passes(): """[Model Selection] Test sets error score on failed scoring.""" evl = Evaluator(bad_scorer, error_score=0, n_jobs=1) evl = np.testing.assert_warns(FitFailedWarning, evl.fit, X, y, [OLS()], {'ols': {'offset': randint(1, 10)}}, n_iter=1) assert evl.summary['test_score_mean']['ols'] == 0
def test_data(): """[Parallel | Learner] Test data attribute""" lr = Learner(OLS(), indexer=data.indexer, name='lr') lr.scorer = scorer _run(lr, 'fit', X, y, return_preds=True) assert lr.raw_data assert isinstance(lr.raw_data, list) assert isinstance(lr.data, dict) assert lr.data.__repr__() assert 'score' in lr.data.__repr__()
def test_collect(): """[Parallel | Learner] Testing multiple collections""" lr = Learner(OLS(), indexer=data.indexer, name='lr') lr.__no_output__ = False a = _run(lr, 'fit', X, y, return_preds=True) b = _run(lr, 'fit', X, y, refit=False, return_preds=True) c = _run(lr, 'transform', X, y, return_preds=True) d = _run(lr, 'transform', X, y) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(a, c) np.testing.assert_array_equal(a, d)
def test_w_prep_set_params(): """[Model Selection] Test run with preprocessing, sep param dists.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100, verbose=2) params = { 'no.ols': { 'offset': randint(3, 6) }, 'pr.ols': { 'offset': randint(1, 3) }, } with open(os.devnull, 'w') as f, redirect_stdout(f): evl.fit(X, y, estimators={ 'pr': [OLS()], 'no': [OLS()] }, param_dicts=params, preprocessing={ 'pr': [Scale()], 'no': [] }, n_iter=10) np.testing.assert_approx_equal(evl.results['test_score-m']['no.ols'], -18.684229451043198) np.testing.assert_approx_equal(evl.results['test_score-m']['pr.ols'], -7.2594502123869491) assert evl.results['params']['no.ols']['offset'] == 3 assert evl.results['params']['pr.ols']['offset'] == 1
def test_params(): """[Model Selection] Test raises on bad params.""" evl = Evaluator(mape_scorer, verbose=2) np.testing.assert_raises( ValueError, evl.fit, X, y, estimators=[OLS()], param_dicts={'bad.ols': { 'offset': randint(1, 10) }}, preprocessing={'prep': [Scale()]})
def test_no_prep(): """[Model Selection] Test run without preprocessing.""" evl = Evaluator(mape_scorer, verbose=True, cv=5, shuffle=False, random_state=100) with open(os.devnull, 'w') as f, redirect_stderr(f): evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean']['ols'], -24.903229451043195) assert evl.summary['params']['ols']['offset'] == 4
def test_raises(): """[Model Selection] Test raises on error.""" evl = Evaluator(bad_scorer, verbose=1) with open(os.devnull, 'w') as f, redirect_stdout(f): np.testing.assert_raises( ValueError, evl.fit, X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=1)
def test_passes(): """[Model Selection] Test sets error score on failed scoring.""" evl = Evaluator(bad_scorer, error_score=0, n_jobs=1, verbose=5) with open(os.devnull, 'w') as f, redirect_stdout(f): evl = np.testing.assert_warns( FitFailedWarning, evl.fit, X, y, estimators=[OLS()], param_dicts={'ols': { 'offset': randint(1, 10) }}, n_iter=1) assert evl.results['test_score-m']['ols'] == 0
def test_w_prep_fit(): """[Model Selection] Test run with preprocessing, single step.""" evl = Evaluator(mape_scorer, cv=5, shuffle=False, random_state=100) evl.fit(X, y, estimators=[OLS()], param_dicts={'ols': {'offset': randint(1, 10)}}, preprocessing={'pr': [Scale()], 'no': []}, n_iter=3) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('no', 'ols')], -24.903229451043195) np.testing.assert_approx_equal( evl.summary['test_score_mean'][('pr', 'ols')], -26.510708862278072, 1) assert evl.summary['params'][('no', 'ols')]['offset'] == 4 assert evl.summary['params'][('pr', 'ols')]['offset'] == 4
def test_subset_fit(): """[Subsemble] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y) g = meta.predict(P) ens = Subsemble() ens.add(estimators, partitions=2, folds=3, dtype=np.float64) ens.add_meta(OLS(), dtype=np.float64) ens.fit(X, y) pred = ens.predict(X) np.testing.assert_array_equal(pred, g)
def test_run(): """[Blend] 'fit' and 'predict' runs correctly.""" meta = OLS() meta.fit(F, y[3:]) g = meta.predict(P) ens = BlendEnsemble(test_size=3) ens.add(ESTIMATORS, PREPROCESSING, dtype=np.float64) ens.add(OLS(), meta=True, dtype=np.float64) ens.fit(X, y) pred = ens.predict(X) np.testing.assert_array_equal(pred, g)
def test_fit_score(): """[Parallel | Evaluation] Test fit-score function.""" out = fit_score(case='test', tr_list=[], est_name='ols', est=OLS(), params=(0, { 'offset': 2 }), x=X, y=y, idx=((0, 5), (5, 10)), scorer=make_scorer(mape, greater_is_better=False), error_score=None) assert out[0] == 'test' assert out[1] == 'ols' assert out[2] == 0 np.testing.assert_almost_equal(out[3], -1.5499999999999992, 5) np.testing.assert_almost_equal(out[4], -2.0749999999999993, 5)
def test_check_instances_dict_formatting(): """[Utils] check_instances: test formatting of dict.""" instances = {'a': [OLS(), ('ols', OLS()), ('ols', OLS()), ['list', OLS()]], 'b': [], 'c': [OLS(), ('ols', OLS())]} formatted = _check_instances(instances) for k, v in formatted.items(): if k == 'b': assert len(v) == 0 else: for i in v: strings = [] assert isinstance(i, tuple) assert isinstance(i[0], str) assert isinstance(i[1], OLS) assert i[0] not in strings strings.append(i[0])
import numpy as np from mlens.utils.dummy import OLS from mlens.base import SubsetIndex from mlens.parallel.subset import _expand_instance_list, _get_col_idx from mlens.ensemble.base import LayerContainer x = np.arange(24).reshape((12, 2)) y = x[:, 0] * x[:, 1] estimators = [('ols-%i' % i, OLS(i)) for i in range(2)] indexer = SubsetIndex(2, 3, X=x) def ground_truth(): """Ground truth for subset test. """ e = _expand_instance_list(estimators, indexer) P = np.zeros((12, 2 * 2)) F = np.zeros((12, 2 * 2)) cols = _get_col_idx(e, 2, 1) for name, tri, tei, est_list in e: for est_name, est in est_list: if tei is None: est.fit(x[tri[0]:tri[1]], y[tri[0]:tri[1]]) p = est.predict(x) P[:, cols[(name, est_name)]] = p