def test_data(): """[Parallel | Learner] Test data""" idx = INDEXERS['subsemble']() lyr = Layer('layer-scorer').push(make_group(idx, ECM, None)) for lr in lyr.learners: lr.scorer = scorer run(lyr, 'fit', X, y, return_preds=True) repr = lyr.data.__repr__() assert lyr.raw_data assert isinstance(lyr.raw_data, list) assert isinstance(lyr.data, dict) assert repr assert 'score' in repr
def test_push_2(): """[Parallel | Layer] Test double push""" layer.push(g2) assert not layer.__fitted__ with open(os.devnull, 'w') as f, redirect_stdout(f): a = run(layer, 'fit', X, y, refit=False, return_preds=True) assert layer.__fitted__ with open(os.devnull, 'w') as f, redirect_stdout(f): b = run(layer, 'fit', X, y, refit=False, return_preds=True) with open(os.devnull, 'w') as f, redirect_stdout(f): c = run(layer, 'transform', X, return_preds=True) with open(os.devnull, 'w') as f, redirect_stdout(f): d = run(layer, 'fit', X, y, refit=True, return_preds=True) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(a, c) np.testing.assert_array_equal(a, d)
def test_push_1(): """[Parallel | Layer] Testing single push""" assert not layer.__stack__ layer.push(g1) assert layer.stack[0] is g1 assert layer.__stack__ with open(os.devnull, 'w') as f, redirect_stdout(f): run(layer, 'fit', X, y) run(layer, 'transform', X) run(layer, 'predict', X) assert layer.__fitted__
def test_clone(): """[Parallel | Layer] Test cloning""" lyr = clone(layer) assert lyr.__stack__ assert not lyr.__fitted__ with open(os.devnull, 'w') as f, redirect_stdout(f): F = run(layer, 'fit', X, y, refit=False, return_preds=True) H = run(lyr, 'fit', X, y, return_preds=True) np.testing.assert_array_equal(F, H) with open(os.devnull, 'w') as f, redirect_stdout(f): F = run(layer, 'transform', X) H = run(lyr, 'transform', X) np.testing.assert_array_equal(F, H) with open(os.devnull, 'w') as f, redirect_stdout(f): F = run(layer, 'predict', X) H = run(lyr, 'predict', X) np.testing.assert_array_equal(F, H)
############################################################################ # This ``group`` object is now a complete description of how to fit our two # learners using the prescribed indexing method. # # To train the estimators, we need feed the group to a :class:`Layer` instance: import numpy as np np.random.seed(2) X = np.arange(20).reshape(10, 2) y = np.random.rand(10) layer = Layer(stack=group) print(run(layer, 'fit', X, y, return_preds=True)) ############################################################################ # To use some preprocessing before fitting the estimators, we can use the # ``transformers`` argument when creating our ``group``: group = make_group(indexer, [OLS(1), OLS(2)], [Scale()]) layer = Layer(stack=group) print(run(layer, 'fit', X, y, return_preds=True)) ############################################################################ # # Multitasking # ------------
# This is the topic of our next walk through. Here we show how to parallelize # estimation with a single learner using multiple threads: from multiprocessing.dummy import Pool def run(est): est() args = job.args() job.predict_out = np.zeros((y.shape[0], 2)) job.job = 'predict' Pool(4).map(run, list(learner(args, 'main'))) ############################################################################ # For a slightly more high-level API for parallel computation on a single # instance (of any accepted class), we can turn to the :func:`run` function. # This function takes care of argument specification, array creation and all # details we would otherwise need to attend to. For instance, to transform # a dataset using the preprocessing pipeline fitted on the full training set, # use :func:`run` to call ``predict``: from mlens.parallel import run print(run(transformer, 'predict', X)) ############################################################################ # Next we handle several learners by grouping them in a layer in the # :ref:`layer mechanics tutorial <layer_tutorial>`.