def test_subset_raises_on_float(): """[Base] SubsetIndex: check raises error on floats for n_part, n_split.""" with np.testing.assert_raises(ValueError): SubsetIndex(0.5, 2, X=X) with np.testing.assert_raises(ValueError): SubsetIndex(2, 0.5, X=X)
def test_subset_partition(): """[Base] Subset: test partition indexing on tuples.""" parts = list() for part in SubsetIndex(X=X).partition(): parts.append(part) assert parts == [(0, 3), (3, 5)]
def test_subset_tuple_shape(): """[Base] SubsetIndex: test the tuple shape on generation.""" tup = [(tri, tei) for tri, tei in SubsetIndex(2, 2).generate(X)] assert tup == [(((2, 3), ), [(0, 2), (3, 4)]), (((0, 2), ), [(2, 3), (4, 5)]), (((4, 5), ), [(0, 2), (3, 4)]), (((3, 4), ), [(2, 3), (4, 5)])]
def test_subset_partition_array(): """[Base] Subset: test partition indexing on arrays.""" parts = list() for part in SubsetIndex(X=X).partition(as_array=True): parts.append(part) np.testing.assert_array_equal(parts[0], np.array([0, 1, 2])) np.testing.assert_array_equal(parts[1], np.array([3, 4]))
def test_subset_index_is_fitted(): """[Base] BlendIndex: check fit methods.""" attrs = ['n_samples', 'n_test_samples'] idx = SubsetIndex() for attr in attrs: assert not hasattr(idx, attr) idx.fit(X) for attr in attrs: assert hasattr(idx, attr) idx = SubsetIndex() for attr in attrs: assert not hasattr(idx, attr) for _ in idx.generate(X): pass for attr in attrs: assert hasattr(idx, attr) idx = SubsetIndex(X=X) for attr in attrs: assert hasattr(idx, attr)
def test_subset_array_shape(): """[Base] ClusteredSubsetIndex: test the array shape on generation.""" t = list() e = list() for tri, tei in SubsetIndex(2, 2, X=X).generate(as_array=True): t.append(tri.tolist()) e.append(tei.tolist()) assert t == [[2], [0, 1], [4], [3]] assert e == [[0, 1, 3], [2, 4], [0, 1, 3], [2, 4]]
import numpy as np from mlens.utils.dummy import OLS from mlens.base import SubsetIndex from mlens.parallel.subset import _expand_instance_list, _get_col_idx from mlens.ensemble.base import LayerContainer x = np.arange(24).reshape((12, 2)) y = x[:, 0] * x[:, 1] estimators = [('ols-%i' % i, OLS(i)) for i in range(2)] indexer = SubsetIndex(2, 3, X=x) def ground_truth(): """Ground truth for subset test. """ e = _expand_instance_list(estimators, indexer) P = np.zeros((12, 2 * 2)) F = np.zeros((12, 2 * 2)) cols = _get_col_idx(e, 2, 1) for name, tri, tei, est_list in e: for est_name, est in est_list: if tei is None: est.fit(x[tri[0]:tri[1]], y[tri[0]:tri[1]]) p = est.predict(x) P[:, cols[(name, est_name)]] = p
def test_subset_raises_empty(): """[Base] SubsetIndex: check raises error on empty train set.""" with np.testing.assert_raises(ValueError): SubsetIndex(2, 2, X=np.empty(1))
def test_subset_raises_no_partition(): """[Base] SubsetIndex: check raises error on 0 partitions.""" with np.testing.assert_raises(ValueError): SubsetIndex(0, X=X)
def test_subset_raises_on_no_split_part(): """[Base] SubsetIndex: check raises error n_part * n_split > n_samples.""" with np.testing.assert_raises(ValueError): SubsetIndex(3, 3, X=X)
def test_subset_raises_on_partitions_and_one_split(): """[Base] SubsetIndex: check raises error on single split of partitions.""" with np.testing.assert_raises(ValueError): SubsetIndex(2, 1, X=X)
def test_subset_raises_on_w_raise_(): """[Base] SubsetIndex: check raises on n_part = 1, n_splits = 1.""" with np.testing.assert_raises(ValueError): SubsetIndex(1, 1, X=X)
def test_subset_warns_on_wo_raise_(): """[Base] SubsetIndex: check raises on n_part = 1, n_splits = 1.""" with np.testing.assert_warns(UserWarning): SubsetIndex(1, 1, raise_on_exception=False, X=X)