def test_subset_raises_on_float(): """[Base] SubsetIndex: check raises error on floats for n_part, n_split.""" with np.testing.assert_raises(ValueError): SubsetIndex(0.5, 2, X=X) with np.testing.assert_raises(ValueError): SubsetIndex(2, 0.5, X=X)
def test_subset_partition(): """[Base] Subset: test partition indexing on tuples.""" parts = list() for part in SubsetIndex(X=X).partition(): parts.append(part) assert parts == [(0, 3), (3, 5)]
def test_subset_index_is_fitted(): """[Base] BlendIndex: check fit methods.""" attrs = ['n_samples', 'n_test_samples'] idx = SubsetIndex() for attr in attrs: assert not getattr(idx, attr) idx.fit(X) for attr in attrs: assert getattr(idx, attr) idx = SubsetIndex() for attr in attrs: assert not getattr(idx, attr) for _ in idx.generate(X): pass for attr in attrs: assert getattr(idx, attr) idx = SubsetIndex(X=X) for attr in attrs: assert getattr(idx, attr)
def test_subset_tuple_shape(): """[Base] SubsetIndex: test the tuple shape on generation.""" tup = [(tri, tei) for tri, tei in SubsetIndex(2, 2).generate(X)] assert tup == [(((2, 3),), [(0, 2), (3, 4)]), (((0, 2),), [(2, 3), (4, 5)]), (((4, 5),), [(0, 2), (3, 4)]), (((3, 4),), [(2, 3), (4, 5)])]
def test_subset_partition_array(): """[Base] Subset: test partition indexing on arrays.""" parts = list() for part in SubsetIndex(X=X).partition(as_array=True): parts.append(part) np.testing.assert_array_equal(parts[0], np.array([0, 1, 2])) np.testing.assert_array_equal(parts[1], np.array([3, 4]))
def test_subset_array_shape(): """[Base] ClusteredSubsetIndex: test the array shape on generation.""" t = list() e = list() for tri, tei in SubsetIndex(2, 2, X=X).generate(as_array=True): t.append(tri.tolist()) e.append(tei.tolist()) assert t == [[2], [0, 1], [4], [3]] assert e == [[0, 1, 3], [2, 4], [0, 1, 3], [2, 4]]
# We can create several other types of learners by # varying the estimation strategy. An especially interesting strategy is to # partition the training set and create several learners fitted on a given # partition. This will create one prediction feature per partition. # In the following example we fit the OLS model using two partitions and # three fold CV on each partition. Note that by passing the output array # as an argument during ``'fit'``, we perform a fit and transform operation. from mlens.index import SubsetIndex def mse(y, p): return np.mean((y - p)**2) indexer = SubsetIndex(partitions=2, folds=2, X=X) learner = Learner(estimator=OLS(), indexer=indexer, name='subsemble-ols', scorer=mse, verbose=True) job.job = 'fit' job.predict_out = np.zeros((y.shape[0], 2)) learner.setup(job.predict_in, job.targets, job.job) for sub_learner in learner(job.args(), 'main'): sub_learner.fit() print('Output:') print(job.predict_out) print()
def test_subset_raises_empty(): """[Base] SubsetIndex: check raises error on empty train set.""" with np.testing.assert_raises(ValueError): SubsetIndex(2, 2, X=np.empty(1))
def test_subset_raises_no_partition(): """[Base] SubsetIndex: check raises error on 0 partitions.""" with np.testing.assert_raises(ValueError): SubsetIndex(0, X=X)
def test_subset_raises_on_no_split_part(): """[Base] SubsetIndex: check raises error n_part * n_split > n_samples.""" with np.testing.assert_raises(ValueError): SubsetIndex(3, 3, X=X)
def test_subset_raises_opartitions_and_one_split(): """[Base] SubsetIndex: check raises error on single split of partitions.""" with np.testing.assert_raises(ValueError): SubsetIndex(2, 1, X=X)
def test_subset_raises_on_w_raise_(): """[Base] SubsetIndex: check raises on n_part = 1, folds = 1.""" with np.testing.assert_raises(ValueError): SubsetIndex(1, 1, X=X)
def test_subset_warns_on_wo_raise_(): """[Base] SubsetIndex: check raises on n_part = 1, folds = 1.""" with np.testing.assert_warns(UserWarning): SubsetIndex(1, 1, raise_on_exception=False, X=X)