def test_random_oversampling(): ecoc_clf = ecoc.ECOC(preprocessing='globalCS') X_oversampled, y_oversampled = ecoc_clf._oversample(X, y) assert len(X_oversampled) == len(y_oversampled) assert len(set(np.unique(y_oversampled, return_counts=True)[1])) == 1 assert set(y_oversampled).issubset(set(y))
def test_ecoc_with_sklearn_pipeline(encoding_strategy, oversampling): pipeline = Pipeline([ ('scaler', StandardScaler()), ('ecoc', ecoc.ECOC(encoding=encoding_strategy, preprocessing=oversampling)) ]) pipeline.fit(X, y) y_hat = pipeline.predict(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])) assert len(y_hat) == 3
def test_encoding(encoding_strategy, oversampling): ecoc_clf = ecoc.ECOC(encoding=encoding_strategy, preprocessing=oversampling) ecoc_clf.fit(X, y) matrix = ecoc_clf._code_matrix number_of_classes = len(np.unique(y)) assert matrix.shape[0] == number_of_classes assert len(np.unique(matrix, axis=0)) == number_of_classes assert bool((~matrix.any(axis=0)).any()) is False
def test_with_own_preprocessing(): class DummyResampler: def fit_transform(self, X, y): return np.concatenate((X, X), axis=0), np.concatenate((y, y), axis=None) dummy_resampler = DummyResampler() ecoc_clf = ecoc.ECOC(preprocessing=dummy_resampler) X_oversampled, y_oversampled = ecoc_clf._oversample(X, y) assert len(X_oversampled) == 2 * len(X) assert len(y_oversampled) == 2 * len(y)
def test_with_own_classifier(): class DummyClassifier: def fit(self, X, y): pass def predict(self, X): return np.zeros(len(X)) dummy_clf = DummyClassifier() ecoc_clf = ecoc.ECOC(binary_classifier=dummy_clf, preprocessing=None) ecoc_clf.fit(X, y) predicted = ecoc_clf.predict(np.array([[1.0, 2.0], [4.0, 5.5], [6.7, 8.8]])) assert np.all(predicted == 0)
def test_dense_and_sparse_with_not_cached_matrices(encoding_strategy): X1 = np.concatenate((X, 2 * X, 3 * X, 4 * X, 5 * X), axis=0) y1 = np.concatenate((y + 4, y + 8, y + 12, y + 16, y + 20)) ecoc_clf = ecoc.ECOC(encoding=encoding_strategy) ecoc_clf.fit(X1, y1) matrix = ecoc_clf._code_matrix number_of_classes = len(np.unique(y1)) assert matrix.shape[0] == number_of_classes assert len(np.unique(matrix, axis=0)) == number_of_classes assert bool((~matrix.any(axis=0)).any()) is False
def test_own_preprocessing_without_fit_transform(): class DummyOversampler: def foo(self, X, y): pass def bar(self, X): return np.zeros(len(X)) dummy_oversampler = DummyOversampler() ecoc_clf = ecoc.ECOC(preprocessing=dummy_oversampler) with pytest.raises(ValueError) as e: ecoc_clf.fit(X, y) assert 'fit_transform' in str(e.value)
def test_own_classifier_without_predict_and_fit(): class DummyClassifier: def foo(self, X, y): pass def bar(self, X): return np.zeros(len(X)) dummy_clf = DummyClassifier() ecoc_clf = ecoc.ECOC(binary_classifier=dummy_clf, preprocessing=None) with pytest.raises(ValueError) as e: ecoc_clf.fit(X, y) assert 'predict' in str(e.value) assert 'fit' in str(e.value)
def test_hamming_distance(): v1 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, -1, -1, -1, -1]) v2 = np.array([-1, 1, -1, 1, -1, 0, 1, 0, 1, 0, -1, -1, -1, -1]) distance = ecoc.ECOC()._hamming_distance(v1, v2) assert distance == 5
def test_no_oversampling(): ecoc_clf = ecoc.ECOC(preprocessing=None) X_oversampled, y_oversampled = ecoc_clf._oversample(X, y) assert X.shape == X_oversampled.shape assert y.shape == y_oversampled.shape
def test_unknown_preprocessing(): ecoc_clf = ecoc.ECOC(preprocessing='DUMMY_OVERSAMPLING') with pytest.raises(ValueError) as e: ecoc_clf.fit(X, y) assert 'DUMMY_OVERSAMPLING' in str(e.value)
def test_predefined_classifiers_and_weighting_without_exceptions(classifier, weights): ecoc_clf = ecoc.ECOC(binary_classifier=classifier, weights=weights) ecoc_clf.fit(X, y) predicted = ecoc_clf.predict(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]])) assert len(predicted) == 3
def test_unknown_classifier(): ecoc_clf = ecoc.ECOC(binary_classifier='DUMMY_CLASSIFIER', preprocessing=None) with pytest.raises(ValueError) as e: ecoc_clf.fit(X, y) assert 'DUMMY_CLASSIFIER' in str(e.value)