Esempio n. 1
0
def test_random_oversampling():
    ecoc_clf = ecoc.ECOC(preprocessing='globalCS')
    X_oversampled, y_oversampled = ecoc_clf._oversample(X, y)

    assert len(X_oversampled) == len(y_oversampled)
    assert len(set(np.unique(y_oversampled, return_counts=True)[1])) == 1
    assert set(y_oversampled).issubset(set(y))
Esempio n. 2
0
def test_ecoc_with_sklearn_pipeline(encoding_strategy, oversampling):
    pipeline = Pipeline([
        ('scaler', StandardScaler()),
        ('ecoc', ecoc.ECOC(encoding=encoding_strategy, preprocessing=oversampling))
    ])
    pipeline.fit(X, y)
    y_hat = pipeline.predict(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]))
    assert len(y_hat) == 3
Esempio n. 3
0
def test_encoding(encoding_strategy, oversampling):
    ecoc_clf = ecoc.ECOC(encoding=encoding_strategy, preprocessing=oversampling)
    ecoc_clf.fit(X, y)
    matrix = ecoc_clf._code_matrix

    number_of_classes = len(np.unique(y))

    assert matrix.shape[0] == number_of_classes
    assert len(np.unique(matrix, axis=0)) == number_of_classes
    assert bool((~matrix.any(axis=0)).any()) is False
Esempio n. 4
0
def test_with_own_preprocessing():
    class DummyResampler:
        def fit_transform(self, X, y):
            return np.concatenate((X, X), axis=0), np.concatenate((y, y), axis=None)

    dummy_resampler = DummyResampler()
    ecoc_clf = ecoc.ECOC(preprocessing=dummy_resampler)
    X_oversampled, y_oversampled = ecoc_clf._oversample(X, y)
    assert len(X_oversampled) == 2 * len(X)
    assert len(y_oversampled) == 2 * len(y)
Esempio n. 5
0
def test_with_own_classifier():
    class DummyClassifier:
        def fit(self, X, y):
            pass

        def predict(self, X):
            return np.zeros(len(X))

    dummy_clf = DummyClassifier()
    ecoc_clf = ecoc.ECOC(binary_classifier=dummy_clf, preprocessing=None)
    ecoc_clf.fit(X, y)
    predicted = ecoc_clf.predict(np.array([[1.0, 2.0], [4.0, 5.5], [6.7, 8.8]]))
    assert np.all(predicted == 0)
Esempio n. 6
0
def test_dense_and_sparse_with_not_cached_matrices(encoding_strategy):
    X1 = np.concatenate((X, 2 * X, 3 * X, 4 * X, 5 * X), axis=0)
    y1 = np.concatenate((y + 4, y + 8, y + 12, y + 16, y + 20))

    ecoc_clf = ecoc.ECOC(encoding=encoding_strategy)
    ecoc_clf.fit(X1, y1)
    matrix = ecoc_clf._code_matrix

    number_of_classes = len(np.unique(y1))

    assert matrix.shape[0] == number_of_classes
    assert len(np.unique(matrix, axis=0)) == number_of_classes
    assert bool((~matrix.any(axis=0)).any()) is False
Esempio n. 7
0
def test_own_preprocessing_without_fit_transform():
    class DummyOversampler:
        def foo(self, X, y):
            pass

        def bar(self, X):
            return np.zeros(len(X))

    dummy_oversampler = DummyOversampler()
    ecoc_clf = ecoc.ECOC(preprocessing=dummy_oversampler)
    with pytest.raises(ValueError) as e:
        ecoc_clf.fit(X, y)
    assert 'fit_transform' in str(e.value)
Esempio n. 8
0
def test_own_classifier_without_predict_and_fit():
    class DummyClassifier:
        def foo(self, X, y):
            pass

        def bar(self, X):
            return np.zeros(len(X))

    dummy_clf = DummyClassifier()
    ecoc_clf = ecoc.ECOC(binary_classifier=dummy_clf, preprocessing=None)
    with pytest.raises(ValueError) as e:
        ecoc_clf.fit(X, y)
    assert 'predict' in str(e.value)
    assert 'fit' in str(e.value)
Esempio n. 9
0
def test_hamming_distance():
    v1 = np.array([1, 1, 1, 1, 1, 0, 0, 0, 0, 0, -1, -1, -1, -1])
    v2 = np.array([-1, 1, -1, 1, -1, 0, 1, 0, 1, 0, -1, -1, -1, -1])
    distance = ecoc.ECOC()._hamming_distance(v1, v2)

    assert distance == 5
Esempio n. 10
0
def test_no_oversampling():
    ecoc_clf = ecoc.ECOC(preprocessing=None)
    X_oversampled, y_oversampled = ecoc_clf._oversample(X, y)

    assert X.shape == X_oversampled.shape
    assert y.shape == y_oversampled.shape
Esempio n. 11
0
def test_unknown_preprocessing():
    ecoc_clf = ecoc.ECOC(preprocessing='DUMMY_OVERSAMPLING')
    with pytest.raises(ValueError) as e:
        ecoc_clf.fit(X, y)
    assert 'DUMMY_OVERSAMPLING' in str(e.value)
Esempio n. 12
0
def test_predefined_classifiers_and_weighting_without_exceptions(classifier, weights):
    ecoc_clf = ecoc.ECOC(binary_classifier=classifier, weights=weights)
    ecoc_clf.fit(X, y)
    predicted = ecoc_clf.predict(np.array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]))
    assert len(predicted) == 3
Esempio n. 13
0
def test_unknown_classifier():
    ecoc_clf = ecoc.ECOC(binary_classifier='DUMMY_CLASSIFIER', preprocessing=None)
    with pytest.raises(ValueError) as e:
        ecoc_clf.fit(X, y)
    assert 'DUMMY_CLASSIFIER' in str(e.value)