Esempio n. 1
0
def test_sparse():
    X = [
        "One and two", "One only", "Three and four, nothing else",
        "Two nothing else", "Two and three"
    ]
    Y = csr_matrix(
        np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 0],
                  [0, 1, 1, 0]]))
    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf',
                       CNNClassifier(multilabel=True,
                                     batch_size=2,
                                     sparse_y=True))])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.4
    assert model.predict(X).shape == (5, 4)
Esempio n. 2
0
def test_threshold():
    X = [
        "One",
        "One only",
        "Two nothing else",
        "Two and three"
    ]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([
        ('vec', KerasVectorizer()),
        ('clf', CNNClassifier(threshold=0.1))
    ])
    model.fit(X, Y)
    Y_pred_expected = model.predict_proba(X) > 0.1
    Y_pred = model.predict(X)
    assert np.array_equal(Y_pred_expected, Y_pred)
Esempio n. 3
0
def test_predict_proba():
    X = [
        "One",
        "One only",
        "Two nothing else",
        "Two and three"
    ]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([
        ('vec', KerasVectorizer()),
        ('clf', CNNClassifier())
    ])
    model.fit(X, Y)
    Y_pred_prob = model.predict_proba(X)
    assert sum(Y_pred_prob >= 0) == Y.shape[0]
    assert sum(Y_pred_prob <= 1) == Y.shape[0]
Esempio n. 4
0
def test_attention():
    X = [
        "One",
        "One only",
        "Two nothing else",
        "Two and three"
    ]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([
        ('vec', KerasVectorizer()),
        ('clf', CNNClassifier(
                    attention=True,
                    attention_heads=10))
    ])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Esempio n. 5
0
def test_early_stopping():
    X = [
        "One",
        "One only",
        "Two nothing else",
        "Two and three"
    ]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([
        ('vec', KerasVectorizer()),
        ('clf', CNNClassifier(
                    early_stopping=True,
                    nb_epochs=10000
        ))
    ])
    # if early_stopping is not working it will take
    # a lot of time to finish running this test
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Esempio n. 6
0
from wellcomeml.ml import CNNClassifier, KerasVectorizer
from sklearn.pipeline import Pipeline

import numpy as np

X = ["One", "three", "one", "two", "four"]
Y = np.array([1, 0, 1, 0, 0])

cnn_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", CNNClassifier())])
cnn_pipeline.fit(X, Y)
print(cnn_pipeline.score(X, Y))

X = ["One, three", "one", "two, three"]
Y = np.array([[1, 0, 1], [1, 0, 0], [0, 1, 1]])

cnn_pipeline = Pipeline([("vec", KerasVectorizer()),
                         ("clf", CNNClassifier(multilabel=True))])
cnn_pipeline.fit(X, Y)
print(cnn_pipeline.score(X, Y))
Esempio n. 7
0
from wellcomeml.ml import BiLSTMClassifier, KerasVectorizer
from sklearn.pipeline import Pipeline

import numpy as np

X = ["One", "three", "one", "two", "four"]
Y = np.array([1, 0, 1, 0, 0])

bilstm_pipeline = Pipeline([("vec", KerasVectorizer()),
                            ("clf", BiLSTMClassifier())])
bilstm_pipeline.fit(X, Y)
print(bilstm_pipeline.score(X, Y))

X = ["One, three", "one", "two, three"]
Y = np.array([[1, 0, 1], [1, 0, 0], [0, 1, 1]])

bilstm_pipeline = Pipeline([("vec", KerasVectorizer()),
                            ("clf", BiLSTMClassifier(multilabel=True))])
bilstm_pipeline.fit(X, Y)
print(bilstm_pipeline.score(X, Y))