Example #1
0
def test_XY_dataset_sparse_y():
    X = ["One and two", "One only", "Two nothing else", "Two and three"]
    Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]])
    Y_sparse = csr_matrix(Y)

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    data = tf.data.Dataset.from_tensor_slices((X_vec, Y))
    data = data.shuffle(100, seed=42)
    clf = CNNClassifier(batch_size=2, sparse_y=True, multilabel=True)
    clf.fit(data)
    assert clf.score(data, Y_sparse) > 0.3
Example #2
0
def test_XY_dataset():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    data = tf.data.Dataset.from_tensor_slices((X_vec, Y))
    data = data.shuffle(100, seed=42)
    clf = CNNClassifier(batch_size=2)

    clf.fit(data)
    assert clf.score(data, Y) > 0.3
Example #3
0
def test_vanilla():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([('vec', KerasVectorizer()), ('clf', CNNClassifier())])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Example #4
0
def test_sparse():
    X = [
        "One and two",
        "One only",
        "Three and four, nothing else",
        "Two nothing else",
        "Two and three"
    ]
    Y = csr_matrix(np.array([
        [1, 1, 0, 0],
        [1, 0, 0, 0],
        [0, 0, 1, 1],
        [0, 1, 0, 0],
        [0, 1, 1, 0]
    ]))
    model = Pipeline([
        ('vec', KerasVectorizer()),
        ('clf', CNNClassifier(
            multilabel=True,
            batch_size=2,
            sparse_y=True))
    ])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.4
    assert model.predict(X).shape == (5, 4)
Example #5
0
def test_XY_list():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = [0, 0, 1, 1]

    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf', CNNClassifier(batch_size=2))])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Example #6
0
def test_attention():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf', CNNClassifier(attention=True,
                                            attention_heads=10))])
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Example #7
0
def test_threshold():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf', CNNClassifier(batch_size=2, threshold=0.1))])
    model.fit(X, Y)
    Y_pred_expected = model.predict_proba(X) > 0.1
    Y_pred = model.predict(X)
    assert np.array_equal(Y_pred_expected, Y_pred)
Example #8
0
def test_predict_proba():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf', CNNClassifier(batch_size=2))])
    model.fit(X, Y)
    Y_pred_prob = model.predict_proba(X)
    assert sum(Y_pred_prob >= 0) == Y.shape[0]
    assert sum(Y_pred_prob <= 1) == Y.shape[0]
Example #9
0
def test_early_stopping():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    model = Pipeline([('vec', KerasVectorizer()),
                      ('clf',
                       CNNClassifier(early_stopping=True, nb_epochs=10000))])
    # if early_stopping is not working it will take
    # a lot of time to finish running this test
    model.fit(X, Y)
    assert model.score(X, Y) > 0.6
Example #10
0
def test_save_load():
    X = ["One", "One only", "Two nothing else", "Two and three"]
    Y = np.array([0, 0, 1, 1])

    vec = KerasVectorizer()
    X_vec = vec.fit_transform(X)

    model = CNNClassifier()
    model.fit(X_vec, Y)

    with tempfile.TemporaryDirectory() as tmp_dir:
        model.save(tmp_dir)
        loaded_model = CNNClassifier()
        loaded_model.load(tmp_dir)
        assert hasattr(loaded_model, 'model')
        assert loaded_model.score(X_vec, Y) > 0.6
Example #11
0
from wellcomeml.ml import CNNClassifier, KerasVectorizer
from sklearn.pipeline import Pipeline

import numpy as np

X = ["One", "three", "one", "two", "four"]
Y = np.array([1, 0, 1, 0, 0])

cnn_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", CNNClassifier())])
cnn_pipeline.fit(X, Y)
print(cnn_pipeline.score(X, Y))

X = ["One, three", "one", "two, three"]
Y = np.array([[1, 0, 1], [1, 0, 0], [0, 1, 1]])

cnn_pipeline = Pipeline([("vec", KerasVectorizer()),
                         ("clf", CNNClassifier(multilabel=True))])
cnn_pipeline.fit(X, Y)
print(cnn_pipeline.score(X, Y))