def test_sparse(): X = [ "One and two", "One only", "Three and four, nothing else", "Two nothing else", "Two and three" ] Y = csr_matrix( np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 0, 1, 1], [0, 1, 0, 0], [0, 1, 1, 0]])) model = Pipeline([('vec', KerasVectorizer()), ('clf', CNNClassifier(multilabel=True, batch_size=2, sparse_y=True))]) model.fit(X, Y) assert model.score(X, Y) > 0.4 assert model.predict(X).shape == (5, 4)
def test_threshold(): X = [ "One", "One only", "Two nothing else", "Two and three" ] Y = np.array([0, 0, 1, 1]) model = Pipeline([ ('vec', KerasVectorizer()), ('clf', CNNClassifier(threshold=0.1)) ]) model.fit(X, Y) Y_pred_expected = model.predict_proba(X) > 0.1 Y_pred = model.predict(X) assert np.array_equal(Y_pred_expected, Y_pred)
def test_predict_proba(): X = [ "One", "One only", "Two nothing else", "Two and three" ] Y = np.array([0, 0, 1, 1]) model = Pipeline([ ('vec', KerasVectorizer()), ('clf', CNNClassifier()) ]) model.fit(X, Y) Y_pred_prob = model.predict_proba(X) assert sum(Y_pred_prob >= 0) == Y.shape[0] assert sum(Y_pred_prob <= 1) == Y.shape[0]
def test_attention(): X = [ "One", "One only", "Two nothing else", "Two and three" ] Y = np.array([0, 0, 1, 1]) model = Pipeline([ ('vec', KerasVectorizer()), ('clf', CNNClassifier( attention=True, attention_heads=10)) ]) model.fit(X, Y) assert model.score(X, Y) > 0.6
def test_early_stopping(): X = [ "One", "One only", "Two nothing else", "Two and three" ] Y = np.array([0, 0, 1, 1]) model = Pipeline([ ('vec', KerasVectorizer()), ('clf', CNNClassifier( early_stopping=True, nb_epochs=10000 )) ]) # if early_stopping is not working it will take # a lot of time to finish running this test model.fit(X, Y) assert model.score(X, Y) > 0.6
from wellcomeml.ml import CNNClassifier, KerasVectorizer from sklearn.pipeline import Pipeline import numpy as np X = ["One", "three", "one", "two", "four"] Y = np.array([1, 0, 1, 0, 0]) cnn_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", CNNClassifier())]) cnn_pipeline.fit(X, Y) print(cnn_pipeline.score(X, Y)) X = ["One, three", "one", "two, three"] Y = np.array([[1, 0, 1], [1, 0, 0], [0, 1, 1]]) cnn_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", CNNClassifier(multilabel=True))]) cnn_pipeline.fit(X, Y) print(cnn_pipeline.score(X, Y))
from wellcomeml.ml import BiLSTMClassifier, KerasVectorizer from sklearn.pipeline import Pipeline import numpy as np X = ["One", "three", "one", "two", "four"] Y = np.array([1, 0, 1, 0, 0]) bilstm_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", BiLSTMClassifier())]) bilstm_pipeline.fit(X, Y) print(bilstm_pipeline.score(X, Y)) X = ["One, three", "one", "two, three"] Y = np.array([[1, 0, 1], [1, 0, 0], [0, 1, 1]]) bilstm_pipeline = Pipeline([("vec", KerasVectorizer()), ("clf", BiLSTMClassifier(multilabel=True))]) bilstm_pipeline.fit(X, Y) print(bilstm_pipeline.score(X, Y))