def test_build_model(): X = ["One and two", "One only", "Two nothing else", "Two and three"] Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]]) vectorizer = KerasVectorizer() X_vec = vectorizer.fit_transform(X) batch_size = 2 model = CNNClassifier(batch_size=batch_size, multilabel=True, learning_rate=1e-2) model.fit(X_vec, Y) Y_pred = model.predict(X_vec) assert Y_pred.shape[1] == 4 Y = Y[:, :3] sequence_length = X_vec.shape[1] vocab_size = X_vec.max() + 1 nb_outputs = Y.shape[1] decay_steps = X_vec.shape[0] / batch_size model.build_model(sequence_length, vocab_size, nb_outputs, decay_steps) model.fit(X_vec, Y) Y_pred = model.predict(X_vec) assert Y_pred.shape[1] == 3
def test_vocab_size(): X = ["One", "Two", "Three"] vocab_size = 1 keras_vectorizer = KerasVectorizer(vocab_size=vocab_size) X_vec = keras_vectorizer.fit_transform(X) assert X_vec.max() == vocab_size
def test_sequence_length(): X = ["One", "Two", "Three"] sequence_length = 5 keras_vectorizer = KerasVectorizer(sequence_length=sequence_length) X_vec = keras_vectorizer.fit_transform(X) assert X_vec.shape[1] == sequence_length
def test_vanilla(): X = ["One", "Two", "Three Four"] keras_vectorizer = KerasVectorizer() X_vec = keras_vectorizer.fit_transform(X) assert X_vec.shape[0] == 3 assert X_vec.shape[1] == 2 assert X_vec.max() == 5 # 4 tokens including OOV
def test_XY_dataset_sparse_y(): X = ["One and two", "One only", "Two nothing else", "Two and three"] Y = np.array([[1, 1, 0, 0], [1, 0, 0, 0], [0, 1, 0, 0], [0, 1, 1, 0]]) Y_sparse = csr_matrix(Y) vec = KerasVectorizer() X_vec = vec.fit_transform(X) train_data = tf.data.Dataset.from_tensor_slices((X_vec, Y)) test_data = tf.data.Dataset.from_tensor_slices((X_vec)) clf = CNNClassifier(batch_size=2, sparse_y=True, multilabel=True) clf.fit(train_data) assert clf.score(test_data, Y_sparse) > 0.3
def test_XY_dataset(): X = ["One", "One only", "Two nothing else", "Two and three"] Y = np.array([0, 0, 1, 1]) vec = KerasVectorizer() X_vec = vec.fit_transform(X) data = tf.data.Dataset.from_tensor_slices((X_vec, Y)) data = data.shuffle(100, seed=42) clf = CNNClassifier(batch_size=2) clf.fit(data) assert clf.score(data, Y) > 0.3
def test_save_load_attention(): X = ["One", "One only", "Two nothing else", "Two and three"] Y = np.array([0, 0, 1, 1]) vec = KerasVectorizer() X_vec = vec.fit_transform(X) model = BiLSTMClassifier(attention=True) model.fit(X_vec, Y) with tempfile.TemporaryDirectory() as tmp_dir: model.save(tmp_dir) loaded_model = BiLSTMClassifier() loaded_model.load(tmp_dir) assert hasattr(loaded_model, 'model') assert loaded_model.score(X_vec, Y) > 0.6