def test_spacy_ner(): X, y = load_data_and_labels("nerds/test/data/example.iob") model = SpacyNER() model.fit(X, y) model.save("nerds/test/data/models") model_r = model.load("nerds/test/data/models") y_pred = model_r.predict(X) assert_equal(y, y_pred, "Label and prediction must be equal") assert_equal(1.0, model.score(X, y)) shutil.rmtree("nerds/test/data/models")
def test_bert_ner(): X, y = load_data_and_labels("nerds/test/data/example.iob") model = BertNER(max_iter=1) model.fit(X, y) model.save("nerds/test/data/models") model_r = model.load("nerds/test/data/models") y_pred = model_r.predict(X) assert_equal(len(y), len(y_pred), "Number of labels and predictions must be equal") assert_equal(len(y[0]), len(y_pred[0]), "Size of first Label and prediction must be equal") shutil.rmtree("nerds/test/data/models")
def test_dictionary_ner_from_conll(): X, y = load_data_and_labels("nerds/test/data/example.iob") model = DictionaryNER() model.fit(X, y) model.save("nerds/test/data/models") r_model = model.load("nerds/test/data/models") y_pred = r_model.predict(X) assert_equal(y, y_pred, "Label and prediction must be equal") assert_equal(1.0, model.score(X, y)) shutil.rmtree("nerds/test/data/models")
def test_bilstm_ner(): X, y = load_data_and_labels("nerds/test/data/example.iob") model = BiLstmCrfNER(max_iter=1) model.fit(X, y) model.save("nerds/test/data/models") model_r = model.load("nerds/test/data/models") y_pred = model_r.predict(X) # there is not enough data to train this model properly, so decent # asserts are unlikely to succeed. assert_equal(len(y), len(y_pred), "Number of labels and predictions must be equal.") shutil.rmtree("nerds/test/data/models")
def test_ensemble_ner_multithreaded(): X, y = load_data_and_labels("nerds/test/data/example.iob") estimators = [ ("dict_ner", DictionaryNER()), ("crf_ner", CrfNER(max_iter=1)), ("spacy_ner", SpacyNER(max_iter=1)) ] model = EnsembleNER(estimators=estimators, n_jobs=-1) model.fit(X, y) y_pred = model.predict(X) assert_equal(len(y), len(y_pred), "Number of predicted and label documents must be same.") assert_equal(len(y[0]), len(y_pred[0]), "Number of predicted and label tags must be same.")
def test_elmo_ner(): X, y = load_data_and_labels("nerds/test/data/example.iob") # there are 28 unique words in our "vocabulary" embeddings = np.random.random((28, 100)) model = ElmoNER(embeddings=embeddings, max_iter=1) model.fit(X, y) model.save("nerds/test/data/models") model_r = model.load("nerds/test/data/models") y_pred = model_r.predict(X) # there is not enough data to train this model properly, so decent # asserts are unlikely to succeed. assert_equal(len(y), len(y_pred), "Number of labels and predictions must be equal.") shutil.rmtree("nerds/test/data/models")
def test_crf_ner_with_nondefault_features(): def my_test_featurizer(sentence): return [{"word":token} for token in sentence] X, y = load_data_and_labels("nerds/test/data/example.iob") model = CrfNER(featurizer=my_test_featurizer) model.fit(X, y) y_pred = model.predict(X) # our features are not good enough to do good predictions, so just # check the lengths of labels vs predictions to make sure it worked assert_equal(len(y), len(y_pred), "Number of label and predictions must be equal.") assert_equal(len(y[0]), len(y_pred[0]), "Size of label and predictions must match (1).") assert_equal(len(y[1]), len(y_pred[1]), "Size of label and predictions must match (2).")
def test_flair_ner(): X, y = load_data_and_labels("nerds/test/data/example.iob") model = FlairNER("nerds/test/data/models", max_iter=1) model.fit(X, y) model.save("nerds/test/data/models") model_r = model.load("nerds/test/data/models") y_pred = model_r.predict(X) # FLAIR NER needs more data to train than provided, so pointless testing # for prediction quality, just make sure prediction produces something sane assert_equal(len(y), len(y_pred), "Size of Label and prediction must be equal") assert_equal(len(y[0]), len(y_pred[0]), "Size of first Label and prediction must be equal") shutil.rmtree("nerds/test/data/models")
def test_dictionary_ner_from_dict(): # load and fit model from dictionary xs, ys = [], [] fdict = open("nerds/test/data/example.ents", "r") for line in fdict: x, y = line.strip().split('\t') xs.append(x) ys.append(y) fdict.close() model = DictionaryNER(from_dictionary=True) model.fit(xs, ys) # predict using example X, y = load_data_and_labels("nerds/test/data/example.iob") y_pred = model.predict(X) assert_equal(y, y_pred, "Label and prediction must be equal") assert_equal(1.0, model.score(X, y))