Ejemplo n.º 1
0
def test_spacy_ner():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = SpacyNER()
    model.fit(X, y)
    model.save("nerds/test/data/models")
    model_r = model.load("nerds/test/data/models")
    y_pred = model_r.predict(X)
    assert_equal(y, y_pred, "Label and prediction must be equal")
    assert_equal(1.0, model.score(X, y))
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 2
0
def test_bert_ner():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = BertNER(max_iter=1)
    model.fit(X, y)
    model.save("nerds/test/data/models")
    model_r = model.load("nerds/test/data/models")
    y_pred = model_r.predict(X)
    assert_equal(len(y), len(y_pred), "Number of labels and predictions must be equal")
    assert_equal(len(y[0]), len(y_pred[0]), "Size of first Label and prediction must be equal")
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 3
0
def test_dictionary_ner_from_conll():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = DictionaryNER()
    model.fit(X, y)
    model.save("nerds/test/data/models")
    r_model = model.load("nerds/test/data/models")
    y_pred = r_model.predict(X)
    assert_equal(y, y_pred, "Label and prediction must be equal")
    assert_equal(1.0, model.score(X, y))
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 4
0
def test_bilstm_ner():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = BiLstmCrfNER(max_iter=1)
    model.fit(X, y)
    model.save("nerds/test/data/models")
    model_r = model.load("nerds/test/data/models")
    y_pred = model_r.predict(X)
    # there is not enough data to train this model properly, so decent
    # asserts are unlikely to succeed.
    assert_equal(len(y), len(y_pred),
                 "Number of labels and predictions must be equal.")
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 5
0
def test_ensemble_ner_multithreaded():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    estimators = [
        ("dict_ner", DictionaryNER()),
        ("crf_ner", CrfNER(max_iter=1)),
        ("spacy_ner", SpacyNER(max_iter=1))
    ]
    model = EnsembleNER(estimators=estimators, n_jobs=-1)
    model.fit(X, y)
    y_pred = model.predict(X)
    assert_equal(len(y), len(y_pred), "Number of predicted and label documents must be same.")
    assert_equal(len(y[0]), len(y_pred[0]), "Number of predicted and label tags must be same.")
Ejemplo n.º 6
0
def test_elmo_ner():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    # there are 28 unique words in our "vocabulary"
    embeddings = np.random.random((28, 100))
    model = ElmoNER(embeddings=embeddings, max_iter=1)
    model.fit(X, y)
    model.save("nerds/test/data/models")
    model_r = model.load("nerds/test/data/models")
    y_pred = model_r.predict(X)
    # there is not enough data to train this model properly, so decent
    # asserts are unlikely to succeed.
    assert_equal(len(y), len(y_pred), "Number of labels and predictions must be equal.")
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 7
0
def test_crf_ner_with_nondefault_features():
    def my_test_featurizer(sentence):
        return [{"word":token} for token in sentence]

    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = CrfNER(featurizer=my_test_featurizer)
    model.fit(X, y)
    y_pred = model.predict(X)
    # our features are not good enough to do good predictions, so just
    # check the lengths of labels vs predictions to make sure it worked
    assert_equal(len(y), len(y_pred), "Number of label and predictions must be equal.")
    assert_equal(len(y[0]), len(y_pred[0]), "Size of label and predictions must match (1).")
    assert_equal(len(y[1]), len(y_pred[1]), "Size of label and predictions must match (2).")
Ejemplo n.º 8
0
def test_flair_ner():
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    model = FlairNER("nerds/test/data/models", max_iter=1)
    model.fit(X, y)
    model.save("nerds/test/data/models")
    model_r = model.load("nerds/test/data/models")
    y_pred = model_r.predict(X)
    # FLAIR NER needs more data to train than provided, so pointless testing
    # for prediction quality, just make sure prediction produces something sane
    assert_equal(len(y), len(y_pred),
                 "Size of Label and prediction must be equal")
    assert_equal(len(y[0]), len(y_pred[0]),
                 "Size of first Label and prediction must be equal")
    shutil.rmtree("nerds/test/data/models")
Ejemplo n.º 9
0
def test_dictionary_ner_from_dict():
    # load and fit model from dictionary
    xs, ys = [], []
    fdict = open("nerds/test/data/example.ents", "r")
    for line in fdict:
        x, y = line.strip().split('\t')
        xs.append(x)
        ys.append(y)
    fdict.close()
    model = DictionaryNER(from_dictionary=True)
    model.fit(xs, ys)
    # predict using example
    X, y = load_data_and_labels("nerds/test/data/example.iob")
    y_pred = model.predict(X)
    assert_equal(y, y_pred, "Label and prediction must be equal")
    assert_equal(1.0, model.score(X, y))