Beispiel #1
0
def test_en_tagger_lemma_base_forms(NLP):
    doc = NLP.make_doc("dive")
    doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Sing")
    doc[0].pos_ = "NOUN"
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == "dive"
    doc = NLP.make_doc("diva")
    doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Plur")
    doc[0].pos_ = "NOUN"
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == "diva"
Beispiel #2
0
def test_en_tagger_lemma_nouns(NLP, text, lemmas, morphology):
    # Cases like this are problematic - not clear what we should do to resolve
    # ambiguity? ("axes", ["ax", "axes", "axis"])
    # noun_index = lemmatizer.index["noun"]
    # noun_exc = lemmatizer.exc["noun"]
    doc = NLP.make_doc(text)
    doc[0].morph = MorphAnalysis(NLP.vocab, morphology)
    doc[0].pos_ = "NOUN"
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == lemmas[0]
Beispiel #3
0
def test_en_tagger_lemma_verbs(NLP, text, lemmas):
    doc = NLP.make_doc(text)
    doc[0].morph = MorphAnalysis(NLP.vocab, "VerbForm=Inf")
    doc[0].pos_ = "VERB"
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == lemmas[0]
Beispiel #4
0
def test_en_tagger_lemma_issue781(NLP, word, lemmas):
    doc = NLP.make_doc(word)
    doc[0].pos_ = "NOUN"
    doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Plur")
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == lemmas[0]
Beispiel #5
0
def test_en_tagger_lemma_base_form_verb(NLP):
    doc = NLP.make_doc("saw")
    doc[0].morph = MorphAnalysis(NLP.vocab, "VerbForm=Past")
    doc[0].pos_ = "VERB"
    NLP.get_pipe("lemmatizer")(doc)
    assert doc[0].lemma_ == "see"