def test_en_tagger_lemma_base_forms(NLP): doc = NLP.make_doc("dive") doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Sing") doc[0].pos_ = "NOUN" NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == "dive" doc = NLP.make_doc("diva") doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Plur") doc[0].pos_ = "NOUN" NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == "diva"
def test_en_tagger_lemma_nouns(NLP, text, lemmas, morphology): # Cases like this are problematic - not clear what we should do to resolve # ambiguity? ("axes", ["ax", "axes", "axis"]) # noun_index = lemmatizer.index["noun"] # noun_exc = lemmatizer.exc["noun"] doc = NLP.make_doc(text) doc[0].morph = MorphAnalysis(NLP.vocab, morphology) doc[0].pos_ = "NOUN" NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == lemmas[0]
def test_en_tagger_lemma_verbs(NLP, text, lemmas): doc = NLP.make_doc(text) doc[0].morph = MorphAnalysis(NLP.vocab, "VerbForm=Inf") doc[0].pos_ = "VERB" NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == lemmas[0]
def test_en_tagger_lemma_issue781(NLP, word, lemmas): doc = NLP.make_doc(word) doc[0].pos_ = "NOUN" doc[0].morph = MorphAnalysis(NLP.vocab, "Number=Plur") NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == lemmas[0]
def test_en_tagger_lemma_base_form_verb(NLP): doc = NLP.make_doc("saw") doc[0].morph = MorphAnalysis(NLP.vocab, "VerbForm=Past") doc[0].pos_ = "VERB" NLP.get_pipe("lemmatizer")(doc) assert doc[0].lemma_ == "see"