def test_has_annotation(en_vocab): doc = Doc(en_vocab, words=["Hello", "world"]) attrs = ("TAG", "POS", "MORPH", "LEMMA", "DEP", "HEAD", "ENT_IOB", "ENT_TYPE") for attr in attrs: assert not doc.has_annotation(attr) assert not doc.has_annotation(attr, require_complete=True) doc[0].tag_ = "A" doc[0].pos_ = "X" doc[0].set_morph("Feat=Val") doc[0].lemma_ = "a" doc[0].dep_ = "dep" doc[0].head = doc[1] doc.set_ents([Span(doc, 0, 1, label="HELLO")], default="missing") for attr in attrs: assert doc.has_annotation(attr) assert not doc.has_annotation(attr, require_complete=True) doc[1].tag_ = "A" doc[1].pos_ = "X" doc[1].set_morph("") doc[1].lemma_ = "a" doc[1].dep_ = "dep" doc.ents = [Span(doc, 0, 2, label="HELLO")] for attr in attrs: assert doc.has_annotation(attr) assert doc.has_annotation(attr, require_complete=True)
def __call__(self, doc: Doc) -> Doc: ents = [ Span(doc, start=s, end=e, label="CUSTOM") for _, s, e in self.matcher(doc) ] ents = filter_spans(ents) doc.set_ents(ents) return doc