Beispiel #1
0
 def decode_all(tag_sequences):
     # decode from all sequences, each sequence with a unique id
     ents = []
     for sent_id, tags in enumerate(tag_sequences):
         for ent in decode_from_bioes(tags):
             ent['sent_id'] = sent_id
             ents += [ent]
     return ents
Beispiel #2
0
 def build_ents(self):
     """ Build the list of entities by iterating over all tokens. Return all entities as a list. 
     
     Note that unlike other attributes, since NER requires raw text, the actual tagging are always
     performed at and attached to the `Token`s, instead of `Word`s.
     """
     self.ents = []
     tags = [w.ner for w in self.tokens]
     decoded = decode_from_bioes(tags)
     for e in decoded:
         ent_tokens = self.tokens[e['start']:e['end']+1]
         self.ents.append(Span(tokens=ent_tokens, type=e['type'], doc=self.doc, sent=self))
     return self.ents