Esempio n. 1
0
 def get_spacy(self, doc):
     ents = []
     for ent in doc.ents:
         ents.append(
             he.append_ner(ent.text, ent.start_char, ent.end_char,
                           ent.label_, 'spacy'))
     return ents
Esempio n. 2
0
 def get_list(self, doc):
     mats = []
     for match_id, start, end in self.matcher(doc):
         mats.append(
             he.append_ner(doc[start:end], start, end,
                           self.nlp.vocab.strings[match_id], 'list'))
     return mats
Esempio n. 3
0
 def get_list(self, doc):
     mats = []
     for match_id, start, end in self.matcher(doc):
         #Convert Start Stop from token level to char level
         charStart = doc[start].idx
         charEnd = charStart + len(str(doc[start:end]))
         mats.append(
             he.append_ner(doc[start:end], charStart, charEnd,
                           self.nlp.vocab.strings[match_id], 'list'))
     return mats
Esempio n. 4
0
 def get_rules(self, text):
     #TODO: move regex to custom or config
     ents = []
     ## Get error codes
     matches = re.finditer(r'\b(((o|0)(x|\*))|(800))\S*', text,
                           re.IGNORECASE)
     for match in matches:
         ents.append(
             he.append_ner(text[match.span()[0]:match.span()[1]],
                           match.span()[0],
                           match.span()[1], 'ERROR CODE', 'regex'))
     return ents