def get_spacy(self, doc): ents = [] for ent in doc.ents: ents.append( he.append_ner(ent.text, ent.start_char, ent.end_char, ent.label_, 'spacy')) return ents
def get_list(self, doc): mats = [] for match_id, start, end in self.matcher(doc): mats.append( he.append_ner(doc[start:end], start, end, self.nlp.vocab.strings[match_id], 'list')) return mats
def get_list(self, doc): mats = [] for match_id, start, end in self.matcher(doc): #Convert Start Stop from token level to char level charStart = doc[start].idx charEnd = charStart + len(str(doc[start:end])) mats.append( he.append_ner(doc[start:end], charStart, charEnd, self.nlp.vocab.strings[match_id], 'list')) return mats
def get_rules(self, text): #TODO: move regex to custom or config ents = [] ## Get error codes matches = re.finditer(r'\b(((o|0)(x|\*))|(800))\S*', text, re.IGNORECASE) for match in matches: ents.append( he.append_ner(text[match.span()[0]:match.span()[1]], match.span()[0], match.span()[1], 'ERROR CODE', 'regex')) return ents