def __call__(self, sentence): m = apply_matcher(sentence.words, sentence.char_offsets, self.dictionary, max_ngrams=self.max_ngrams, longest_match_only=False, case_sensitive=self.case_sensitive) L = {} for (char_start, char_end), term in m: if term.lower() in self.stopwords or term in self.stopwords: continue start, end = get_word_index_span((char_start, char_end - 1), sentence) for i in range(start, end + 1): L[i] = self.label return L
def __call__(self, sentence: Sentence) -> Dict[int, int]: matches = apply_matcher(sentence.words, sentence.char_offsets, self.ontology, max_ngrams=self.max_ngrams, longest_match_only=True, case_sensitive=self.case_sensitive) matches = sorted(matches, key=lambda x: x[0], reverse=0) L = {} for (char_start, char_end), term in matches: label = self._get_term_label(term) # None labels are treated as abstains if not label: continue start, end = get_word_index_span((char_start, char_end - 1), sentence) for i in range(start, end + 1): L[i] = label return L
def __call__(self, sentence: Sentence) -> Dict[int, int]: """ Parameters ---------- sentence Returns ------- """ matches = apply_matcher(sentence.words, sentence.char_offsets, self.ontology, max_ngrams=self.max_ngrams, longest_match_only=True, case_sensitive=self.case_sensitive) matches = sorted(matches, key=lambda x: x[0], reverse=0) if not matches: return {} matches, labels = self._merge_matches(matches) terms = [m[-1] for m in matches] # Slot-filled matches f_matches = [] mask = np.array([0] * len(matches)) for slot in self.slot_rgxs: n_args = slot.count('{}') args = list(zip(terms, labels)) for i in range(len(args) - n_args + 1): # skip arguments that are already matched if 1 in mask[i:i + n_args]: continue xs, ys = zip(*args[i:i + n_args]) # HACK - positive classes only if None in ys or 2 in ys: continue rgx = re.compile(slot.format(*xs), re.I) m = match_rgx(rgx, sentence) if m: m = list(m.items())[0] span = list(m[0][0:2]) span[-1] += 1 m = tuple([span, m[-1].text]) # expand the argument matches to this span mask[i:i + n_args] = 1 f_matches.append((m, np.unique(ys)[0])) # add slot filled matches matches = [m for i, m in zip(mask, matches) if i == 0] labels = [y for i, y in zip(mask, labels) if i == 0] for m, y in f_matches: matches.append(m) labels.append(y) flip = False L = {} for ((char_start, char_end), term), label in zip(matches, labels): #key = term.lower() if term.lower() in self._labels else term # None labels are treated as abstains if not label: continue # check span-specific rules if self.span_rule and label == 1: span = Span(char_start, char_end - 1, sentence) if self.span_rule(span): label = 2 flip = True if term.lower() in self.stopwords or term in self.stopwords: label = 2 #label = self.stopwords[key] start, end = get_word_index_span((char_start, char_end - 1), sentence) for i in range(start, end + 1): L[i] = label flip = False return L