Esempio n. 1
0
    def __call__(self, sentence):

        m = apply_matcher(sentence.words,
                          sentence.char_offsets,
                          self.dictionary,
                          max_ngrams=self.max_ngrams,
                          longest_match_only=False,
                          case_sensitive=self.case_sensitive)
        L = {}
        for (char_start, char_end), term in m:
            if term.lower() in self.stopwords or term in self.stopwords:
                continue
            start, end = get_word_index_span((char_start, char_end - 1),
                                             sentence)
            for i in range(start, end + 1):
                L[i] = self.label
        return L
Esempio n. 2
0
    def __call__(self, sentence: Sentence) -> Dict[int, int]:

        matches = apply_matcher(sentence.words,
                                sentence.char_offsets,
                                self.ontology,
                                max_ngrams=self.max_ngrams,
                                longest_match_only=True,
                                case_sensitive=self.case_sensitive)
        matches = sorted(matches, key=lambda x: x[0], reverse=0)

        L = {}
        for (char_start, char_end), term in matches:
            label = self._get_term_label(term)

            # None labels are treated as abstains
            if not label:
                continue

            start, end = get_word_index_span((char_start, char_end - 1),
                                             sentence)
            for i in range(start, end + 1):
                L[i] = label
        return L
Esempio n. 3
0
    def __call__(self, sentence: Sentence) -> Dict[int, int]:
        """

        Parameters
        ----------
        sentence

        Returns
        -------

        """
        matches = apply_matcher(sentence.words,
                                sentence.char_offsets,
                                self.ontology,
                                max_ngrams=self.max_ngrams,
                                longest_match_only=True,
                                case_sensitive=self.case_sensitive)

        matches = sorted(matches, key=lambda x: x[0], reverse=0)
        if not matches:
            return {}

        matches, labels = self._merge_matches(matches)
        terms = [m[-1] for m in matches]

        # Slot-filled matches
        f_matches = []
        mask = np.array([0] * len(matches))
        for slot in self.slot_rgxs:
            n_args = slot.count('{}')
            args = list(zip(terms, labels))

            for i in range(len(args) - n_args + 1):

                # skip arguments that are already matched
                if 1 in mask[i:i + n_args]:
                    continue

                xs, ys = zip(*args[i:i + n_args])

                # HACK - positive classes only
                if None in ys or 2 in ys:
                    continue

                rgx = re.compile(slot.format(*xs), re.I)
                m = match_rgx(rgx, sentence)
                if m:
                    m = list(m.items())[0]
                    span = list(m[0][0:2])
                    span[-1] += 1
                    m = tuple([span, m[-1].text])
                    # expand the argument matches to this span
                    mask[i:i + n_args] = 1
                    f_matches.append((m, np.unique(ys)[0]))

        # add slot filled matches
        matches = [m for i, m in zip(mask, matches) if i == 0]
        labels = [y for i, y in zip(mask, labels) if i == 0]
        for m, y in f_matches:
            matches.append(m)
            labels.append(y)

        flip = False
        L = {}
        for ((char_start, char_end), term), label in zip(matches, labels):
            #key = term.lower() if term.lower() in self._labels else term

            # None labels are treated as abstains
            if not label:
                continue

            # check span-specific rules
            if self.span_rule and label == 1:
                span = Span(char_start, char_end - 1, sentence)
                if self.span_rule(span):
                    label = 2
                    flip = True

            if term.lower() in self.stopwords or term in self.stopwords:
                label = 2
                #label = self.stopwords[key]

            start, end = get_word_index_span((char_start, char_end - 1),
                                             sentence)
            for i in range(start, end + 1):
                L[i] = label

            flip = False

        return L