예제 #1
0
    def collect_regex_matches_with_quoted_chunks(
            phrase: str, reg: re, prob: int,
            quoted_def_start: Callable[[str, Match, Match], int],
            quoted_def_end: Callable[[str, Match, Match],
                                     int], def_start: Callable[[str, Match],
                                                               int],
            def_end: Callable[[str, Match], int]) -> List[PatternFound]:
        """
        First, find all matches by 'reg' ptr
        Second, go through matches
        For each match try to find a set of quoted words
        If found, use them as matches
        Or use the whole match
        :param quoted_def_start: (phrase, match, quoted_match) -> definition's start
        :param quoted_def_end: (phrase, match, quoted_match) -> definition's end
        :param def_start: (phrase, match) -> definition's start
        :param def_end: (phrase, match) -> definition's end
        :return:
        """
        defs = []
        for match in reg.finditer(phrase):
            quoted_matches = \
                CommonDefinitionPatterns.peek_quoted_part(phrase,
                                                          match,
                                                          quoted_def_start,
                                                          quoted_def_end,
                                                          prob)
            if len(quoted_matches) > 0:
                defs += quoted_matches
                continue

            df = PatternFound()
            df.name = match.group()
            df.start = def_start(phrase, match)
            df.end = def_end(phrase, match)
            df.probability = prob
            defs.append(df)

        return defs
예제 #2
0
    def collect_regex_matches(
            phrase: str, reg: re, prob: int, def_start: Callable[[str, Match],
                                                                 int],
            def_end: Callable[[str, Match], int]) -> List[PatternFound]:
        """
        find all matches by 'reg' ptr
        :param quoted_def_start: (phrase, match, quoted_match) -> definition's start
        :param quoted_def_end: (phrase, match, quoted_match) -> definition's end
        :param def_start: (phrase, match) -> definition's start
        :param def_end: (phrase, match) -> definition's end
        :return:
        """
        defs = []
        for match in reg.finditer(phrase):

            df = PatternFound()
            df.name = match.group()
            df.start = def_start(phrase, match)
            df.end = def_end(phrase, match)
            df.probability = prob
            defs.append(df)

        return defs