Esempio n. 1
0
    def _replace_complex(str_val):
        """
        Replace the alternative or optional elements by a proper regular expression

        :param str_val: the initial string
        :return: the formatted expression
        """
        result = str_val
        matcher = RegexTemplate._alt_regex.search(result)
        while matcher:
            group_val = matcher.captures()[0]
            if not StringUtils.check_form(group_val):
                continue

            if group_val.endswith('?'):
                core = group_val[2:len(group_val) - 4]
                if matcher.end() < len(result) and result[matcher.end()] == ' ':
                    result = result[:matcher.start()] + ("(?:" + core.replace("\\|", " \\|") + " )?") + result[matcher.end() + 1:]
                elif matcher.end() >= len(result) and matcher.start() > 0 and result[matcher.start() - 1] == ' ':
                    result = result[:matcher.start() - 1] + ("(?: " + core.replace("\\|", " \\|") + ")?") + result[matcher.end():]
                else:
                    result = result[:matcher.start()] + "(?:" + core + ")?" + result[matcher.end():]
            else:
                core = group_val[2:len(group_val) - 2]
                result = result[:matcher.start()] + "(?:" + core + ")" + result[matcher.end():]

            matcher = RegexTemplate._alt_regex.search(result)

        return result
Esempio n. 2
0
    def find(self, str_val, max_results):
        """
        Tries to find all occurrences of the template in the provided string. Stops
        after the maximum number of results is reached.
        """
        str_val = str_val.strip()
        results = list()

        for matcher in self._pattern.finditer(str_val):
            if not StringUtils.is_delimited(str_val, matcher.start(), matcher.end()):
                continue

            match_result = MatchResult(matcher.start(), matcher.end())
            for slot_key in self._slots.keys():
                filled_value = matcher.group(self._slots[slot_key]).strip()

                # quick-fix to handle some rare cases where the occurrence found
                # by the regex leads to unbalanced parentheses or brackets.
                # TODO: check whether this is a bug or not.
                if not StringUtils.check_form(filled_value) and self.permutate_pattern():
                    return self.find(str_val, max_results)

                match_result.add_pair(slot_key, filled_value)

            results.append(match_result)

            if len(results) >= max_results:
                break

        return results
Esempio n. 3
0
    def match(self, str_val):
        """
        Tries to match the template against the provided string.
        """
        input = str_val.strip()

        matcher = self._pattern.fullmatch(input)
        if matcher:
            results = MatchResult(matcher.start(), matcher.end())
            for slot_key in self._slots.keys():
                filled_value = matcher.captures(self._slots[slot_key])[0]
                if not StringUtils.check_form(filled_value) and self.permutate_pattern():
                    return self.match(str_val)
                results.add_pair(slot_key, filled_value)

            return results

        return MatchResult(False)