def _replace_complex(str_val): """ Replace the alternative or optional elements by a proper regular expression :param str_val: the initial string :return: the formatted expression """ result = str_val matcher = RegexTemplate._alt_regex.search(result) while matcher: group_val = matcher.captures()[0] if not StringUtils.check_form(group_val): continue if group_val.endswith('?'): core = group_val[2:len(group_val) - 4] if matcher.end() < len(result) and result[matcher.end()] == ' ': result = result[:matcher.start()] + ("(?:" + core.replace("\\|", " \\|") + " )?") + result[matcher.end() + 1:] elif matcher.end() >= len(result) and matcher.start() > 0 and result[matcher.start() - 1] == ' ': result = result[:matcher.start() - 1] + ("(?: " + core.replace("\\|", " \\|") + ")?") + result[matcher.end():] else: result = result[:matcher.start()] + "(?:" + core + ")?" + result[matcher.end():] else: core = group_val[2:len(group_val) - 2] result = result[:matcher.start()] + "(?:" + core + ")" + result[matcher.end():] matcher = RegexTemplate._alt_regex.search(result) return result
def find(self, str_val, max_results): """ Tries to find all occurrences of the template in the provided string. Stops after the maximum number of results is reached. """ str_val = str_val.strip() results = list() for matcher in self._pattern.finditer(str_val): if not StringUtils.is_delimited(str_val, matcher.start(), matcher.end()): continue match_result = MatchResult(matcher.start(), matcher.end()) for slot_key in self._slots.keys(): filled_value = matcher.group(self._slots[slot_key]).strip() # quick-fix to handle some rare cases where the occurrence found # by the regex leads to unbalanced parentheses or brackets. # TODO: check whether this is a bug or not. if not StringUtils.check_form(filled_value) and self.permutate_pattern(): return self.find(str_val, max_results) match_result.add_pair(slot_key, filled_value) results.append(match_result) if len(results) >= max_results: break return results
def match(self, str_val): """ Tries to match the template against the provided string. """ input = str_val.strip() matcher = self._pattern.fullmatch(input) if matcher: results = MatchResult(matcher.start(), matcher.end()) for slot_key in self._slots.keys(): filled_value = matcher.captures(self._slots[slot_key])[0] if not StringUtils.check_form(filled_value) and self.permutate_pattern(): return self.match(str_val) results.add_pair(slot_key, filled_value) return results return MatchResult(False)