def extract_all_matching_tokens(text, regexes): """the same as the ``extract_matching_token`` but returns a tuple of: list of first group matches from the regexes and the remains of the input text """ matching_tokens = set() for regex in regexes: matches = regex.findall(text) if len(matches) > 0: text = regex.sub('', text) matching_tokens.update([match.strip() for match in matches]) return ([strip_plus(token) for token in matching_tokens], strip_plus(text))
def extract_matching_token(text, regexes): """if text matches any of the regexes, * the entire match is removed from text * repeating spaces in the remaining string are replaced with one * returned is a tuple of: first group from the regex, remaining text """ for regex in regexes: m = regex.search(text) if m: text = regex.sub('', text) extracted_match = m.group(1) return (strip_plus(extracted_match), strip_plus(text)) return ('', text.strip())