+--------+------------------------------------------------------+ | code | meaning | +========+======================================================+ | ^ | Start of string, or line | +--------+------------------------------------------------------+ | $ | End of string, or line | +--------+------------------------------------------------------+ | \A | Start of string | +--------+------------------------------------------------------+ | \Z | End of string | +--------+------------------------------------------------------+ | \b | Empty string at the beginning or end of a word | +--------+------------------------------------------------------+ | \B | Empty string NOT at the beginning or end of a word | +--------+------------------------------------------------------+ """ from py_09_rePatterns import test_patterns if __name__ == "__main__": test_patterns([(r'^\w+', 'word at start of string'), (r'\A\w+', 'word at start of string'), (r'\w+\S*$', 'word near end of string'), (r'\w+\S*\Z', 'word near end of string'), (r'\w*t\w*', 'word containing t'), (r'\bt\w+', 't at start of word'), (r'\w+t\b', 't at end of word'), (r'\Bt\B', 't, not start or end of word')], 'This is some text -- with punctuation.')
| code | meaning | +========+=======================================+ | \d | A digit | +--------+---------------------------------------+ | \D | A non-digit | +--------+---------------------------------------+ | \s | Whitespace (tab, space, newline, etc.)| +--------+---------------------------------------+ | \S | Non-whitespace | +--------+---------------------------------------+ | \w | Alphanumeric | +--------+---------------------------------------+ | \W | Non-alphanumeric | +--------+---------------------------------------+ """ from py_09_rePatterns import test_patterns if __name__ == "__main__": test_patterns( [ (r'\d+', 'sequence of digits'), (r'\D+', 'sequence of non-digits'), (r'\s+', 'sequence of whitespace'), (r'\S+', 'sequence of non-whitespace'), (r'\w+', 'alphanumeric characters'), (r'\W+', 'non-alphanumeric') ], 'A prime #1 example!' )
r'(?P<ends_with_t>\w+t)\b', ] for pattern in patterns: regex = re.compile(pattern) match = regex.search(text) print("'{}'".format(pattern)) print(' ', match.groups()) print(' ', match.groupdict()) print() if __name__ == "__main__": test_patterns([ ('a(ab)', 'a followed by literal ab'), ('a(a*b*)', 'a followed by 0-n a and 0-n b'), ('a(ab)*', 'a followed by 0-n ab'), ('a(ab)+', 'a followed by 1-n ab'), ], 'abbaaabbbbaaaaa') # match.groups() re_groups_match() # index reference: match.group(index) re_group_individual() # name reference: match.groupdict() re_groups_named() # match.groupdict() test_patterns_updated_ver( [(r'a((a*)(b*))', 'a followed by optional 0-n a and 0-n b')], 'abbaabbba') # match.groups() is useful for specifying alternative patterns. using pipe symbol(|) test_patterns_updated_ver([ (r'a((a+)|(b+))', 'a then seq. of a or seq. of b'),
from py_09_rePatterns import test_patterns if __name__ == "__main__": test_patterns([ ('[ab]', 'either a or b'), ('a[ab]+', 'a followed by 1 or more a or b'), ('a[ab]+?', 'a followed by 1 or more a or b, not greedy'), ], 'abbaabbba') # exclude using ^. [^ ] test_patterns([ ('[^-. ]+', 'sequences without -, ., or space'), ], 'This is some text -- with punctuation.') # ranges test_patterns([ ('[a-z]+', 'sequences of lowercase letters'), ('[A-Z]+', 'sequences of uppercase letters'), ('[a-zA-Z]+', 'sequences of lower- or uppercase letters'), ('[A-Z][a-z]+', 'one uppercase followed by lowercase'), ], 'This is some text -- with punctuation.') # dot test_patterns([('a.', 'a followed by any one character'), ('b.', 'b followed by any one character'), ('a.*b', 'a followed by anything, ending in b'), ('a.*?b', 'a followed by anything, ending in b')], 'abbaabbba')
# import sys, os # sys.path.append(os.path.dirname(os.path.dirname(__file__))) # from pkg.breaker import addBreaker from py_09_rePatterns import test_patterns, text if __name__ == "__main__": # default behavior is greedy test_patterns( [('ab*', 'a followed by zero or more b'), ('ab+', 'a followed by one or more b'), ('ab?', 'a followed by zero or one b'), ('ab{3}', 'a followed by three b'), ('ab{2, 3}', 'a followed by two or three b')], text='abbaabbba' ) # non-greedy via turning off by following the repetition instruction with ? test_patterns( [('ab*?', 'a followed by zero or more b'), ('ab+?', 'a followed by one or more b'), ('ab??', 'a followed by zero or one b'), ('ab{3}?', 'a followed by three b'), ('ab{2, 3}?', 'a followed by two or three b')], text='abbaabbba' )