def test_charclass_in_range(): r = make_runner(r"[\de]") assert r.recognize('0') assert r.recognize('5') assert r.recognize('9') assert r.recognize('e') assert not r.recognize('d') r = make_runner(r"[\de]{2,}") assert r.recognize('09') assert r.recognize('158') assert r.recognize('3eee') assert not r.recognize('1') assert not r.recognize('ddee') r = make_runner(r"[\D5]") assert r.recognize('d') assert r.recognize('\n') assert r.recognize('5') assert not r.recognize('0') r = make_runner(r"[\s][\S]") assert r.recognize(' d') assert r.recognize('\t9') assert not r.recognize('d ') assert not r.recognize('99') assert not r.recognize('\r\r') r = make_runner(r"[\w]+\W[\w]+") assert r.recognize('hey hey') assert not r.recognize('word') assert not r.recognize('variable_name')
def test_range(): r = make_runner("[A-Z]") assert r.recognize("A") assert r.recognize("F") assert r.recognize("Z") assert not r.recognize("j") r = make_runner("[a-ceg-i]") assert r.recognize("a") assert r.recognize("b") assert r.recognize("c") assert r.recognize("e") assert r.recognize("g") assert r.recognize("h") assert r.recognize("i") assert not r.recognize("d") assert not r.recognize("f") r = make_runner("[^a-ceg-i]") assert not r.recognize("a") assert not r.recognize("b") assert not r.recognize("c") assert not r.recognize("e") assert not r.recognize("g") assert not r.recognize("h") assert not r.recognize("i") assert r.recognize("d") assert r.recognize("f")
def test_quotes(): r = make_runner('"[^\\"]*"') assert r.recognize('"abc"') assert r.recognize('"asdfefveeaa"') assert not r.recognize('"""') r = make_runner('\\n\\x0a') assert not r.recognize("n\n") assert r.recognize("\n\n")
def test_quoted(): r = make_runner("\\(*") assert r.recognize("(") assert not r.recognize("\\(") r = make_runner("(\\x61a)*") assert r.recognize("aa") assert r.recognize("aaaaaa") assert not r.recognize("a") assert not r.recognize("aabb")
def test_plus(): r = make_runner("[0-9]+") assert r.recognize("09123") assert not r.recognize("") r = make_runner("a+b+") assert r.recognize("ab") assert r.recognize("aaaaabbb") assert not r.recognize("b") assert not r.recognize("a") assert not r.recognize("c")
def test_singlequote(): r = make_runner("'") assert r.recognize("'") assert not r.recognize('"') r = make_runner("'..*'") assert r.recognize("'adadf'") assert not r.recognize("'adfasdf") r = make_runner("([a-z]([a-zA-Z0-9]|_)*)|('..*')") assert r.recognize("aasdf") assert r.recognize("'X'") assert not r.recognize("''")
def test_questionmark(): r = make_runner("ab?") assert r.recognize("a") assert r.recognize("ab") r = make_runner("0|(\\+|\\-)?[1-9][0-9]*") assert r.recognize("0") assert not r.recognize("00") assert r.recognize("12341") assert not r.recognize("021314") assert r.recognize("+12314") assert r.recognize("-12314")
def test_repetition(): r = make_runner('a{15}') assert r.recognize("a" * 15) assert not r.recognize("a" * 14) assert not r.recognize("a" * 16) assert not r.recognize("b" * 16) r = make_runner('a{2,10}') assert r.recognize("a" * 2) assert r.recognize("a" * 5) assert r.recognize("a" * 10) assert not r.recognize("a") assert not r.recognize("a" + "b") assert not r.recognize("a" * 11) assert not r.recognize("a" * 12)
def test_quoted(): r = make_runner("\\(*") assert r.recognize("(") assert not r.recognize("\\(") r = make_runner("(\\x61a)*") assert r.recognize("aa") assert r.recognize("aaaaaa") assert not r.recognize("a") assert not r.recognize("aabb") r = make_runner("(\\x61a)*") assert r.recognize("aa") assert r.recognize("aaaaaa") assert not r.recognize("a") assert not r.recognize("aabb")
def test_quotes(): r = make_runner('"[^\\"]*"') assert r.recognize('"abc"') assert r.recognize('"asdfefveeaa"') assert not r.recognize('"""') r = make_runner('\\n\\x0a') assert not r.recognize("n\n") assert r.recognize("\n\n") r = make_runner('\\12\\012') assert r.recognize("\n\n") r = make_runner('\\377\\xff') assert r.recognize("\xff\xff") r = make_runner('\\?') assert r.recognize("?") assert not r.recognize("a")
def test_escaped_quote(): r = make_runner(r'"[^\\"]*(\\.[^\\"]*)*"') assert r.recognize(r'""') assert r.recognize(r'"a"') assert r.recognize(r'"a\"b"') assert r.recognize(r'"\\\""') assert not r.recognize(r'"\\""')
def test_simple(): r = make_runner("a*") assert r.recognize("aaaaa") assert r.recognize("") assert not r.recognize("aaaaaaaaaaaaaaaaaaaaaaaaaa ") r = make_runner("a*bc|d") assert r.recognize("aaaaabc") assert r.recognize("bc") assert r.recognize("d") assert not r.recognize("abcd") r = make_runner("(ab)*|a*b*") assert r.recognize("ababababab") assert r.recognize("aaaabb") assert not r.recognize("abababaabb") r = make_runner(".*") assert r.recognize("kjsadfq3jlflASDF@#$") assert r.recognize("vka afj ASF# A")
def test_triple_regex(): delim = '"' harmless = r"[^\%s]" % (delim, ) anyharmless = harmless + "*" atleastoneharmless = harmless + "+" normal_chars = anyharmless + any(group(delim, 2 * delim) + atleastoneharmless) runner = make_runner(normal_chars) assert runner.recognize('""a""a""a""a') assert not runner.recognize('""a""a"""a""a')
def test_triple_regex(): delim = '"' harmless = r"[^\%s]" % (delim, ) anyharmless = harmless + "*" atleastoneharmless = harmless + "+" normal_chars = anyharmless + any( group(delim, 2 * delim) + atleastoneharmless) runner = make_runner(normal_chars) assert runner.recognize('""a""a""a""a') assert not runner.recognize('""a""a"""a""a')
def test_charclass(): r = make_runner(r"\d") assert r.recognize('0') assert r.recognize('5') assert r.recognize('9') assert not r.recognize('d') r = make_runner(r"\d{2,}") assert r.recognize('09') assert r.recognize('158') assert not r.recognize('1') r = make_runner(r"\D") assert r.recognize('d') assert r.recognize('\n') assert not r.recognize('0') assert not r.recognize('1234') r = make_runner(r"\s\S") assert r.recognize(' d') assert r.recognize('\t9') assert not r.recognize('d ') assert not r.recognize('99') assert not r.recognize('\r\r') r = make_runner(r"\w+") assert r.recognize('word') assert r.recognize('variable_name') assert r.recognize('abc123') assert not r.recognize('word\n') assert not r.recognize('hey hey') r = make_runner(r"\w\W\w") assert r.recognize('9 9') assert r.recognize('_\fx') assert not r.recognize('\n\r\t')
def test_repetition(): r = make_runner('a{15}') assert r.recognize("a" * 15) assert not r.recognize("a" * 14) assert not r.recognize("a" * 16) assert not r.recognize("b" * 15) r = make_runner('a{2,10}') assert r.recognize("a" * 2) assert r.recognize("a" * 5) assert r.recognize("a" * 10) assert not r.recognize("a") assert not r.recognize("a" + "b") assert not r.recognize("a" * 11) assert not r.recognize("a" * 12) r = make_runner('a{3,}') assert r.recognize("a" * 3) assert r.recognize("a" * 5) assert r.recognize("a" * 10) assert r.recognize("a" * 12) assert not r.recognize("a") assert not r.recognize("a" + "b") assert not r.recognize("a" * 2)
def run_individual_test(regex, tests): """Run a test from the PCRE suite.""" # Process the regex and make it ready for make_runner regex_to_use = regex anchor_left = regex_to_use.startswith('^') anchor_right = regex_to_use.endswith( '$') and not regex_to_use.endswith('\\$') if anchor_left: regex_to_use = regex_to_use[1:] # chop the ^ if it's there if anchor_right: regex_to_use = regex_to_use[:-1] # chop the $ if it's there if not regex_to_use: #print " SKIPPED (Cant do blank regex)" return print "%s:" % regex_to_use runner = make_runner(regex_to_use) # Now run the test expressions against the Regex for test, match in tests: print "/%r/%r/" % (test, match) # Create possible subsequences that we should test if anchor_left: start_range = [0] else: start_range = range(0, len(test)) if anchor_right: subseq_gen = ((start, len(test)) for start in start_range) else: # Go backwards to simulate greediness subseq_gen = ((start, end) for start in start_range for end in range(len(test) + 1, start - 1, -1)) # Search the possibilities for a match... for start, end in subseq_gen: attempt = test[start:end] if runner.recognize(attempt): assert attempt == match break else: assert match is None
def run_individual_test(regex, tests): """Run a test from the PCRE suite.""" # Process the regex and make it ready for make_runner regex_to_use = regex anchor_left = regex_to_use.startswith('^') anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$') if anchor_left: regex_to_use = regex_to_use[1:] # chop the ^ if it's there if anchor_right: regex_to_use = regex_to_use[:-1] # chop the $ if it's there if not regex_to_use: #print " SKIPPED (Cant do blank regex)" return print "%s:" % regex_to_use runner = make_runner(regex_to_use) # Now run the test expressions against the Regex for test, match in tests: print "/%r/%r/" % (test, match) # Create possible subsequences that we should test if anchor_left: start_range = [0] else: start_range = range(0, len(test)) if anchor_right: subseq_gen = ( (start, len(test)) for start in start_range ) else: # Go backwards to simulate greediness subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start-1, -1) ) # Search the possibilities for a match... for start, end in subseq_gen: attempt = test[start:end] if runner.recognize(attempt): assert attempt==match break else: assert match is None
def test_file(): """Open the PCRE tests and run them.""" tests = [line.rstrip() for line in open('testinput1', 'r').readlines()] results = [line.rstrip() for line in open('testoutput1', 'r').readlines()] regex_flag_mapping = {'': lambda s: s, 'i': lambda s: s.upper()} regex_set = create_regex_iterator(tests, results) import pdb for regex, regex_flags in regex_set: try: print '%r' % regex # Create an iterator to grab the test/results for this regex result_set = create_result_iterator(tests, results) # Handle the flags: if regex_flags in regex_flag_mapping: text_prepare = regex_flag_mapping[regex_flags] elif 'x' in regex_flags: raise SkipException("Cant do extended PRCE expressions") else: print "UNKNOWN FLAGS: %s" % regex_flags continue skipped = any( [op in regex for op in ['*?', '??', '+?', '}?', '(?']]) if skipped: raise SkipException( "Cant do non-greedy operators or '(?' constructions)") regex_to_use = text_prepare(regex) anchor_left = regex_to_use.startswith('^') anchor_right = regex_to_use.endswith( '$') and not regex_to_use.endswith('\\$') if anchor_left: regex_to_use = regex_to_use[1:] # chop the ^ if it's there if anchor_right: regex_to_use = regex_to_use[:-1] # chop the $ if it's there if not regex_to_use: raise SkipException("Cant do blank regex") except SkipException, e: print " SKIPPED (%s)" % e.message # now burn all the tests for this regex for _ in result_set: pass continue # Finally, we make the pypy regex runner runner = make_runner(regex_to_use) # Now run the test expressions against the Regex for test, result in result_set: # Create possible subsequences that we should test if anchor_left: start_range = [0] else: start_range = range(0, len(test)) if anchor_right: subseq_gen = ((start, len(test)) for start in start_range) else: # Go backwards to simulate greediness subseq_gen = ((start, end) for start in start_range for end in range(len(test) + 1, start, -1)) # Search the possibilities for a match... for start, end in subseq_gen: attempt = text_prepare(test[start:end]) matched = runner.recognize(attempt) if matched: break # Did we get what we expected? if result == 'No match': if matched: print " FALSE MATCH: regex==%r test==%r" % (regex, test) else: print " pass: regex==%r test==%r" % (regex, test) elif result.startswith(' 0: '): if not matched: print " MISSED: regex==%r test==%r" % (regex, test) elif not attempt == text_prepare(result[4:]): print " BAD MATCH: regex==%r test==%r found==%r expect==%r" % ( regex, test, attempt, result[4:]) else: print " pass: regex==%r test==%r" % (regex, test)
def test_file(): """Open the PCRE tests and run them.""" tests = [line.rstrip() for line in open('testinput1','r').readlines()] results = [line.rstrip() for line in open('testoutput1','r').readlines()] regex_flag_mapping = { '': lambda s: s, 'i': lambda s: s.upper() } regex_set = create_regex_iterator(tests, results) import pdb for regex, regex_flags in regex_set: try: print '%r' % regex # Create an iterator to grab the test/results for this regex result_set = create_result_iterator(tests, results) # Handle the flags: if regex_flags in regex_flag_mapping: text_prepare = regex_flag_mapping[regex_flags] elif 'x' in regex_flags: raise SkipException("Cant do extended PRCE expressions") else: print "UNKNOWN FLAGS: %s" % regex_flags continue skipped = any([op in regex for op in ['*?', '??', '+?', '}?', '(?']]) if skipped: raise SkipException("Cant do non-greedy operators or '(?' constructions)") regex_to_use = text_prepare(regex) anchor_left = regex_to_use.startswith('^') anchor_right = regex_to_use.endswith('$') and not regex_to_use.endswith('\\$') if anchor_left: regex_to_use = regex_to_use[1:] # chop the ^ if it's there if anchor_right: regex_to_use = regex_to_use[:-1] # chop the $ if it's there if not regex_to_use: raise SkipException("Cant do blank regex") except SkipException, e: print " SKIPPED (%s)" % e.message # now burn all the tests for this regex for _ in result_set: pass continue # Finally, we make the pypy regex runner runner = make_runner(regex_to_use) # Now run the test expressions against the Regex for test, result in result_set: # Create possible subsequences that we should test if anchor_left: start_range = [0] else: start_range = range(0, len(test)) if anchor_right: subseq_gen = ( (start, len(test)) for start in start_range ) else: # Go backwards to simulate greediness subseq_gen = ( (start, end) for start in start_range for end in range(len(test)+1, start, -1) ) # Search the possibilities for a match... for start, end in subseq_gen: attempt = text_prepare(test[start:end]) matched = runner.recognize(attempt) if matched: break # Did we get what we expected? if result == 'No match': if matched: print " FALSE MATCH: regex==%r test==%r" % (regex, test) else: print " pass: regex==%r test==%r" % (regex, test) elif result.startswith(' 0: '): if not matched: print " MISSED: regex==%r test==%r" % (regex, test) elif not attempt==text_prepare(result[4:]): print " BAD MATCH: regex==%r test==%r found==%r expect==%r" % (regex, test, attempt, result[4:]) else: print " pass: regex==%r test==%r" % (regex, test)
def test_comment(): r = make_runner("(/\\*[^\\*/]*\\*/)") assert r.recognize("/*asdfasdfasdf*/")
def test_quoted_2(): r = make_runner('\\[|\\]|\\|') assert r.recognize("[") assert r.recognize("|") assert r.recognize("]") assert not r.recognize("]]")
def test_number(): r = make_runner(r"\-?(0|[1-9][0-9]*)(\.[0-9]+)?([eE][\+\-]?[0-9]+)?") assert r.recognize("-0.912E+0001") assert not r.recognize("-0.a912E+0001") assert r.recognize("5")
def test_quoted(): r = make_runner('\\[|\\]|\\|') assert r.recognize("[") assert r.recognize("|") assert r.recognize("]") assert not r.recognize("]]")