def test_match_question(self): assert PatternMatcher().match(self.cmp("ab?"), "a") == "a" assert PatternMatcher().match(self.cmp("ab?"), "ab") == "ab" assert PatternMatcher().match(self.cmp("ab?"), "ac") == "a" assert PatternMatcher().match(self.cmp("ab(cdef)?"), "ab") == "ab" assert PatternMatcher().match(self.cmp("ab(cdef)?"), "abcdef") == "abcdef"
def test_match_or(self): assert PatternMatcher().match(self.cmp("a|b"), "a") == "a" assert PatternMatcher().match(self.cmp("a|b"), "b") == "b" assert PatternMatcher().match(self.cmp("a|bd|c"), "a") == "a" assert PatternMatcher().match(self.cmp("a|bd|c"), "bd") == "bd" assert PatternMatcher().match(self.cmp("a|bd|c"), "c") == "c" assert PatternMatcher().match(self.cmp("ab|ac"), "ac") == "ac" assert PatternMatcher().match(self.cmp("abc|abcde"), "abcde") == "abc" assert PatternMatcher().match(self.cmp("abc|abcde"), "abc") == "abc" assert PatternMatcher().match(self.cmp("abcde|abc"), "abc") == "abc"
def test_match_plus(self): assert PatternMatcher().match(self.cmp("a+"), "aaaaaa") == "aaaaaa" assert PatternMatcher().match(self.cmp("a+"), "bbbbb") is None assert PatternMatcher().match(self.cmp("ab+"), "abbbbbb") == "abbbbbb" assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb" assert PatternMatcher().match(self.cmp("a+b+"), "bbbbbbbbbb") is None assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaaa") is None assert PatternMatcher().match(self.cmp("(ab)+"), "abababab") == "abababab" assert PatternMatcher().match(self.cmp("(ab)+"), "aaabbb") is None assert PatternMatcher().match(self.cmp(".+bc"), "aaabcaaabc") == "aaabcaaabc" # greedy assert PatternMatcher().match(self.cmp(".+?bc"), "aaabcaaabc") == "aaabc" # non-greedy
def test_mixed(self): assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aabbc") == "aabbc" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aabbdd") == "aabbdd" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aadd") == "aadd" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aabbc") == "aabbc" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aac") == "aac" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aad") == "aad" assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "cdd") is None assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "bbcdd") is None assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "abc") == "abc"
def test_charrange(self): assert PatternMatcher().match(self.cmp("[a-z]"), "b") == "b" assert PatternMatcher().match(self.cmp("[a-z]"), "x") == "x" assert PatternMatcher().match(self.cmp("[a-z]+"), "123") is None assert PatternMatcher().match(self.cmp("[a-z]+"), "foobar") == "foobar" assert PatternMatcher().match(self.cmp("[a-zA-Z0-9]+"), "fooBAR123") == "fooBAR123" assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z0-9_]*"), "_fooBAR123_") == "_fooBAR123_" assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z0-9_]*"), "123foobar") is None assert PatternMatcher().match(self.cmp("[a-z]"), "abc") == "a" assert PatternMatcher().match(self.cmp("[+-]"), "+") == "+"
def test_match_plus(self): assert PatternMatcher().match(self.cmp("a+"), "aaaaaa") == "aaaaaa" assert PatternMatcher().match(self.cmp("a+"), "bbbbb") is None assert PatternMatcher().match(self.cmp("ab+"), "abbbbbb") == "abbbbbb" assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb" assert PatternMatcher().match(self.cmp("a+b+"), "bbbbbbbbbb") is None assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaaa") is None assert PatternMatcher().match(self.cmp("(ab)+"), "abababab") == "abababab" assert PatternMatcher().match(self.cmp("(ab)+"), "aaabbb") is None
def test_match_star(self): assert PatternMatcher().match(self.cmp("a*"), "") == "" assert PatternMatcher().match(self.cmp("a*"), "aaaaaa") == "aaaaaa" assert PatternMatcher().match(self.cmp("a*"), "bbbbb") == "" assert PatternMatcher().match(self.cmp("ab*"), "abbbbbb") == "abbbbbb" assert PatternMatcher().match(self.cmp("a*b*"), "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb" assert PatternMatcher().match(self.cmp(".*"), "absakljsadklajd") == "absakljsadklajd"
def test_match_more(self): assert PatternMatcher().match(self.cmp("aa"), "aa") == "aa" assert PatternMatcher().match(self.cmp("a.b"), "axb") == "axb"
def test_match_one(self): assert PatternMatcher().match(RE_CHAR("a"), "a") == "a" assert PatternMatcher().match(RE_CHAR("."), "c") == "c" assert PatternMatcher().match(RE_CHAR("x"), "c") is None #assert PatternMatcher().match(None, "c") == "c" assert PatternMatcher().match(RE_CHAR("c"), "") is None
def setup_class(cls): cls.pmatch = PatternMatcher() cls.rp = RegexParser()
def test_exactmatch(self): pm = PatternMatcher() pm.match(self.cmp("abc"), "abcd") assert pm.exactmatch is True pm.match(self.cmp("[abcd]+"), "abcdx") assert pm.exactmatch is False pm.match(self.cmp("a[bcd]+"), "abc") assert pm.exactmatch is True pm.match(self.cmp("a[abcd]+"), "abx") assert pm.exactmatch is False pm.match(self.cmp("as"), "abx") assert pm.exactmatch is False pm.match(self.cmp("[abc]"), "aclass") assert pm.exactmatch is True pm.match(self.cmp("abc|abcde"), "abcde") assert pm.exactmatch is True pm.match(self.cmp("abcde|abc"), "abcde") assert pm.exactmatch is True pm.match(self.cmp("abcx|abcde"), "abcx") assert pm.exactmatch is True pm.match(self.cmp("abcde|abcx"), "abcx") assert pm.exactmatch is True
def test_realworld_examples(self): assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z_0-9]*"), "abc123_") == "abc123_" assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z_0-9]*"), "123abc123_") is None assert PatternMatcher().match(self.cmp("#[^\\r]*"), "# abc") == "# abc" assert PatternMatcher().match(self.cmp("#[^\r]*"), "# abc \r") == "# abc " assert PatternMatcher().match( self.cmp("([0-9]+\.?[0-9]*|\.[0-9]+)([eE](\+|-)?[0-9]+)?"), "123.456") == "123.456" assert PatternMatcher().match( self.cmp("([0-9]+\.?[0-9]*|\.[0-9]+)([eE](\+|-)?[0-9]+)?"), "1e23") == "1e23" assert PatternMatcher().match( self.cmp("\'[^\'\r]*\'"), "'this is a string 123!'") == "'this is a string 123!'" assert PatternMatcher().match( self.cmp("\'[^\'\r]*\'"), "'this is a with a newline \r string 123!'") is None # Python assert PatternMatcher().match(self.cmp("#[^\\r]*"), "# hello world") == "# hello world" assert PatternMatcher().match(self.cmp('\\"\\"\\"[^\\"]*\\"\\"\\"'), '"""this is a test string 123"""' ) == '"""this is a test string 123"""' assert PatternMatcher().match( self.cmp("\\'[^\\'\\r]*\\'"), "'single quoted string'") == "'single quoted string'" assert PatternMatcher().match( self.cmp('\\"[^\\"\\r]*\\"'), '"double quoted string"') == '"double quoted string"' assert PatternMatcher().match(self.cmp("[ \\t]+"), " ") == " " assert PatternMatcher().match(self.cmp("\\"), "\\") == "\\" assert PatternMatcher().match(self.cmp("\\"), "range") is None assert PatternMatcher().match(self.cmp("[\\n\\r]"), "\r") == "\r" assert PatternMatcher().match(self.cmp("\."), ".") == "." assert PatternMatcher().match(self.cmp("&="), "&=") == "&=" assert PatternMatcher().match(self.cmp("0[xX][\da-fA-F]+"), "0xAB") == "0xAB" assert PatternMatcher().match(self.cmp("0[oO][0-7]+"), "0o67") == "0o67" assert PatternMatcher().match(self.cmp("0[bB][01]+"), "0b10101") == "0b10101" assert PatternMatcher().match( self.cmp('\"([^\"\r\\\\]|\\\\")*\"'), '"escaped\\"quote"') == '"escaped\\"quote"' # Prolog assert PatternMatcher().match(self.cmp("/"), "/") == "/" assert PatternMatcher().match(self.cmp("/\\"), "/\\") == "/\\" assert PatternMatcher().match(self.cmp("(%[^\\n\\r]*)"), "% comment") == "% comment" assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"), "Variable_") == "Variable_" assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"), "VAR") == "VAR" assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"), "var") is None assert PatternMatcher().match(self.cmp("(0|[1-9][0-9]*)"), "0") == "0" assert PatternMatcher().match(self.cmp("(0|[1-9][0-9]*)"), "12345") == "12345" assert PatternMatcher().match( self.cmp("(0|[1-9][0-9]*)(\.[0-9]+)([eE][-+]?[0-9]+)?"), "1213.89e+23") == "1213.89e+23" assert PatternMatcher().match(self.cmp("([a-z]([a-zA-Z0-9]|_)*)"), "aH8_") == "aH8_" assert PatternMatcher().match(self.cmp("('[^']*')"), "'quoted'") == "'quoted'" assert PatternMatcher().match(self.cmp("\[\]"), "[]") == "[]" assert PatternMatcher().match(self.cmp("!"), "!") == "!" assert PatternMatcher().match(self.cmp("\+"), "+") == "+" assert PatternMatcher().match(self.cmp("\-"), "-") == "-" assert PatternMatcher().match(self.cmp("\{\}"), "{}") == "{}" assert PatternMatcher().match( self.cmp("([a-z]([a-zA-Z0-9]|_)*)|('[^']*')|\[\]|!|\+|\-|\{\}"), "aH8_") == "aH8_" assert PatternMatcher().match(self.cmp("\"[^\"]*\""), '"a string"') == '"a string"' # Eco grammar assert PatternMatcher().match(self.cmp('\\"([^\\"]|\\\\\\")*\\"'), '"terminal"') == '"terminal"' assert PatternMatcher().match(self.cmp('\\"([^\\"]|\\\\\\")*\\"'), '"[a-z]"') == '"[a-z]"' assert PatternMatcher().match(self.cmp('\\"([^\\\\"]|\\\\\\")*\\"'), '"\\"[a-z]\\""') == '"\\"[a-z]\\""' assert PatternMatcher().match(self.cmp('\\"([^\\\\"]|\\\\\\")*\\"'), '"\\"[a"-z]\\""') == '"\\"[a"' assert PatternMatcher().match(self.cmp('"([^\\"\\\\r]|\\\\\")*"'), '"\\"[a-z]\\""') == '"\\"[a-z]\\""' assert PatternMatcher().match(self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'), '"\+"') == '"\+"' assert PatternMatcher().match( self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'), '"escaped\\"quote"') == '"escaped\\"quote"' assert PatternMatcher().match( self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'), "\"escaped\\\"quote\"") == '"escaped\\"quote"' assert PatternMatcher().match(self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'), '"\\"[a"-z]\\""') == '"\\"[a"'
def test_escaped(self): assert PatternMatcher().match(self.cmp("[a-z]"), "-") is None assert PatternMatcher().match(self.cmp("[a\-z]"), "-") == "-" assert PatternMatcher().match(self.cmp("#[^\-]*"), "-") is None assert PatternMatcher().match(self.cmp("[\[]*"), "[") == "[" assert PatternMatcher().match(self.cmp("[\.]"), ".") == "." assert PatternMatcher().match(self.cmp("\."), ".") == "." assert PatternMatcher().match(self.cmp("\["), "[") == "[" assert PatternMatcher().match(self.cmp("\[\]"), "[]") == "[]" assert PatternMatcher().match(self.cmp("\*"), "*") == "*" assert PatternMatcher().match(self.cmp("\+"), "+") == "+" assert PatternMatcher().match(self.cmp("\\+"), "+") == "+" assert PatternMatcher().match(self.cmp("\'"), "\'") == "'" assert PatternMatcher().match(self.cmp("\\'"), "\'") == "'" assert PatternMatcher().match(self.cmp('\\"'), '\"') == '\"' assert PatternMatcher().match(self.cmp("\r"), "\r") == "\r" assert PatternMatcher().match(self.cmp("\\r"), "\r") == "\r" assert PatternMatcher().match(self.cmp("\r"), "\\r") is None assert PatternMatcher().match(self.cmp("\\r"), "\\r") is None assert PatternMatcher().match(self.cmp("\\r"), "\\r") is None assert PatternMatcher().match(self.cmp("[\\r\\n]"), "\r") == "\r" assert PatternMatcher().match(self.cmp("[\\r\\n]"), "\n") == "\n"
def test_negatedcharrange(self): assert PatternMatcher().match(self.cmp("[^abcd]"), "a") is None assert PatternMatcher().match(self.cmp("[^abcd]"), "e") == "e" assert PatternMatcher().match(self.cmp("[^a-z]+"), "ABCD") == "ABCD" assert PatternMatcher().match(self.cmp("[^a-z]+"), "abcd") is None
def test_match_star(self): assert PatternMatcher().match(self.cmp("a*"), "") == "" assert PatternMatcher().match(self.cmp("a*"), "aaaaaa") == "aaaaaa" assert PatternMatcher().match(self.cmp("a*"), "bbbbb") == "" assert PatternMatcher().match(self.cmp("ab*"), "abbbbbb") == "abbbbbb" assert PatternMatcher().match(self.cmp("a*b*"), "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb" assert PatternMatcher().match(self.cmp(".*"), "absakljsadklajd") == "absakljsadklajd" assert PatternMatcher().match(self.cmp(".*bc"), "abcaaabc") == "abcaaabc" assert PatternMatcher().match(self.cmp(".*abc"), "abc") == "abc" assert PatternMatcher().match(self.cmp("a*a"), "aa") == "aa" assert PatternMatcher().match(self.cmp("a*aa"), "aa") == "aa" assert PatternMatcher().match(self.cmp("a*aa"), "aaa") == "aaa" assert PatternMatcher().match(self.cmp("a*a"), "a") == "a" assert PatternMatcher().match(self.cmp("a*a"), "") is None assert PatternMatcher().match(self.cmp(".*abc"), "abcabcabc") == "abcabcabc" # greedy assert PatternMatcher().match(self.cmp(".*?abc"), "abcabcabc") == "abc" # non-greedy assert PatternMatcher().match(self.cmp("(ab)*ab"), "ab") == "ab" assert PatternMatcher().match(self.cmp("(ab)*ab"), "abab") == "abab" assert PatternMatcher().match(self.cmp("(ab)*ab"), "ababab") == "ababab" assert PatternMatcher().match(self.cmp("a*?bc"), "aaaabc") == "aaaabc" assert PatternMatcher().match(self.cmp("a*?bc"), "bc") == "bc" assert PatternMatcher().match(self.cmp("a*?bc"), "dbc") is None
def test_exactmatch(self): pm = PatternMatcher() pm.match(self.cmp("abc"), "abcd") assert pm.exactmatch is True pm.match(self.cmp("[abcd]+"), "abcdx") assert pm.exactmatch is False pm.match(self.cmp("a[bcd]+"), "abc") assert pm.exactmatch is True pm.match(self.cmp("a[abcd]+"), "abx") assert pm.exactmatch is False pm.match(self.cmp("as"), "abx") assert pm.exactmatch is False pm.match(self.cmp("[abc]"), "aclass") assert pm.exactmatch is True pm.match(self.cmp("abc|abcde"), "abcde") assert pm.exactmatch is True pm.match(self.cmp("abcde|abc"), "abcde") assert pm.exactmatch is True pm.match(self.cmp("abcx|abcde"), "abcx") assert pm.exactmatch is True pm.match(self.cmp("abcde|abcx"), "abcx") assert pm.exactmatch is True pm = PatternMatcher() assert pm.match(self.cmp("[a-z]*"), 'abc1') == "abc" assert pm.la == 4 pm = PatternMatcher() assert pm.match(self.cmp("a[b]*a"), 'abbbaa') == "abbba" assert pm.la == 5 pm = PatternMatcher() assert pm.match(self.cmp('"[^"]*"'), '"abc') is None assert pm.la == 5