Esempio n. 1
0
 def test_match_question(self):
     assert PatternMatcher().match(self.cmp("ab?"), "a") == "a"
     assert PatternMatcher().match(self.cmp("ab?"), "ab") == "ab"
     assert PatternMatcher().match(self.cmp("ab?"), "ac") == "a"
     assert PatternMatcher().match(self.cmp("ab(cdef)?"), "ab") == "ab"
     assert PatternMatcher().match(self.cmp("ab(cdef)?"),
                                   "abcdef") == "abcdef"
Esempio n. 2
0
    def test_match_or(self):
        assert PatternMatcher().match(self.cmp("a|b"), "a") == "a"
        assert PatternMatcher().match(self.cmp("a|b"), "b") == "b"
        assert PatternMatcher().match(self.cmp("a|bd|c"), "a") == "a"
        assert PatternMatcher().match(self.cmp("a|bd|c"), "bd") == "bd"
        assert PatternMatcher().match(self.cmp("a|bd|c"), "c") == "c"
        assert PatternMatcher().match(self.cmp("ab|ac"), "ac") == "ac"

        assert PatternMatcher().match(self.cmp("abc|abcde"), "abcde") == "abc"
        assert PatternMatcher().match(self.cmp("abc|abcde"), "abc") == "abc"
        assert PatternMatcher().match(self.cmp("abcde|abc"), "abc") == "abc"
Esempio n. 3
0
 def test_match_plus(self):
     assert PatternMatcher().match(self.cmp("a+"), "aaaaaa") == "aaaaaa"
     assert PatternMatcher().match(self.cmp("a+"), "bbbbb") is None
     assert PatternMatcher().match(self.cmp("ab+"), "abbbbbb") == "abbbbbb"
     assert PatternMatcher().match(self.cmp("a+b+"),
                                   "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb"
     assert PatternMatcher().match(self.cmp("a+b+"), "bbbbbbbbbb") is None
     assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaaa") is None
     assert PatternMatcher().match(self.cmp("(ab)+"),
                                   "abababab") == "abababab"
     assert PatternMatcher().match(self.cmp("(ab)+"), "aaabbb") is None
     assert PatternMatcher().match(self.cmp(".+bc"),
                                   "aaabcaaabc") == "aaabcaaabc"  # greedy
     assert PatternMatcher().match(self.cmp(".+?bc"),
                                   "aaabcaaabc") == "aaabc"  # non-greedy
Esempio n. 4
0
 def test_mixed(self):
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"),
                                   "aabbc") == "aabbc"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"),
                                   "aabbdd") == "aabbdd"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aadd") == "aadd"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"),
                                   "aabbc") == "aabbc"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aac") == "aac"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "aad") == "aad"
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "cdd") is None
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "bbcdd") is None
     assert PatternMatcher().match(self.cmp("a+b*(c|d+)"), "abc") == "abc"
Esempio n. 5
0
 def test_charrange(self):
     assert PatternMatcher().match(self.cmp("[a-z]"), "b") == "b"
     assert PatternMatcher().match(self.cmp("[a-z]"), "x") == "x"
     assert PatternMatcher().match(self.cmp("[a-z]+"), "123") is None
     assert PatternMatcher().match(self.cmp("[a-z]+"), "foobar") == "foobar"
     assert PatternMatcher().match(self.cmp("[a-zA-Z0-9]+"),
                                   "fooBAR123") == "fooBAR123"
     assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z0-9_]*"),
                                   "_fooBAR123_") == "_fooBAR123_"
     assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z0-9_]*"),
                                   "123foobar") is None
     assert PatternMatcher().match(self.cmp("[a-z]"), "abc") == "a"
     assert PatternMatcher().match(self.cmp("[+-]"), "+") == "+"
Esempio n. 6
0
 def test_match_plus(self):
     assert PatternMatcher().match(self.cmp("a+"), "aaaaaa") == "aaaaaa"
     assert PatternMatcher().match(self.cmp("a+"), "bbbbb") is None
     assert PatternMatcher().match(self.cmp("ab+"), "abbbbbb") == "abbbbbb"
     assert PatternMatcher().match(self.cmp("a+b+"),
                                   "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb"
     assert PatternMatcher().match(self.cmp("a+b+"), "bbbbbbbbbb") is None
     assert PatternMatcher().match(self.cmp("a+b+"), "aaaaaaa") is None
     assert PatternMatcher().match(self.cmp("(ab)+"),
                                   "abababab") == "abababab"
     assert PatternMatcher().match(self.cmp("(ab)+"), "aaabbb") is None
Esempio n. 7
0
 def test_match_star(self):
     assert PatternMatcher().match(self.cmp("a*"), "") == ""
     assert PatternMatcher().match(self.cmp("a*"), "aaaaaa") == "aaaaaa"
     assert PatternMatcher().match(self.cmp("a*"), "bbbbb") == ""
     assert PatternMatcher().match(self.cmp("ab*"), "abbbbbb") == "abbbbbb"
     assert PatternMatcher().match(self.cmp("a*b*"),
                                   "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb"
     assert PatternMatcher().match(self.cmp(".*"),
                                   "absakljsadklajd") == "absakljsadklajd"
Esempio n. 8
0
 def test_match_more(self):
     assert PatternMatcher().match(self.cmp("aa"), "aa") == "aa"
     assert PatternMatcher().match(self.cmp("a.b"), "axb") == "axb"
Esempio n. 9
0
 def test_match_one(self):
     assert PatternMatcher().match(RE_CHAR("a"), "a") == "a"
     assert PatternMatcher().match(RE_CHAR("."), "c") == "c"
     assert PatternMatcher().match(RE_CHAR("x"), "c") is None
     #assert PatternMatcher().match(None, "c") == "c"
     assert PatternMatcher().match(RE_CHAR("c"), "") is None
Esempio n. 10
0
 def setup_class(cls):
     cls.pmatch = PatternMatcher()
     cls.rp = RegexParser()
Esempio n. 11
0
    def test_exactmatch(self):
        pm = PatternMatcher()
        pm.match(self.cmp("abc"), "abcd")
        assert pm.exactmatch is True

        pm.match(self.cmp("[abcd]+"), "abcdx")
        assert pm.exactmatch is False

        pm.match(self.cmp("a[bcd]+"), "abc")
        assert pm.exactmatch is True

        pm.match(self.cmp("a[abcd]+"), "abx")
        assert pm.exactmatch is False

        pm.match(self.cmp("as"), "abx")
        assert pm.exactmatch is False

        pm.match(self.cmp("[abc]"), "aclass")
        assert pm.exactmatch is True

        pm.match(self.cmp("abc|abcde"), "abcde")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcde|abc"), "abcde")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcx|abcde"), "abcx")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcde|abcx"), "abcx")
        assert pm.exactmatch is True
Esempio n. 12
0
    def test_realworld_examples(self):
        assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z_0-9]*"),
                                      "abc123_") == "abc123_"
        assert PatternMatcher().match(self.cmp("[a-zA-Z_][a-zA-Z_0-9]*"),
                                      "123abc123_") is None

        assert PatternMatcher().match(self.cmp("#[^\\r]*"), "# abc") == "# abc"
        assert PatternMatcher().match(self.cmp("#[^\r]*"),
                                      "# abc \r") == "# abc "

        assert PatternMatcher().match(
            self.cmp("([0-9]+\.?[0-9]*|\.[0-9]+)([eE](\+|-)?[0-9]+)?"),
            "123.456") == "123.456"
        assert PatternMatcher().match(
            self.cmp("([0-9]+\.?[0-9]*|\.[0-9]+)([eE](\+|-)?[0-9]+)?"),
            "1e23") == "1e23"
        assert PatternMatcher().match(
            self.cmp("\'[^\'\r]*\'"),
            "'this is a string 123!'") == "'this is a string 123!'"
        assert PatternMatcher().match(
            self.cmp("\'[^\'\r]*\'"),
            "'this is a with a newline \r string 123!'") is None

        # Python
        assert PatternMatcher().match(self.cmp("#[^\\r]*"),
                                      "# hello world") == "# hello world"
        assert PatternMatcher().match(self.cmp('\\"\\"\\"[^\\"]*\\"\\"\\"'),
                                      '"""this is a test string 123"""'
                                      ) == '"""this is a test string 123"""'
        assert PatternMatcher().match(
            self.cmp("\\'[^\\'\\r]*\\'"),
            "'single quoted string'") == "'single quoted string'"
        assert PatternMatcher().match(
            self.cmp('\\"[^\\"\\r]*\\"'),
            '"double quoted string"') == '"double quoted string"'
        assert PatternMatcher().match(self.cmp("[ \\t]+"), "    ") == "    "
        assert PatternMatcher().match(self.cmp("\\"), "\\") == "\\"
        assert PatternMatcher().match(self.cmp("\\"), "range") is None
        assert PatternMatcher().match(self.cmp("[\\n\\r]"), "\r") == "\r"
        assert PatternMatcher().match(self.cmp("\."), ".") == "."
        assert PatternMatcher().match(self.cmp("&="), "&=") == "&="
        assert PatternMatcher().match(self.cmp("0[xX][\da-fA-F]+"),
                                      "0xAB") == "0xAB"
        assert PatternMatcher().match(self.cmp("0[oO][0-7]+"),
                                      "0o67") == "0o67"
        assert PatternMatcher().match(self.cmp("0[bB][01]+"),
                                      "0b10101") == "0b10101"
        assert PatternMatcher().match(
            self.cmp('\"([^\"\r\\\\]|\\\\")*\"'),
            '"escaped\\"quote"') == '"escaped\\"quote"'

        # Prolog
        assert PatternMatcher().match(self.cmp("/"), "/") == "/"
        assert PatternMatcher().match(self.cmp("/\\"), "/\\") == "/\\"
        assert PatternMatcher().match(self.cmp("(%[^\\n\\r]*)"),
                                      "% comment") == "% comment"
        assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"),
                                      "Variable_") == "Variable_"
        assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"),
                                      "VAR") == "VAR"
        assert PatternMatcher().match(self.cmp("[A-Z_]([a-zA-Z0-9]|_)*|_"),
                                      "var") is None
        assert PatternMatcher().match(self.cmp("(0|[1-9][0-9]*)"), "0") == "0"
        assert PatternMatcher().match(self.cmp("(0|[1-9][0-9]*)"),
                                      "12345") == "12345"
        assert PatternMatcher().match(
            self.cmp("(0|[1-9][0-9]*)(\.[0-9]+)([eE][-+]?[0-9]+)?"),
            "1213.89e+23") == "1213.89e+23"
        assert PatternMatcher().match(self.cmp("([a-z]([a-zA-Z0-9]|_)*)"),
                                      "aH8_") == "aH8_"
        assert PatternMatcher().match(self.cmp("('[^']*')"),
                                      "'quoted'") == "'quoted'"
        assert PatternMatcher().match(self.cmp("\[\]"), "[]") == "[]"
        assert PatternMatcher().match(self.cmp("!"), "!") == "!"
        assert PatternMatcher().match(self.cmp("\+"), "+") == "+"
        assert PatternMatcher().match(self.cmp("\-"), "-") == "-"
        assert PatternMatcher().match(self.cmp("\{\}"), "{}") == "{}"
        assert PatternMatcher().match(
            self.cmp("([a-z]([a-zA-Z0-9]|_)*)|('[^']*')|\[\]|!|\+|\-|\{\}"),
            "aH8_") == "aH8_"
        assert PatternMatcher().match(self.cmp("\"[^\"]*\""),
                                      '"a string"') == '"a string"'

        # Eco grammar
        assert PatternMatcher().match(self.cmp('\\"([^\\"]|\\\\\\")*\\"'),
                                      '"terminal"') == '"terminal"'
        assert PatternMatcher().match(self.cmp('\\"([^\\"]|\\\\\\")*\\"'),
                                      '"[a-z]"') == '"[a-z]"'
        assert PatternMatcher().match(self.cmp('\\"([^\\\\"]|\\\\\\")*\\"'),
                                      '"\\"[a-z]\\""') == '"\\"[a-z]\\""'
        assert PatternMatcher().match(self.cmp('\\"([^\\\\"]|\\\\\\")*\\"'),
                                      '"\\"[a"-z]\\""') == '"\\"[a"'
        assert PatternMatcher().match(self.cmp('"([^\\"\\\\r]|\\\\\")*"'),
                                      '"\\"[a-z]\\""') == '"\\"[a-z]\\""'
        assert PatternMatcher().match(self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'),
                                      '"\+"') == '"\+"'
        assert PatternMatcher().match(
            self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'),
            '"escaped\\"quote"') == '"escaped\\"quote"'
        assert PatternMatcher().match(
            self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'),
            "\"escaped\\\"quote\"") == '"escaped\\"quote"'
        assert PatternMatcher().match(self.cmp('\\"([^\\"\\\\]|\\\\.)*\\"'),
                                      '"\\"[a"-z]\\""') == '"\\"[a"'
Esempio n. 13
0
 def test_escaped(self):
     assert PatternMatcher().match(self.cmp("[a-z]"), "-") is None
     assert PatternMatcher().match(self.cmp("[a\-z]"), "-") == "-"
     assert PatternMatcher().match(self.cmp("#[^\-]*"), "-") is None
     assert PatternMatcher().match(self.cmp("[\[]*"), "[") == "["
     assert PatternMatcher().match(self.cmp("[\.]"), ".") == "."
     assert PatternMatcher().match(self.cmp("\."), ".") == "."
     assert PatternMatcher().match(self.cmp("\["), "[") == "["
     assert PatternMatcher().match(self.cmp("\[\]"), "[]") == "[]"
     assert PatternMatcher().match(self.cmp("\*"), "*") == "*"
     assert PatternMatcher().match(self.cmp("\+"), "+") == "+"
     assert PatternMatcher().match(self.cmp("\\+"), "+") == "+"
     assert PatternMatcher().match(self.cmp("\'"), "\'") == "'"
     assert PatternMatcher().match(self.cmp("\\'"), "\'") == "'"
     assert PatternMatcher().match(self.cmp('\\"'), '\"') == '\"'
     assert PatternMatcher().match(self.cmp("\r"), "\r") == "\r"
     assert PatternMatcher().match(self.cmp("\\r"), "\r") == "\r"
     assert PatternMatcher().match(self.cmp("\r"), "\\r") is None
     assert PatternMatcher().match(self.cmp("\\r"), "\\r") is None
     assert PatternMatcher().match(self.cmp("\\r"), "\\r") is None
     assert PatternMatcher().match(self.cmp("[\\r\\n]"), "\r") == "\r"
     assert PatternMatcher().match(self.cmp("[\\r\\n]"), "\n") == "\n"
Esempio n. 14
0
 def test_negatedcharrange(self):
     assert PatternMatcher().match(self.cmp("[^abcd]"), "a") is None
     assert PatternMatcher().match(self.cmp("[^abcd]"), "e") == "e"
     assert PatternMatcher().match(self.cmp("[^a-z]+"), "ABCD") == "ABCD"
     assert PatternMatcher().match(self.cmp("[^a-z]+"), "abcd") is None
Esempio n. 15
0
 def test_match_star(self):
     assert PatternMatcher().match(self.cmp("a*"), "") == ""
     assert PatternMatcher().match(self.cmp("a*"), "aaaaaa") == "aaaaaa"
     assert PatternMatcher().match(self.cmp("a*"), "bbbbb") == ""
     assert PatternMatcher().match(self.cmp("ab*"), "abbbbbb") == "abbbbbb"
     assert PatternMatcher().match(self.cmp("a*b*"),
                                   "aaaaaabbbbbbbbbb") == "aaaaaabbbbbbbbbb"
     assert PatternMatcher().match(self.cmp(".*"),
                                   "absakljsadklajd") == "absakljsadklajd"
     assert PatternMatcher().match(self.cmp(".*bc"),
                                   "abcaaabc") == "abcaaabc"
     assert PatternMatcher().match(self.cmp(".*abc"), "abc") == "abc"
     assert PatternMatcher().match(self.cmp("a*a"), "aa") == "aa"
     assert PatternMatcher().match(self.cmp("a*aa"), "aa") == "aa"
     assert PatternMatcher().match(self.cmp("a*aa"), "aaa") == "aaa"
     assert PatternMatcher().match(self.cmp("a*a"), "a") == "a"
     assert PatternMatcher().match(self.cmp("a*a"), "") is None
     assert PatternMatcher().match(self.cmp(".*abc"),
                                   "abcabcabc") == "abcabcabc"  # greedy
     assert PatternMatcher().match(self.cmp(".*?abc"),
                                   "abcabcabc") == "abc"  # non-greedy
     assert PatternMatcher().match(self.cmp("(ab)*ab"), "ab") == "ab"
     assert PatternMatcher().match(self.cmp("(ab)*ab"), "abab") == "abab"
     assert PatternMatcher().match(self.cmp("(ab)*ab"),
                                   "ababab") == "ababab"
     assert PatternMatcher().match(self.cmp("a*?bc"), "aaaabc") == "aaaabc"
     assert PatternMatcher().match(self.cmp("a*?bc"), "bc") == "bc"
     assert PatternMatcher().match(self.cmp("a*?bc"), "dbc") is None
Esempio n. 16
0
    def test_exactmatch(self):
        pm = PatternMatcher()
        pm.match(self.cmp("abc"), "abcd")
        assert pm.exactmatch is True

        pm.match(self.cmp("[abcd]+"), "abcdx")
        assert pm.exactmatch is False

        pm.match(self.cmp("a[bcd]+"), "abc")
        assert pm.exactmatch is True

        pm.match(self.cmp("a[abcd]+"), "abx")
        assert pm.exactmatch is False

        pm.match(self.cmp("as"), "abx")
        assert pm.exactmatch is False

        pm.match(self.cmp("[abc]"), "aclass")
        assert pm.exactmatch is True

        pm.match(self.cmp("abc|abcde"), "abcde")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcde|abc"), "abcde")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcx|abcde"), "abcx")
        assert pm.exactmatch is True

        pm.match(self.cmp("abcde|abcx"), "abcx")
        assert pm.exactmatch is True

        pm = PatternMatcher()
        assert pm.match(self.cmp("[a-z]*"), 'abc1') == "abc"
        assert pm.la == 4

        pm = PatternMatcher()
        assert pm.match(self.cmp("a[b]*a"), 'abbbaa') == "abbba"
        assert pm.la == 5

        pm = PatternMatcher()
        assert pm.match(self.cmp('"[^"]*"'), '"abc') is None
        assert pm.la == 5