Esempio n. 1
0
    def runTest(self):
        class A(Token):
            pass

        class B(Token):
            pass

        class AB(Token):
            pass

        tokenizer = Tokenizer([(u"ab+", AB), (u"a+", A), (u"b+", B)])
        self.assertEqual(list(tokenizer(u"ababaab")), [
            AB(u"abab", Span(0, 4)),
            A(u"aa", Span(4, 6)),
            B(u"b", Span(6, 7))
        ])

        string = u"ababaabbcaa"
        with self.assertRaises(TokenizerError) as context:
            list(tokenizer(string))
        exception = context.exception
        self.assertEqual(exception.reason,
                         "string cannot be further consumed at position 8")
        self.assertEqual(exception.position, 8)
        self.assertEqual(string[exception.position], u"c")
Esempio n. 2
0
    def test_epsilon(self):
        with self.regex(u"") as regex:
            regex.assertMatches(u"", 0)
            regex.assertNotMatches(u"a")

            regex.assertAllFinds([(u"", Span(0, 0)), (u"a", Span(1, 1))])

            regex.assertSub(u"", u"a", (u"a", 1))
Esempio n. 3
0
    def test_any(self):
        with self.regex(u".") as regex:
            regex.assertMatches(u"a", 1)

            regex.assertFindEqual(u"a", Span(0, 1))

            regex.assertFindAllEqual(u"aa", [Span(0, 1), Span(1, 2)])

            regex.assertSub(u"a", u"b", (u"b", 1))
            regex.assertSub(u"aa", u"b", (u"bb", 2))
Esempio n. 4
0
    def test_neither(self):
        with self.regex(u"[^ab]") as regex:
            regex.assertMatches(u"c", 1)
            regex.assertNotMatchesAny([u"a", u"b"])

            regex.assertAllFinds([(u"c", Span(0, 1)), (u"ac", Span(1, 2)),
                                  (u"bc", Span(1, 2))])

            for string in [u"cac", u"cbc"]:
                regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)])

            regex.assertSub(u"bcb", u"a", (u"bab", 1))
            regex.assertSub(u"bcbcb", u"a", (u"babab", 2))
Esempio n. 5
0
 def match_token(self, string, start=0):
     for matcher, token_cls in self.definitions:
         end = matcher.match(string)
         if end is not None:
             return (
                 token_cls(string[:end], Span(start, start + end)),
                 string[end:],
                 start + end
             )
Esempio n. 6
0
    def test_range(self):
        with self.regex(u"[a-c]") as regex:
            for string in [u"a", u"aa", u"b", u"bb", u"c", u"cc"]:
                regex.assertMatches(string, 1)

            for string in [u"a", u"b", u"c"]:
                regex.assertFindEqual(string, Span(0, 1))
            for string in [u"da", u"db", u"dc"]:
                regex.assertFindEqual(string, Span(1, 2))

            for string in [u"ada", u"bdb", u"cdc"]:
                regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)])

            regex.assertSub(u"faf", u"e", (u"fef", 1))
            regex.assertSub(u"fbf", u"e", (u"fef", 1))
            regex.assertSub(u"fcf", u"e", (u"fef", 1))
            regex.assertSub(u"fafbf", u"e", (u"fefef", 2))
            regex.assertSub(u"fafbfcf", u"e", (u"fefefef", 3))
Esempio n. 7
0
    def test_concatenation(self):
        with self.regex(u"ab") as regex:
            regex.assertMatches(u"ab", 2)
            regex.assertMatches(u"abab", 2)

            regex.assertAllFinds([(u"ab", Span(0, 2)), (U"cab", Span(1, 3))])

            regex.assertFindAllEqual(u"abab", [Span(0, 2), Span(2, 4)])
            regex.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)])

            regex.assertSub(u"ab", u"c", (u"c", 1))
            regex.assertSub(u"abab", u"c", (u"cc", 2))
            regex.assertSub(u"dabdabd", u"c", (u"dcdcd", 2))
Esempio n. 8
0
    def test_one_or_more(self):
        with self.regex(u"a+") as regex:
            regex.assertAllMatches([(u"a", 1), (u"aa", 2)])

            for string in [u"a", u"aa"]:
                regex.assertFindEqual(string, Span(0, len(string)))
            for string in [u"ba", u"baa"]:
                regex.assertFindEqual(string, Span(1, len(string)))

            regex.assertFindAllEqual(u"aba", [Span(0, 1), Span(2, 3)])
            regex.assertFindAllEqual(u"aabaa", [Span(0, 2), Span(3, 5)])

            regex.assertSub(u"cac", u"b", (u"cbc", 1))
            regex.assertSub(u"caac", u"b", (u"cbc", 1))
Esempio n. 9
0
    def test_character(self):
        with self.regex(u"a") as regex:
            regex.assertMatches(u"a", 1)
            regex.assertMatches(u"aa", 1)

            regex.assertAllFinds([(u"a", Span(0, 1)), (u"ba", Span(1, 2))])

            regex.assertFindAllEqual(u"aa", [Span(0, 1), Span(1, 2)])
            regex.assertFindAllEqual(u"aba", [Span(0, 1), Span(2, 3)])

            regex.assertSub(u"a", u"b", (u"b", 1))
            regex.assertSub(u"ab", u"b", (u"bb", 1))
            regex.assertSub(u"aa", u"b", (u"bb", 2))
            regex.assertSub(u"bab", u"b", (u"bbb", 1))
Esempio n. 10
0
    def test_either(self):
        with self.regex(u"[ab]") as regex:
            for string in [u"a", u"b", u"aa", u"bb", u"ab", u"ba"]:
                regex.assertMatches(string, 1)

            for string in [u"a", u"b"]:
                regex.assertFindEqual(string, Span(0, 1))
            for string in [u"ca", u"cb"]:
                regex.assertFindEqual(string, Span(1, 2))

            for string in [u"aa", u"bb", u"ab", u"ba"]:
                regex.assertFindAllEqual(string, [Span(0, 1), Span(1, 2)])
            for string in [u"aca", u"bcb", u"acb", u"bca"]:
                regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)])

            regex.assertSub(u"a", u"c", (u"c", 1))
            regex.assertSub(u"b", u"c", (u"c", 1))
            regex.assertSub(u"dadbd", u"c", (u"dcdcd", 2))
Esempio n. 11
0
    def test_group(self):
        with self.regex(u"(ab)") as ab:
            for string in [u"ab", u"abab", u"ababab"]:
                ab.assertMatches(string, 2)

            ab.assertAllFinds([(u"ab", Span(0, 2)), (u"cab", Span(1, 3))])

            ab.assertFindAllEqual(u"abab", [Span(0, 2), Span(2, 4)])
            ab.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)])

            ab.assertSub(u"dabd", u"c", (u"dcd", 1))
            ab.assertSub(u"dababd", u"c", (u"dccd", 2))

        with self.regex(u"(ab)+") as abp:
            abp.assertAllMatches([(u"ab", 2), (u"abab", 4), (u"ababab", 6)])

            for string in [u"ab", u"abab"]:
                abp.assertFindEqual(string, Span(0, len(string)))
            for string in [u"cab", u"cabab"]:
                abp.assertFindEqual(string, Span(1, len(string)))

            abp.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)])
            abp.assertFindAllEqual(u"ababcabab", [Span(0, 4), Span(5, 9)])

            abp.assertSub(u"dabd", u"c", (u"dcd", 1))
            abp.assertSub(u"dababd", u"c", (u"dcd", 1))