def runTest(self): class A(Token): pass class B(Token): pass class AB(Token): pass tokenizer = Tokenizer([(u"ab+", AB), (u"a+", A), (u"b+", B)]) self.assertEqual(list(tokenizer(u"ababaab")), [ AB(u"abab", Span(0, 4)), A(u"aa", Span(4, 6)), B(u"b", Span(6, 7)) ]) string = u"ababaabbcaa" with self.assertRaises(TokenizerError) as context: list(tokenizer(string)) exception = context.exception self.assertEqual(exception.reason, "string cannot be further consumed at position 8") self.assertEqual(exception.position, 8) self.assertEqual(string[exception.position], u"c")
def test_epsilon(self): with self.regex(u"") as regex: regex.assertMatches(u"", 0) regex.assertNotMatches(u"a") regex.assertAllFinds([(u"", Span(0, 0)), (u"a", Span(1, 1))]) regex.assertSub(u"", u"a", (u"a", 1))
def test_any(self): with self.regex(u".") as regex: regex.assertMatches(u"a", 1) regex.assertFindEqual(u"a", Span(0, 1)) regex.assertFindAllEqual(u"aa", [Span(0, 1), Span(1, 2)]) regex.assertSub(u"a", u"b", (u"b", 1)) regex.assertSub(u"aa", u"b", (u"bb", 2))
def test_neither(self): with self.regex(u"[^ab]") as regex: regex.assertMatches(u"c", 1) regex.assertNotMatchesAny([u"a", u"b"]) regex.assertAllFinds([(u"c", Span(0, 1)), (u"ac", Span(1, 2)), (u"bc", Span(1, 2))]) for string in [u"cac", u"cbc"]: regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)]) regex.assertSub(u"bcb", u"a", (u"bab", 1)) regex.assertSub(u"bcbcb", u"a", (u"babab", 2))
def match_token(self, string, start=0): for matcher, token_cls in self.definitions: end = matcher.match(string) if end is not None: return ( token_cls(string[:end], Span(start, start + end)), string[end:], start + end )
def test_range(self): with self.regex(u"[a-c]") as regex: for string in [u"a", u"aa", u"b", u"bb", u"c", u"cc"]: regex.assertMatches(string, 1) for string in [u"a", u"b", u"c"]: regex.assertFindEqual(string, Span(0, 1)) for string in [u"da", u"db", u"dc"]: regex.assertFindEqual(string, Span(1, 2)) for string in [u"ada", u"bdb", u"cdc"]: regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)]) regex.assertSub(u"faf", u"e", (u"fef", 1)) regex.assertSub(u"fbf", u"e", (u"fef", 1)) regex.assertSub(u"fcf", u"e", (u"fef", 1)) regex.assertSub(u"fafbf", u"e", (u"fefef", 2)) regex.assertSub(u"fafbfcf", u"e", (u"fefefef", 3))
def test_concatenation(self): with self.regex(u"ab") as regex: regex.assertMatches(u"ab", 2) regex.assertMatches(u"abab", 2) regex.assertAllFinds([(u"ab", Span(0, 2)), (U"cab", Span(1, 3))]) regex.assertFindAllEqual(u"abab", [Span(0, 2), Span(2, 4)]) regex.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)]) regex.assertSub(u"ab", u"c", (u"c", 1)) regex.assertSub(u"abab", u"c", (u"cc", 2)) regex.assertSub(u"dabdabd", u"c", (u"dcdcd", 2))
def test_one_or_more(self): with self.regex(u"a+") as regex: regex.assertAllMatches([(u"a", 1), (u"aa", 2)]) for string in [u"a", u"aa"]: regex.assertFindEqual(string, Span(0, len(string))) for string in [u"ba", u"baa"]: regex.assertFindEqual(string, Span(1, len(string))) regex.assertFindAllEqual(u"aba", [Span(0, 1), Span(2, 3)]) regex.assertFindAllEqual(u"aabaa", [Span(0, 2), Span(3, 5)]) regex.assertSub(u"cac", u"b", (u"cbc", 1)) regex.assertSub(u"caac", u"b", (u"cbc", 1))
def test_character(self): with self.regex(u"a") as regex: regex.assertMatches(u"a", 1) regex.assertMatches(u"aa", 1) regex.assertAllFinds([(u"a", Span(0, 1)), (u"ba", Span(1, 2))]) regex.assertFindAllEqual(u"aa", [Span(0, 1), Span(1, 2)]) regex.assertFindAllEqual(u"aba", [Span(0, 1), Span(2, 3)]) regex.assertSub(u"a", u"b", (u"b", 1)) regex.assertSub(u"ab", u"b", (u"bb", 1)) regex.assertSub(u"aa", u"b", (u"bb", 2)) regex.assertSub(u"bab", u"b", (u"bbb", 1))
def test_either(self): with self.regex(u"[ab]") as regex: for string in [u"a", u"b", u"aa", u"bb", u"ab", u"ba"]: regex.assertMatches(string, 1) for string in [u"a", u"b"]: regex.assertFindEqual(string, Span(0, 1)) for string in [u"ca", u"cb"]: regex.assertFindEqual(string, Span(1, 2)) for string in [u"aa", u"bb", u"ab", u"ba"]: regex.assertFindAllEqual(string, [Span(0, 1), Span(1, 2)]) for string in [u"aca", u"bcb", u"acb", u"bca"]: regex.assertFindAllEqual(string, [Span(0, 1), Span(2, 3)]) regex.assertSub(u"a", u"c", (u"c", 1)) regex.assertSub(u"b", u"c", (u"c", 1)) regex.assertSub(u"dadbd", u"c", (u"dcdcd", 2))
def test_group(self): with self.regex(u"(ab)") as ab: for string in [u"ab", u"abab", u"ababab"]: ab.assertMatches(string, 2) ab.assertAllFinds([(u"ab", Span(0, 2)), (u"cab", Span(1, 3))]) ab.assertFindAllEqual(u"abab", [Span(0, 2), Span(2, 4)]) ab.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)]) ab.assertSub(u"dabd", u"c", (u"dcd", 1)) ab.assertSub(u"dababd", u"c", (u"dccd", 2)) with self.regex(u"(ab)+") as abp: abp.assertAllMatches([(u"ab", 2), (u"abab", 4), (u"ababab", 6)]) for string in [u"ab", u"abab"]: abp.assertFindEqual(string, Span(0, len(string))) for string in [u"cab", u"cabab"]: abp.assertFindEqual(string, Span(1, len(string))) abp.assertFindAllEqual(u"abcab", [Span(0, 2), Span(3, 5)]) abp.assertFindAllEqual(u"ababcabab", [Span(0, 4), Span(5, 9)]) abp.assertSub(u"dabd", u"c", (u"dcd", 1)) abp.assertSub(u"dababd", u"c", (u"dcd", 1))