def test_directives(self): """Test CDATA and other directives.""" s = S.markup_scanner('<!normal directive>') t = len(s.input) self.assertEqual(s.parse_one(0), ('dir', 0, t, { 'content': (2, t - 1), 'name': (2, 2 + len('normal')) })) s = S.markup_scanner('<!broken directive<') t = len(s.input) self.assertEqual(s.parse_one(0), ('udir', 0, t - 1, { 'content': (2, t - 1), 'name': (2, 2 + len('broken')) })) s = S.markup_scanner('<![CDATA[a[<]]*>+y<]]>') t = len(s.input) self.assertEqual(s.parse_one(0), ('cdata', 0, t, { 'content': (len('<![CDATA['), t - len(']]>')) })) s = S.markup_scanner('<![CDATA[...') t = len(s.input) self.assertEqual(s.parse_one(0), ('udata', 0, t, { 'content': (len('<![CDATA['), t) }))
def test_comment(self): """Test comment tokens.""" s = S.markup_scanner('<!---->') self.assertEqual(s.scan_comment(0), ('com', 0, len(s.input))) s = S.markup_scanner('<!-- some random stuff') self.assertEqual(s.scan_comment(0), ('ucom', 0, len(s.input))) s = S.markup_scanner('!<FAIL') self.assertRaises(S.parse_error, lambda: s.scan_comment(0))
def test_entity(self): """Test consumption of entities.""" for src in ('&ok', '&123', '&a1bx3c'): s = S.markup_scanner(src) self.assertEqual(s.scan_entity(1), ('uent', 1, len(s.input))) s = S.markup_scanner(src + ';') self.assertEqual(s.scan_entity(1), ('ent', 1, len(s.input))) s = S.markup_scanner('&$;') self.assertEqual(s.scan_entity(1), ('uent', 1, 1))
def test_unquoted(self): """Test unquoted string tokens.""" s = S.markup_scanner('unquoted string') self.assertEqual(s.scan_unquoted(0), ('unq', 0, len('unquoted'))) s = S.markup_scanner(' empty') self.assertEqual(s.scan_unquoted(0, allow_blank=True), ('unq', 0, 0)) self.assertRaises(S.parse_error, lambda: s.scan_unquoted(0, allow_blank=False)) s = S.markup_scanner('NAME<') self.assertEqual(s.scan_name(0), ('name', 0, len('NAME')))
def test_strings(self): """Test quoted string tokens.""" for tag, src in ( ('str', '"double-quoted string"'), ('ustr', '"incomplete string'), ('str', "'single-quoted string'"), ('ustr', "'incomplete string"), ('str', '""'), # empty double-quoted ('str', "''"), # empty single-quoted ('ustr', '"\x01'), # incomplete double-quoted ('ustr', "'\x01"), # incomplete single-quoted ): s = S.markup_scanner(src) self.assertEqual(s.scan_string(0), (tag, 0, len(s.input)))
def test_attrib(self): """Test tag-attribute tokens.""" s = S.markup_scanner('novalue/') nlen = len('novalue') self.assertEqual(s.parse_attrib(0), ('attr', 0, nlen, { 'name': (0, nlen), 'value': ('none', nlen, nlen) })) s = S.markup_scanner('name=bare word') nlen = len('name') self.assertEqual(s.parse_attrib(0), ('attr', 0, len(s.input) - 5, { 'name': (0, nlen), 'value': ('unq', 5, 9) })) s = S.markup_scanner('name="bare word') self.assertEqual(s.parse_attrib(0), ('attr', 0, len(s.input), { 'name': (0, nlen), 'value': ('ustr', 5, len(s.input)) })) s = S.markup_scanner(s.input + '"') self.assertEqual(s.parse_attrib(0), ('attr', 0, len(s.input), { 'name': (0, nlen), 'value': ('str', 5, len(s.input)) }))
def test_literal(self): """Test consumption of literals.""" s = S.markup_scanner('$++OK') self.assertEqual(s.scan_literal(1, '++'), ('lit', 1, 1 + len('++'))) self.assertRaises(S.parse_error, lambda: s.scan_literal(0, 'XXX'))
def test_space(self): """Test whitespace consumption.""" s = S.markup_scanner('nospace') self.assertEqual(s.scan_space(0), ('ws', 0, 0)) s = S.markup_scanner(' \t\f\r\n$') self.assertEqual(s.scan_space(0), ('ws', 0, len(s.input) - 1))
def test_empty_input(self): """Test correct scan of empty input.""" ts = list(S.markup_scanner('').scan()) self.assertEqual(ts, [('eof', 0, 0, None)])