def test_constructor(self): e = structures.XMLEntity(b"<hello>") self.assertTrue(e.line_num == 1) self.assertTrue(e.line_pos == 1) self.assertTrue(is_unicode(e.the_char) and e.the_char == '<') e = structures.XMLEntity(ul("<hello>")) self.assertTrue(is_unicode(e.the_char) and e.the_char == '<') e = structures.XMLEntity(StringIO(ul("<hello>"))) self.assertTrue(e.line_num == 1) self.assertTrue(e.line_pos == 1) self.assertTrue(is_unicode(e.the_char) and e.the_char == '<')
def test_chars(self): e = structures.XMLEntity(b"<hello>") for c in "<hello>": self.assertTrue(e.the_char == c) e.next_char() self.assertTrue(e.the_char is None) e.reset() self.assertTrue(e.the_char == '<')
def test_codecs(self): m = ul('Caf\xe9') e = structures.XMLEntity(b'Caf\xc3\xa9') self.assertTrue(e.bom is False, 'defaulted utf-8 BOM detection') for c in m: self.assertTrue( e.the_char == c, "Print: parsing utf-8 got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() e = structures.XMLEntity(b'Caf\xe9', 'latin_1') self.assertTrue(e.bom is False, 'latin_1 BOM detection') for c in m: self.assertTrue( e.the_char == c, "Print: parsing latin-1 got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() # This string should be automatically detected e = structures.XMLEntity(b'\xff\xfeC\x00a\x00f\x00\xe9\x00') self.assertTrue(e.bom is True, 'utf-16-le BOM detection') for c in m: self.assertTrue( e.the_char == c, "Print: parsing utf-16LE got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() e = structures.XMLEntity(b'\xfe\xff\x00C\x00a\x00f\x00\xe9') self.assertTrue(e.bom is True, 'utf-16-be BOM detection') for c in m: self.assertTrue( e.the_char == c, "Print: parsing utf-16BE got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() e = structures.XMLEntity(b'\xef\xbb\xbfCaf\xc3\xa9', 'utf-8') self.assertTrue(e.bom is False, 'utf-8 BOM detection') for c in m: self.assertTrue( e.the_char == c, "Print: parsing utf-8 with BOM got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() e = structures.XMLEntity(b'Caf\xe9') for c in 'Ca': e.next_char() e.change_encoding('ISO-8859-1') self.assertTrue(e.the_char == 'f', "Bad encoding change") e.next_char() self.assertTrue( e.the_char == character(0xE9), "Print: change encoding got %s instead of %s" % (repr(e.the_char), repr(character(0xE9)))) e = structures.XMLEntity(b'C\x00a\x00f\x00\xe9\x00', 'utf-16-le') self.assertTrue(e.bom is False, 'utf-16-le no BOM detection error') for c in m: self.assertTrue( e.the_char == c, "Print: parsing utf-16LE no BOM got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() # add <? to trigger auto-detection e = structures.XMLEntity(b'\x00<\x00?\x00C\x00a\x00f\x00\xe9') self.assertTrue(e.bom is False, 'utf-16-be no BOM detection error') for c in ul("<?") + m: self.assertTrue( e.the_char == c, "Print: parsing utf-16BE no BOM got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char() e = structures.XMLEntity(b'\xfe\xff\xfe\xff\x00C\x00a\x00f\x00\xe9') for c in character(0xfeff) + m: self.assertTrue( e.the_char == c, "Print: parsing double BOM got %s instead of %s" % (repr(e.the_char), repr(c))) e.next_char()
def test_lines(self): e = structures.XMLEntity(b"Hello\nWorld\n!") while e.the_char is not None: e.next_char() self.assertTrue(e.line_num == 3) self.assertTrue(e.line_pos == 2)