def test_use_foreign_dtd(self): """ If UseForeignDTD is passed True and a document without an external entity reference is parsed, ExternalEntityRefHandler is first called with None for the public and system ids. """ handler_call_args = [] def resolve_entity(context, base, system_id, public_id): handler_call_args.append((public_id, system_id)) return 1 parser = expat.ParserCreate() parser.UseForeignDTD(True) parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)]) # test UseForeignDTD() is equal to UseForeignDTD(True) handler_call_args[:] = [] parser = expat.ParserCreate() parser.UseForeignDTD() parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS) parser.ExternalEntityRefHandler = resolve_entity parser.Parse("<?xml version='1.0'?><element/>") self.assertEqual(handler_call_args, [(None, None)])
def test_parse_file(self): # Try parsing a file out = self.Outputter() parser = expat.ParserCreate(namespace_separator='!') parser.returns_unicode = 1 for name in self.handler_names: setattr(parser, name, getattr(out, name)) file = StringIO.StringIO(data) parser.ParseFile(file) op = out.out self.assertEqual(op[0], 'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'') self.assertEqual(op[1], "Comment: u' comment data '") self.assertEqual(op[2], "EL decl: root: (2, 0, None, ())"), self.assertEqual( op[3], "Notation declared: (u'notation', None, u'notation.jpeg', None)") self.assertEqual( op[4], "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')" ) self.assertEqual( op[5], "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}" ) self.assertEqual( op[6], "NS decl: u'myns' u'http://www.python.org/namespace'") self.assertEqual( op[7], "Start element: u'http://www.python.org/namespace!subelement' {}") self.assertEqual(op[8], "Character data: u'Contents of subelements'") self.assertEqual( op[9], "End element: u'http://www.python.org/namespace!subelement'") self.assertEqual(op[10], "End of NS decl: u'myns'") self.assertEqual(op[11], "Start element: u'sub2' {}") self.assertEqual(op[12], 'Start of CDATA section') self.assertEqual(op[13], "Character data: u'contents of CDATA section'") self.assertEqual(op[14], 'End of CDATA section') self.assertEqual(op[15], "End element: u'sub2'") self.assertEqual(op[16], "External entity ref: (None, u'entity.file', None)") self.assertEqual(op[17], "End element: u'root'") # Issue 4877: expat.ParseFile causes segfault on a closed file. fp = open(test_support.TESTFN, 'wb') try: fp.close() parser = expat.ParserCreate() with self.assertRaises(ValueError): parser.ParseFile(fp) finally: test_support.unlink(test_support.TESTFN)
def parse(self, input): self.reset() p = pyexpat.ParserCreate() p.StartElementHandler = self.start p.EndElementHandler = self.end p.CharacterDataHandler = self.cdata try: if type(input) == type(''): p.Parse(input, 1) else: while 1: s = input.read(_BLOCKSIZE) if not s: p.Parse('', 1) break p.Parse(s, 0) finally: if self.root: _clean_tree(self.root) return self.root
def test_set_buffersize(self): import pyexpat, sys p = pyexpat.ParserCreate() p.buffer_size = 150 assert p.buffer_size == 150 raises((ValueError, TypeError), setattr, p, 'buffer_size', sys.maxsize + 1)
class NamespaceSeparatorTest(unittest.TestCase): def test_legal(self): # Tests that make sure we get errors when the namespace_separator value # is illegal, and that we don't for good values: expat.ParserCreate() expat.ParserCreate(namespace_separator=None) expat.ParserCreate(namespace_separator=' ') def test_illegal(self): try: expat.ParserCreate(namespace_separator=42) self.fail() except TypeError, e: self.assertEqual( str(e), 'ParserCreate() argument 2 must be string or None, not int') try: expat.ParserCreate(namespace_separator='too long') self.fail() except ValueError, e: self.assertEqual( str(e), 'namespace_separator must be at most one character, omitted, or None' )
def test_expaterror(self): import pyexpat from pyexpat import errors xml = '<' parser = pyexpat.ParserCreate() e = raises(pyexpat.ExpatError, parser.Parse, xml, True) assert e.value.code == errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]
def setUp(self): self.parser = expat.ParserCreate() xml = '\n<a><>\n' try: self.parser.Parse(xml, 1) except expat.ExpatError: pass
def test_entities(self): import pyexpat parser = pyexpat.ParserCreate(None, "") def startElement(tag, attrs): assert tag == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF' assert attrs == { 'http://www.w3.org/XML/1998/namespacebase': 'http://www.semanticweb.org/jiba/ontologies/2017/0/test' } parser.StartElementHandler = startElement parser.Parse( """<?xml version="1.0"?> <!DOCTYPE rdf:RDF [ <!ENTITY owl "http://www.w3.org/2002/07/owl#" > <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" > <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" > <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" > ]> <rdf:RDF xmlns="http://www.semanticweb.org/jiba/ontologies/2017/0/test#" xml:base="http://www.semanticweb.org/jiba/ontologies/2017/0/test" xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#" xmlns:owl="http://www.w3.org/2002/07/owl#" xmlns:xsd="http://www.w3.org/2001/XMLSchema#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"> </rdf:RDF> """, True)
def test_disabling_buffer(self): xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512) xml2 = ('b' * 1024) xml3 = "%s</a>" % ('c' * 1024) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_text = 1 parser.buffer_size = 1024 self.assertEqual(parser.buffer_size, 1024) # Parse one chunk of XML self.n = 0 parser.Parse(xml1, 0) self.assertEqual(parser.buffer_size, 1024) self.assertEqual(self.n, 1) # Turn off buffering and parse the next chunk. parser.buffer_text = 0 self.assertFalse(parser.buffer_text) self.assertEqual(parser.buffer_size, 1024) for i in range(10): parser.Parse(xml2, 0) self.assertEqual(self.n, 11) parser.buffer_text = 1 self.assertTrue(parser.buffer_text) self.assertEqual(parser.buffer_size, 1024) parser.Parse(xml3, 1) self.assertEqual(self.n, 12)
def test_intern(self): import pyexpat p = pyexpat.ParserCreate() def f(*args): pass p.StartElementHandler = f p.EndElementHandler = f p.Parse("<xml></xml>") assert len(p.intern) == 1
def test_illegal(self): try: expat.ParserCreate(namespace_separator=42) self.fail() except TypeError, e: self.assertEqual( str(e), 'ParserCreate() argument 2 must be string or None, not int')
def test_malformed_xml(self): import sys if sys.platform == "darwin": skip("Fails with the version of expat on Mac OS 10.6.6") import pyexpat xml = "\0\r\n" parser = pyexpat.ParserCreate() raises(pyexpat.ExpatError, "parser.Parse(xml, True)")
def test_explicit_encoding(self): xml = "<?xml version='1.0'?><s>caf\xe9</s>" import pyexpat p = pyexpat.ParserCreate(encoding='iso-8859-1') def gotText(text): assert text == u"caf\xe9" p.CharacterDataHandler = gotText p.Parse(xml)
def test1(self): xml = "\0\r\n" parser = expat.ParserCreate() try: parser.Parse(xml, True) self.fail() except expat.ExpatError as e: self.assertEqual(str(e), 'unclosed token: line 2, column 0')
def test_decode_error(self): xml = '<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>' import pyexpat p = pyexpat.ParserCreate() def f(*args): pass p.StartElementHandler = f exc = raises(UnicodeDecodeError, p.Parse, xml) assert exc.value.start == 4
def setUp(self): self.parser = expat.ParserCreate() self.parser.ExternalEntityRefHandler = self.ExternalEntityRefHandler self.parser.CharacterDataHandler = self.CharacterDataHandler self.data = [] self.retval = 1 self.document = ( '<!DOCTYPE test [<!ENTITY external SYSTEM "external.txt">]>\n' '<root>Hi &external;!</root>')
def test_malformed_xml(self): import sys if sys.platform == "darwin": skip("Fails with the version of expat on Mac OS 10.6.6") import pyexpat xml = "\0\r\n" parser = pyexpat.ParserCreate() exc = raises(pyexpat.ExpatError, "parser.Parse(xml, True)") assert 'unclosed token: line 2, column 0' in exc.value[0]
def __init__(self, inputobj, mwetkparser): self._subparser = expat.ParserCreate() self._subparser.CommentHandler = self.CommentHandler self._subparser.StartElementHandler = self.StartElementHandler self._subparser.EndElementHandler = self.EndElementHandler self._inputobj = inputobj self._mwetkparser = mwetkparser self._queue = collections.deque() # deque[XMLEvent] self._elemstack = [] # list[XMLEvent] (event_str=="start")
def test_python_encoding(self): # This name is not knonwn by expat xml = "<?xml version='1.0' encoding='latin1'?><s>caf\xe9</s>" import pyexpat p = pyexpat.ParserCreate() def gotText(text): assert text == u"caf\xe9" p.CharacterDataHandler = gotText p.Parse(xml)
def test_zero_length(self): # ParserCreate() needs to accept a namespace_separator of zero length # to satisfy the requirements of RDF applications that are required # to simply glue together the namespace URI and the localname. Though # considered a wart of the RDF specifications, it needs to be supported. # # See XML-SIG mailing list thread starting with # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html # expat.ParserCreate(namespace_separator='') # too short
def test(self): parser = expat.ParserCreate() parser.StartElementHandler = self.StartElementHandler try: parser.Parse("<a><b><c/></b></a>", 1) self.fail() except RuntimeError, e: self.assertEqual(e.args[0], 'a', "Expected RuntimeError for element 'a', but" + \ " found %r" % e.args[0])
def test_parse_str(self): xml = "<?xml version='1.0' encoding='latin1'?><s>caf\xe9</s>" import pyexpat p = pyexpat.ParserCreate() def gotText(text): assert text == "caf\xe9" p.CharacterDataHandler = gotText p.Parse(xml)
def test_encoding_xml(self): # use one of the few encodings built-in in expat xml = "<?xml version='1.0' encoding='iso-8859-1'?><s>caf\xe9</s>" import pyexpat p = pyexpat.ParserCreate() def gotText(text): assert text == u"caf\xe9" p.CharacterDataHandler = gotText assert p.returns_unicode p.Parse(xml)
def test_parse_again(self): parser = expat.ParserCreate() file = StringIO.StringIO(data) parser.ParseFile(file) # Issue 6676: ensure a meaningful exception is raised when attempting # to parse more than one XML document per xmlparser instance, # a limitation of the Expat library. with self.assertRaises(expat.error) as cm: parser.ParseFile(file) self.assertEqual(expat.ErrorString(cm.exception.code), expat.errors.XML_ERROR_FINISHED)
def test_ipy2_gh655(self): """https://github.com/IronLanguages/ironpython2/issues/655""" import pyexpat buffer_size = pyexpat.ParserCreate().buffer_size self.assertEqual(buffer_size, 8192) import xml.etree.ElementTree as ET for count in range(buffer_size - 100, buffer_size + 100): txt = b'<Data>' + b'1'*count + b'</Data>' result = ET.tostring(ET.fromstring(txt)) self.assertEqual(txt, result)
def test(self): self.parser = expat.ParserCreate() self.parser.StartElementHandler = self.StartElementHandler self.parser.EndElementHandler = self.EndElementHandler self.upto = 0 self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2), ('e', 15, 3, 6), ('e', 17, 4, 1), ('e', 22, 5, 0)] xml = '<a>\n <b>\n <c/>\n </b>\n</a>' self.parser.Parse(xml, 1)
def test_wrong_size(self): parser = expat.ParserCreate() parser.buffer_text = 1 with self.assertRaises(ValueError): parser.buffer_size = -1 with self.assertRaises(ValueError): parser.buffer_size = 0 with self.assertRaises(TypeError): parser.buffer_size = 512.0 with self.assertRaises(TypeError): parser.buffer_size = sys.maxint + 1
def small_buffer_test(self, buffer_len): xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % ( 'a' * buffer_len) parser = expat.ParserCreate() parser.CharacterDataHandler = self.counting_handler parser.buffer_size = 1024 parser.buffer_text = 1 self.n = 0 parser.Parse(xml) return self.n
def test_simple(self): import pyexpat p = pyexpat.ParserCreate() res = p.Parse("<xml></xml>") assert res == 1 exc = raises(pyexpat.ExpatError, p.Parse, "3") assert exc.value.lineno == 1 assert exc.value.offset == 11 assert exc.value.code == 9 # XML_ERROR_JUNK_AFTER_DOC_ELEMENT pyexpat.ExpatError("error")
def test_get_handler(self): import pyexpat p = pyexpat.ParserCreate() assert p.StartElementHandler is None assert p.EndElementHandler is None def f(*args): pass p.StartElementHandler = f assert p.StartElementHandler is f def g(*args): pass p.EndElementHandler = g assert p.StartElementHandler is f assert p.EndElementHandler is g