Ejemplo n.º 1
0
    def test_use_foreign_dtd(self):
        """
        If UseForeignDTD is passed True and a document without an external
        entity reference is parsed, ExternalEntityRefHandler is first called
        with None for the public and system ids.
        """
        handler_call_args = []

        def resolve_entity(context, base, system_id, public_id):
            handler_call_args.append((public_id, system_id))
            return 1

        parser = expat.ParserCreate()
        parser.UseForeignDTD(True)
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)])

        # test UseForeignDTD() is equal to UseForeignDTD(True)
        handler_call_args[:] = []

        parser = expat.ParserCreate()
        parser.UseForeignDTD()
        parser.SetParamEntityParsing(expat.XML_PARAM_ENTITY_PARSING_ALWAYS)
        parser.ExternalEntityRefHandler = resolve_entity
        parser.Parse("<?xml version='1.0'?><element/>")
        self.assertEqual(handler_call_args, [(None, None)])
Ejemplo n.º 2
0
    def test_parse_file(self):
        # Try parsing a file
        out = self.Outputter()
        parser = expat.ParserCreate(namespace_separator='!')
        parser.returns_unicode = 1
        for name in self.handler_names:
            setattr(parser, name, getattr(out, name))
        file = StringIO.StringIO(data)

        parser.ParseFile(file)

        op = out.out
        self.assertEqual(op[0],
                         'PI: u\'xml-stylesheet\' u\'href="stylesheet.css"\'')
        self.assertEqual(op[1], "Comment: u' comment data '")
        self.assertEqual(op[2], "EL decl: root: (2, 0, None, ())"),
        self.assertEqual(
            op[3],
            "Notation declared: (u'notation', None, u'notation.jpeg', None)")
        self.assertEqual(
            op[4],
            "Unparsed entity decl: (u'unparsed_entity', None, u'entity.file', None, u'notation')"
        )
        self.assertEqual(
            op[5],
            "Start element: u'root' {u'attr1': u'value1', u'attr2': u'value2\\u1f40'}"
        )
        self.assertEqual(
            op[6], "NS decl: u'myns' u'http://www.python.org/namespace'")
        self.assertEqual(
            op[7],
            "Start element: u'http://www.python.org/namespace!subelement' {}")
        self.assertEqual(op[8], "Character data: u'Contents of subelements'")
        self.assertEqual(
            op[9],
            "End element: u'http://www.python.org/namespace!subelement'")
        self.assertEqual(op[10], "End of NS decl: u'myns'")
        self.assertEqual(op[11], "Start element: u'sub2' {}")
        self.assertEqual(op[12], 'Start of CDATA section')
        self.assertEqual(op[13],
                         "Character data: u'contents of CDATA section'")
        self.assertEqual(op[14], 'End of CDATA section')
        self.assertEqual(op[15], "End element: u'sub2'")
        self.assertEqual(op[16],
                         "External entity ref: (None, u'entity.file', None)")
        self.assertEqual(op[17], "End element: u'root'")

        # Issue 4877: expat.ParseFile causes segfault on a closed file.
        fp = open(test_support.TESTFN, 'wb')
        try:
            fp.close()
            parser = expat.ParserCreate()
            with self.assertRaises(ValueError):
                parser.ParseFile(fp)
        finally:
            test_support.unlink(test_support.TESTFN)
Ejemplo n.º 3
0
    def parse(self, input):
        self.reset()

        p = pyexpat.ParserCreate()
        p.StartElementHandler = self.start
        p.EndElementHandler = self.end
        p.CharacterDataHandler = self.cdata

        try:
            if type(input) == type(''):
                p.Parse(input, 1)
            else:
                while 1:
                    s = input.read(_BLOCKSIZE)
                    if not s:
                        p.Parse('', 1)
                        break

                    p.Parse(s, 0)

        finally:
            if self.root:
                _clean_tree(self.root)

        return self.root
Ejemplo n.º 4
0
 def test_set_buffersize(self):
     import pyexpat, sys
     p = pyexpat.ParserCreate()
     p.buffer_size = 150
     assert p.buffer_size == 150
     raises((ValueError, TypeError), setattr, p, 'buffer_size',
            sys.maxsize + 1)
Ejemplo n.º 5
0
class NamespaceSeparatorTest(unittest.TestCase):
    def test_legal(self):
        # Tests that make sure we get errors when the namespace_separator value
        # is illegal, and that we don't for good values:
        expat.ParserCreate()
        expat.ParserCreate(namespace_separator=None)
        expat.ParserCreate(namespace_separator=' ')

    def test_illegal(self):
        try:
            expat.ParserCreate(namespace_separator=42)
            self.fail()
        except TypeError, e:
            self.assertEqual(
                str(e),
                'ParserCreate() argument 2 must be string or None, not int')

        try:
            expat.ParserCreate(namespace_separator='too long')
            self.fail()
        except ValueError, e:
            self.assertEqual(
                str(e),
                'namespace_separator must be at most one character, omitted, or None'
            )
Ejemplo n.º 6
0
 def test_expaterror(self):
     import pyexpat
     from pyexpat import errors
     xml = '<'
     parser = pyexpat.ParserCreate()
     e = raises(pyexpat.ExpatError, parser.Parse, xml, True)
     assert e.value.code == errors.codes[errors.XML_ERROR_UNCLOSED_TOKEN]
Ejemplo n.º 7
0
 def setUp(self):
     self.parser = expat.ParserCreate()
     xml = '\n<a><>\n'
     try:
         self.parser.Parse(xml, 1)
     except expat.ExpatError:
         pass
Ejemplo n.º 8
0
    def test_entities(self):
        import pyexpat
        parser = pyexpat.ParserCreate(None, "")

        def startElement(tag, attrs):
            assert tag == 'http://www.w3.org/1999/02/22-rdf-syntax-ns#RDF'
            assert attrs == {
                'http://www.w3.org/XML/1998/namespacebase':
                'http://www.semanticweb.org/jiba/ontologies/2017/0/test'
            }

        parser.StartElementHandler = startElement
        parser.Parse(
            """<?xml version="1.0"?>

        <!DOCTYPE rdf:RDF [
        <!ENTITY owl "http://www.w3.org/2002/07/owl#" >
        <!ENTITY xsd "http://www.w3.org/2001/XMLSchema#" >
        <!ENTITY rdfs "http://www.w3.org/2000/01/rdf-schema#" >
        <!ENTITY rdf "http://www.w3.org/1999/02/22-rdf-syntax-ns#" >
        ]>

        <rdf:RDF xmlns="http://www.semanticweb.org/jiba/ontologies/2017/0/test#"
          xml:base="http://www.semanticweb.org/jiba/ontologies/2017/0/test"
          xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#"
          xmlns:owl="http://www.w3.org/2002/07/owl#"
          xmlns:xsd="http://www.w3.org/2001/XMLSchema#"
          xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
        </rdf:RDF>
        """, True)
Ejemplo n.º 9
0
    def test_disabling_buffer(self):
        xml1 = "<?xml version='1.0' encoding='iso8859'?><a>%s" % ('a' * 512)
        xml2 = ('b' * 1024)
        xml3 = "%s</a>" % ('c' * 1024)
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_text = 1
        parser.buffer_size = 1024
        self.assertEqual(parser.buffer_size, 1024)

        # Parse one chunk of XML
        self.n = 0
        parser.Parse(xml1, 0)
        self.assertEqual(parser.buffer_size, 1024)
        self.assertEqual(self.n, 1)

        # Turn off buffering and parse the next chunk.
        parser.buffer_text = 0
        self.assertFalse(parser.buffer_text)
        self.assertEqual(parser.buffer_size, 1024)
        for i in range(10):
            parser.Parse(xml2, 0)
        self.assertEqual(self.n, 11)

        parser.buffer_text = 1
        self.assertTrue(parser.buffer_text)
        self.assertEqual(parser.buffer_size, 1024)
        parser.Parse(xml3, 1)
        self.assertEqual(self.n, 12)
Ejemplo n.º 10
0
 def test_intern(self):
     import pyexpat
     p = pyexpat.ParserCreate()
     def f(*args): pass
     p.StartElementHandler = f
     p.EndElementHandler = f
     p.Parse("<xml></xml>")
     assert len(p.intern) == 1
Ejemplo n.º 11
0
 def test_illegal(self):
     try:
         expat.ParserCreate(namespace_separator=42)
         self.fail()
     except TypeError, e:
         self.assertEqual(
             str(e),
             'ParserCreate() argument 2 must be string or None, not int')
Ejemplo n.º 12
0
 def test_malformed_xml(self):
     import sys
     if sys.platform == "darwin":
         skip("Fails with the version of expat on Mac OS 10.6.6")
     import pyexpat
     xml = "\0\r\n"
     parser = pyexpat.ParserCreate()
     raises(pyexpat.ExpatError, "parser.Parse(xml, True)")
Ejemplo n.º 13
0
 def test_explicit_encoding(self):
     xml = "<?xml version='1.0'?><s>caf\xe9</s>"
     import pyexpat
     p = pyexpat.ParserCreate(encoding='iso-8859-1')
     def gotText(text):
         assert text == u"caf\xe9"
     p.CharacterDataHandler = gotText
     p.Parse(xml)
Ejemplo n.º 14
0
 def test1(self):
     xml = "\0\r\n"
     parser = expat.ParserCreate()
     try:
         parser.Parse(xml, True)
         self.fail()
     except expat.ExpatError as e:
         self.assertEqual(str(e), 'unclosed token: line 2, column 0')
Ejemplo n.º 15
0
 def test_decode_error(self):
     xml = '<fran\xe7ais>Comment \xe7a va ? Tr\xe8s bien ?</fran\xe7ais>'
     import pyexpat
     p = pyexpat.ParserCreate()
     def f(*args): pass
     p.StartElementHandler = f
     exc = raises(UnicodeDecodeError, p.Parse, xml)
     assert exc.value.start == 4
Ejemplo n.º 16
0
 def setUp(self):
     self.parser = expat.ParserCreate()
     self.parser.ExternalEntityRefHandler = self.ExternalEntityRefHandler
     self.parser.CharacterDataHandler = self.CharacterDataHandler
     self.data = []
     self.retval = 1
     self.document = (
         '<!DOCTYPE test [<!ENTITY external SYSTEM "external.txt">]>\n'
         '<root>Hi &external;!</root>')
Ejemplo n.º 17
0
 def test_malformed_xml(self):
     import sys
     if sys.platform == "darwin":
         skip("Fails with the version of expat on Mac OS 10.6.6")
     import pyexpat
     xml = "\0\r\n"
     parser = pyexpat.ParserCreate()
     exc = raises(pyexpat.ExpatError, "parser.Parse(xml, True)")
     assert 'unclosed token: line 2, column 0' in exc.value[0]
Ejemplo n.º 18
0
 def __init__(self, inputobj, mwetkparser):
     self._subparser = expat.ParserCreate()
     self._subparser.CommentHandler = self.CommentHandler
     self._subparser.StartElementHandler = self.StartElementHandler
     self._subparser.EndElementHandler = self.EndElementHandler
     self._inputobj = inputobj
     self._mwetkparser = mwetkparser
     self._queue = collections.deque()  # deque[XMLEvent]
     self._elemstack = []  # list[XMLEvent]  (event_str=="start")
Ejemplo n.º 19
0
 def test_python_encoding(self):
     # This name is not knonwn by expat
     xml = "<?xml version='1.0' encoding='latin1'?><s>caf\xe9</s>"
     import pyexpat
     p = pyexpat.ParserCreate()
     def gotText(text):
         assert text == u"caf\xe9"
     p.CharacterDataHandler = gotText
     p.Parse(xml)
Ejemplo n.º 20
0
 def test_zero_length(self):
     # ParserCreate() needs to accept a namespace_separator of zero length
     # to satisfy the requirements of RDF applications that are required
     # to simply glue together the namespace URI and the localname.  Though
     # considered a wart of the RDF specifications, it needs to be supported.
     #
     # See XML-SIG mailing list thread starting with
     # http://mail.python.org/pipermail/xml-sig/2001-April/005202.html
     #
     expat.ParserCreate(namespace_separator='')  # too short
Ejemplo n.º 21
0
 def test(self):
     parser = expat.ParserCreate()
     parser.StartElementHandler = self.StartElementHandler
     try:
         parser.Parse("<a><b><c/></b></a>", 1)
         self.fail()
     except RuntimeError, e:
         self.assertEqual(e.args[0], 'a',
                          "Expected RuntimeError for element 'a', but" + \
                          " found %r" % e.args[0])
Ejemplo n.º 22
0
    def test_parse_str(self):
        xml = "<?xml version='1.0' encoding='latin1'?><s>caf\xe9</s>"
        import pyexpat
        p = pyexpat.ParserCreate()

        def gotText(text):
            assert text == "caf\xe9"

        p.CharacterDataHandler = gotText
        p.Parse(xml)
Ejemplo n.º 23
0
 def test_encoding_xml(self):
     # use one of the few encodings built-in in expat
     xml = "<?xml version='1.0' encoding='iso-8859-1'?><s>caf\xe9</s>"
     import pyexpat
     p = pyexpat.ParserCreate()
     def gotText(text):
         assert text == u"caf\xe9"
     p.CharacterDataHandler = gotText
     assert p.returns_unicode
     p.Parse(xml)
Ejemplo n.º 24
0
 def test_parse_again(self):
     parser = expat.ParserCreate()
     file = StringIO.StringIO(data)
     parser.ParseFile(file)
     # Issue 6676: ensure a meaningful exception is raised when attempting
     # to parse more than one XML document per xmlparser instance,
     # a limitation of the Expat library.
     with self.assertRaises(expat.error) as cm:
         parser.ParseFile(file)
     self.assertEqual(expat.ErrorString(cm.exception.code),
                      expat.errors.XML_ERROR_FINISHED)
Ejemplo n.º 25
0
    def test_ipy2_gh655(self):
        """https://github.com/IronLanguages/ironpython2/issues/655"""
        import pyexpat
        buffer_size = pyexpat.ParserCreate().buffer_size
        self.assertEqual(buffer_size, 8192)

        import xml.etree.ElementTree as ET
        for count in range(buffer_size - 100, buffer_size + 100):
            txt = b'<Data>' + b'1'*count + b'</Data>'
            result = ET.tostring(ET.fromstring(txt))
            self.assertEqual(txt, result)
Ejemplo n.º 26
0
    def test(self):
        self.parser = expat.ParserCreate()
        self.parser.StartElementHandler = self.StartElementHandler
        self.parser.EndElementHandler = self.EndElementHandler
        self.upto = 0
        self.expected_list = [('s', 0, 1, 0), ('s', 5, 2, 1), ('s', 11, 3, 2),
                              ('e', 15, 3, 6), ('e', 17, 4, 1),
                              ('e', 22, 5, 0)]

        xml = '<a>\n <b>\n  <c/>\n </b>\n</a>'
        self.parser.Parse(xml, 1)
Ejemplo n.º 27
0
 def test_wrong_size(self):
     parser = expat.ParserCreate()
     parser.buffer_text = 1
     with self.assertRaises(ValueError):
         parser.buffer_size = -1
     with self.assertRaises(ValueError):
         parser.buffer_size = 0
     with self.assertRaises(TypeError):
         parser.buffer_size = 512.0
     with self.assertRaises(TypeError):
         parser.buffer_size = sys.maxint + 1
Ejemplo n.º 28
0
    def small_buffer_test(self, buffer_len):
        xml = "<?xml version='1.0' encoding='iso8859'?><s>%s</s>" % (
            'a' * buffer_len)
        parser = expat.ParserCreate()
        parser.CharacterDataHandler = self.counting_handler
        parser.buffer_size = 1024
        parser.buffer_text = 1

        self.n = 0
        parser.Parse(xml)
        return self.n
Ejemplo n.º 29
0
    def test_simple(self):
        import pyexpat
        p = pyexpat.ParserCreate()
        res = p.Parse("<xml></xml>")
        assert res == 1

        exc = raises(pyexpat.ExpatError, p.Parse, "3")
        assert exc.value.lineno == 1
        assert exc.value.offset == 11
        assert exc.value.code == 9 # XML_ERROR_JUNK_AFTER_DOC_ELEMENT

        pyexpat.ExpatError("error")
Ejemplo n.º 30
0
 def test_get_handler(self):
     import pyexpat
     p = pyexpat.ParserCreate()
     assert p.StartElementHandler is None
     assert p.EndElementHandler is None
     def f(*args): pass
     p.StartElementHandler = f
     assert p.StartElementHandler is f
     def g(*args): pass
     p.EndElementHandler = g
     assert p.StartElementHandler is f
     assert p.EndElementHandler is g