Beispiel #1
0
def parse(xml: str) -> Document:
    # Normalise whitespace
    xml = xml.replace("\u000d\u000a", "\u000a")
    xml = xml.replace("\u000d", "\u000a")

    # Parse document
    document = Document(xml)
    document.parse()
    return document
Beispiel #2
0
    def test_with_xmldeclaration(self):
        document = Document(
            "<?xml version='1.0' encoding='utf-8' standalone='yes'?>  <root></root>"
        )
        document.parse()

        self.assertEqual("1.0", document.version)
        self.assertEqual("utf-8", document.encoding)
        self.assertEqual(True, document.standalone)

        self.assertIsInstance(document.root, Element)
        self.assertEqual(document.root.name, "root")
Beispiel #3
0
    def test_without_xmldeclaration(self):
        with self.subTest("Open tag"):
            document = Document("<root></root>")
            document.parse()

            self.assertIsInstance(document.root, Element)
            self.assertEqual(document.root.name, "root")
        with self.subTest("Closed tag"):
            document = Document("<root/>")
            document.parse()

            self.assertIsInstance(document.root, Element)
            self.assertEqual(document.root.name, "root")
Beispiel #4
0
    def test_with_empty_dtd(self):
        document = Document("""
        <?xml version='1.0' encoding='utf-8' standalone='yes'?>  
        <!DOCTYPE root []>
        
        <root></root>
        """)
        document.parse()

        self.assertEqual("1.0", document.version)
        self.assertEqual("utf-8", document.encoding)
        self.assertEqual(True, document.standalone)

        self.assertIsInstance(document.root, Element)
        self.assertEqual(document.root.name, "root")
Beispiel #5
0
    def test_with_fluff(self):
        document = Document("""
        <?xml version='1.0' encoding='utf-8' standalone='yes'?>  
        <!DOCTYPE root []>
        
        <?Target A rogue processing instruction?>
        <!-- And even a comment! -->
        <root></root>
        <!-- Another comment -->
        <?Target And more PIs?>
        <?Target?>
        """)
        document.parse()

        self.assertEqual("1.0", document.version)
        self.assertEqual("utf-8", document.encoding)
        self.assertEqual(True, document.standalone)

        self.assertIsInstance(document.root, Element)
        self.assertEqual(document.root.name, "root")
        self.assertEqual(3, len(document.processing_instructions))
Beispiel #6
0
 def test_no_superfluous_characters(self):
     with self.subTest("Before xml declaration"):
         document = Document("some text<?xml version='1.0'?><root></root>")
         with self.assertRaises(XMLError):
             document.parse()
     with self.subTest("Between xml declaration and dtd"):
         document = Document(
             "<?xml version='1.0'?>some text<!DOCTYPE root []><root></root>"
         )
         with self.assertRaises(XMLError):
             document.parse()
     with self.subTest("Between dtd and root element"):
         document = Document(
             "<?xml version='1.0'?><!DOCTYPE root []>some text<root></root>"
         )
         with self.assertRaises(XMLError):
             document.parse()
     with self.subTest("After root element"):
         document = Document(
             "<?xml version='1.0'?><!DOCTYPE root []><root></root>some text"
         )
         with self.assertRaises(XMLError):
             document.parse()
Beispiel #7
0
 def test_only_one_root_element(self):
     document = Document(
         "<?xml version='1.0' encoding='utf-8' standalone='yes'?>  <root1></root1> <root2></root2>"
     )
     with self.assertRaises(XMLError):
         document.parse()