def pushtree(obj, pattern, target, uri=None, entity_factory=None, standalone=False, validate=False, namespaces=None): # Adapter for what Dave uses. FIXME?! class Handler(object): def startElementMatch(self, node): pass def endElementMatch(self, node): target(node) def attributeMatch(self, pair): # Returns the node and the attribute name (hack!) # Get just the node target(pair[0]) # Create a rule handler object mgr = PushtreeManager(pattern, Handler(), namespaces=namespaces) rhand = mgr.build_pushtree_handler() # Run the parser on the rule handler return parse(obj, uri, entity_factory, standalone, validate, rule_handler=rhand)
def run(self, source, parameters=None, result=None): """ Transform a source document as given via an InputSource. Assumes that either the Processor instance has already had stylesheets appended (via appendStylesheet(), for example), or the source document contains xml-stylesheet processing instructions that are not being ignored. The `parameters` argument is an optional dictionary of stylesheet parameters, the keys of which may be given as strings if they have no namespace, or as (uri, localname) tuples otherwise. The optional writer argument is a SAX-like event handler that is an Ft.Xml.Xslt.NullWriter subclass. The default writer is either an Ft.Xml.Xslt.XmlWriter, HtmlWriter or PlainTextWriter, depending on the stylesheet(s). The optional `output` argument is a Python file-like object to be used as the destination for the writer's output. """ try: document = tree.parse(source) except ReaderError, e: raise XsltError(XsltError.SOURCE_PARSE_ERROR, uri=(source.uri or '<Python string>'), text=e)
def launch(source, **kwargs): doc = parse(source, validate=kwargs['validate'], standalone=kwargs['standalone']) if 'pretty' in kwargs: doc.xml_write('xml-indent') else: doc.xml_write() return
def test_expat_segfault(self): "Check malformed input that caused an Expat segfault error." try: doc = parse("<?xml version\xc2\x85='1.0'?>\r\n") self.fail() except ReaderError as e: self.assertTrue(str(e).endswith( 'line 1, column 14: XML declaration not well-formed')) import StringIO stream = StringIO.StringIO("\0\r\n") try: doc = parse(stream) self.fail() except ReaderError as e: self.assertTrue(str(e).endswith( 'line 2, column 1: no element found'))
def test_parse_with_url(self): doc = parse(TEST_URL) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None,)
def test_parse_with_file_path(self): """Parse with file path""" doc = parse(TEST_FILE) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None,)
def test_expat_segfault(self): "Check malformed input that caused an Expat segfault error." try: doc = parse("<?xml version\xc2\x85='1.0'?>\r\n") self.fail() except ReaderError as e: self.assertTrue( str(e).endswith( 'line 1, column 14: XML declaration not well-formed')) import StringIO stream = StringIO.StringIO("\0\r\n") try: doc = parse(stream) self.fail() except ReaderError as e: self.assertTrue( str(e).endswith('line 2, column 1: no element found'))
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False, prefixes=None, model=None): if model: entity_factory = model.clone if not entity_factory: entity_factory = nodes.entity_base doc = tree.parse(obj, uri, entity_factory=entity_factory, standalone=standalone, validate=validate) if prefixes: set_namespaces(doc, prefixes) return doc
def test_parse_with_string(self): """Parse with string""" doc = parse(TEST_STRING) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'test') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None,)
def test_canonical(self): "Tests output of canonical XML (see also test_c14n)" raise KnownFailure("See http://trac.xml3k.org/ticket/23") t = tree.parse("<root><empty/>" "</root>") self.assertEqual( t.xml_encode('xml-canonical'), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root><empty></empty>' '</root>')
def test_canonical(self): "Tests output of canonical XML (see also test_c14n)" raise KnownFailure("See http://trac.xml3k.org/ticket/23") t = tree.parse("<root><empty/>" "</root>") self.assertEqual(t.xml_encode('xml-canonical'), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root><empty></empty>' '</root>')
def test_parse_with_stream(self): """Parse with stream""" stream = open(TEST_FILE) doc = parse(stream) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual(doc.xml_children[0].xml_prefix, None,)
def test_parse_with_url(self): doc = parse(TEST_URL) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual( doc.xml_children[0].xml_prefix, None, )
def test_write_to_stdout(self): "Check that the default output is to stdout" xml_w = lookup('xml') t = tree.parse("<root>entrée</root>") stream = StringIO() try: sys.stdout = stream t.xml_write() self.assertEqual(stream.getvalue(), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>') finally: sys.stdout = sys.__stdout__
def test_write_to_stdout(self): "Check that the default output is to stdout" xml_w = lookup('xml') t = tree.parse("<root>entrée</root>") stream = StringIO() try: sys.stdout = stream t.xml_write() self.assertEqual( stream.getvalue(), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>') finally: sys.stdout = sys.__stdout__
def test_parse_with_file_path(self): """Parse with file path""" doc = parse(TEST_FILE) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual( doc.xml_children[0].xml_prefix, None, )
def test_parse_with_string(self): """Parse with string""" doc = parse(TEST_STRING) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'test') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual( doc.xml_children[0].xml_prefix, None, )
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False): from amara import tree if not entity_factory: entity_factory = nodes.Document return tree.parse(obj, uri, entity_factory=entity_factory, standalone=standalone, validate=validate)
def test_parse_with_stream(self): """Parse with stream""" stream = open(TEST_FILE) doc = parse(stream) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) self.assertEqual(doc.xml_children[0].xml_typecode, tree.element.xml_typecode) self.assertEqual(doc.xml_children[0].xml_qname, 'disclaimer') self.assertEqual(doc.xml_children[0].xml_namespace, None) self.assertEqual( doc.xml_children[0].xml_prefix, None, )
def test_encode(self): xml_w = lookup('xml') t = tree.parse("<root>entrée</root>") # Default is UTF-8. self.assert_(xml_compare(t.xml_encode(), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) self.assert_(xml_compare(t.xml_encode(XML_W), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) self.assert_(xml_compare(t.xml_encode(xml_w), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) # Try latin-1 output. self.assert_(xml_compare(t.xml_encode(encoding='iso-8859-1'), '<?xml version="1.0" encoding="iso-8859-1"?>\n' '<root>entr\xe9e</root>'))
def pushtree(obj, pattern, target, uri=None, entity_factory=None, standalone=False, validate=False, namespaces=None): # Adapter for what Dave uses. FIXME?! class Handler(object): def startElementMatch(self, node): pass def endElementMatch(self, node): target(node) def attributeMatch(self, pair): # Returns the node and the attribute name (hack!) # Get just the node target(pair[0]) # Create a rule handler object mgr = PushtreeManager(pattern, Handler(), namespaces = namespaces) rhand = mgr.build_pushtree_handler() # Run the parser on the rule handler return parse(obj,uri,entity_factory,standalone,validate,rule_handler=rhand)
def test_html(self): "Simple check of HTML output" t = tree.parse("""<?xml version='1.0'?> <disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer> """) self.assertEqual(t.xml_encode(HTML_W), """<disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer>""") html_w = lookup(HTML_W) self.assertEqual(t.xml_encode(html_w), """<disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer>""")
def test_html(self): "Simple check of HTML output" t = tree.parse("""<?xml version='1.0'?> <disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer> """) self.assertEqual( t.xml_encode(HTML_W), """<disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer>""") html_w = lookup(HTML_W) self.assertEqual( t.xml_encode(html_w), """<disclaimer> <p>The opinions represented herein represent those of the individual and should not be interpreted as official policy endorsed by this organization.</p> </disclaimer>""")
def test_encode(self): xml_w = lookup('xml') t = tree.parse("<root>entrée</root>") # Default is UTF-8. self.assert_( xml_compare( t.xml_encode(), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) self.assert_( xml_compare( t.xml_encode(XML_W), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) self.assert_( xml_compare( t.xml_encode(xml_w), '<?xml version="1.0" encoding="UTF-8"?>\n' '<root>entr\xc3\xa9e</root>')) # Try latin-1 output. self.assert_( xml_compare( t.xml_encode(encoding='iso-8859-1'), '<?xml version="1.0" encoding="iso-8859-1"?>\n' '<root>entr\xe9e</root>'))
def apply_xupdate(source, xupdate): xupdate = reader.parse(xupdate) source = tree.parse(source) return xupdate.apply_updates(source)
Text1 </CHILD1> <CHILD2 attr1="val2" CODE="1"> <GCHILD name="GCHILD21"/> <GCHILD name="GCHILD22"/> </CHILD2> <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/> <lang xml:lang="en"> <foo xml:lang=""/> <foo/> <f\xf6\xf8/> </lang> </ROOT> <?no-data ?> """, 'urn:domlette-test-tree') DOC = tree.parse(src) def children(node, type=tree.element): return [child for child in node if isinstance(child, type)] # `#document` nodes PI, PI2 = children(DOC, tree.processing_instruction) ROOT = children(DOC, tree.element)[0] # `ROOT` nodes COMMENT = children(ROOT, tree.comment)[0] CHILDREN = CHILD1, CHILD2, CHILD3, LANG = children(ROOT) # `CHILD1` nodes ATTR1 = CHILD1.xml_attributes.getnode(None, 'attr1') ATTR31 = CHILD1.xml_attributes.getnode(None, 'attr31')
Text1 </CHILD1> <CHILD2 attr1="val2" CODE="1"> <GCHILD name="GCHILD21"/> <GCHILD name="GCHILD22"/> </CHILD2> <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/> <lang xml:lang="en"> <foo xml:lang=""/> <foo/> <f\xf6\xf8/> </lang> </ROOT> <?no-data ?> """, 'urn:domlette-test-tree') DOC = tree.parse(src) def children(node, type=tree.element): return [ child for child in node if isinstance(child, type) ] # `#document` nodes PI, PI2 = children(DOC, tree.processing_instruction) ROOT = children(DOC, tree.element)[0] # `ROOT` nodes COMMENT = children(ROOT, tree.comment)[0] CHILDREN = CHILD1, CHILD2, CHILD3, LANG = children(ROOT) # `CHILD1` nodes ATTR1 = CHILD1.xml_attributes.getnode(None, 'attr1') ATTR31 = CHILD1.xml_attributes.getnode(None, 'attr31') GCHILDREN1 = GCHILD11, GCHILD12 = children(CHILD1) TEXT_WS1, TEXT_WS2, TEXT1 = children(CHILD1, type=tree.text)