def test_xmlschema_nested_resolvers(self): """Test that resolvers work in a nested fashion.""" resolver_schema = self.resolver_schema_ext class res_nested(etree.Resolver): def __init__(self, ext_schema): self.ext_schema = ext_schema def resolve(self, url, id, context): assert url == 'YYY.xsd' return self.resolve_string(self.ext_schema, context) class res(etree.Resolver): def __init__(self, ext_schema_1, ext_schema_2): self.ext_schema_1 = ext_schema_1 self.ext_schema_2 = ext_schema_2 def resolve(self, url, id, context): assert url == 'XXX.xsd' new_parser = etree.XMLParser() new_parser.resolvers.add(res_nested(self.ext_schema_2)) new_schema_doc = etree.parse(self.ext_schema_1, parser=new_parser) new_schema = etree.XMLSchema(new_schema_doc) return self.resolve_string(resolver_schema, context) parser = etree.XMLParser() parser.resolvers.add( res(self.resolver_schema_int2, self.resolver_schema_ext)) schema_doc = etree.parse(self.resolver_schema_int, parser=parser) schema = etree.XMLSchema(schema_doc)
def test_xmlschema_parse_default_attributes(self): # does not work as of libxml2 2.7.3 schema = self.parse(''' <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <xsd:element name="a" type="AType"/> <xsd:complexType name="AType"> <xsd:sequence minOccurs="4" maxOccurs="4"> <xsd:element name="b" type="BType" /> </xsd:sequence> </xsd:complexType> <xsd:complexType name="BType"> <xsd:attribute name="hardy" type="xsd:string" default="hey" /> </xsd:complexType> </xsd:schema> ''') schema = etree.XMLSchema(schema) parser = etree.XMLParser(schema=schema, attribute_defaults=True) tree_valid = self.parse( '<a><b hardy="ho"/><b/><b hardy="ho"/><b/></a>', parser=parser) root = tree_valid.getroot() self.assertEquals('ho', root[0].get('hardy')) self.assertEquals('hey', root[1].get('hardy')) self.assertEquals('ho', root[2].get('hardy')) self.assertEquals('hey', root[3].get('hardy'))
def test_default_class_lookup(self): class TestElement(etree.ElementBase): FIND_ME = "default element" class TestComment(etree.CommentBase): FIND_ME = "default comment" class TestPI(etree.PIBase): FIND_ME = "default pi" parser = etree.XMLParser() lookup = etree.ElementDefaultClassLookup(element=TestElement, comment=TestComment, pi=TestPI) parser.set_element_class_lookup(lookup) root = etree.XML( _bytes("""<?xml version='1.0'?> <root> <?myPI?> <!-- hi --> </root> """), parser) self.assertEqual("default element", root.FIND_ME) self.assertEqual("default pi", root[0].FIND_ME) self.assertEqual("default comment", root[1].FIND_ME)
def setUp(self): super(ETreeNamespaceClassesTestCase, self).setUp() lookup = etree.ElementNamespaceClassLookup() self.Namespace = lookup.get_namespace parser = etree.XMLParser() parser.set_element_class_lookup(lookup) etree.set_default_parser(parser)
def test_dtd_parse_invalid(self): fromstring = etree.fromstring parser = etree.XMLParser(dtd_validation=True) xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % fileInTestDir("test.dtd")) self.assertRaises(etree.XMLSyntaxError, fromstring, xml, parser=parser)
def test_parse_file_dtd(self): parser = etree.XMLParser(attribute_defaults=True) tree = etree.parse(fileInTestDir('test.xml'), parser) root = tree.getroot() self.assertEqual("valueA", root.get("default")) self.assertEqual("valueB", root[0].get("default"))
def resolve(self, url, id, context): assert url == 'XXX.xsd' new_parser = etree.XMLParser() new_parser.resolvers.add(res_nested(self.ext_schema_2)) new_schema_doc = etree.parse(self.ext_schema_1, parser = new_parser) new_schema = etree.XMLSchema(new_schema_doc) return self.resolve_string(resolver_schema, context)
def test_dtd_attrs(self): dtd = etree.DTD(fileUrlInTestDir("test.dtd")) # Test DTD.system_url attribute self.assertTrue(dtd.system_url.endswith("test.dtd")) # Test elements and their attributes a = dtd.elements()[0] self.assertEqual(a.name, "a") self.assertEqual(a.type, "element") self.assertEqual(a.content.name, "b") self.assertEqual(a.content.type, "element") self.assertEqual(a.content.occur, "once") aattr = a.attributes()[0] self.assertEqual(aattr.name, "default") self.assertEqual(aattr.type, "enumeration") self.assertEqual(aattr.values(), ["valueA", "valueB"]) self.assertEqual(aattr.default_value, "valueA") b = dtd.elements()[1] self.assertEqual(b.name, "b") self.assertEqual(b.type, "empty") self.assertEqual(b.content, None) # Test entities and their attributes c = dtd.entities()[0] self.assertEqual(c.name, "c") self.assertEqual(c.orig, "*") self.assertEqual(c.content, "*") # Test DTD.name attribute root = etree.XML( _bytes(''' <!DOCTYPE a SYSTEM "none" [ <!ELEMENT a EMPTY> ]> <a/> ''')) dtd = etree.ElementTree(root).docinfo.internalDTD self.assertEqual(dtd.name, "a") # Test DTD.name and DTD.systemID attributes parser = etree.XMLParser(dtd_validation=True) xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' root = etree.fromstring(xml, parser=parser, base_url=fileUrlInTestDir("test.xml")) dtd = root.getroottree().docinfo.internalDTD self.assertEqual(dtd.name, "a") self.assertEqual(dtd.system_url, "test.dtd")
def test_xslt_resolver_url_building(self): assertEquals = self.assertEquals called = {'count' : 0} expected_url = None class TestResolver(etree.Resolver): def resolve(self, url, id, context): assertEquals(url, expected_url) called['count'] += 1 return self.resolve_string('<CALLED/>', context) stylesheet_xml = _bytes("""\ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:l="local"> <xsl:template match="/"> <xsl:copy-of select="document('test.xml')"/> </xsl:template> </xsl:stylesheet> """) parser = etree.XMLParser() parser.resolvers.add(TestResolver()) # test without base_url => relative path only expected_url = 'test.xml' xslt = etree.XSLT(etree.XML(stylesheet_xml, parser)) self.assertEquals(called['count'], 0) result = xslt(etree.XML('<a/>')) self.assertEquals(called['count'], 1) # now the same thing with a stylesheet base URL on the filesystem called['count'] = 0 expected_url = os.path.join('MY', 'BASE', 'test.xml') xslt = etree.XSLT(etree.XML(stylesheet_xml, parser, base_url=os.path.join('MY', 'BASE', 'FILE'))) self.assertEquals(called['count'], 0) result = xslt(etree.XML('<a/>')) self.assertEquals(called['count'], 1) # now the same thing with a stylesheet base URL called['count'] = 0 expected_url = 'http://server.com/BASE/DIR/test.xml' xslt = etree.XSLT(etree.XML(stylesheet_xml, parser, base_url='http://server.com/BASE/DIR/FILE')) self.assertEquals(called['count'], 0) result = xslt(etree.XML('<a/>')) self.assertEquals(called['count'], 1)
def test_dtd_parse_file_not_found(self): fromstring = etree.fromstring dtd_filename = fileInTestDir("__nosuch.dtd") parser = etree.XMLParser(dtd_validation=True) xml = _bytes('<!DOCTYPE b SYSTEM "%s"><b><a/></b>' % dtd_filename) self.assertRaises(etree.XMLSyntaxError, fromstring, xml, parser=parser) errors = None try: fromstring(xml, parser=parser) except etree.XMLSyntaxError: e = sys.exc_info()[1] errors = [ entry.message for entry in e.error_log if dtd_filename in entry.message ] self.assert_(errors)
def test_xmlschema_resolvers_noroot(self): """Test that the default resolver will not get called when a more specific resolver is registered.""" class res_root(etree.Resolver): def resolve(self, url, id, context): assert False return None root_resolver = res_root() etree.get_default_parser().resolvers.add(root_resolver) parser = etree.XMLParser() parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) schema_doc = etree.parse(self.resolver_schema_int, parser=parser) schema = etree.XMLSchema(schema_doc) etree.get_default_parser().resolvers.remove(root_resolver)
def test_xmlschema_stringio(self): schema_file = BytesIO(''' <xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"> <xsd:element name="a" type="AType"/> <xsd:complexType name="AType"> <xsd:sequence> <xsd:element name="b" type="xsd:string" /> </xsd:sequence> </xsd:complexType> </xsd:schema> ''') schema = etree.XMLSchema(file=schema_file) parser = etree.XMLParser(schema=schema) tree_valid = self.parse('<a><b></b></a>', parser=parser) self.assertEqual('a', tree_valid.getroot().tag) self.assertRaises(etree.XMLSyntaxError, self.parse, '<a><c></c></a>', parser=parser)
def test_evil_class_lookup(self): class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): if name == 'none': return None elif name == 'obj': return object() else: return etree.ElementBase parser = etree.XMLParser() parser.set_element_class_lookup(MyLookup()) root = etree.XML(_bytes('<none/>'), parser) self.assertEqual('none', root.tag) self.assertRaises(TypeError, etree.XML, _bytes("<obj />"), parser) root = etree.XML(_bytes('<root/>'), parser) self.assertEqual('root', root.tag)
def test_parser_based_lookup(self): class TestElement(etree.ElementBase): FIND_ME = "parser_based" lookup = etree.ParserBasedElementClassLookup() etree.set_element_class_lookup(lookup) class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): return TestElement parser = etree.XMLParser() parser.set_element_class_lookup(MyLookup()) root = etree.parse(BytesIO(xml_str), parser).getroot() self.assertEqual(root.FIND_ME, TestElement.FIND_ME) self.assertEqual(root[0].FIND_ME, TestElement.FIND_ME) root = etree.parse(BytesIO(xml_str)).getroot() self.assertFalse(hasattr(root, 'FIND_ME')) self.assertFalse(hasattr(root[0], 'FIND_ME'))
def test_class_lookup_type_mismatch(self): class MyLookup(etree.CustomElementClassLookup): def lookup(self, t, d, ns, name): if t == 'element': if name == 'root': return etree.ElementBase return etree.CommentBase elif t == 'comment': return etree.PIBase elif t == 'PI': return etree.EntityBase elif t == 'entity': return etree.ElementBase else: raise ValueError('got type %s' % t) parser = etree.XMLParser(resolve_entities=False) parser.set_element_class_lookup(MyLookup()) root = etree.XML(_bytes('<root></root>'), parser) self.assertEqual('root', root.tag) self.assertEqual(etree.ElementBase, type(root)) root = etree.XML(_bytes("<root><test/></root>"), parser) self.assertRaises(TypeError, root.__getitem__, 0) root = etree.XML(_bytes("<root><!-- test --></root>"), parser) self.assertRaises(TypeError, root.__getitem__, 0) root = etree.XML(_bytes("<root><?test?></root>"), parser) self.assertRaises(TypeError, root.__getitem__, 0) root = etree.XML( _bytes('<!DOCTYPE root [<!ENTITY myent "ent">]>' '<root>&myent;</root>'), parser) self.assertRaises(TypeError, root.__getitem__, 0) root = etree.XML(_bytes('<root><root/></root>'), parser) self.assertEqual('root', root[0].tag)
def test_xslt_document_XML_resolver(self): # make sure document('') works when custom resolvers are in use assertEquals = self.assertEquals called = {'count' : 0} class TestResolver(etree.Resolver): def resolve(self, url, id, context): assertEquals(url, 'file://ANYTHING') called['count'] += 1 return self.resolve_string('<CALLED/>', context) parser = etree.XMLParser() parser.resolvers.add(TestResolver()) xslt = etree.XSLT(etree.XML(_bytes("""\ <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" xmlns:l="local"> <xsl:template match="/"> <test> <xsl:for-each select="document('')//l:data/l:entry"> <xsl:copy-of select="document('file://ANYTHING')"/> <xsl:copy> <xsl:attribute name="value"> <xsl:value-of select="."/> </xsl:attribute> </xsl:copy> </xsl:for-each> </test> </xsl:template> <l:data> <l:entry>A</l:entry> <l:entry>B</l:entry> </l:data> </xsl:stylesheet> """), parser)) self.assertEquals(called['count'], 0) result = xslt(etree.XML('<a/>')) self.assertEquals(called['count'], 1) root = result.getroot() self.assertEquals(root.tag, 'test') self.assertEquals(len(root), 4) self.assertEquals(root[0].tag, 'CALLED') self.assertEquals(root[1].tag, '{local}entry') self.assertEquals(root[1].text, None) self.assertEquals(root[1].get("value"), 'A') self.assertEquals(root[2].tag, 'CALLED') self.assertEquals(root[3].tag, '{local}entry') self.assertEquals(root[3].text, None) self.assertEquals(root[3].get("value"), 'B')
def test_xmlschema_resolvers(self): """Test that resolvers work with schema.""" parser = etree.XMLParser() parser.resolvers.add(self.simple_resolver(self.resolver_schema_ext)) schema_doc = etree.parse(self.resolver_schema_int, parser=parser) schema = etree.XMLSchema(schema_doc)
def test_dtd_parse_valid_file_url(self): parser = etree.XMLParser(dtd_validation=True) xml = ('<!DOCTYPE a SYSTEM "%s"><a><b/></a>' % fileUrlInTestDir("test.dtd")) root = etree.fromstring(xml, parser=parser)
class PyClassLookupTestCase(HelperTestCase): """Test cases for the lxml.pyclasslookup class lookup mechanism. """ etree = etree parser = etree.XMLParser() Element = parser.makeelement def tearDown(self): self.parser.set_element_class_lookup(None) super(PyClassLookupTestCase, self).tearDown() def _setClassLookup(self, lookup_function): class Lookup(PythonElementClassLookup): def lookup(self, *args): return lookup_function(*args) self.parser.set_element_class_lookup(Lookup()) def _buildElementClass(self): class LocalElement(etree.ElementBase): pass return LocalElement def XML(self, xml): return self.etree.XML(xml, self.parser) # --- Test cases def test_lookup(self): el_class = self._buildElementClass() el_class.i = 1 def lookup(*args): if el_class.i == 1: el_class.i = 2 return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(2, el_class.i) def test_lookup_keep_ref_assertion(self): el_class = self._buildElementClass() el_class.EL = None def lookup(doc, el): if el_class.EL is None: el_class.EL = el return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, el_class.EL) self.assertRaises(ReferenceError, el_class.EL.getchildren) def test_lookup_tag(self): el_class = self._buildElementClass() el_class.TAG = None def lookup(doc, el): if el_class.TAG is None: el_class.TAG = el.tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, root.TAG) self.assertEqual(root.tag, root.TAG) def test_lookup_text(self): el_class = self._buildElementClass() el_class.TEXT = None def lookup(doc, el): if el_class.TEXT is None: el_class.TEXT = el.text return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, root.TEXT) self.assertEqual(root.text, root.TEXT) def test_lookup_tail(self): el_class = self._buildElementClass() el_class.TAIL = None def lookup(doc, el): if el_class.TAIL is None: el_class.TAIL = el.tail return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(root.tail, root.TAIL) def test_lookup_attrib(self): el_class = self._buildElementClass() el_class.ATTRIB = None def lookup(doc, el): if el_class.ATTRIB is None: el_class.ATTRIB = el[0].attrib return el_class self._setClassLookup(lookup) root = self.XML(xml_str) items1 = list(root[0].attrib.items()) items1.sort() items2 = list(root.ATTRIB.items()) items2.sort() self.assertEqual(items1, items2) def test_lookup_prefix(self): el_class = self._buildElementClass() el_class.PREFIX = None def lookup(doc, el): if el_class.PREFIX is None: el_class.PREFIX = el.prefix return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(root.prefix, root.PREFIX) def test_lookup_sourceline(self): el_class = self._buildElementClass() el_class.LINE = None def lookup(doc, el): if el_class.LINE is None: el_class.LINE = el.sourceline return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(root.sourceline, root.LINE) def test_lookup_getitem(self): el_class = self._buildElementClass() el_class.CHILD_TAG = None def lookup(doc, el): el_class.CHILD_TAG = el[0].tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tag = root.CHILD_TAG self.assertNotEqual(None, child_tag) self.assertEqual(root[0].tag, child_tag) def test_lookup_getitem_neg(self): el_class = self._buildElementClass() el_class.CHILD_TAG = None def lookup(doc, el): if el_class.CHILD_TAG is None: el_class.CHILD_TAG = el[-1].tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tag = root.CHILD_TAG self.assertNotEqual(None, child_tag) self.assertEqual(root[-1].tag, child_tag) def test_lookup_getslice(self): el_class = self._buildElementClass() el_class.CHILD_TAGS = None def lookup(doc, el): if el_class.CHILD_TAGS is None: el_class.CHILD_TAGS = [c.tag for c in el[1:-1]] return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertEqual([c.tag for c in root[1:-1]], child_tags) def test_lookup_len(self): el_class = self._buildElementClass() el_class.LEN = None def lookup(doc, el): if el_class.LEN is None: el_class.LEN = len(el) return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(1, el_class.LEN) def test_lookup_bool(self): el_class = self._buildElementClass() el_class.TRUE = None def lookup(doc, el): if el_class.TRUE is None: el_class.TRUE = bool(el) return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertTrue(el_class.TRUE) def test_lookup_get(self): el_class = self._buildElementClass() el_class.VAL = None def lookup(doc, el): if el_class.VAL is None: el_class.VAL = el[0].get('a1') return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, el_class.VAL) self.assertEqual(root[0].get('a1'), el_class.VAL) def test_lookup_get_default(self): el_class = self._buildElementClass() default = str(id(el_class)) el_class.VAL = None def lookup(doc, el): if el_class.VAL is None: el_class.VAL = el[0].get('unknownattribute', default) return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(default, el_class.VAL) def test_lookup_getchildren(self): el_class = self._buildElementClass() el_class.CHILD_TAGS = None def lookup(doc, el): if el_class.CHILD_TAGS is None: el_class.CHILD_TAGS = [c.tag for c in el.getchildren()] return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertEqual([c.tag for c in root.getchildren()], child_tags) def test_lookup_iter_children(self): el_class = self._buildElementClass() el_class.CHILD_TAGS = None def lookup(doc, el): if el_class.CHILD_TAGS is None: el_class.CHILD_TAGS = [c.tag for c in el] return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertEqual([c.tag for c in root.getchildren()], child_tags) def test_lookup_iterchildren(self): el_class = self._buildElementClass() el_class.CHILD_TAGS = None def lookup(doc, el): if el_class.CHILD_TAGS is None: el_class.CHILD_TAGS = [c.tag for c in el.iterchildren()] return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertEqual([c.tag for c in root.getchildren()], child_tags) def test_lookup_iterchildren_tag(self): el_class = self._buildElementClass() el_class.CHILD_TAGS = None def lookup(doc, el): if not el_class.CHILD_TAGS: el_class.CHILD_TAGS = [ c.tag for c in el.iterchildren(tag='{objectified}c2') ] return el_class self._setClassLookup(lookup) root = self.XML(xml_str) child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertEqual([], child_tags) c1 = root[0] child_tags = root.CHILD_TAGS self.assertNotEqual(None, child_tags) self.assertNotEqual([], child_tags) self.assertEqual( [c.tag for c in root[0].iterchildren(tag='{objectified}c2')], child_tags) def test_lookup_getparent(self): el_class = self._buildElementClass() el_class.PARENT = None def lookup(doc, el): if el_class.PARENT is None: el_class.PARENT = el[0].getparent().tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertEqual(root.tag, root.PARENT) def test_lookup_getnext(self): el_class = self._buildElementClass() el_class.NEXT = None def lookup(doc, el): if el_class.NEXT is None: el_class.NEXT = el[0][1].getnext().tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, el_class.NEXT) self.assertEqual(root[0][1].getnext().tag, el_class.NEXT) def test_lookup_getprevious(self): el_class = self._buildElementClass() el_class.PREV = None def lookup(doc, el): if el_class.PREV is None: el_class.PREV = el[0][1].getprevious().tag return el_class self._setClassLookup(lookup) root = self.XML(xml_str) self.assertNotEqual(None, el_class.PREV) self.assertEqual(root[0][1].getprevious().tag, el_class.PREV) def test_comments_fallback(self): def return_none(*args): return None self._setClassLookup(return_none) el = self.XML('<a><!-- hello world --></a>') self.assertEqual(el[0].tag, self.etree.Comment) self.assertEqual(el[0].text, " hello world ")
def test_dtd_parse_valid_relative_file_url(self): parser = etree.XMLParser(dtd_validation=True) xml = '<!DOCTYPE a SYSTEM "test.dtd"><a><b/></a>' root = etree.fromstring(xml, parser=parser, base_url=fileUrlInTestDir("test.xml"))
def test_illegal_utf8_recover(self): data = _bytes('<test>\x80\x80\x80</test>', encoding='iso8859-1') parser = etree.XMLParser(recover=True) self.assertRaises(etree.XMLSyntaxError, etree.fromstring, data, parser)