def transform(source, transforms, params=None, output=None): """ Convenience function for applying an XSLT transform. Returns a result object. source - XML source document in the form of a string (not Unicode object), file-like object (stream), file path, URI or amara.lib.inputsource instance. If string or stream it must be self-contained XML (i.e. not requiring access to any other resource such as external entities or includes) transforms - XSLT document (or list thereof) in the form of a string, stream, URL, file path or amara.lib.inputsource instance params - optional dictionary of stylesheet parameters, the keys of which may be given as unicode objects if they have no namespace, or as (uri, localname) tuples if they do. output - optional file-like object to which output is written (incrementally, as processed) """ #do the imports within the function: a tad bit less efficient, but #avoid circular crap from amara.lib import inputsource from amara.xpath.util import parameterize from amara.xslt.result import streamresult, stringresult from amara.xslt.processor import processor params = parameterize(params) if params else {} proc = processor() if isinstance(transforms, (list, tuple)): for transform in transforms: proc.append_transform(inputsource(transform)) else: proc.append_transform(inputsource(transforms)) if output is not None: result = streamresult(output) else: result = stringresult() return proc.run(inputsource(source), params, result)
def setup_blank_text(): global source global trans global xslt_proc _source1 = '''<?xml version="1.0"?> <test> <item/> <item/> <item/> </test> ''' _trans1 = '''<?xml version="1.0"?> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:strip-space elements="*"/> <xsl:template match="/"> <test> <xsl:apply-templates/> </test> </xsl:template> <xsl:template match="item"> <no> <xsl:value-of select="position()"/> </no> </xsl:template> </xsl:stylesheet> ''' xslt_proc = processor() source = inputsource(_source1, None) trans = inputsource(_trans1, None)
def setup_blank_node(): global source global trans global xslt_proc _source1 = '''<?xml version="1.0"?> <document> <text> </text> </document>''' _trans1 = '''<?xml version='1.0'?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:output method="text"/> <xsl:strip-space elements="*"/> <xsl:template match="/"> <xsl:apply-templates select="//text"/> </xsl:template> <xsl:template match="text"> Chars: <xsl:value-of select="string-length(text())"/> </xsl:template> </xsl:stylesheet>''' xslt_proc = processor() source = inputsource(_source1, None) trans = inputsource(_trans1, None)
def test_method(self): source = inputsource(self.source, 'source') xupdate = inputsource(self.xupdate, 'xupdate-error-source') expected = format_error(self.error_code) try: document = apply_xupdate(source, xupdate) except XUpdateError, error: compared = format_error(error.code) self.assertEquals(expected, compared)
def setUp(self): self.source = inputsource(self.source.source, self.source.uri) if isinstance(self.transform, testsource): T = self.transform self.transform = [inputsource(T.source, T.uri)] elif self.transform: self.transform = [ inputsource(T.source, T.uri) for T in self.transform ] else: self.transform = () return
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False): ''' Parse an input source with HTML text into an Amara Bindery tree Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object. You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name) ''' from amara.lib.util import set_namespaces #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247") #parser = html5lib.HTMLParser() if PRE_0_90: def get_tree_instance(): entity_factory = model.clone if model else entity return treebuilder(entity_factory) else: def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns): #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml entity_factory = model.clone if model else entity return treebuilder(entity_factory, use_xhtml_ns) parser = html5lib.HTMLParser(tree=get_tree_instance) doc = parser.parse(inputsource(source, None).stream, encoding=encoding) if prefixes: set_namespaces(doc, prefixes) return doc
def append_transform(self, source, uri=None): """ Add an XSL transformation document to the processor. uri - optional override document URI. This method establishes the transformation that the processor will use to transform a source tree into a result tree. If a transform has already been appended, then this method is equivalent to having, in an outer "shell" document, an `xsl:import` for the most recently added transform followed by an `xsl:import` for the document accessible via the given `transform`. """ if isinstance(source, tree.node): document = source.xml_root if not uri: try: uri = document.xml_base except AttributeError: raise ValueError('base-uri required for %s' % document) self._documents[uri] = document self.transform = self._reader.parse(document) else: if not isinstance(source, inputsource): source = inputsource(source, uri) self.transform = self._reader.parse(source) return
def test_scheme_registry_resolver(self): def eval_scheme_handler(uri, base=None): if base: uri = base+uri uri = uri[5:] return str(eval(uri)) def shift_scheme_handler(uri, base=None): if base: uri = base+uri uri = uri[6:] return ''.join([ chr(ord(c)+1) for c in uri]) resolver = irihelpers.scheme_registry_resolver( handlers={'eval': eval_scheme_handler, 'shift': shift_scheme_handler}) start_isrc = inputsource(FILE('sampleresource.txt'), resolver=resolver) scheme_cases = [(None, 'eval:150-50', '100'), (None, 'shift:abcde', 'bcdef'), ('eval:150-', '50', '100'), ('shift:ab', 'cde', 'bcdef'), ] for base, relative, expected in scheme_cases: res = resolver.resolve(relative, base) self.assertEqual(expected, res, "URI: base=%s uri=%s" % (base, relative)) resolver.handlers[None] = shift_scheme_handler del resolver.handlers['shift'] for base, relative, expected in scheme_cases: res = resolver.resolve(relative, base) self.assertEqual(expected, res, "URI: base=%s uri=%s" % (base, relative))
def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False): ''' ''' from amara.lib.util import set_namespaces #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247") #parser = html5lib.HTMLParser() if PRE_0_90: def get_tree_instance(): entity_factory = model.clone if model else entity return treebuilder(entity_factory) else: def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns): #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml entity_factory = model.clone if model else entity return treebuilder(entity_factory, use_xhtml_ns) parser = html5lib.HTMLParser(tree=get_tree_instance) doc = parser.parse(inputsource(source, None).stream, encoding=encoding) if prefixes: set_namespaces(doc, prefixes) return doc
def test_if_1(): """`xsl:if`""" _run_xml( source_xml = inputsource(FILE("addr_book1.xml")), transform_uri = "file:xslt/test_if.py", transform_xml = """<?xml version="1.0" encoding="UTF-8"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:strip-space elements='*'/> <xsl:template match="/"> <HTML> <HEAD><TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE><xsl:apply-templates/></TABLE> </BODY> </HTML> </xsl:template> <xsl:template match="ENTRY"> <xsl:element name='TR'> <xsl:apply-templates select='NAME'/> </xsl:element> <xsl:if test='not(position()=last())'><HR/></xsl:if> </xsl:template> <xsl:template match="NAME"> <xsl:element name='TD'> <xsl:attribute name='ALIGN'>CENTER</xsl:attribute> <B><xsl:apply-templates/></B> </xsl:element> </xsl:template> </xsl:stylesheet> """, expected = """<HTML> <HEAD> <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'> <TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE> <TR> <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD> </TR> <HR> <TR> <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD> </TR> <HR> <TR> <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD> </TR> </TABLE> </BODY> </HTML>""")
def test_if_1(): """`xsl:if`""" _run_xml(source_xml=inputsource(FILE("addr_book1.xml")), transform_uri="file:xslt/test_if.py", transform_xml="""<?xml version="1.0" encoding="UTF-8"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:strip-space elements='*'/> <xsl:template match="/"> <HTML> <HEAD><TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE><xsl:apply-templates/></TABLE> </BODY> </HTML> </xsl:template> <xsl:template match="ENTRY"> <xsl:element name='TR'> <xsl:apply-templates select='NAME'/> </xsl:element> <xsl:if test='not(position()=last())'><HR/></xsl:if> </xsl:template> <xsl:template match="NAME"> <xsl:element name='TD'> <xsl:attribute name='ALIGN'>CENTER</xsl:attribute> <B><xsl:apply-templates/></B> </xsl:element> </xsl:template> </xsl:stylesheet> """, expected="""<HTML> <HEAD> <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'> <TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE> <TR> <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD> </TR> <HR> <TR> <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD> </TR> <HR> <TR> <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD> </TR> </TABLE> </BODY> </HTML>""")
def test_elem_attr_1(): """`xsl:element` and `xsl:attribute` instantiation""" _run_html( source_xml = inputsource(os.path.join(module_dirname, 'addr_book1.xml')), source_uri = "file:" + module_dirname + "/addr_book1.xml", transform_xml = """<?xml version="1.0"?> <xsl:transform xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:template match="/"> <HTML> <HEAD><TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE><xsl:apply-templates/></TABLE> </BODY> </HTML> </xsl:template> <xsl:template match="ENTRY"> <xsl:element name='TR'> <xsl:apply-templates select='NAME'/> </xsl:element> </xsl:template> <xsl:template match="NAME"> <xsl:element name='TD'> <xsl:attribute name='ALIGN'>CENTER</xsl:attribute> <B><xsl:apply-templates/></B> </xsl:element> </xsl:template> </xsl:transform>""", expected = """<HTML> <HEAD> <META HTTP-EQUIV='Content-Type' CONTENT='text/html; charset=iso-8859-1'> <TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Pieter Aaron</B></TD> </TR> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B></TD> </TR> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Vasia Zhugenev</B></TD> </TR> </TABLE> </BODY> </HTML>""")
def parse(self, source): """ Read in a stylesheet source document from an InputSource and add it to the stylesheet tree. If a document with the same URI has already been read, the cached version will be used instead (so duplicate imports, includes, or stylesheet appends do not result in multiple reads). """ uri = source.uri #Check cache content = '' if self._root is not None: # We prefer to use an already-parsed doc, as it has had its # external entities and XIncludes resolved already if uri in self._root.sourceNodes: doc = self._root.sourceNodes[uri] # temporarily uncache it so fromDocument will process it; # fromDocument will add it back to the cache when finished del self._root.sourceNodes[uri] return self.fromDocument(doc, baseUri=uri) # It's OK to use cached string content, but we have no idea # whether we're using the same InputSource class as was used to # parse it the first time, and we don't cache external entities # or XIncludes, so there is the possibility of those things # being resolved differently this time around. Oh well. elif uri in self._root.sources: content = self._root.sources[uri] source = inputsource(content, uri) if not content: content = source.stream.read() source = inputsource(cStringIO.StringIO(content), source.uri) #features = [(sax.FEATURE_PROCESS_XINCLUDES, True)] features, properties = [], [] stylesheet = self._parseSrc(source, features, properties) # Cache the string content for subsequent uses # e.g., xsl:import/xsl:include and document() self._root.sources[uri] = content return stylesheet
def rdfascrape(source): from amara.lib import inputsource source = inputsource(source, None) doc = html.parse(source.stream) try: docuri = doc.html.head.base.href except: docuri = source.uri statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]') triples = (handle_statement(elem, docuri) for elem in statement_elems) return triples
def rdfascrape(source): from amara.lib import inputsource source = inputsource(source, None) doc = html.parse(source.stream) try: docuri = doc.html.head.base.href except: docuri = source.uri statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]') triples = ( handle_statement(elem, docuri) for elem in statement_elems ) return triples
def test_many_inputsources(): assert rlimit_nofile < 20000, "is your file limit really that large?" # Amara's inputsource consumes a filehandle, in the 'stream' attribute # See what happens if we run out of file handles. sources = [] filename = __file__ for i in range(rlimit_nofile): try: sources.append(inputsource(filename)) except: print "Failed after", i, "files"
def test_plain_parse(self): """Parse plain text""" isrc = inputsource(SOURCE1) doc = parse_fragment(isrc) self.assertEqual(EXPECTED1, doc.xml_encode()) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'p') self.assertEqual(first_child.xml_namespace, None) self.assertEqual(first_child.xml_prefix, None,)
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False, rule_handler=None): ''' Parse an XML input source and return a tree :param obj: object with "text" to parse :type obj: string, Unicode object (only if you really know what you're doing), file-like object (stream), file path, URI or `amara.inputsource` object :param uri: optional document URI. You really should provide this if the input source is a text string or stream :type uri: string :return: Parsed tree object :rtype: `amara.tree.entity` instance :raises `amara.ReaderError`: If the XML is not well formed, or there are other core parsing errors entity_factory - optional factory callable for creating entity nodes. This is the main lever for customizing the classes used to construct tree nodes standalone - similar to the standalone declaration for XML. Asserts that the XML being parsed does not require any resouces outside the given input source (e.g. on the network). In this case has the side-effect of ignoring such external resources if they are encountered (which is where it diverges from XML core. In XML core that would be a fatal error) validate - whether or not to apply DTD validation rule_handler - Handler object used to perform rule matching in incremental processing. Examples: >>> import amara >>> MONTY_XML = """<monty> ... <python spam="eggs">What do you mean "bleh"</python> ... <python ministry="abuse">But I was looking for argument</python> ... </monty>""" >>> doc = amara.parse(MONTY_XML) >>> len(doc.xml_children) 1 ''' if standalone: flags = PARSE_FLAGS_STANDALONE elif validate: flags = PARSE_FLAGS_VALIDATE else: flags = PARSE_FLAGS_EXTERNAL_ENTITIES return _parse(inputsource(obj, uri), flags, entity_factory=entity_factory, rule_handler=rule_handler)
def test_uri_jail(self): start_uri = iri.os_path_to_uri(FILE('test_irihelpers.py')) #raise start_uri #print >> sys.stderr, "GRIPPO", start_uri start_base = start_uri.rsplit('/', 1)[0] + '/' #Only allow access files in the same directory as sampleresource.txt via URL jails auths = [(lambda u: u.rsplit('/', 1)[0] + '/' == start_base, True)] resolver = irihelpers.resolver(authorizations=auths) start_isrc = inputsource(start_uri, resolver=resolver) new_isrc = start_isrc.resolve('sampleresource.txt', start_base) self.assertEqual('Spam', new_isrc.stream.read().strip()) self.assertRaises(iri.IriError, resolver.resolve, 'http://google.com', start_base)
def test_parse_overridden_default_namespace_reoverridden_child(self): """Parse with overridden default namespace and re-overridden child""" nss = {u'xml': u'http://www.w3.org/XML/1998/namespace', None: u'http://www.w3.org/1999/xhtml'} isrc = inputsource(SOURCE2) doc = parse_fragment(isrc, nss) self.assertEqual(EXPECTED3, doc.xml_encode()) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'p') self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml') self.assertEqual(first_child.xml_prefix, None,)
def akara_xslt(body, ctype, **params): ''' @xslt - URL to the XSLT transform to be applied all other query parameters are passed ot the XSLT processor as top-level params Sample request: curl --request POST --data-binary "@foo.xml" --header "Content-Type: application/xml" "http://*****:*****@xslt=http://hg.akara.info/amara/trunk/raw-file/tip/demo/data/identity.xslt" ''' if "@xslt" in params: akaraxslttransform = params["@xslt"] else: if not DEFAULT_TRANSFORM: raise ValueError('XSLT transform required') akaraxslttransform = DEFAULT_TRANSFORM restricted_resolver = irihelpers.resolver(authorizations=ALLOWED) #Using restricted_resolver should forbid Any URI access outside the specified "jails" #Including access through imports and includes body = inputsource(body, resolver=restricted_resolver) akaraxslttransform = inputsource(akaraxslttransform, resolver=restricted_resolver) result = transform(body, akaraxslttransform) response.add_header("Content-Type", result.parameters.media_type) return result
def rdfascrape(source): from amara.lib import inputsource source = inputsource(source, None) doc = html.parse(source.stream) try: docuri = doc.html.head.base.href except: docuri = source.uri #https://github.com/zepheira/amara/issues/8 #statement_elems = doc.xml_select(u'//*[@property|@resource|@rel]') statement_elems = chain(doc.xml_select(u'//*[@property]'), doc.xml_select(u'//*[@resource]'), doc.xml_select(u'//*[@rel]')) triples = ( handle_statement(elem, docuri) for elem in statement_elems ) return triples
def _run(source_xml, transform_xml, expected, parameters, compare_method, source_uri=None, transform_uri=None, processor_kwargs={}): P = processor(**processor_kwargs) source = inputsource(source_xml, source_uri) transform = inputsource(transform_xml, transform_uri) P.append_transform(transform) if parameters is not None: parameters = util.parameterize(parameters) result = str(P.run(source, parameters=parameters)) try: diff = compare_method(result, expected) diff = list(diff) assert not diff, (source_xml, transform_xml, result, expected, diff) except Exception, err: # I don't have a quick way to tell which string caused # the error, so let the person debugging figure it out. print "=== RESULT ===" print result print "=== EXPECTED ===" print expected print "=== DONE ===" raise
def test_parse_overridden_non_default_namespace(self): """Parse with overridden non-default namespace""" nss = {u'xml': u'http://www.w3.org/XML/1998/namespace', u'h': u'http://www.w3.org/1999/xhtml'} isrc = inputsource(SOURCE3) doc = parse_fragment(isrc, nss) self.assertEqual(EXPECTED4, doc.xml_encode()) #doc = parse_fragment(TEST_STRING) #Minimal node testing self.assertEqual(len(doc.xml_children), 1) first_child = doc.xml_children[0] self.assertEqual(first_child.xml_typecode, tree.element.xml_typecode) self.assertEqual(first_child.xml_qname, u'h:p') self.assertEqual(first_child.xml_namespace, u'http://www.w3.org/1999/xhtml') self.assertEqual(first_child.xml_prefix, u'h')
def __init__(self, location=None, graph=None, debug=False, nsBindings = None, owlEmbeddings = False): self.owlEmbeddings = owlEmbeddings self.nsBindings = nsBindings if nsBindings else {} self.location = location self.rules = {} self.debug = debug if graph: assert location is None,"Must supply one of graph or location" self.graph = graph if debug: print "RIF in RDF graph was provided" else: assert graph is None,"Must supply one of graph or location" if debug: print "RIF document URL provided ", location if self.location.find('http:')+1: req = urllib2.Request(self.location) ##From: http://www.diveintopython.org/http_web_services/redirects.html #points an 'opener' to the address to 'sniff' out final Location header opener = urllib2.build_opener(SmartRedirectHandler()) f = opener.open(req) self.content = f.read() else: try: self.content = urllib2.urlopen(self.location).read() except ValueError: self.content = urllib2.urlopen(iri.os_path_to_uri(self.location)).read() # self.content = open(self.location).read() try: rdfContent = transform(self.content,inputsource(TRANSFORM_URI)) self.graph = Graph().parse(StringIO(rdfContent)) if debug: print "Extracted rules from RIF XML format" except ValueError: try: self.graph = Graph().parse(StringIO(self.content),format='xml') except: self.graph = Graph().parse(StringIO(self.content),format='n3') if debug: print "Extracted rules from RIF in RDF document" self.nsBindings.update(dict(self.graph.namespaces()))
def parse(obj, uri=None, entity_factory=None, standalone=False, validate=False, rule_handler=None): ''' Parse an XML input source and return a tree :param obj: object with "text" to parse :type obj: string, Unicode object (only if you really know what you're doing), file-like object (stream), file path, URI or `amara.inputsource` object :param uri: optional document URI. You really should provide this if the input source is a text string or stream :type uri: string :return: Parsed tree object :rtype: `amara.tree.entity` instance :raises `amara.ReaderError`: If the XML is not well formed, or there are other core parsing errors entity_factory - optional factory callable for creating entity nodes. This is the main lever for customizing the classes used to construct tree nodes standalone - similar to the standalone declaration for XML. Asserts that the XML being parsed does not require any resouces outside the given input source (e.g. on the network). In this case has the side-effect of ignoring such external resources if they are encountered (which is where it diverges from XML core. In XML core that would be a fatal error) validate - whether or not to apply DTD validation rule_handler - Handler object used to perform rule matching in incremental processing. Examples: >>> import amara >>> MONTY_XML = """<monty> ... <python spam="eggs">What do you mean "bleh"</python> ... <python ministry="abuse">But I was looking for argument</python> ... </monty>""" >>> doc = amara.parse(MONTY_XML) >>> len(doc.xml_children) 1 ''' if standalone: flags = PARSE_FLAGS_STANDALONE elif validate: flags = PARSE_FLAGS_VALIDATE else: flags = PARSE_FLAGS_EXTERNAL_ENTITIES return _parse(inputsource(obj, uri), flags, entity_factory=entity_factory,rule_handler=rule_handler)
def test_copy_2(): """identity transform""" _run_xml(source_xml=inputsource( os.path.join(module_dirname, 'addr_book1.xml')), source_uri="file:" + module_dirname + "/addr_book1.xml", transform_xml="""<?xml version="1.0"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:template match="@*|node()"> <xsl:copy> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> </xsl:stylesheet> """, expected="""<?xml version='1.0' encoding='UTF-8'?> <?xml-stylesheet href="addr_book1.xsl" type="text/xml"?><ADDRBOOK> <ENTRY ID='pa'> <NAME>Pieter Aaron</NAME> <ADDRESS>404 Error Way</ADDRESS> <PHONENUM DESC='Work'>404-555-1234</PHONENUM> <PHONENUM DESC='Fax'>404-555-4321</PHONENUM> <PHONENUM DESC='Pager'>404-555-5555</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> <ENTRY ID='en'> <NAME>Emeka Ndubuisi</NAME> <ADDRESS>42 Spam Blvd</ADDRESS> <PHONENUM DESC='Work'>767-555-7676</PHONENUM> <PHONENUM DESC='Fax'>767-555-7642</PHONENUM> <PHONENUM DESC='Pager'>800-SKY-PAGEx767676</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> <ENTRY ID='vz'> <NAME>Vasia Zhugenev</NAME> <ADDRESS>2000 Disaster Plaza</ADDRESS> <PHONENUM DESC='Work'>000-987-6543</PHONENUM> <PHONENUM DESC='Cell'>000-000-0000</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> </ADDRBOOK>""")
def test_copy_2(): """identity transform""" _run_xml( source_xml = inputsource(os.path.join(module_dirname, 'addr_book1.xml')), source_uri = "file:" + module_dirname + "/addr_book1.xml", transform_xml = """<?xml version="1.0"?> <xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"> <xsl:template match="@*|node()"> <xsl:copy> <xsl:apply-templates select="@*|node()"/> </xsl:copy> </xsl:template> </xsl:stylesheet> """, expected ="""<?xml version='1.0' encoding='UTF-8'?> <?xml-stylesheet href="addr_book1.xsl" type="text/xml"?><ADDRBOOK> <ENTRY ID='pa'> <NAME>Pieter Aaron</NAME> <ADDRESS>404 Error Way</ADDRESS> <PHONENUM DESC='Work'>404-555-1234</PHONENUM> <PHONENUM DESC='Fax'>404-555-4321</PHONENUM> <PHONENUM DESC='Pager'>404-555-5555</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> <ENTRY ID='en'> <NAME>Emeka Ndubuisi</NAME> <ADDRESS>42 Spam Blvd</ADDRESS> <PHONENUM DESC='Work'>767-555-7676</PHONENUM> <PHONENUM DESC='Fax'>767-555-7642</PHONENUM> <PHONENUM DESC='Pager'>800-SKY-PAGEx767676</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> <ENTRY ID='vz'> <NAME>Vasia Zhugenev</NAME> <ADDRESS>2000 Disaster Plaza</ADDRESS> <PHONENUM DESC='Work'>000-987-6543</PHONENUM> <PHONENUM DESC='Cell'>000-000-0000</PHONENUM> <EMAIL>[email protected]</EMAIL> </ENTRY> </ADDRBOOK>""")
def test_basic_uri_resolver(self): data = [('http://foo.com/root/', 'path', 'http://foo.com/root/path'), ('http://foo.com/root', 'path', 'http://foo.com/path'), ] #import sys; print >> sys.stderr, filesource('sampleresource.txt').uri start_isrc = inputsource(FILE('sampleresource.txt')) #start_isrc = inputsource(filesource('sampleresource.txt').uri) for base, uri, exp in data: res = start_isrc.absolutize(uri, base) self.assertEqual(exp, res, "absolutize: %s %s" % (base, uri)) base = 'foo:foo.com' uri = 'path' self.assertRaises(iri.IriError, start_isrc.absolutize, uri, base) base = os.getcwd() if base[-1] != os.sep: base += os.sep new_isrc = start_isrc.resolve(FILE('sampleresource.txt'), iri.os_path_to_uri(base)) self.assertEqual('Spam', new_isrc.stream.readline().rstrip(), 'resolve')
def test_borrowed(source_xml=source_xml, transform_xml=transform_xml, expected=expected_html): _run_html(source_xml=inputsource(source_xml), transform_xml=inputsource(transform_xml), expected=inputsource(expected).stream.read())
def test_method(self): source = inputsource(self.source, 'source') xupdate = inputsource(self.xupdate, 'xupdate-source') document = apply_xupdate(source, xupdate) return
src = inputsource( """<?xml version='1.0' encoding='ISO-8859-1'?> <!DOCTYPE ROOT [ <!ELEMENT ROOT (#PCDATA|CHILD1|CHILD2|foo:CHILD3|lang)*> <!ELEMENT CHILD1 (#PCDATA|GCHILD)*> <!ELEMENT CHILD2 (#PCDATA|GCHILD)*> <!ELEMENT foo:CHILD3 EMPTY> <!ELEMENT GCHILD EMPTY> <!ELEMENT lang (foo|f\xf6\xf8)*> <!ELEMENT foo EMPTY> <!ELEMENT f\xf6\xf8 EMPTY> <!ATTLIST CHILD1 attr1 CDATA #IMPLIED attr31 CDATA #IMPLIED> <!ATTLIST CHILD2 attr1 CDATA #IMPLIED CODE ID #REQUIRED> <!ATTLIST foo:CHILD3 foo:name CDATA #IMPLIED xmlns:foo CDATA #IMPLIED> <!ATTLIST GCHILD name CDATA #IMPLIED> <!ATTLIST lang xml:lang CDATA #IMPLIED> <!ATTLIST foo xml:lang CDATA #IMPLIED> ]> <?xml-stylesheet "Data" ?> <ROOT> <!-- Test Comment --> <CHILD1 attr1="val1" attr31="31"> <GCHILD name="GCHILD11"/> <GCHILD name="GCHILD12"/> Text1 </CHILD1> <CHILD2 attr1="val2" CODE="1"> <GCHILD name="GCHILD21"/> <GCHILD name="GCHILD22"/> </CHILD2> <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/> <lang xml:lang="en"> <foo xml:lang=""/> <foo/> <f\xf6\xf8/> </lang> </ROOT> <?no-data ?> """, 'urn:domlette-test-tree')
def test_choose_1(): """`xsl:choose""" _run_html( source_xml = inputsource(os.path.join(module_name, "addr_book1.xml")), source_uri = "file:" + module_name + "/addr_book1.xml", transform_xml = """<?xml version="1.0"?> <xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0"> <xsl:output method='html'/> <xsl:template match="/"> <HTML> <HEAD><TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE><xsl:apply-templates/></TABLE> </BODY> </HTML> </xsl:template> <xsl:template match="ENTRY"> <xsl:element name='TR'> <xsl:apply-templates select='NAME'/> </xsl:element> </xsl:template> <xsl:template match="NAME"> <xsl:element name='TD'> <xsl:attribute name='ALIGN'>CENTER</xsl:attribute> <B><xsl:apply-templates/></B> <xsl:choose> <xsl:when test="text()='Pieter Aaron'">: Employee 1</xsl:when> <xsl:when test="text()='Emeka Ndubuisi'">: Employee 2</xsl:when> <xsl:otherwise>: Other Employee</xsl:otherwise> </xsl:choose> </xsl:element> </xsl:template> </xsl:stylesheet> """, expected ="""<HTML> <HEAD> <meta http-equiv='Content-Type' content='text/html; charset=iso-8859-1'> <TITLE>Address Book</TITLE> </HEAD> <BODY> <TABLE> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Pieter Aaron</B>: Employee 1</TD> </TR> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Emeka Ndubuisi</B>: Employee 2</TD> </TR> \x20\x20\x20\x20 <TR> <TD ALIGN='CENTER'><B>Vasia Zhugenev</B>: Other Employee</TD> </TR> </TABLE> </BODY> </HTML>""")
def produce_final_output(response, response_headers=response_headers): log = sys.stderr if not send_browser_xslt and environ[ACTIVE_FLAG]: use_pi = False if force_server_side and force_server_side != True: #True is a special flag meaning "don't delegate to the browser but still check for XSLT PIs" xslt = force_server_side else: #Check for a Stylesheet PI parser = sax.reader() parser.setFeature(sax.FEATURE_GENERATOR, True) handler = find_xslt_pis(parser) pi_iter = parser.parse(inputsource(response)) try: #Note: only grabs the first PI. Consider whether we should handle multiple xslt = pi_iter.next() except StopIteration: xslt = None use_pi = True if xslt: xslt = xslt.encode('utf-8') result = StringIO() #self.xslt_sources = environ.get( # 'wsgixml.applyxslt.xslt_sources', {}) source = InputSource.DefaultFactory.fromString( response, uri=get_request_url(environ)) params = {} for ns in self.stock_xslt_params: params.update(setup_xslt_params(ns, self.stock_xslt_params[ns])) start = time.time() ''' processor = self.processorPool.get_processor( stylesheet, self.ext_functions, self.ext_elements) cherrypy.response.body = processor.run( DefaultFactory.fromString(picket.document, picket.uri), topLevelParams=picket.parameters) if self.default_content_type: cherrypy.response.headers['Content-Type'] = self.default_content_type if picket.content_type: cherrypy.response.headers['Content-Type'] = picket.content_type finally: self.processorPool.release_processor(stylesheet) ''' if xslt in self.processor_cache: processor = self.processor_cache[xslt] #Any transform would have already been loaded use_pi = False print >> log, 'Using cached processor instance for transform', xslt else: print >> log, 'Creating new processor instance for transform', xslt processor = Processor.Processor() if self.ext_modules: processor.registerExtensionModules(self.ext_modules) if self.use_wsgi_env: params.update(setup_xslt_params(WSGI_NS, environ)) #srcAsUri = OsPathToUri() #if False: if environ.has_key('paste.recursive.include'): #paste's recursive facilities are available, to #so we can get the XSLT with a middleware call #rather than a full Web invocation #print environ['paste.recursive.include'] xslt_resp = environ['paste.recursive.include'](xslt) #FIXME: this should be relative to the XSLT, not XML #print xslt_resp, xslt_resp.body isrc = InputSource.DefaultFactory.fromString( xslt_resp.body, get_request_url(environ)) processor.appendStylesheet(isrc) else: #We have to make a full Web call to get the XSLT. #4Suite will do that for us in processing the PI if not use_pi: uri = Uri.Absolutize(xslt, get_request_url(environ)) isrc = InputSource.DefaultFactory.fromUri(uri) processor.appendStylesheet(isrc) self.processor_cache[xslt] = processor processor.run(source, outputStream=result, ignorePis=not use_pi, topLevelParams=params) #Strip content-length if present (needs to be #recalculated by server) #Also strip content-type, which will be replaced below response_headers = [ (name, value) for name, value in response_headers if ( name.lower() not in ['content-length', 'content-type']) ] #Put in the updated content type imt = processor.outputParams.mediaType content = result.getvalue() if environ.get(CACHEABLE_FLAG): self.path_cache[path] = imt, content response_headers.append(('content-type', imt)) start_response(status, response_headers, exc_info) end = time.time() print >> log, '%s: elapsed time: %0.3f\n'%(xslt, end-start) #environ['wsgi.errors'].write('%s: elapsed time: %0.3f\n'%(xslt, end-start)) return content #If it reaches this point, no XSLT was applied. return
def test_borrowed(source_xml=source_xml, transform_xml=transform_xml, expected=expected_html): _run_html( source_xml = inputsource(source_xml), transform_xml = inputsource(transform_xml), expected = inputsource(expected).stream.read())
src = inputsource("""<?xml version='1.0' encoding='ISO-8859-1'?> <!DOCTYPE ROOT [ <!ELEMENT ROOT (#PCDATA|CHILD1|CHILD2|foo:CHILD3|lang)*> <!ELEMENT CHILD1 (#PCDATA|GCHILD)*> <!ELEMENT CHILD2 (#PCDATA|GCHILD)*> <!ELEMENT foo:CHILD3 EMPTY> <!ELEMENT GCHILD EMPTY> <!ELEMENT lang (foo|f\xf6\xf8)*> <!ELEMENT foo EMPTY> <!ELEMENT f\xf6\xf8 EMPTY> <!ATTLIST CHILD1 attr1 CDATA #IMPLIED attr31 CDATA #IMPLIED> <!ATTLIST CHILD2 attr1 CDATA #IMPLIED CODE ID #REQUIRED> <!ATTLIST foo:CHILD3 foo:name CDATA #IMPLIED xmlns:foo CDATA #IMPLIED> <!ATTLIST GCHILD name CDATA #IMPLIED> <!ATTLIST lang xml:lang CDATA #IMPLIED> <!ATTLIST foo xml:lang CDATA #IMPLIED> ]> <?xml-stylesheet "Data" ?> <ROOT> <!-- Test Comment --> <CHILD1 attr1="val1" attr31="31"> <GCHILD name="GCHILD11"/> <GCHILD name="GCHILD12"/> Text1 </CHILD1> <CHILD2 attr1="val2" CODE="1"> <GCHILD name="GCHILD21"/> <GCHILD name="GCHILD22"/> </CHILD2> <foo:CHILD3 xmlns:foo="http://foo.com" foo:name="mike"/> <lang xml:lang="en"> <foo xml:lang=""/> <foo/> <f\xf6\xf8/> </lang> </ROOT> <?no-data ?> """, 'urn:domlette-test-tree')