def testSearch(self): s = "<foo><bar id='me' /><baz><foo /></baz></foo>" s2 = "<fOo><bAr id='me' /><bAz><fOO /></bAz></fOo>" d = microdom.parseString(s) d2 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) d3 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) root = d.documentElement self.assertEquals(root.firstChild(), d.getElementById('me')) self.assertEquals(d.getElementsByTagName("foo"), [root, root.lastChild().firstChild()]) root = d2.documentElement self.assertEquals(root.firstChild(), d2.getElementById('me')) self.assertEquals(d2.getElementsByTagName('fOo'), [root]) self.assertEquals(d2.getElementsByTagName('fOO'), [root.lastChild().firstChild()]) self.assertEquals(d2.getElementsByTagName('foo'), []) root = d3.documentElement self.assertEquals(root.firstChild(), d3.getElementById('me')) self.assertEquals(d3.getElementsByTagName('FOO'), [root, root.lastChild().firstChild()]) self.assertEquals(d3.getElementsByTagName('fOo'), [root, root.lastChild().firstChild()])
def setAuthors(template, authors): # First, similarly to setTitle, insert text into an <div class="authors"> text = '' for name, href in authors: # FIXME: Do proper quoting/escaping (is it ok to use # xml.sax.saxutils.{escape,quoteattr}?) anchor = '<a href="%s">%s</a>' % (href, name) if (name, href) == authors[-1]: if len(authors) == 1: text = anchor else: text += 'and ' + anchor else: text += anchor + ',' childNodes = microdom.parseString('<span>' + text +'</span>').childNodes for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): node.childNodes.extend(childNodes) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [microdom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors] head.childNodes.extend(authors)
def footnotes(document): """ Find footnotes in the given document, move them to the end of the body, and generate links to them. A footnote is any node with a C{class} attribute set to C{footnote}. Footnote links are generated as superscript. Footnotes are collected in a C{ol} node at the end of the document. @type document: A DOM Node or Document @param document: The input document which contains all of the content to be presented. @return: C{None} """ footnotes = domhelpers.findElementsWithAttribute(document, "class", "footnote") if not footnotes: return footnoteElement = microdom.Element("ol") id = 1 for footnote in footnotes: href = microdom.parseString('<a href="#footnote-%(id)d">' "<super>%(id)d</super></a>" % vars()).documentElement text = " ".join(domhelpers.getNodeText(footnote).split()) href.setAttribute("title", text) target = microdom.Element("a", attributes={"name": "footnote-%d" % id}) target.childNodes = [footnote] footnoteContent = microdom.Element("li") footnoteContent.childNodes = [target] footnoteElement.childNodes.append(footnoteContent) footnote.parentNode.replaceChild(href, footnote) id += 1 body = domhelpers.findNodesNamed(document, "body")[0] header = microdom.parseString("<h2>Footnotes</h2>").documentElement body.childNodes.append(header) body.childNodes.append(footnoteElement)
def testMutate(self): s = "<foo />" s1 = '<foo a="b"><bar/><foo/></foo>' s2 = '<foo a="b">foo</foo>' d = microdom.parseString(s).documentElement d1 = microdom.parseString(s1).documentElement d2 = microdom.parseString(s2).documentElement d.appendChild(d.cloneNode()) d.setAttribute("a", "b") child = d.childNodes[0] self.assertEquals(child.getAttribute("a"), None) self.assertEquals(child.nodeName, "foo") d.insertBefore(microdom.Element("bar"), child) self.assertEquals(d.childNodes[0].nodeName, "bar") self.assertEquals(d.childNodes[1], child) for n in d.childNodes: self.assertEquals(n.parentNode, d) self.assert_(d.isEqualToNode(d1)) d.removeChild(child) self.assertEquals(len(d.childNodes), 1) self.assertEquals(d.childNodes[0].nodeName, "bar") t = microdom.Text("foo") d.replaceChild(t, d.firstChild()) self.assertEquals(d.firstChild(), t) self.assert_(d.isEqualToNode(d2))
def testOutput(self): for s, out in self.samples: d = microdom.parseString(s, caseInsensitive=0) d2 = microdom.parseString(out, caseInsensitive=0) testOut = d.documentElement.toxml() self.assertEquals(out, testOut) self.assert_(d.isEqualToDocument(d2))
def testEatingWhitespace(self): s = """<hello> </hello>""" d = microdom.parseString(s) self.failUnless(not d.documentElement.hasChildNodes(), d.documentElement.childNodes) self.failUnless(d.isEqualToDocument(microdom.parseString('<hello></hello>')))
def testNamespaces(self): s = ''' <x xmlns="base"> <y /> <y q="1" x:q="2" y:q="3" /> <y:y xml:space="1">here is some space </y:y> <y:y /> <x:y /> </x> ''' d = microdom.parseString(s) # at least make sure it doesn't traceback s2 = d.toprettyxml() self.assertEquals(d.documentElement.namespace, "base") self.assertEquals(d.documentElement.getElementsByTagName("y")[0].namespace, "base") self.assertEquals( d.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'), '1') d2 = microdom.parseString(s2) self.assertEquals(d2.documentElement.namespace, "base") self.assertEquals(d2.documentElement.getElementsByTagName("y")[0].namespace, "base") self.assertEquals( d2.documentElement.getElementsByTagName("y")[1].getAttributeNS('base','q'), '1')
def generateToC(document): """ Create a table of contents for the given document. @type document: A DOM Node or Document @rtype: A DOM Node @return: a Node containing a table of contents based on the headers of the given document. """ toc, level, id = "\n<ol>\n", 0, 0 for element in getHeaders(document): elementLevel = int(element.tagName[1]) - 2 toc += (level - elementLevel) * "</ul>\n" toc += (elementLevel - level) * "<ul>" toc += '<li><a href="#auto%d">' % id toc += domhelpers.getNodeText(element) toc += "</a></li>\n" level = elementLevel anchor = microdom.parseString('<a name="auto%d" />' % id).documentElement element.childNodes.append(anchor) id += 1 toc += "</ul>\n" * level toc += "</ol>\n" return microdom.parseString(toc).documentElement
def testScriptLeniencyIntelligence(self): s = """<script><!-- lalal --></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s) s = """<script><![CDATA[lalal]]></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s) s = """<script> // <![CDATA[ lalal //]]></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
def testLenientAmpersand(self): prefix = "<?xml version='1.0'?>" # we use <pre> so space will be preserved for i, o in [("&", "&"), ("& ", "& "), ("&", "&"), ("&hello monkey", "&hello monkey")]: d = microdom.parseString("%s<pre>%s</pre>" % (prefix, i), beExtremelyLenient=1) self.assertEqual(d.documentElement.toxml(), "<pre>%s</pre>" % o) # non-space preserving d = microdom.parseString("<t>hello & there</t>", beExtremelyLenient=1) self.assertEqual(d.documentElement.toxml(), "<t>hello & there</t>")
def testDoctype(self): s = '<?xml version="1.0"?>' '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">' "<foo></foo>" s2 = "<foo/>" d = microdom.parseString(s) d2 = microdom.parseString(s2) self.assertEqual(d.doctype, 'foo PUBLIC "baz" "http://www.example.com/example.dtd"') self.assertEqual(d.toxml(), s) self.failIf(d.isEqualToDocument(d2)) self.failUnless(d.documentElement.isEqualToNode(d2.documentElement))
def testSingletons(self): s = "<foo><b/><b /><b\n/></foo>" s2 = "<foo><b/><b/><b/></foo>" nodes = microdom.parseString(s).documentElement.childNodes nodes2 = microdom.parseString(s2).documentElement.childNodes self.assertEquals(len(nodes), 3) for (n, n2) in zip(nodes, nodes2): self.assert_(isinstance(n, microdom.Element)) self.assertEquals(n.nodeName, "b") self.assert_(n.isEqualToNode(n2))
def test_replaceNonChild(self): """ L{Node.replaceChild} raises L{ValueError} if the node given to be replaced is not a child of the node C{replaceChild} is called on. """ parent = microdom.parseString("<foo />") orphan = microdom.parseString("<bar />") replacement = microdom.parseString("<baz />") self.assertRaises(ValueError, parent.replaceChild, replacement, orphan)
def test_getElementsByTagName(self): doc1=microdom.parseString('<foo/>') actual=domhelpers.getElementsByTagName(doc1, 'foo')[0].nodeName expected='foo' self.assertEquals(actual, expected) el1=doc1.documentElement actual=domhelpers.getElementsByTagName(el1, 'foo')[0].nodeName self.assertEqual(actual, expected) doc2_xml='<a><foo in="a"/><b><foo in="b"/></b><c><foo in="c"/></c><foo in="d"/><foo in="ef"/><g><foo in="g"/><h><foo in="h"/></h></g></a>' doc2=microdom.parseString(doc2_xml) tag_list=domhelpers.getElementsByTagName(doc2, 'foo') actual=''.join([node.getAttribute('in') for node in tag_list]) expected='abcdefgh' self.assertEquals(actual, expected) el2=doc2.documentElement tag_list=domhelpers.getElementsByTagName(el2, 'foo') actual=''.join([node.getAttribute('in') for node in tag_list]) self.assertEqual(actual, expected) doc3_xml=''' <a><foo in="a"/> <b><foo in="b"/> <d><foo in="d"/> <g><foo in="g"/></g> <h><foo in="h"/></h> </d> <e><foo in="e"/> <i><foo in="i"/></i> </e> </b> <c><foo in="c"/> <f><foo in="f"/> <j><foo in="j"/></j> </f> </c> </a>''' doc3=microdom.parseString(doc3_xml) tag_list=domhelpers.getElementsByTagName(doc3, 'foo') actual=''.join([node.getAttribute('in') for node in tag_list]) expected='abdgheicfj' self.assertEquals(actual, expected) el3=doc3.documentElement tag_list=domhelpers.getElementsByTagName(el3, 'foo') actual=''.join([node.getAttribute('in') for node in tag_list]) self.assertEqual(actual, expected) doc4_xml='<foo><bar></bar><baz><foo/></baz></foo>' doc4=microdom.parseString(doc4_xml) actual=domhelpers.getElementsByTagName(doc4, 'foo') root=doc4.documentElement expected=[root, root.lastChild().firstChild()] self.assertEquals(actual, expected) actual=domhelpers.getElementsByTagName(root, 'foo') self.assertEqual(actual, expected)
def testScriptLeniencyIntelligence(self): # if there is comment or CDATA in script, the autoquoting in bEL mode # should not happen s = """<script><!-- lalal --></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s) s = """<script><![CDATA[lalal]]></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s) s = """<script> // <![CDATA[ lalal //]]></script>""" self.assertEquals(microdom.parseString(s, beExtremelyLenient=1).firstChild().toxml(), s)
def testDoctype(self): s = ('<?xml version="1.0"?>' '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">' '<foo />') s2 = '<foo/>' d = microdom.parseString(s) d2 = microdom.parseString(s2) self.assertEquals(d.doctype, 'foo PUBLIC "baz" "http://www.example.com/example.dtd"') self.assertEquals(d.toxml(), s) self.failIfEqual(d, d2) self.failUnlessEqual(d.documentElement, d2.documentElement)
def test_doctype(self): s = ('<?xml version="1.0"?>' '<!DOCTYPE foo PUBLIC "baz" "http://www.example.com/example.dtd">' '<foo></foo>') s2 = '<foo/>' d = microdom.parseString(s) d2 = microdom.parseString(s2) self.assertEqual(d.doctype, 'foo PUBLIC "baz" "http://www.example.com/example.dtd"') self.assertEqual(d.toxml(), s) self.assertFalse(d.isEqualToDocument(d2)) self.assertTrue(d.documentElement.isEqualToNode(d2.documentElement))
def testPreserveCase(self): s = '<eNcApSuLaTe><sUxor></sUxor><bOrk><w00T>TeXt</W00t></BoRk></EnCaPsUlAtE>' s2 = s.lower().replace('text', 'TeXt') d = microdom.parseString(s, caseInsensitive=1, preserveCase=1) d2 = microdom.parseString(s, caseInsensitive=1, preserveCase=0) d3 = microdom.parseString(s2, caseInsensitive=0, preserveCase=1) d4 = microdom.parseString(s2, caseInsensitive=1, preserveCase=0) d5 = microdom.parseString(s2, caseInsensitive=1, preserveCase=1) self.assertEquals(d.documentElement.toxml(), s) self.assert_(d.isEqualToDocument(d2), "%r != %r" % (d.toxml(), d2.toxml())) self.assert_(d2.isEqualToDocument(d3), "%r != %r" % (d2.toxml(), d3.toxml())) self.assert_(d3.isEqualToDocument(d4), "%r != %r" % (d3.toxml(), d4.toxml())) self.assert_(d4.isEqualToDocument(d5), "%r != %r" % (d4.toxml(), d5.toxml()))
def checkParsed(self, input, expected, beExtremelyLenient=1): """ Check that C{input}, when parsed, produces a DOM where the XML of the document element is equal to C{expected}. """ output = microdom.parseString(input, beExtremelyLenient=beExtremelyLenient) self.assertEqual(output.documentElement.toxml(), expected)
def lookupTemplate(self, request): """ Use acquisition to look up the template named by self.templateFile, located anywhere above this object in the heirarchy, and use it as the template. The first time the template is used it is cached for speed. """ if self.template: return microdom.parseString(self.template, caseInsensitive=0, preserveCase=0) if not self.templateDirectory: mod = sys.modules[self.__module__] if hasattr(mod, '__file__'): self.templateDirectory = os.path.split(mod.__file__)[0] # First see if templateDirectory + templateFile is a file templatePath = os.path.join(self.templateDirectory, self.templateFile) if not os.path.exists(templatePath): raise RuntimeError, "The template %r was not found." % templatePath # Check to see if there is an already parsed copy of it mtime = os.path.getmtime(templatePath) cachedTemplate = templateCache.get(templatePath, None) compiledTemplate = None if cachedTemplate is not None: if cachedTemplate[0] == mtime: compiledTemplate = templateCache[templatePath][1].cloneNode(deep=1) if compiledTemplate is None: compiledTemplate = microdom.parse(templatePath, caseInsensitive=0, preserveCase=0) templateCache[templatePath] = (mtime, compiledTemplate.cloneNode(deep=1)) return compiledTemplate
def generate(self, request, node): if self.data: try: child = microdom.parseString(self.data) except Exception, e: log.msg("Error parsing return value, probably invalid xml:", e) child = request.d.createTextNode(self.data)
def test_scrubCIDLinks(self): """ Test L{xquotient.scrubber.Scrubber.scrubCIDLinks} with a bunch of different nodes """ node = parseString(""" <html> <img src="cid:foo" /> <a href="x" name="1" /> <iframe src="cid:bar" /> <iframe name="2" /> <a href="cid:xxx" /> <img src="123" name="3" /> <link href="cid:foo" /> <link href="xyz" name="4" /> <script src="cid:baz" /> <script href="x" name="5" /> </html>""").documentElement scrubCIDLinks(node) self.assertEquals( list(int(e.attributes['name']) for e in node.childNodes), [1, 2, 3, 4, 5])
def lookupTemplate(self, request): """ Use acquisition to look up the template named by self.templateFile, located anywhere above this object in the heirarchy, and use it as the template. The first time the template is used it is cached for speed. """ if self.template: return microdom.parseString(self.template) if not self.templateDirectory: mod = sys.modules[self.__module__] if hasattr(mod, '__file__'): self.templateDirectory = os.path.split(mod.__file__)[0] # First see if templateDirectory + templateFile is a file templatePath = os.path.join(self.templateDirectory, self.templateFile) # Check to see if there is an already compiled copy of it templateName = os.path.splitext(self.templateFile)[0] compiledTemplateName = '.' + templateName + '.pxp' compiledTemplatePath = os.path.join(self.templateDirectory, compiledTemplateName) # No? Compile and save it if (not os.path.exists(compiledTemplatePath) or os.stat(compiledTemplatePath)[stat.ST_MTIME] < os.stat(templatePath)[stat.ST_MTIME]): compiledTemplate = microdom.parse(templatePath) pickle.dump(compiledTemplate, open(compiledTemplatePath, 'wb'), 1) else: compiledTemplate = pickle.load(open(compiledTemplatePath, "rb")) return compiledTemplate
def testAwfulTagSoup(self): s = """ <html> <head><title> I send you this message to have your advice!!!!</titl e </headd> <body bgcolor alink hlink vlink> <h1><BLINK>SALE</blINK> TWENTY MILLION EMAILS & FUR COAT NOW FREE WITH `ENLARGER'</h1> YES THIS WONDERFUL AWFER IS NOW HERER!!! <script LANGUAGE="javascript"> function give_answers() { if (score < 70) { alert("I hate you"); }} </script><a href=/foo.com/lalal name=foo>lalal</a> </body> </HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, 'blink') self.assertEquals(len(l), 1)
def requestPT(result, proxied_service): doc = microdom.parseString(result) elms = doc.getElementsByTagName("cas:authenticationSuccess") valid = False pgt = None if len(elms) == 0: log.msg("[WARNING] CAS authentication failed. Result was:\n%s" % str(result)) raise Exception("CAS authentication failed.") elms = doc.getElementsByTagName("cas:proxyGrantingTicket") if len(elms) == 0: log.msg("[WARNING] No PGT IOU was supplied. Result was:\n%s" % str(result)) raise Exception("No PGT IOU was supplied.") elm = elms[0] iou = elm.childNodes[0].value pgt = None if iou in self._ious: pgt = self._ious[iou] del self._ious[iou] else: log.msg("[WARNING] Could not corrolate PGTIOU '%s'." % iou) raise Exception("Could not corrolate PGTIOU.") # Request the PT. url = self.cas_root + '/proxy' q = { 'targetService': proxied_service, 'pgt': pgt, } url += '?' + urlencode(q) d = getPage(url) return d
def gotResponse(response): log.msg(response) doc = microdom.parseString(response) elms = doc.getElementsByTagName("cas:authenticationSuccess") valid = False pgt = None if len(elms) > 0: valid = True elms = doc.getElementsByTagName("cas:user") if len(elms) > 0: elm = elms[0] username = elm.childNodes[0].value elms = doc.getElementsByTagName("cas:proxyGrantingTicket") if len(elms) > 0: elm = elms[0] iou = elm.childNodes[0].value pgt = None if iou in self._ious: pgt = self._ious[iou] del self._ious[iou] else: log.msg("[WARNING] Could not corrolate PGTIOU '%s'." % iou) if not valid: raise Exception('Invalid login') session = request.getSession() session.username = username if pgt is not None: session.pgt = pgt log.msg("PGT added to session '%s'." % pgt) request.redirect(request.URLPath().sibling('').path)
def testSpacing(self): # testing issue #414 s = "<?xml version='1.0'?><p><q>smart</q> <code>HairDryer</code></p>" d = microdom.parseString(s, beExtremelyLenient=1) expected = "<p><q>smart</q> <code>HairDryer</code></p>" actual = d.documentElement.toxml() self.assertEquals(expected, actual)
def render(self, request): """First, check to see if this request is attempting to hook up the output conduit. If so, do it. Otherwise, unlink the current session's View from the MVC notification infrastructure, then render the page normally. """ # Check to see if we're hooking up an output conduit sess = request.getSession(interfaces.IWovenLivePage) #print "REQUEST.ARGS", request.args if request.args.has_key('woven_hookupOutputConduitToThisFrame'): sess.hookupOutputConduit(request) return server.NOT_DONE_YET if request.args.has_key('woven_clientSideEventName'): try: request.d = microdom.parseString('<xml/>', caseInsensitive=0, preserveCase=0) eventName = request.args['woven_clientSideEventName'][0] eventTarget = request.args['woven_clientSideEventTarget'][0] eventArgs = request.args.get('woven_clientSideEventArguments', []) #print "EVENT", eventName, eventTarget, eventArgs return self.clientToServerEvent(request, eventName, eventTarget, eventArgs) except: fail = failure.Failure() self.view.renderFailure(fail, request) return server.NOT_DONE_YET # Unlink the current page in this user's session from MVC notifications page = sess.getCurrentPage() #request.currentId = getattr(sess, 'currentId', 0) if page is not None: page.view.unlinkViews() sess.setCurrentPage(None) #print "PAGE SESSION IS NONE" self.pageSession = None return Controller.render(self, request)
def generateToC(document): toc, level, id = '\n<ol>\n', 0, 0 for element in getHeaders(document): elementLevel = int(element.tagName[1])-2 toc += (level-elementLevel)*'</ul>\n' toc += (elementLevel-level)*'<ul>' toc += '<li><a href="#auto%d">' % id toc += domhelpers.getNodeText(element) toc += '</a></li>\n' level = elementLevel anchor = microdom.parseString('<a name="auto%d" />' % id).documentElement element.childNodes.append(anchor) id += 1 toc += '</ul>\n' * level toc += '</ol>\n' return microdom.parseString(toc).documentElement
def test_getParents(self): doc1=microdom.parseString('<a><b><c><d/></c><e/></b><f/></a>') node_list=domhelpers.getParents(doc1.childNodes[0].childNodes[0].childNodes[0]) actual=''.join([node.tagName for node in node_list if hasattr(node, 'tagName')]) expected='cba' assert actual==expected, 'expected %s, got %s' % (expected, actual)
def test_lenientParentSingle(self): """ Test that the C{parentNode} attribute is set to a meaningful value when we parse an HTML document that has a non-Element root node. """ s = "Hello" d = microdom.parseString(s, beExtremelyLenient=1) self.assertIdentical(d.documentElement, d.documentElement.firstChild().parentNode)
def test_getIfExists(self): doc1 = microdom.parseString('<a><b id="bar"/><c class="foo"/></a>') node = domhelpers.getIfExists(doc1, "foo") actual = node.toxml() expected = '<c class="foo"></c>' assert actual == expected, 'expected %s, got %s' % (expected, actual) node = domhelpers.getIfExists(doc1, "pzork") assert node == None, 'expected None, didn\'t get None'
def test_unEntities(self): s = """ <HTML> This HTML goes between Stupid <=CrAzY!=> Dumb. </HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) n = domhelpers.gatherTextNodes(d) self.assertNotEqual(n.find('>'), -1)
def test_trailingTextDropping(self): """ Ensure that no *trailing* text in a mal-formed no-top-level-element document(s) will not be dropped. """ s = "<br>Hi orders!" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEquals(d.firstChild().toxml(), '<html><br />Hi orders!</html>')
def test_noTags(self): """ A string with nothing that looks like a tag at all should just be parsed as body text. """ s = "Hi orders!" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEquals(d.firstChild().toxml(), "<html>Hi orders!</html>")
def testChildren(self): s = "<foo><bar /><baz /><bax>foo</bax></foo>" d = microdom.parseString(s).documentElement self.assertEqual([n.nodeName for n in d.childNodes], ["bar", "baz", "bax"]) self.assertEqual(d.lastChild().nodeName, "bax") self.assertEqual(d.firstChild().nodeName, "bar") self.assert_(d.hasChildNodes()) self.assert_(not d.firstChild().hasChildNodes())
def _replaceWithListing(node, val, filename, class_): captionTitle = domhelpers.getNodeText(node) if captionTitle == os.path.basename(filename): captionTitle = 'Source listing' text = ('<div class="%s">%s<div class="caption">%s - ' '<a href="%s"><span class="filename">%s</span></a></div></div>' % (class_, val, captionTitle, filename, filename)) newnode = microdom.parseString(text).documentElement node.parentNode.replaceChild(newnode, node)
def test_surroundingCrap(self): """ If a document is surrounded by non-xml text, the text should be remain in the XML. """ s = "Hi<br> orders!" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEquals(d.firstChild().toxml(), "<html>Hi<br /> orders!</html>")
def body(self, request, tag): """ @return: body of our blurb """ if not self.original.body: return '' document = parseString(self.original.body, beExtremelyLenient=True) body = document.documentElement.toxml() return self._htmlifyLineBreaks(body)
def testEntities(self): nodes = microdom.parseString( "<b>&AB;</b>").documentElement.childNodes self.assertEqual(len(nodes), 2) self.assertEqual(nodes[0].data, "&") self.assertEqual(nodes[1].data, "AB;") self.assertEqual(nodes[0].cloneNode().toxml(), "&") for n in nodes: self.assert_(isinstance(n, microdom.EntityReference))
def testNamespaceDelete(self): """ Test that C{toxml} can support xml structures that remove namespaces. """ s1 = ( '<?xml version="1.0"?><html xmlns="http://www.w3.org/TR/REC-html40">' '<body xmlns=""></body></html>') s2 = microdom.parseString(s1).toxml() self.assertEqual(s1, s2)
def test_leadingTextDropping(self): """ Make sure that if there's no top-level node lenient-mode won't drop leading text that's outside of any elements. """ s = "Hi orders! <br>Well. <br>" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEquals(d.firstChild().toxml(), '<html>Hi orders! <br />Well. <br /></html>')
def setAuthors(template, authors): """ Add author information to the template document. Names and contact information for authors are added to each node with a C{class} attribute set to C{authors} and to the template head as C{link} nodes. @type template: A DOM Node or Document @param template: The output template which defines the presentation of the version information. @type authors: C{list} of two-tuples of C{str} @param authors: List of names and contact information for the authors of the input document. @return: C{None} """ # First, similarly to setTitle, insert text into an <div class="authors"> text = '' for name, href in authors: # FIXME: Do proper quoting/escaping (is it ok to use # xml.sax.saxutils.{escape,quoteattr}?) anchor = '<a href="%s">%s</a>' % (href, name) if (name, href) == authors[-1]: if len(authors) == 1: text = anchor else: text += 'and ' + anchor else: text += anchor + ',' childNodes = microdom.parseString('<span>' + text +'</span>').childNodes for node in domhelpers.findElementsWithAttribute(template, "class", 'authors'): node.childNodes.extend(childNodes) # Second, add appropriate <link rel="author" ...> tags to the <head>. head = domhelpers.findNodesNamed(template, 'head')[0] authors = [microdom.parseString('<link rel="author" href="%s" title="%s"/>' % (href, name)).childNodes[0] for name, href in authors] head.childNodes.extend(authors)
def test_scrubTrustsH1(self): """ Test that L{xquotient.scrubber.Scrubber} considers h1 to be a safe tag. Added because of #1895. """ node = parseString("<h1>Foo</h1>").documentElement scrubbed = scrub(node) h1s = getElementsByTagName(scrubbed, 'h1') self.assertEquals(len(h1s), 1) self.assertEquals(gatherTextNodes(h1s[0]).strip(), "Foo")
def _fixup(data, rendererName): document = parseString(data, beExtremelyLenient=True) document.documentElement.setAttribute("xmlns:t", TEMPLATE_NAMESPACE) document.doctype = ( 'html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" ' '"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"') document.documentElement.setAttribute("t:render", rendererName) _walk(document, document) result = document.toxml() return result
def test_lenientParenting(self): """ Test that C{parentNode} attributes are set to meaningful values when we are parsing HTML that lacks a root node. """ # Spare the rod, ruin the child. s = "<br/><br/>" d = microdom.parseString(s, beExtremelyLenient=1) self.assertIdentical(d.documentElement, d.documentElement.firstChild().parentNode)
def render(self, request): template = self.getTemplate(request) if template: self.d = microdom.parseString(template) else: if not self.templateFile: raise AttributeError, "%s does not define self.templateFile to operate on" % self.__class__ self.d = self.lookupTemplate(request) self.handleDocument(request, self.d) return NOT_DONE_YET
def load(self, ctx=None, preprocessors=()): assert not preprocessors, "preprocessors not supported by htmlstr" if self._cache is None: doc = microdom.parseString( self.template, beExtremelyLenient=self.beExtremelyLenient) doc = flat.precompile(doc, ctx) if self.pattern is not None: doc = inevow.IQ(doc).onePattern(self.pattern) self._cache = doc return self._cache
def test_indexedSpan(self): """ Test processing of a span tag with an index class results in a latex \\index directive the correct value. """ dom = microdom.parseString( '<span class="index" value="name" />').documentElement out = StringIO() spitter = LatexSpitter(out.write) spitter.visitNode(dom) self.assertEqual(out.getvalue(), u'\\index{name}\n')
def test_comparePosition(self): """ L{tree.comparePosition} is deprecated. """ from twisted.web.microdom import parseString element = parseString('<foo/>').documentElement self.assertEqual( self.callDeprecated( Version('Twisted', 9, 0, 0), tree.comparePosition, element, element), 0)
def test_comment(self): s = "<bar><!--<foo />--></bar>" d = microdom.parseString(s) e = d.documentElement self.assertEqual(e.nodeName, "bar") c = e.childNodes[0] self.assertTrue(isinstance(c, microdom.Comment)) self.assertEqual(c.value, "<foo />") c2 = c.cloneNode() self.assertTrue(c is not c2) self.assertEqual(c2.toxml(), "<!--<foo />-->")
def test_clearNode(self): doc1 = microdom.parseString('<a><b><c><d/></c></b></a>') a_node = doc1.documentElement domhelpers.clearNode(a_node) actual = doc1.documentElement.toxml() expected = '<a></a>' assert actual == expected, 'expected %s, got %s' % (expected, actual) doc2 = microdom.parseString('<a><b><c><d/></c></b></a>') b_node = doc2.documentElement.childNodes[0] domhelpers.clearNode(b_node) actual = doc2.documentElement.toxml() expected = '<a><b></b></a>' assert actual == expected, 'expected %s, got %s' % (expected, actual) doc3 = microdom.parseString('<a><b><c><d/></c></b></a>') c_node = doc3.documentElement.childNodes[0].childNodes[0] domhelpers.clearNode(c_node) actual = doc3.documentElement.toxml() expected = '<a><b><c></c></b></a>' assert actual == expected, 'expected %s, got %s' % (expected, actual)
def test_cloneNode(self): s = '<foo a="b"><bax>x</bax></foo>' node = microdom.parseString(s).documentElement clone = node.cloneNode(deep=1) self.failIfEquals(node, clone) self.assertEqual(len(node.childNodes), len(clone.childNodes)) c1, c2 = node.firstChild(), clone.firstChild() self.failIfEquals(c1, c2) self.assertEqual(len(c1.childNodes), len(c2.childNodes)) self.failIfEquals(c1.firstChild(), c2.firstChild()) self.assertEqual(s, clone.toxml()) self.assertEqual(node.namespace, clone.namespace)
def testLaterCloserDL2(self): s = ("<dl>" "<dt>word<dd>definition<p>more definition" "<dt>word" "</dl>") expected = ("<dl>" "<dt>word</dt><dd>definition<p>more definition</p></dd>" "<dt>word</dt>" "</dl>") d = microdom.parseString(s, beExtremelyLenient=1) actual = d.documentElement.toxml() self.assertEquals(expected, actual)
def parsePT(result): log.msg(result) doc = microdom.parseString(result) elms = doc.getElementsByTagName("cas:proxySuccess") if len(elms) == 0: raise Exception("Error parsing PT") elms = doc.getElementsByTagName("cas:proxyTicket") if len(elms) == 0: raise Exception("Error parsing PT") elm = elms[0] pt = elm.childNodes[0].value return pt
def test_namedChildren(self): tests = {"<foo><bar /><bar unf='1' /><bar>asdfadsf</bar>" "<bam/></foo>" : 3, '<foo>asdf</foo>' : 0, '<foo><bar><bar></bar></bar></foo>' : 1, } for t in tests.keys(): node = microdom.parseString(t).documentElement result = domhelpers.namedChildren(node, 'bar') self.assertEqual(len(result), tests[t]) if result: self.assertTrue(hasattr(result[0], 'tagName'))
def test_findElementsWithAttribute(self): doc1 = microdom.parseString( '<a foo="1"><b foo="2"/><c foo="1"/><d/></a>') node_list = domhelpers.findElementsWithAttribute(doc1, 'foo') actual = ''.join([node.tagName for node in node_list]) expected = 'abc' assert actual == expected, 'expected %s, got %s' % (expected, actual) node_list = domhelpers.findElementsWithAttribute(doc1, 'foo', '1') actual = ''.join([node.tagName for node in node_list]) expected = 'ac' assert actual == expected, 'expected %s, got %s' % (expected, actual)
def test_leadingTextDropping(self): """ Make sure that if there's no top-level node lenient-mode won't drop leading text that's outside of any elements. """ s = "Hi orders! <br>Well. <br>" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEqual(d.firstChild().toxml(), "<html>Hi orders! <br />Well. <br /></html>") byteStream = BytesIO() d.firstChild().writexml(byteStream, "", "", "", "", {}, "") self.assertEqual(byteStream.getvalue(), b"<html>Hi orders! <br />Well. <br /></html>")
def test_attributes(self): s = '<foo a="b" />' node = microdom.parseString(s).documentElement self.assertEqual(node.getAttribute("a"), "b") self.assertEqual(node.getAttribute("c"), None) self.assertTrue(node.hasAttribute("a")) self.assertTrue(not node.hasAttribute("c")) a = node.getAttributeNode("a") self.assertEqual(a.value, "b") node.setAttribute("foo", "bar") self.assertEqual(node.getAttribute("foo"), "bar")
def test_trailingTextDropping(self): """ Ensure that no *trailing* text in a mal-formed no-top-level-element document(s) will not be dropped. """ s = "<br>Hi orders!" d = microdom.parseString(s, beExtremelyLenient=True) self.assertEqual(d.firstChild().toxml(), "<html><br />Hi orders!</html>") byteStream = BytesIO() d.firstChild().writexml(byteStream, "", "", "", "", {}, "") self.assertEqual(byteStream.getvalue(), b"<html><br />Hi orders!</html>")