def test_resourceSearch(self): """ Searching for resources should result in an HTML table resource search. """ self.expectSomeRecords() document = yield self.renderPage(dict(resourceSearch=["bob"])) # Form is filled out with existing input. self.assertEquals( document.getElementById("txt_resourceSearch").getAttribute("value"), "bob" ) tables = getElementsByTagName(document, "table") # search results are the first table rows = getElementsByTagName(tables[0], 'tr') self.assertEquals(len(rows), 3) firstRowCells = getElementsByTagName(rows[1], 'td') self.assertEquals( [gatherTextNodes(cell) for cell in firstRowCells[1:]], ["Bob Bobson", "User", "bob", "boblogin", "[email protected], [email protected]"] ) [resourceLink] = getElementsByTagName( firstRowCells[0], 'a') self.assertEquals( resourceLink.getAttribute("href"), "/admin/?resourceId=users:bob" ) self.assertEquals(gatherTextNodes(resourceLink), "select") self.assertNotIn( "No matches found for resource bob", gatherTextNodes(document) )
def test_proxySearch(self): """ When the user searches for a proxy, the results are displayed in a table, in a form that will allow them to submit it to add new read or write proxies. """ self.expectSomeRecords() self.resource.getResourceById = partial(FakePrincipalResource, self) document = yield self.renderPage(dict(resourceId=["qux"], proxySearch=["bob"])) # Form is filled out with existing input. self.assertEquals( document.getElementById("txt_proxySearch").getAttribute("value"), "bob" ) proxyAddForm = document.getElementById("frm_proxyAdd") [proxyResultsTable] = getElementsByTagName(proxyAddForm, "table") rows = getElementsByTagName(proxyResultsTable, 'tr') self.assertEquals(len(rows), 3) firstRowCells = getElementsByTagName(rows[1], 'td') self.assertEquals( [gatherTextNodes(cell) for cell in firstRowCells[1:]], ["User", "bob", "[email protected], [email protected]", ""] ) self.assertNotIn( "No matches found for proxy resource bob", gatherTextNodes(document) )
def test_selectResourceById(self): """ When a resource is selected by a 'resourceId' parameter, """ self.resource.getResourceById = partial(FakePrincipalResource, self) document = yield self.renderPage(dict(resourceId=["qux"])) [detailsTitle] = getElementsByTagName(document, 'h3') detailString = gatherTextNodes(detailsTitle) self.assertEquals(detailString, "Resource Details: Hello Fake Resource: 'qux'") hiddenResourceId = document.getElementById( "hdn_resourceId").getAttribute("value") self.assertEquals(hiddenResourceId, "qux") autoScheduleMenu = document.getElementById("sel_autoSchedule") # Now, some assertions about features that are covered in other tests # which should be turned _off_ here since we're not asking for them. # Not an auto-schedule resource; there should be no auto-schedule menu. self.assertIdentical(autoScheduleMenu, None) # No resource search present; we shouldn't be performing the search. self.assertNotIn("No matches found for resource", gatherTextNodes(document)) self.assertIdentical(document.getElementById("tab_searchResults"), None) # I'm not attempting to retrieve a property, there's nothing to fail to # parse. self.assertNotIn("Unable to parse property", gatherTextNodes(document)) # I'm not searching for proxies, so there shouldn't be any results. self.assertNotIn("No matches found for proxy resource", gatherTextNodes(document))
def test_textEntitiesDecoded(self): """ Minidom does decode entities in text nodes. """ doc5_xml = '<x>Souffl&</x>' doc5 = self.dom.parseString(doc5_xml) actual = domhelpers.gatherTextNodes(doc5) expected = 'Souffl&' self.assertEqual(actual, expected) actual = domhelpers.gatherTextNodes(doc5.documentElement) self.assertEqual(actual, expected)
def testComplexNotification(self): listNode = self.d.getElementById("theList") self.assert_(listNode, "Test %s failed" % outputNum) liNodes = domhelpers.getElementsByTagName(listNode, "li") self.assert_(liNodes, "DOM was not updated by notifying Widgets. Test %s" % outputNum) text = domhelpers.gatherTextNodes(liNodes[0]) self.assert_(text.strip() == "test", "Wrong output: %s. Test %s" % (text, outputNum))
def test_proxiesListing(self): """ Resource principals will have their proxies listed in a table. """ def fakeResourceById(request, resid): return FakePrincipalResource(self, request, resid, recordType="resources") self.resource.getResourceById = fakeResourceById document = yield self.renderPage(dict(resourceId=["qux"])) proxiesForm = document.getElementById("frm_proxies") [proxiesTable] = getElementsByTagName(proxiesForm, "table") rows = getElementsByTagName(proxiesTable, "tr") # header + 3 data rows (see FakePrincipalResource) self.assertEquals(len(rows), 4) firstRowCells = getElementsByTagName(rows[1], "td") # name, buttons, name, buttons self.assertEquals(len(firstRowCells), 4) lastRowCells = getElementsByTagName(rows[-1], "td") # name, buttons, blank space self.assertEquals(len(lastRowCells), 3) self.assertEquals(lastRowCells[-1].getAttribute("colspan"), "2") self.assertNotIn("This resource has no proxies.", ''.join(gatherTextNodes(document)))
def check_80_columns(self, dom, filename): for node in domhelpers.findNodesNamed(dom, 'pre'): # the ps/pdf output is in a font that cuts off at 80 characters, # so this is enforced to make sure the interesting parts (which # are likely to be on the right-hand edge) stay on the printed # page. for line in domhelpers.gatherTextNodes(node, 1).split('\n'): if len(line.rstrip()) > 80: self._reportError(filename, node, 'text wider than 80 columns in pre') for node in domhelpers.findNodesNamed(dom, 'a'): if node.getAttribute('class', '').endswith('listing'): try: fn = os.path.dirname(filename) fn = os.path.join(fn, node.getAttribute('href')) lines = open(fn,'r').readlines() except: self._reportError(filename, node, 'bad listing href: %r' % node.getAttribute('href')) continue for line in lines: if len(line.rstrip()) > 80: self._reportError(filename, node, 'listing wider than 80 columns')
def test_gatherTextNodes(self): doc1=microdom.parseString('<a>foo</a>') actual=domhelpers.gatherTextNodes(doc1) expected='foo' self.assertEqual(actual, expected) actual=domhelpers.gatherTextNodes(doc1.documentElement) self.assertEqual(actual, expected) doc2_xml='<a>a<b>b</b><c>c</c>def<g>g<h>h</h></g></a>' doc2=microdom.parseString(doc2_xml) actual=domhelpers.gatherTextNodes(doc2) expected='abcdefgh' self.assertEqual(actual, expected) actual=domhelpers.gatherTextNodes(doc2.documentElement) self.assertEqual(actual, expected) doc3_xml=('<a>a<b>b<d>d<g>g</g><h>h</h></d><e>e<i>i</i></e></b>' + '<c>c<f>f<j>j</j></f></c></a>') doc3=microdom.parseString(doc3_xml) actual=domhelpers.gatherTextNodes(doc3) expected='abdgheicfj' self.assertEqual(actual, expected) actual=domhelpers.gatherTextNodes(doc3.documentElement) self.assertEqual(actual, expected) doc4_xml='''<html> <head> </head> <body> stuff </body> </html> ''' doc4=microdom.parseString(doc4_xml) actual=domhelpers.gatherTextNodes(doc4) expected='\n stuff\n ' assert actual==expected, 'expected %s, got %s' % (expected, actual) actual=domhelpers.gatherTextNodes(doc4.documentElement) self.assertEqual(actual, expected) doc5_xml='<x>Soufflé</x>' doc5=microdom.parseString(doc5_xml) actual=domhelpers.gatherTextNodes(doc5) expected='Soufflé' self.assertEqual(actual, expected) actual=domhelpers.gatherTextNodes(doc5.documentElement) self.assertEqual(actual, expected)
def testUnEntities(self): s = """ <HTML> This HTML goes between Stupid <=CrAzY!=> Dumb. </HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) n = domhelpers.gatherTextNodes(d) self.assertNotEquals(n.find('>'), -1)
def textFromHtml(htmlText): """ Convert html text into its text nodes, with extreme leniency. If the input is unicode, keep it unicode. """ d = microdom.parseString(htmlText, beExtremelyLenient=1) s = domhelpers.gatherTextNodes(d, joinWith=u" ") ## print '\n'.join('| ' + l for l in s.splitlines()) return s
def test_scrubTrustsH1(self): """ Test that L{xquotient.scrubber.Scrubber} considers h1 to be a safe tag. Added because of #1895. """ node = parseString("<h1>Foo</h1>").documentElement scrubbed = scrub(node) h1s = getElementsByTagName(scrubbed, 'h1') self.assertEquals(len(h1s), 1) self.assertEquals(gatherTextNodes(h1s[0]).strip(), "Foo")
def test_davProperty(self): """ When a resource is selected by a resourceId parameter, and a DAV property is selected by the 'davPropertyName' parameter, that property will displayed. """ self.resource.getResourceById = partial(FakePrincipalResource, self) document = yield self.renderPage( dict(resourceId=["qux"], davPropertyName=["DAV:#displayname"]) ) propertyName = document.getElementById('txt_davPropertyName') self.assertEquals(propertyName.getAttribute("value"), "DAV:#displayname") propertyValue = DisplayName("The Name To Display").toxml() self.assertIn(cgi.escape(propertyValue), gatherTextNodes(document)) self.assertNotIn("Unable to parse property to read:", gatherTextNodes(document))
def test_gatherTextNodesDropsWhitespace(self): """ Microdom discards whitespace-only text nodes, so L{gatherTextNodes} returns only the text from nodes which had non-whitespace characters. """ doc4_xml = '''<html> <head> </head> <body> stuff </body> </html> ''' doc4 = self.dom.parseString(doc4_xml) actual = domhelpers.gatherTextNodes(doc4) expected = '\n stuff\n ' self.assertEqual(actual, expected) actual = domhelpers.gatherTextNodes(doc4.documentElement) self.assertEqual(actual, expected)
def test_noProxiesListing(self): """ When the selected resource principal has no proxies, the page should display a message saying so. """ self.resource.getResourceById = partial(FakePrincipalResource, self, recordType='resources', hasProxies=False) document = yield self.renderPage(dict(resourceId=['qux'])) self.assertIn("This resource has no proxies.", ''.join(gatherTextNodes(document)))
def scrub(self, node, filterCIDLinks=True): """ Remove all potentially harmful elements from the node and return a wrapper node. For reasons (perhaps dubious) of performance, this mutates its input. """ if node.nodeName == 'html': filler = body = lmx().div(_class="message-html") for c in node.childNodes: if c.nodeName == 'head': for hc in c.childNodes: if hc.nodeName == 'title': body.div(_class="message-title").text(domhelpers.gatherTextNodes(hc)) break elif c.nodeName == 'body': filler = body.div(_class='message-body') break else: filler = body = lmx().div(_class="message-nohtml") for e in self.iternode(node): if getattr(e, 'clean', False): # If I have manually exploded this node, just forget about it. continue ennl = e.nodeName.lower() if filterCIDLinks and self._filterCIDLink(e): # we could replace these with a marker element, like we do # with dangerous tags, but i'm not sure there is a reason to e.parentNode.removeChild(e) if ennl in self._goodHtml: handler = getattr(self, '_handle_' + ennl, None) if handler is not None: e = handler(e) newAttributes = {} oldAttributes = e.attributes e.attributes = newAttributes goodAttributes = self._goodHtml[ennl] + self._alwaysSafeAttributes for attr in goodAttributes: if attr in oldAttributes: newAttributes[attr] = oldAttributes[attr] else: e.attributes.clear() e.setTagName("div") e.setAttribute("class", "message-html-unknown") e.setAttribute("style", "display: none") div = Element('div') div.setAttribute('class', 'message-html-unknown-tag') div.appendChild(Text("Untrusted %s tag" % (ennl, ))) e.childNodes.insert(0, div) filler.node.appendChild(node) return body.node
def test_withoutDocType(self): """ A Lore XML input document may omit a I{DOCTYPE} declaration. If it does so, the XHTML1 Strict DTD is used. """ # Parsing should succeed. document = self._parseTest("<foo>uses an xhtml entity: ©</foo>") # But even more than that, the © entity should be turned into the # appropriate unicode codepoint. self.assertEqual( domhelpers.gatherTextNodes(document.documentElement), u"uses an xhtml entity: \N{COPYRIGHT SIGN}")
def test_noResourceFound(self): """ Searching for resources which don't exist should result in an informative message. """ self.expectRecordSearch("bob", []) document = yield self.renderPage(dict(resourceSearch=["bob"])) self.assertIn( "No matches found for resource bob", gatherTextNodes(document) ) # Search results table should not be displayed. self.assertIdentical(document.getElementById("tab_searchResults"), None)
def testTameDocument(self): s = """ <test> <it> <is> <a> test </a> </is> </it> </test> """ d = microdom.parseString(s) self.assertEqual(domhelpers.gatherTextNodes(d.documentElement).strip(), "test")
def test_noDavProperty(self): """ When a DAV property is not found, an error will be displayed. """ self.resource.getResourceById = partial(FakePrincipalResource, self) document = yield self.renderPage( dict(resourceId=["qux"], davPropertyName=["DAV:#blub"]) ) propertyName = document.getElementById('txt_davPropertyName') self.assertEquals(propertyName.getAttribute("value"), "DAV:#blub") propertyValue = "No such property: DAV:#blub" self.assertIn(cgi.escape(propertyValue), gatherTextNodes(document))
def getSectionNumber(header): """ Retrieve the section number of the given node. This is probably intended to interact in a rather specific way with L{numberDocument}. @type header: A DOM Node or L{None} @param header: The section from which to extract a number. The section number is the value of this node's first child. @return: C{None} or a C{str} giving the section number. """ if not header: return None return domhelpers.gatherTextNodes(header.childNodes[0])
def test_withTransitionalDocType(self): """ A Lore XML input document may include a I{DOCTYPE} declaration referring to the XHTML1 Transitional DTD. """ # Parsing should succeed. document = self._parseTest("""\ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <foo>uses an xhtml entity: ©</foo> """) # But even more than that, the © entity should be turned into the # appropriate unicode codepoint. self.assertEqual( domhelpers.gatherTextNodes(document.documentElement), u"uses an xhtml entity: \N{COPYRIGHT SIGN}")
def test_withStrictDocType(self): """ A Lore XML input document may include a I{DOCTYPE} declaration referring to the XHTML1 Strict DTD. """ # Parsing should succeed. document = self._parseTest("""\ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <foo>uses an xhtml entity: ©</foo> """) # But even more than that, the © entity should be turned into the # appropriate unicode codepoint. self.assertEqual( domhelpers.gatherTextNodes(document.documentElement), u"uses an xhtml entity: \N{COPYRIGHT SIGN}")
def testCaseSensitiveSoonCloser(self): s = """ <HTML><BODY> <P ALIGN="CENTER"> <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A> </P> <P> This is an insane set of text nodes that should NOT be gathered under the A tag above. </P> </BODY></HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, 'a') n = domhelpers.gatherTextNodes(l[0],1).replace(' ',' ') self.assertEquals(n.find('insane'), -1)
def test_notADavProperty(self): """ When a DAV property is selected without the proper syntax (i.e. no "#" to separate namespace and name), an error will be displayed. """ self.resource.getResourceById = partial(FakePrincipalResource, self) document = yield self.renderPage( dict(resourceId=["qux"], davPropertyName=["blub"]) ) propertyName = document.getElementById('txt_davPropertyName') self.assertEquals(propertyName.getAttribute("value"), "blub") propertyValue = "Unable to parse property to read: blub" self.assertIn(cgi.escape(propertyValue), gatherTextNodes(document))
def test_caseSensitiveSoonCloser(self): s = """ <HTML><BODY> <P ALIGN="CENTER"> <A HREF="http://www.apache.org/"><IMG SRC="/icons/apache_pb.gif"></A> </P> <P> This is an insane set of text nodes that should NOT be gathered under the A tag above. </P> </BODY></HTML> """ d = microdom.parseString(s, beExtremelyLenient=1) l = domhelpers.findNodesNamed(d.documentElement, "a") n = domhelpers.gatherTextNodes(l[0], 1).replace(" ", " ") self.assertEqual(n.find("insane"), -1)
def test_scrubTrustsSpan(self): """ Test that L{xquotient.scrubber.Scrubber} considers span to be a safe tag. Added because of #1641. """ node = parseString(""" <html> <span style='font-weight: bold; font-family:"Book Antiqua"'> Hello </span> </html> """).documentElement scrubbed = scrub(node) spans = getElementsByTagName(scrubbed, 'span') self.assertEquals(len(spans), 1) self.assertEquals(gatherTextNodes(spans[0]).strip(), "Hello")
def test_emptyProxySearch(self): """ When no results are found for a search for proxies, a relevant message should be displayed and the table/form for results should not be. """ self.resource.getResourceById = partial(FakePrincipalResource, self) self.expectRecordSearch("bob", []) document = yield self.renderPage(dict(resourceId=["qux"], proxySearch=["bob"])) self.assertEquals( document.getElementById("txt_proxySearch").getAttribute("value"), "bob" ) proxyAddForm = document.getElementById("frm_proxyAdd") self.assertIdentical(proxyAddForm, None) self.assertIn( "No matches found for proxy resource bob", gatherTextNodes(document) )
def test_gatherTextNodes(self): doc1 = self.dom.parseString('<a>foo</a>') actual = domhelpers.gatherTextNodes(doc1) expected = 'foo' self.assertEqual(actual, expected) actual = domhelpers.gatherTextNodes(doc1.documentElement) self.assertEqual(actual, expected) doc2_xml = '<a>a<b>b</b><c>c</c>def<g>g<h>h</h></g></a>' doc2 = self.dom.parseString(doc2_xml) actual = domhelpers.gatherTextNodes(doc2) expected = 'abcdefgh' self.assertEqual(actual, expected) actual = domhelpers.gatherTextNodes(doc2.documentElement) self.assertEqual(actual, expected) doc3_xml = ('<a>a<b>b<d>d<g>g</g><h>h</h></d><e>e<i>i</i></e></b>' + '<c>c<f>f<j>j</j></f></c></a>') doc3 = self.dom.parseString(doc3_xml) actual = domhelpers.gatherTextNodes(doc3) expected = 'abdgheicfj' self.assertEqual(actual, expected) actual = domhelpers.gatherTextNodes(doc3.documentElement) self.assertEqual(actual, expected)
def testSimpleRender(self): titleNode = self.d.getElementById("title") helloNode = self.d.getElementById("hello") self.assert_(domhelpers.gatherTextNodes(titleNode) == 'Title') self.assert_(domhelpers.gatherTextNodes(helloNode) == 'Hello')