def xtestSimple(self): html = "<html><br><p>Para1<p>Para2<div one two=2/>Test</div></html>" #expected = '<html><br/><p>Para1<p>Para2<div one="one" two="2"/>Test</p></p></html>' expected = '<html><br/><p>Para1</p><p>Para2</p><div one="one" two="2"/>Test</html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)
def testScriptElement(self): html = """<html><head><script type='text/javascript'>//<![CDATA[ if (1<2) alert('two&');<!-- PopUp -- Comment --> //]]> </script></head><body>Nothing</body></html>""" expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>' xml = HtmlCleanup.cleanup(html)
def testEscaping(self): html = "<html><test att1='< > &   & < >' att2=&&<<>&  &<</test>" expected = '<html><test att1="< > &     & < >" ' + \ 'att2="&&<<">&  &<</test></html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)
def testX(self): html = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1): <ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p> </blockquote></div>""" expected = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1): <ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p> </blockquote></div>""" xml = HtmlCleanup.cleanup(html) self.assertEqual(xml, expected)
def xtestFile(self): file = "testData/test1.htm" f = open(file, "rb") html = f.read() f.close() xml = HtmlCleanup.cleanup(html) f = open("testData/test1Output.htm", "wb") f.write(xml) f.close()
def testGeneral(self): html = "<html> 口<a href=?x=<&test=4&x href=two href=3>link" expected = '<html> 口<a href="?x=<&test=4&x" href="two" href="3">link</a></html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)
def testBadComment(self): html = "<html><head><script type='text/javascript'><!-- PopUp -- Comment --></script></head><body>Nothing</body></html>" expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>' xml = HtmlCleanup.cleanup(html) self.assertEqual(xml, expected)
def testEmptyElem(self): html = "<html><br><?test?><w:test att='one'></w:test><br>" expected = '<html><br/><?test?><w:test att="one"/><br/></html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)
def testEncoding(self): html = "<html><one att=one&two att2='one <& \"two'>&<2 two" expected = '<html><one att="one&two" att2="one <& "two">&<2 two</one></html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)
def testNesting(self): html = "<html><body><p>Testing<b></bad><br></br><br>bold</p>" expected = '<html><body><p>Testing<b><br/><br/>bold</b></p></body></html>' xml = HtmlCleanup.cleanup(html) #print xml self.assertEqual(xml, expected)