def xtestSimple(self):
     html = "<html><br><p>Para1<p>Para2<div one two=2/>Test</div></html>"
     #expected = '<html><br/><p>Para1<p>Para2<div one="one" two="2"/>Test</p></p></html>'
     expected = '<html><br/><p>Para1</p><p>Para2</p><div one="one" two="2"/>Test</html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #2
0
 def testScriptElement(self):
     html = """<html><head><script type='text/javascript'>//<![CDATA[
     if (1<2) alert('two&');<!-- PopUp -- Comment -->
     //]]>
     </script></head><body>Nothing</body></html>"""
     expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>'
     xml = HtmlCleanup.cleanup(html)
 def testScriptElement(self):
     html = """<html><head><script type='text/javascript'>//<![CDATA[
     if (1<2) alert('two&');<!-- PopUp -- Comment -->
     //]]>
     </script></head><body>Nothing</body></html>"""
     expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>'
     xml = HtmlCleanup.cleanup(html)
Example #4
0
 def testEscaping(self):
     html = "<html><test att1='< > & &#160 &nbsp; &amp; &lt; &gt;' att2=&&amp;&lt;<>& &#160&<</test>"
     expected = '<html><test att1="&lt; &gt; &amp; &#160; &#160; &amp; &lt; &gt;" ' + \
                     'att2="&amp;&amp;&lt;&lt;">&amp; &#160;&amp;&lt;</test></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #5
0
 def xtestSimple(self):
     html = "<html><br><p>Para1<p>Para2<div one two=2/>Test</div></html>"
     #expected = '<html><br/><p>Para1<p>Para2<div one="one" two="2"/>Test</p></p></html>'
     expected = '<html><br/><p>Para1</p><p>Para2</p><div one="one" two="2"/>Test</html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
 def testEscaping(self):
     html = "<html><test att1='< > & &#160 &nbsp; &amp; &lt; &gt;' att2=&&amp;&lt;<>& &#160&<</test>"
     expected = '<html><test att1="&lt; &gt; &amp; &#160; &#160; &amp; &lt; &gt;" ' + \
                     'att2="&amp;&amp;&lt;&lt;">&amp; &#160;&amp;&lt;</test></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #7
0
    def testX(self):
        html = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1):
<ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p>
</blockquote></div>"""
        expected = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1):
<ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p>
</blockquote></div>"""
        xml = HtmlCleanup.cleanup(html)
        self.assertEqual(xml, expected)
Example #8
0
 def xtestFile(self):
     file = "testData/test1.htm"
     f = open(file, "rb")
     html = f.read()
     f.close()
     xml = HtmlCleanup.cleanup(html)
     f = open("testData/test1Output.htm", "wb")
     f.write(xml)
     f.close()
    def testX(self):
        html = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1):
<ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p>
</blockquote></div>"""
        expected = """<div><p id="h2de112c0p1">Para.</p><blockquote class="bq"><p id="h65bbb66fp1">Blockquote (bq1):
<ul class="lib"><li><p id="hdd5ea3fp1">li2b</p><p id="h17f52eddp1">li2p</p></li><li><p id="hdd5ea3fp2">li2b</p><p id="h17f52eddp2">li2p</p><p id="h1c35d593p1"/></li></ul></p>
</blockquote></div>"""
        xml = HtmlCleanup.cleanup(html)
        self.assertEqual(xml, expected)
 def xtestFile(self):
     file = "testData/test1.htm"
     f = open(file, "rb")
     html = f.read()
     f.close()
     xml = HtmlCleanup.cleanup(html)
     f = open("testData/test1Output.htm", "wb")
     f.write(xml)
     f.close()
 def testGeneral(self):
     html = "<html>&#160&#21475;<a   href=?x=<&test=4&x  href=two href=3>link"
     expected = '<html>&#160;&#21475;<a href="?x=&lt;&amp;test=4&amp;x" href="two" href="3">link</a></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #12
0
 def testBadComment(self):
     html = "<html><head><script type='text/javascript'><!-- PopUp -- Comment --></script></head><body>Nothing</body></html>"
     expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>'
     xml = HtmlCleanup.cleanup(html)
     self.assertEqual(xml, expected)
Example #13
0
 def testEmptyElem(self):
     html = "<html><br><?test?><w:test att='one'></w:test><br>"
     expected = '<html><br/><?test?><w:test att="one"/><br/></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #14
0
 def testEncoding(self):
     html = "<html><one att=one&two att2='one <& \"two'>&<2 two"
     expected = '<html><one att="one&amp;two" att2="one &lt;&amp; &quot;two">&amp;&lt;2 two</one></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
 def testEncoding(self):
     html = "<html><one att=one&two att2='one <& \"two'>&<2 two"
     expected = '<html><one att="one&amp;two" att2="one &lt;&amp; &quot;two">&amp;&lt;2 two</one></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #16
0
 def testNesting(self):
     html = "<html><body><p>Testing<b></bad><br></br><br>bold</p>"
     expected = '<html><body><p>Testing<b><br/><br/>bold</b></p></body></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
Example #17
0
 def testGeneral(self):
     html = "<html>&#160&#21475;<a   href=?x=<&test=4&x  href=two href=3>link"
     expected = '<html>&#160;&#21475;<a href="?x=&lt;&amp;test=4&amp;x" href="two" href="3">link</a></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
 def testEmptyElem(self):
     html = "<html><br><?test?><w:test att='one'></w:test><br>"
     expected = '<html><br/><?test?><w:test att="one"/><br/></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
 def testNesting(self):
     html = "<html><body><p>Testing<b></bad><br></br><br>bold</p>"
     expected = '<html><body><p>Testing<b><br/><br/>bold</b></p></body></html>'
     xml = HtmlCleanup.cleanup(html)
     #print xml
     self.assertEqual(xml, expected)
 def testBadComment(self):
     html = "<html><head><script type='text/javascript'><!-- PopUp -- Comment --></script></head><body>Nothing</body></html>"
     expected = '<html><head><script type="text/javascript"><!-- PopUp == Comment --></script></head><body>Nothing</body></html>'
     xml = HtmlCleanup.cleanup(html)
     self.assertEqual(xml, expected)