def test_clean_with_article(self): s = '<html><head></head><body><article>Hello! I am a test</article></body></html>' s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>' s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>' self.assertEqual(condense(clean(s)), condense(s)) self.assertEqual(condense(clean(s1)), condense(s)) self.assertEqual(condense(clean(s2)), condense(s))
def test_clean_empty_img(self): s = ''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> </body> </html> ''' s1 = ''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> <img></img> <img/> </body> </html> ''' self.assertEqual(condense(clean(s1)), condense(clean(s)))
def test_html_to_xhtml(self): s = u'<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div id="Test">Hello</div><br /><br /></body></html>' s1 = u''' <!DOCTYPE html> <html> <head> </head> <body> <DIV ID="Test">Hello</div> <br> <br> </body> </html> ''' self.assertEqual(condense(html_to_xhtml(clean(s1))), s)
def test_clean_empty_img(self): s = u''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> </body> </html> ''' s1 = u''' <!DOCTYPE html> <html> <head> </head> <body> <img src=""></img> <img></img> <img/> </body> </html> ''' self.assertEqual(condense(clean(s1)), condense(clean(s)))
def test_clean_tags_full_html(self): s = u''' <!DOCTYPE html> <html> <head> </head> <body> <div>Hello </div> </body> </html> ''' s1 = u''' <!DOCTYPE html> <html> <head> </head> <body> <div>Hello </div> <script>Uh oh...it's an evil script!</script> </body> </html> ''' s2 = u''' <!DOCTYPE html> <html> <head> </head> <body> <div>Hello </div> </body> <script>Uh oh...it's an evil script again!</script> </html> ''' s3 = u''' <!DOCTYPE html> <html> <head> </head> <body> <div>Hello </div> </body> <video>Play me!</video> </html> ''' s4 = u''' <!DOCTYPE html> <html> <head> </head> <body> <video> <div>Hello </div> </video> </body> <video>Play me!</video> </html> ''' s5 = u''' <!DOCTYPE html> <html> <head> </head> <body> <video> <div>Hello </div> </video> </body> <video>Play me!</video> </html> ''' self.assertEqual(condense(clean(s)), condense(s)) self.assertEqual(condense(clean(s1)), condense(s)) self.assertEqual(condense(clean(s2)), condense(s)) self.assertEqual(condense(clean(s3)), condense(s)) self.assertEqual(condense(clean(s4)), condense(s)) self.assertEqual(condense(clean(s5)), condense(s))