Example #1
0
 def test_clean_with_article(self):
     s = '<html><head></head><body><article>Hello! I am a test</article></body></html>'
     s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>'
     s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>'
     self.assertEqual(condense(clean(s)), condense(s))
     self.assertEqual(condense(clean(s1)), condense(s))
     self.assertEqual(condense(clean(s2)), condense(s))
Example #2
0
 def test_clean_with_article(self):
     s = '<html><head></head><body><article>Hello! I am a test</article></body></html>'
     s1 = '<html><head></head><body><div>dsfasfadfasdfasdf</div><article>Hello! I am a test</article></body></html>'
     s2 = '<html><head></head><body><article><video></video>Hello! I am a test</article></body></html>'
     self.assertEqual(condense(clean(s)), condense(s))
     self.assertEqual(condense(clean(s1)), condense(s))
     self.assertEqual(condense(clean(s2)), condense(s))
Example #3
0
 def test_clean_empty_img(self):
     s = '''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
              </body>
             </html>
             '''
     s1 = '''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
               <img></img>
               <img/>
              </body>
             </html>
             '''
     self.assertEqual(condense(clean(s1)), condense(clean(s)))
Example #4
0
 def test_html_to_xhtml(self):
     s = u'<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div id="Test">Hello</div><br /><br /></body></html>'
     s1 = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <DIV ID="Test">Hello</div>
               <br>
               <br>
              </body>
             </html>
             '''
     self.assertEqual(condense(html_to_xhtml(clean(s1))), s)
Example #5
0
 def test_clean_empty_img(self):
     s = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
              </body>
             </html>
             '''
     s1 = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <img src=""></img>
               <img></img>
               <img/>
              </body>
             </html>
             '''
     self.assertEqual(condense(clean(s1)), condense(clean(s)))
Example #6
0
 def test_html_to_xhtml(self):
     s = u'<!DOCTYPE html><html xmlns="http://www.w3.org/1999/xhtml"><head></head><body><div id="Test">Hello</div><br /><br /></body></html>'
     s1 = u'''
             <!DOCTYPE html>
             <html>
              <head>
              </head>
              <body>
               <DIV ID="Test">Hello</div>
               <br>
               <br>
              </body>
             </html>
             '''
     self.assertEqual(condense(html_to_xhtml(clean(s1))), s)
Example #7
0
    def test_clean_tags_full_html(self):
        s = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                </html>
                '''
        s1 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                  <script>Uh oh...it's an evil script!</script>
                 </body>
                </html>
                '''
        s2 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                 <script>Uh oh...it's an evil script again!</script>
                </html>
                '''
        s3 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                 <video>Play me!</video>
                </html>
                '''
        s4 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <video>
                   <div>Hello </div>
                  </video>
                 </body>
                 <video>Play me!</video>
                </html>
                '''

        s5 = u'''
        <!DOCTYPE html>
        <html>
         <head>
         </head>
         <body>
          <video>
           <div>Hello&nbsp;</div>
          </video>
         </body>
         <video>Play me!</video>
        </html>
        '''
        self.assertEqual(condense(clean(s)), condense(s))
        self.assertEqual(condense(clean(s1)), condense(s))
        self.assertEqual(condense(clean(s2)), condense(s))
        self.assertEqual(condense(clean(s3)), condense(s))
        self.assertEqual(condense(clean(s4)), condense(s))
        self.assertEqual(condense(clean(s5)), condense(s))
Example #8
0
    def test_clean_tags_full_html(self):
        s = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                </html>
                '''
        s1 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                  <script>Uh oh...it's an evil script!</script>
                 </body>
                </html>
                '''
        s2 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                 <script>Uh oh...it's an evil script again!</script>
                </html>
                '''
        s3 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <div>Hello </div>
                 </body>
                 <video>Play me!</video>
                </html>
                '''
        s4 = u'''
                <!DOCTYPE html>
                <html>
                 <head>
                 </head>
                 <body>
                  <video>
                   <div>Hello </div>
                  </video>
                 </body>
                 <video>Play me!</video>
                </html>
                '''

        s5 = u'''
        <!DOCTYPE html>
        <html>
         <head>
         </head>
         <body>
          <video>
           <div>Hello&nbsp;</div>
          </video>
         </body>
         <video>Play me!</video>
        </html>
        '''
        self.assertEqual(condense(clean(s)), condense(s))
        self.assertEqual(condense(clean(s1)), condense(s))
        self.assertEqual(condense(clean(s2)), condense(s))
        self.assertEqual(condense(clean(s3)), condense(s))
        self.assertEqual(condense(clean(s4)), condense(s))
        self.assertEqual(condense(clean(s5)), condense(s))