Example #1
0
    def test_70_read_more_and_shorten(self):
        expand_options = {
            'oe_expand_container_class': 'span_class',
            'oe_expand_container_content': 'Herbert Einstein',
            'oe_expand_separator_node': 'br_lapin',
            'oe_expand_a_class': 'a_class',
            'oe_expand_a_content': 'read mee',
        }
        new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_1, remove=True, shorten=True, max_length=100, expand_options=expand_options)
        for ext in test_mail_examples.OERP_WEBSITE_HTML_1_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.OERP_WEBSITE_HTML_1_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
        for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')

        new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=False)
        for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
        for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')

        new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=True)
        for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content')
        for ext in [
                '<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>',
                'tasks using the gantt chart and control deadlines']:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')
Example #2
0
    def test_90_misc(self):
        # False boolean for text must return empty string
        new_html = html_email_clean(False)
        self.assertEqual(new_html, False, 'html_email_cleaner did change a False in an other value.')

        # Message with xml and doctype tags don't crash
        new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n         "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n  <title>404 - Not Found</title>\n </head>\n <body>\n  <h1>404 - Not Found</h1>\n </body>\n</html>\n')
        self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
Example #3
0
    def test_20_email_html(self):
        new_html = html_email_clean(test_mail_examples.HTML_1, remove=True)
        for ext in test_mail_examples.HTML_1_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.HTML_1_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')

        new_html = html_email_clean(test_mail_examples.HTML_2, remove=True)
        for ext in test_mail_examples.HTML_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.HTML_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
Example #4
0
    def test_05_shorten(self):
        # TEST: shorten length
        test_str = '''<div>
        <span>
        </span>
        <p>Hello, <span>Raoul</span> 
    <bold>You</bold> are 
    pretty</p>
<span>Really</span>
</div>
'''
        # shorten at 'H' of Hello -> should shorten after Hello,
        html = html_email_clean(test_str, shorten=True, max_length=1, remove=True)
        self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('Raoul', html, 'html_email_cleaner: shorten error or too long')
        self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
        # shorten at 'are' -> should shorten after are
        html = html_email_clean(test_str, shorten=True, max_length=17, remove=True)
        self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('Raoul', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('are', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('pretty', html, 'html_email_cleaner: shorten error or too long')
        self.assertNotIn('Really', html, 'html_email_cleaner: shorten error or too long')
        self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')

        # TEST: shorten in quote
        test_str = '''<div> Blahble         
            bluih      blouh   
        <blockquote>This is a quote
        <span>And this is quite a long quote, after all.</span>
        </blockquote>
</div>'''
        # shorten in the quote
        html = html_email_clean(test_str, shorten=True, max_length=25, remove=True)
        self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
        self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
        # shorten in second word
        html = html_email_clean(test_str, shorten=True, max_length=9, remove=True)
        self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('blouh', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
        self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion')
        # shorten waaay too large
        html = html_email_clean(test_str, shorten=True, max_length=900, remove=True)
        self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short')
        self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short')
        self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
Example #5
0
    def test_10_email_text(self):
        """ html_email_clean test for text-based emails """
        new_html = html_email_clean(test_mail_examples.TEXT_1, remove=True)
        for ext in test_mail_examples.TEXT_1_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.TEXT_1_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')

        new_html = html_email_clean(test_mail_examples.TEXT_2, remove=True)
        for ext in test_mail_examples.TEXT_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.TEXT_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
Example #6
0
 def test_00_basic_text(self):
     """ html_email_clean test for signatures """
     test_data = [
         (
             """This is Sparta!\n--\nAdministrator\n+9988776655""",
             ['This is Sparta!'],
             ['Administrator', '9988776655']
         ), (
             """<p>--\nAdministrator</p>""",
             [],
             ['--', 'Administrator']
         ), (
             """<p>This is Sparta!\n---\nAdministrator</p>""",
             ['This is Sparta!'],
             ['---', 'Administrator']
         ), (
             """<p>--<br>Administrator</p>""",
             [],
             []
         ), (
             """<p>This is Sparta!<br/>--<br>Administrator</p>""",
             ['This is Sparta!'],
             []
         ), (
             """This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""",
             ['This is Sparta!', 'Certes', 'Clair'],
             ['Ah bon', 'Chouette']
         )
     ]
     for test, in_lst, out_lst in test_data:
         new_html = html_email_clean(test, remove=True)
         for text in in_lst:
             self.assertIn(text, new_html, 'html_email_cleaner wrongly removed content')
         for text in out_lst:
             self.assertNotIn(text, new_html, 'html_email_cleaner did not remove unwanted content')
Example #7
0
    def test_70_read_more(self):
        new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100)
        for ext in test_mail_examples.BUG_1_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
        for ext in test_mail_examples.BUG_1_OUT:
            self.assertNotIn(ext.decode('utf-8'), new_html, 'html_email_cleaner did not removed invalid content')

        new_html = html_email_clean(test_mail_examples.BUG2, remove=True, shorten=True, max_length=250)
        for ext in test_mail_examples.BUG_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
        for ext in test_mail_examples.BUG_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')

        new_html = html_email_clean(test_mail_examples.BUG3, remove=True, shorten=True, max_length=250)
        for ext in test_mail_examples.BUG_3_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content')
        for ext in test_mail_examples.BUG_3_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
Example #8
0
    def test_30_email_msoffice(self):
        new_html = html_email_clean(test_mail_examples.MSOFFICE_1, remove=True)
        for ext in test_mail_examples.MSOFFICE_1_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.MSOFFICE_1_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')

        new_html = html_email_clean(test_mail_examples.MSOFFICE_2, remove=True)
        for ext in test_mail_examples.MSOFFICE_2_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.MSOFFICE_2_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')

        new_html = html_email_clean(test_mail_examples.MSOFFICE_3, remove=True)
        for ext in test_mail_examples.MSOFFICE_3_IN:
            self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
        for ext in test_mail_examples.MSOFFICE_3_OUT:
            self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
Example #9
0
 def test_60_email_thunderbird(self):
     new_html = html_email_clean(test_mail_examples.THUNDERBIRD_1, remove=True)
     for ext in test_mail_examples.THUNDERBIRD_1_IN:
         self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content')
     for ext in test_mail_examples.THUNDERBIRD_1_OUT:
         self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')