def test_70_read_more_and_shorten(self): expand_options = { 'oe_expand_container_class': 'span_class', 'oe_expand_container_content': 'Herbert Einstein', 'oe_expand_separator_node': 'br_lapin', 'oe_expand_a_class': 'a_class', 'oe_expand_a_content': 'read mee', } new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_1, remove=True, shorten=True, max_length=100, expand_options=expand_options) for ext in test_mail_examples.OERP_WEBSITE_HTML_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.OERP_WEBSITE_HTML_1_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content') for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']: self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options') new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=False) for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content') for ext in ['<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>']: self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options') new_html = html_email_clean(test_mail_examples.OERP_WEBSITE_HTML_2, remove=True, shorten=True, max_length=200, expand_options=expand_options, protect_sections=True) for ext in test_mail_examples.OERP_WEBSITE_HTML_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.OERP_WEBSITE_HTML_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase overlimit content') for ext in [ '<span class="span_class">Herbert Einstein<br_lapin></br_lapin><a href="#" class="a_class">read mee</a></span>', 'tasks using the gantt chart and control deadlines']: self.assertIn(ext, new_html, 'html_email_cleaner wrongly take into account specific expand options')
def test_90_misc(self): # False boolean for text must return empty string new_html = html_email_clean(False) self.assertEqual(new_html, False, 'html_email_cleaner did change a False in an other value.') # Message with xml and doctype tags don't crash new_html = html_email_clean(u'<?xml version="1.0" encoding="iso-8859-1"?>\n<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"\n "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">\n<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">\n <head>\n <title>404 - Not Found</title>\n </head>\n <body>\n <h1>404 - Not Found</h1>\n </body>\n</html>\n') self.assertNotIn('encoding', new_html, 'html_email_cleaner did not remove correctly encoding attributes')
def test_20_email_html(self): new_html = html_email_clean(test_mail_examples.HTML_1, remove=True) for ext in test_mail_examples.HTML_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.HTML_1_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content') new_html = html_email_clean(test_mail_examples.HTML_2, remove=True) for ext in test_mail_examples.HTML_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.HTML_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
def test_05_shorten(self): # TEST: shorten length test_str = '''<div> <span> </span> <p>Hello, <span>Raoul</span> <bold>You</bold> are pretty</p> <span>Really</span> </div> ''' # shorten at 'H' of Hello -> should shorten after Hello, html = html_email_clean(test_str, shorten=True, max_length=1, remove=True) self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('Raoul', html, 'html_email_cleaner: shorten error or too long') self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion') # shorten at 'are' -> should shorten after are html = html_email_clean(test_str, shorten=True, max_length=17, remove=True) self.assertIn('Hello,', html, 'html_email_cleaner: shorten error or too short') self.assertIn('Raoul', html, 'html_email_cleaner: shorten error or too short') self.assertIn('are', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('pretty', html, 'html_email_cleaner: shorten error or too long') self.assertNotIn('Really', html, 'html_email_cleaner: shorten error or too long') self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion') # TEST: shorten in quote test_str = '''<div> Blahble bluih blouh <blockquote>This is a quote <span>And this is quite a long quote, after all.</span> </blockquote> </div>''' # shorten in the quote html = html_email_clean(test_str, shorten=True, max_length=25, remove=True) self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short') self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short') self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long') self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion') # shorten in second word html = html_email_clean(test_str, shorten=True, max_length=9, remove=True) self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short') self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('blouh', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long') self.assertIn('read more', html, 'html_email_cleaner: shorten error about read more inclusion') # shorten waaay too large html = html_email_clean(test_str, shorten=True, max_length=900, remove=True) self.assertIn('Blahble', html, 'html_email_cleaner: shorten error or too short') self.assertIn('bluih', html, 'html_email_cleaner: shorten error or too short') self.assertIn('blouh', html, 'html_email_cleaner: shorten error or too short') self.assertNotIn('quote', html, 'html_email_cleaner: shorten error or too long')
def test_10_email_text(self): """ html_email_clean test for text-based emails """ new_html = html_email_clean(test_mail_examples.TEXT_1, remove=True) for ext in test_mail_examples.TEXT_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.TEXT_1_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content') new_html = html_email_clean(test_mail_examples.TEXT_2, remove=True) for ext in test_mail_examples.TEXT_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.TEXT_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
def test_00_basic_text(self): """ html_email_clean test for signatures """ test_data = [ ( """This is Sparta!\n--\nAdministrator\n+9988776655""", ['This is Sparta!'], ['Administrator', '9988776655'] ), ( """<p>--\nAdministrator</p>""", [], ['--', 'Administrator'] ), ( """<p>This is Sparta!\n---\nAdministrator</p>""", ['This is Sparta!'], ['---', 'Administrator'] ), ( """<p>--<br>Administrator</p>""", [], [] ), ( """<p>This is Sparta!<br/>--<br>Administrator</p>""", ['This is Sparta!'], [] ), ( """This is Sparta!\n>Ah bon ?\nCertes\n> Chouette !\nClair""", ['This is Sparta!', 'Certes', 'Clair'], ['Ah bon', 'Chouette'] ) ] for test, in_lst, out_lst in test_data: new_html = html_email_clean(test, remove=True) for text in in_lst: self.assertIn(text, new_html, 'html_email_cleaner wrongly removed content') for text in out_lst: self.assertNotIn(text, new_html, 'html_email_cleaner did not remove unwanted content')
def test_70_read_more(self): new_html = html_email_clean(test_mail_examples.BUG1, remove=True, shorten=True, max_length=100) for ext in test_mail_examples.BUG_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content') for ext in test_mail_examples.BUG_1_OUT: self.assertNotIn(ext.decode('utf-8'), new_html, 'html_email_cleaner did not removed invalid content') new_html = html_email_clean(test_mail_examples.BUG2, remove=True, shorten=True, max_length=250) for ext in test_mail_examples.BUG_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content') for ext in test_mail_examples.BUG_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content') new_html = html_email_clean(test_mail_examples.BUG3, remove=True, shorten=True, max_length=250) for ext in test_mail_examples.BUG_3_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed valid content') for ext in test_mail_examples.BUG_3_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not removed invalid content')
def test_30_email_msoffice(self): new_html = html_email_clean(test_mail_examples.MSOFFICE_1, remove=True) for ext in test_mail_examples.MSOFFICE_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.MSOFFICE_1_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content') new_html = html_email_clean(test_mail_examples.MSOFFICE_2, remove=True) for ext in test_mail_examples.MSOFFICE_2_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.MSOFFICE_2_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content') new_html = html_email_clean(test_mail_examples.MSOFFICE_3, remove=True) for ext in test_mail_examples.MSOFFICE_3_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.MSOFFICE_3_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')
def test_60_email_thunderbird(self): new_html = html_email_clean(test_mail_examples.THUNDERBIRD_1, remove=True) for ext in test_mail_examples.THUNDERBIRD_1_IN: self.assertIn(ext, new_html, 'html_email_cleaner wrongly removed not quoted content') for ext in test_mail_examples.THUNDERBIRD_1_OUT: self.assertNotIn(ext, new_html, 'html_email_cleaner did not erase signature / quoted content')