def test_comment_2(self): self.assertEqual( quote_html(u'''A<!-- test -->B'''), [ (True, 'A<!-- test -->B'), ] )
def test_empty(self): self.assertEqual( quote_html(u''), [ (True, ''), ] )
def test_comment(self): self.assertEqual( quote_html(u'''<!-- test -->'''), [ (True, '<!-- test -->'), ] )
def test_no_quote(self): self.assertEqual( quote_html(u'''<p>One</p><p>Two</p><p>Three</p>'''), [ (True, '<p>One</p><p>Two</p><p>Three</p>'), ] )
def test_newline(self): # Newline in "Am\r\n26. Mai" should not change the way we match. self.assertEqual( quote_html(u'''<html>\r\n<head>\r\n\r\n</head>\r\n<body>\r\n<div style="color: black;">\r\n<div style="color: black;">\r\n<p style="margin: 0 0 1em 0; color: black;">Here is spam.<br>\r\nHam</p>\r\n</div>\r\n<div style="color: black;">\r\n<p\r\nstyle="color: black; font-size: 10pt; font-family: Arial, sans-serif; margin: 10pt 0;">Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <[email protected]>:</p>\r\n<blockquote type="cite" class="gmail_quote"\r\nstyle="margin: 0 0 0 0.75ex; border-left: 1px solid #808080; padding-left: 0.75ex;">Hey\r\nHam,<br><br>I like spam.<br></blockquote>\r\n</div>\r\n</div>\r\n</body>\r\n</html>\r\n'''), [ (True, '<html>\r\n<head>\r\n\r\n</head>\r\n<body>\r\n<div style="color: black;">\r\n<div style="color: black;">\r\n<p style="margin: 0 0 1em 0; color: black;">Here is spam.<br>\r\nHam</p>\r\n</div>\r\n<div style="color: black;">\r\n<p style="color: black; font-size: 10pt; font-family: Arial, sans-serif; margin: 10pt 0;">Am\r\n26. Mai 2015 19:20:17 schrieb Spam Foo <[email protected]>:</p></div></div></body></html>'), (False, '<html><body><div style="color: black;"><div style="color: black;">\r\n<blockquote type="cite" class="gmail_quote" style="margin: 0 0 0 0.75ex; border-left: 1px solid #808080; padding-left: 0.75ex;">Hey\r\nHam,<br><br>I like spam.<br></blockquote>\r\n</div>\r\n</div>\r\n</body>\r\n</html>') ])
def test_limit(self): self.assertEqual( quote_html(u'''<p>One</p><p>Two</p><p>Three</p><p>Four</p>''', limit=3), [ (True, '<p>One</p><p>Two</p><p>Three</p>'), (False, '<p>Four</p>'), ] )
def test_images(self): self.assertEqual( quote_html('''<div>Well hello there Sir!!!<br><br><br>On Dec 23, 2014, at 04:35 PM, Steve Wiseman <[email protected]> wrote:<br><blockquote type=\"cite\"><div style=\"color:#000;\"><div dir=\"ltr\">Hi there <img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo14.gif\" alt=\"*B-) cool\" title=\"*B-) cool\" class=\"fr-fin\"><img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo7.gif\" alt=\"*:P tongue\" title=\"*:P tongue\" class=\"fr-fin\"><img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo72.gif\" alt=\"*:->~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\"></div></div></blockquote></div>'''), [ (True, u'''<div>Well hello there Sir!!!<br><br><br>On Dec 23, 2014, at 04:35 PM, Steve Wiseman <[email protected]> wrote:</div>'''), (False, u'''<div><br><blockquote type="cite"><div style="color:#000;"><div dir="ltr">Hi there <img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo14.gif\" alt=\"*B-) cool\" title=\"*B-) cool\" class=\"fr-fin\"><img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo7.gif\" alt=\"*:P tongue\" title=\"*:P tongue\" class=\"fr-fin\"><img src=\"https://s.yimg.com/ok/u/assets/img/emoticons/emo72.gif\" alt=\"*:->~~ spooky\" title=\"*:->~~ spooky\" class=\"fr-fin\"></div></div></blockquote></div>''') ] )
def test_no_wrap_tag(self): self.assertEqual( quote_html(u'''On Thu, Dec 18, 2014 at 10:02 AM, foo <[email protected]> wrote:<blockquote>some stuff</blockquote>'''), [ (True, 'On Thu, Dec 18, 2014 at 10:02 AM, foo <[email protected]> wrote:'), (False, '<blockquote>some stuff</blockquote>'), ] )
def test_outlook(self): self.assertEqual( quote_html(u'''<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head></head><body lang=EN-US link=blue vlink=purple><div class=WordSection1><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'>Thanks,<o:p></o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><o:p> </o:p></span></p><p class=MsoNormal><span style='font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D'><o:p> </o:p></span></p><div><div style='border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in'><p class=MsoNormal><b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'>From:</span></b><span style='font-size:10.0pt;font-family:"Tahoma","sans-serif"'> John Doe [mailto:[email protected]] <br><b>Sent:</b> Tuesday, December 30, 2014 5:31 PM<br><b>To:</b> [email protected]<br><b>Subject:</b> Excited to have you on board!<o:p></o:p></span></p></div></div><p class=MsoNormal><o:p> </o:p></p><p>Hey,<o:p></o:p></p></div></body></html>'''), [ (True, '<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head></head><body lang="EN-US" link="blue" vlink="purple"><div class="WordSection1"><p class="MsoNormal"><span style=\'font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D\'>Thanks,<p></p></span></p><p class="MsoNormal"><span style=\'font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D\'><p> </p></span></p><p class="MsoNormal"><span style=\'font-size:11.0pt;font-family:"Calibri","sans-serif";color:#1F497D\'><p> </p></span></p><div><div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in"></div></div></div></body></html>'), (False, '<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><body lang="EN-US" link="blue" vlink="purple"><div class="WordSection1"><div><div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0in 0in 0in"><p class="MsoNormal"><b><span style=\'font-size:10.0pt;font-family:"Tahoma","sans-serif"\'>From:</span></b><span style=\'font-size:10.0pt;font-family:"Tahoma","sans-serif"\'> John Doe [mailto:[email protected]] <br><b>Sent:</b> Tuesday, December 30, 2014 5:31 PM<br><b>To:</b> [email protected]<br><b>Subject:</b> Excited to have you on board!<p></p></span></p></div></div><p class="MsoNormal"><p> </p></p><p>Hey,<p></p></p></div></body></html>') ] )
def test_gmail_2(self): self.assertEqual( quote_html(u'''<div dir="ltr">looks good\xa0</div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Dec 18, 2014 at 10:02 AM, foo <span dir="ltr"><<a href="mailto:[email protected]" target="_blank">[email protected]</a>></span> wrote:<blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Hey Phil,\xa0<div><br><div>Sending you the report:\xa0</div></div><div><span class="HOEnZb"><font color="#888888"><br></font></span></div><span class="HOEnZb"><font color="#888888"><div><br></div><div class="gmail_extra">-- <br><div><div dir="ltr"><div>Cheers,</div><div>foo & example Team</div><div><a href="http://www.example.com" target="_blank">www.example.com</a> ; - <a href="mailto:[email protected]" target="_blank">[email protected]</a>\xa0</div></div></div>\r\n</div></font></span></div>\r\n</blockquote></div></div>\r\n'''), [ (True, u'''<div dir="ltr">looks good </div><div class="gmail_extra"><br><div class="gmail_quote">On Thu, Dec 18, 2014 at 10:02 AM, foo <span dir="ltr"><<a href="mailto:[email protected]" target="_blank">[email protected]</a>></span> wrote:</div></div>'''), (False, u'''<div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"><div dir="ltr">Hey Phil, <div><br><div>Sending you the report: </div></div><div><span class="HOEnZb"><font color="#888888"><br></font></span></div><span class="HOEnZb"><font color="#888888"><div><br></div><div class="gmail_extra">-- <br><div><div dir="ltr"><div>Cheers,</div><div>foo & example Team</div><div><a href="http://www.example.com" target="_blank">www.example.com</a> ; - <a href="mailto:[email protected]" target="_blank">[email protected]</a> </div></div></div>\r\n</div></font></span></div>\r\n</blockquote></div></div>\r\n'''), ] )
def test_apple(self): self.assertEqual( quote_html('''<html><head><meta http-equiv="Content-Type" content="text/html charset=us-ascii"></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Some text<div class=""><br class=""></div><div class="">some more text</div><div class=""><br class=""></div><div class=""><br class=""><div><blockquote type="cite" class=""><div class="">On Nov 12, 2014, at 11:07 PM, Some One <<a href="mailto:[email protected]" class="">[email protected]</a>> wrote:</div><br class="Apple-interchange-newline"><div class=""><meta http-equiv="Content-Type" content="text/html charset=us-ascii" class=""><div style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Lorem ipsum dolor sit amet.<div class=""><br class=""></div></div></div></blockquote></div><br class=""></div></body></html>'''), [ # Note that lxml removes Content-Type meta tags (see # lxml.html.tostring include_meta_content_type flag) (True, '''<html><head></head><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Some text<div class=""><br class=""></div><div class="">some more text</div><div class=""><br class=""></div><div class=""><br class=""><div><blockquote type="cite" class=""><div class="">On Nov 12, 2014, at 11:07 PM, Some One <<a href="mailto:[email protected]" class="">[email protected]</a>> wrote:</div></blockquote></div></div></body></html>'''), # Note we have an empty div stripped out here. (False, '''<html><body style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class=""><div class=""><div><blockquote type="cite" class=""><br class="Apple-interchange-newline"><div class=""><div style="word-wrap: break-word; -webkit-nbsp-mode: space; -webkit-line-break: after-white-space;" class="">Lorem ipsum dolor sit amet.<div class=""><br class=""></div></div></div></blockquote></div><br class=""></div></body></html>'''), ] )
def test_gmail(self): self.assertEqual( quote_html('''<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------<br>From: <b class="gmail_sendername">Some One</b> <span dir="ltr"><<a href="mailto:[email protected]">[email protected]</a>></span> </div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature"><div>Some One</div></div> </div>'''), [ (True, '''<div dir="ltr"><br><div class="gmail_quote">---------- Forwarded message ----------</div></div>'''), (False, '''<div dir="ltr"><div class="gmail_quote"><br>From: <b class="gmail_sendername">Some One</b> <span dir="ltr"><<a href="mailto:[email protected]">[email protected]</a>></span> </div><br><br clear="all"><div><br></div>-- <br><div class="gmail_signature"><div>Some One</div></div> </div>'''), ] )
def test_encoding(self): # We assume everything is UTF-8 self.assertEqual( quote_html(u'''<?xml version="1.0" encoding="ISO-8859-1"?> <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1" /> <title></title> </head> <body> test ä </body> </html>'''), [ (True, u'''<html xmlns="http://www.w3.org/1999/xhtml"> <head> <title></title> </head> <body> test ä </body> </html>'''), ])