def test_remove_next_part_from_content(self): with open(get_test_file("pipermail_nextpart.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.failIf("-------------- next part --------------" in contents)
def test_html_only_email(self): # This email only has an HTML part, thus the scrubbed content will be # empty. It should be an unicode empty string, not str. with open(get_test_file("html-email-2.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertTrue(isinstance(contents, unicode), u"Scrubbed content should always be unicode")
def test_non_ascii_payload(self): """Scrubber must handle non-ascii messages""" for enc in ["utf8", "iso8859"]: with open(get_test_file("payload-%s.txt" % enc)) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertTrue(isinstance(contents, unicode)) self.assertEqual(contents, u'This message contains non-ascii ' u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
def test_bad_content_type(self): """Scrubber must handle unknown content-types""" with open(get_test_file("payload-unknown.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) try: contents, attachments = scrubber.scrub() except LookupError, e: import traceback print traceback.format_exc() self.fail(e) # codec not found
def test_html_email_1(self): with open(get_test_file("html-email-1.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) # HTML part self.assertEqual(attachments[0][0:4], (2, "attachment.html", "text/html", "iso-8859-1")) self.assertEqual(len(attachments[0][4]), 2723) # Scrubbed content self.assertEqual(contents, u"This is a test message\r\n" u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n")
def test_attachment_1(self): with open(get_test_file("attachment-1.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) self.assertEqual(attachments[0], (2, 'puntogil.vcf', 'text/x-vcard', "utf-8", 'begin:vcard\r\nfn:gil\r\nn:;gil\r\nversion:2.1\r\n' 'end:vcard\r\n\r\n')) self.assertEqual( contents, "This is a test message.\r\n\r\n" "\n-- \ndevel mailing list\[email protected]\n" "https://admin.fedoraproject.org/mailman/listinfo/devel\n")
def test_attachment_5(self): with open(get_test_file("attachment-5.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) # text attachment self.assertEqual(attachments[0][0:4], #(2, u"todo-déjeuner.txt", "text/plain", "utf-8")) (2, u"attachment.bin", "text/plain", "utf-8")) self.assertEqual(len(attachments[0][4]), 112) # Scrubbed content self.assertEqual(contents, u'This is a test, HTML message with ' u'accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an ' u'attachment with an accented filename\r\n\r\n\r\n\r\n')
def test_attachment_3(self): with open(get_test_file("attachment-3.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 2) # HTML part self.assertEqual(attachments[0][0:4], (3, "attachment.html", "text/html", "iso-8859-1")) self.assertEqual(len(attachments[0][4]), 3134) # Image attachment self.assertEqual(attachments[1][0:4], (4, "GeoffreyRoucourt.jpg", "image/jpeg", None)) self.assertEqual(len(attachments[1][4]), 282180) # Scrubbed content self.assertEqual(contents, u"This is a test message\r\n")
def test_attachment_2(self): with open(get_test_file("attachment-2.txt")) as email_file: msg = email.message_from_file(email_file, _class=Message) scrubber = Scrubber("*****@*****.**", msg) contents, attachments = scrubber.scrub() self.assertEqual(len(attachments), 1) self.assertEqual(attachments[0], ( 3, 'signature.asc', 'application/pgp-signature', None, '-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 ' '(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - ' 'http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj' 'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+' 'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n')) self.assertEqual(contents, u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n" u"\n-- \ndevel mailing list\[email protected]\n" u"https://admin.fedoraproject.org/mailman/listinfo/devel\n" )
def add_to_list(self, mlist, message): """Add the message to a specific list of the store. :param mlist: The mailing-list object, implementing mailman.interfaces.mailinglist.IMailingList. :param message: An email.message.Message instance containing at least a unique Message-ID header. The message will be given an X-Message-ID-Hash header, overriding any existing such header. :returns: The calculated X-Message-ID-Hash header. :raises ValueError: if the message is missing a Message-ID header. The storage service is also allowed to raise this exception if it find, but disallows collisions. """ list_name = unicode(mlist.fqdn_listname) # Create the list if it does not exist l = self.db.find(List, List.name == list_name).one() if l is None: l = List(list_name) self.db.add(l) l.display_name = mlist.display_name l.subject_prefix = mlist.subject_prefix if not message.has_key("Message-Id"): raise ValueError("No 'Message-Id' header in email", message) msg_id = unicode(unquote(message['Message-Id'])) email = Email(list_name, msg_id) if self.is_message_in_list(list_name, email.message_id): print("Duplicate email from %s: %s" % (message['From'], message.get('Subject', '""'))) return email.message_id_hash # the message.as_string() call must be done before scrubbing email_full = EmailFull(list_name, msg_id, message.as_string()) # Find thread id new_thread = False ref, thread_id = get_ref_and_thread_id(message, list_name, self) if thread_id is None: new_thread = True # make up the thread_id if not found thread_id = email.message_id_hash email.thread_id = thread_id email.in_reply_to = ref from_name, from_email = parseaddr(message['From']) from_name = header_to_unicode(from_name) email.sender_name = from_name.strip() email.sender_email = unicode(from_email).strip() email.subject = header_to_unicode(message.get('Subject')) msg_date = parsedate(message.get("Date")) if msg_date is None: # Absent or unparseable date msg_date = datetime.datetime.utcnow() utcoffset = msg_date.utcoffset() if msg_date.tzinfo is not None: msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None) email.date = msg_date if utcoffset is None: email.timezone = 0 else: # in minutes email.timezone = ( (utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60 scrubber = Scrubber(list_name, message) # warning: scrubbing modifies the msg in-place email.content, attachments = scrubber.scrub() # store the Mailman user email.user_id = self._store_mailman_user(email.sender_email) #category = 'Question' # TODO: enum + i18n ? #if ('agenda' in message.get('Subject', '').lower() or # 'reminder' in message.get('Subject', '').lower()): # # i18n! # category = 'Agenda' if new_thread: thread = Thread(list_name, thread_id, email.date) else: thread = self.db.find( Thread, And( Thread.list_name == list_name, Thread.thread_id == thread_id, )).one() thread.date_active = email.date self.db.add(thread) self.db.add(email) self.db.add(email_full) compute_thread_order_and_depth(thread) for attachment in attachments: self.add_attachment(list_name, msg_id, *attachment) self.flush() # search indexing if self.search_index is not None: self.search_index.add(email) return email.message_id_hash