Esempio n. 1
0
    def test_remove_next_part_from_content(self):
        with open(get_test_file("pipermail_nextpart.txt")) as email_file:
            msg = email.message_from_file(email_file, _class=Message)
        scrubber = Scrubber("*****@*****.**", msg)
        contents, attachments = scrubber.scrub()

        self.failIf("-------------- next part --------------" in contents)
Esempio n. 2
0
 def test_attachment_2(self):
     with open(get_test_file("attachment-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(
         attachments[0],
         (
             3,
             "signature.asc",
             "application/pgp-signature",
             None,
             "-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 "
             "(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - "
             "http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj"
             "z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+"
             "SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n",
         ),
     )
     self.assertEqual(
         contents,
         u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n"
         u"\n-- \ndevel mailing list\[email protected]\n"
         u"https://admin.fedoraproject.org/mailman/listinfo/devel\n",
     )
Esempio n. 3
0
    def test_remove_next_part_from_content(self):
        with open(get_test_file("pipermail_nextpart.txt")) as email_file:
            msg = email.message_from_file(email_file, _class=Message)
        scrubber = Scrubber("*****@*****.**", msg)
        contents, attachments = scrubber.scrub()

        self.failIf("-------------- next part --------------" in contents)
Esempio n. 4
0
    def add_to_list(self, list_name, message):
        """Add the message to a specific list of the store.

        :param list_name: The fully qualified list name to which the
            message should be added.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        email = get_class_object(list_to_table_name(list_name), 'email',
                        MetaData(self.engine), create=True)
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = message['Message-Id'].strip("<>")
        msg_id_hash = get_message_id_hash(msg_id)
        if self.get_message_by_id_from_list(list_name, msg_id) is not None:
            print ("Duplicate email from %s: %s" %
                   (message['From'], message.get('Subject', '""')))
            return msg_id_hash

        # Find thread id
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            # make up the thread_id if not found
            thread_id = msg_id_hash

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        full = message.as_string()
        scrubber = Scrubber(list_name, message, self)
        payload = scrubber.scrub() # modifies the message in-place

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        mail = email(
            sender=from_name,
            email=from_email,
            subject=header_to_unicode(message.get('Subject')),
            content=payload.encode("utf-8"),
            date=parsedate(message.get("Date")),
            message_id=msg_id,
            stable_url_id=msg_id_hash,
            thread_id=thread_id,
            references=ref,
            full=full,
            )
        self.session.add(mail)
        return msg_id_hash
Esempio n. 5
0
 def test_html_only_email(self):
     # This email only has an HTML part, thus the scrubbed content will be
     # empty. It should be an unicode empty string, not str.
     with open(get_test_file("html-email-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertTrue(isinstance(contents, unicode),
         u"Scrubbed content should always be unicode")
Esempio n. 6
0
 def test_html_only_email(self):
     # This email only has an HTML part, thus the scrubbed content will be
     # empty. It should be an unicode empty string, not str.
     with open(get_test_file("html-email-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertTrue(isinstance(contents, unicode),
                     u"Scrubbed content should always be unicode")
Esempio n. 7
0
 def test_name_unicode(self):
     for num in range(1, 6):
         with open(get_test_file("attachment-%d.txt" % num)) as email_file:
             msg = email.message_from_file(email_file, _class=Message)
         scrubber = Scrubber("*****@*****.**", msg)
         contents, attachments = scrubber.scrub()
         for attachment in attachments:
             name = attachment[1]
             self.assertTrue(isinstance(name, unicode),
                             "attachment %r must be unicode" % name)
Esempio n. 8
0
 def test_non_ascii_payload(self):
     """Scrubber must handle non-ascii messages"""
     for enc in ["utf8", "iso8859"]:
         with open(get_test_file("payload-%s.txt" % enc)) as email_file:
             msg = email.message_from_file(email_file, _class=Message)
         scrubber = Scrubber("*****@*****.**", msg)
         contents, attachments = scrubber.scrub()
         self.assertTrue(isinstance(contents, unicode))
         self.assertEqual(contents, u'This message contains non-ascii '
                 u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
Esempio n. 9
0
 def test_non_ascii_payload(self):
     """Scrubber must handle non-ascii messages"""
     for enc in ["utf8", "iso8859"]:
         with open(get_test_file("payload-%s.txt" % enc)) as email_file:
             msg = email.message_from_file(email_file, _class=Message)
         scrubber = Scrubber("*****@*****.**", msg)
         contents, attachments = scrubber.scrub()
         self.assertTrue(isinstance(contents, unicode))
         self.assertEqual(contents, u'This message contains non-ascii '
                 u'characters:\n\xe9 \xe8 \xe7 \xe0 \xee \xef \xeb \u20ac\n')
Esempio n. 10
0
 def test_html_email_1(self):
     with open(get_test_file("html-email-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # HTML part
     self.assertEqual(attachments[0][0:4], (2, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 2723)
     # Scrubbed content
     self.assertEqual(contents, u"This is a test message\r\n" u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n")
Esempio n. 11
0
 def test_bad_content_type(self):
     """Scrubber must handle unknown content-types"""
     with open(get_test_file("payload-unknown.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     try:
         contents, attachments = scrubber.scrub()
     except LookupError, e:
         import traceback
         print traceback.format_exc()
         self.fail(e)  # codec not found
Esempio n. 12
0
 def test_bad_content_type(self):
     """Scrubber must handle unknown content-types"""
     with open(get_test_file("payload-unknown.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     try:
         contents, attachments = scrubber.scrub()
     except LookupError, e:
         import traceback;
         print(traceback.format_exc())
         self.fail(e) # codec not found
Esempio n. 13
0
 def test_html_email_1(self):
     with open(get_test_file("html-email-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # HTML part
     self.assertEqual(attachments[0][0:4],
             (2, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 2723)
     # Scrubbed content
     self.assertEqual(contents,
             u"This is a test message\r\n"
             u"Non-ASCII chars: r\xe9ponse fran\xe7ais \n")
Esempio n. 14
0
 def test_attachment_3(self):
     with open(get_test_file("attachment-3.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self.assertEqual(attachments[0][0:4], (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 3134)
     # Image attachment
     self.assertEqual(attachments[1][0:4], (4, "GeoffreyRoucourt.jpg", "image/jpeg", None))
     self.assertEqual(len(attachments[1][4]), 282180)
     # Scrubbed content
     self.assertEqual(contents, u"This is a test message\r\n")
Esempio n. 15
0
 def test_attachment_name_badly_encoded(self):
     msg = email.message.Message()
     msg["From"] = "*****@*****.**"
     msg["Message-ID"] = "<dummy>"
     msg.set_payload(b"Dummy content")
     msg.add_header(b'Content-Disposition', b'attachment', filename=b'non-ascii-\xb8\xb1\xb1\xbe.jpg')
     scrubber = Scrubber("*****@*****.**", msg)
     try:
         contents, attachments = scrubber.scrub()
     except UnicodeDecodeError:
         print(format_exc())
         self.fail("Could not decode the filename")
     self.assertEqual(attachments,
             [(0, u'attachment.bin', 'text/plain', None, b'Dummy content')])
Esempio n. 16
0
 def test_attachment_5(self):
     with open(get_test_file("attachment-5.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # text attachment
     self.assertEqual(attachments[0][0:4],
             (2, u"todo-djeuner.txt", "text/plain", "utf-8"))
     self.assertEqual(len(attachments[0][4]), 112)
     # Scrubbed content
     self.assertEqual(contents, u'This is a test, HTML message with '
             u'accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an '
             u'attachment with an accented filename\r\n\r\n\r\n\r\n')
Esempio n. 17
0
 def test_attachment_1(self):
     with open(get_test_file("attachment-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0],
                      (2, 'puntogil.vcf', 'text/x-vcard', "utf-8",
                       'begin:vcard\r\nfn:gil\r\nn:;gil\r\nversion:2.1\r\n'
                       'end:vcard\r\n\r\n'))
     self.assertEqual(
         contents, "This is a test message.\r\n\r\n"
         "\n-- \ndevel mailing list\[email protected]\n"
         "https://admin.fedoraproject.org/mailman/listinfo/devel\n")
Esempio n. 18
0
 def test_attachment_1(self):
     with open(get_test_file("attachment-1.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0], (
             2, 'puntogil.vcf', 'text/x-vcard', "utf-8",
             'begin:vcard\r\nfn:gil\r\nn:;gil\r\nversion:2.1\r\n'
             'end:vcard\r\n\r\n'))
     self.assertEqual(contents,
             "This is a test message.\r\n\r\n"
             "\n-- \ndevel mailing list\[email protected]\n"
             "https://admin.fedoraproject.org/mailman/listinfo/devel\n"
             )
Esempio n. 19
0
 def test_attachment_5(self):
     with open(get_test_file("attachment-5.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     # text attachment
     self.assertEqual(attachments[0][0:4],
             #(2, u"todo-déjeuner.txt", "text/plain", "utf-8"))
             (2, u"attachment.bin", "text/plain", "utf-8"))
     self.assertEqual(len(attachments[0][4]), 112)
     # Scrubbed content
     self.assertEqual(contents, u'This is a test, HTML message with '
             u'accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an '
             u'attachment with an accented filename\r\n\r\n\r\n\r\n')
Esempio n. 20
0
 def test_attachment_3(self):
     with open(get_test_file("attachment-3.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self.assertEqual(attachments[0][0:4],
             (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 3134)
     # Image attachment
     self.assertEqual(attachments[1][0:4],
             (4, "GeoffreyRoucourt.jpg", "image/jpeg", None))
     self.assertEqual(len(attachments[1][4]), 282180)
     # Scrubbed content
     self.assertEqual(contents, u"This is a test message\r\n")
Esempio n. 21
0
 def test_attachment_2(self):
     with open(get_test_file("attachment-2.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 1)
     self.assertEqual(attachments[0], (
             3, 'signature.asc', 'application/pgp-signature', None,
             '-----BEGIN PGP SIGNATURE-----\r\nVersion: GnuPG v1.4.12 '
             '(GNU/Linux)\r\nComment: Using GnuPG with Mozilla - '
             'http://www.enigmail.net/\r\n\r\niEYEARECAAYFAlBhm3oACgkQhmBj'
             'z394AnmMnQCcC+6tWcqE1dPQmIdRbLXgKGVp\r\nEeUAn2OqtaXaXaQV7rx+'
             'SmOldmSzcFw4\r\n=OEJv\r\n-----END PGP SIGNATURE-----\r\n'))
     self.assertEqual(contents,
             u"This is a test message\r\nNon-ascii chars: Hofm\xfchlgasse\r\n"
             u"\n-- \ndevel mailing list\[email protected]\n"
             u"https://admin.fedoraproject.org/mailman/listinfo/devel\n"
             )
Esempio n. 22
0
 def test_attachment_4(self):
     with open(get_test_file("attachment-4.txt")) as email_file:
         msg = email.message_from_file(email_file, _class=Message)
     scrubber = Scrubber("*****@*****.**", msg)
     contents, attachments = scrubber.scrub()
     self.assertEqual(len(attachments), 2)
     # HTML part
     self.assertEqual(attachments[0][0:4], (3, "attachment.html", "text/html", "iso-8859-1"))
     self.assertEqual(len(attachments[0][4]), 114)
     # text attachment
     self.assertEqual(
         attachments[1][0:4],
         # (4, u"todo-déjeuner.txt", "text/plain", "utf-8"))
         (4, u"todo-djeuner.txt", "text/plain", "utf-8"),
     )
     self.assertEqual(len(attachments[1][4]), 112)
     # Scrubbed content
     self.assertEqual(
         contents,
         u"This is a test, HTML message with "
         u"accented letters : \xe9 \xe8 \xe7 \xe0.\r\nAnd an "
         u"attachment with an accented filename\r\n",
     )
Esempio n. 23
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        l.subject_prefix = mlist.subject_prefix
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print("Duplicate email from %s: %s" %
                  (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = (
                (utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # store the Mailman user
        email.user_id = self._store_mailman_user(email.sender_email)

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(
                Thread,
                And(
                    Thread.list_name == list_name,
                    Thread.thread_id == thread_id,
                )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # search indexing
        if self.search_index is not None:
            self.search_index.add(email)
        return email.message_id_hash
Esempio n. 24
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print ("Duplicate email from %s: %s" %
                   (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.now()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        utcoffset = msg_date.utcoffset()
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ( (utcoffset.days * 24 * 60 * 60)
                               + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(
                            Thread.list_name == list_name,
                            Thread.thread_id == thread_id,
                            )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        self.flush()
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        return email.message_id_hash
Esempio n. 25
0
    def add_to_list(self, mlist, message):
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            # Don't wait for the cache to set those properties
            for propname in l.mailman_props:
                setattr(l, propname, getattr(mlist, propname))
            self.db.add(l)
        if mlist.archive_policy == ArchivePolicy.never:
            logger.info("Archiving disabled by list policy for %s" % list_name)
            return None
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message["Message-Id"]))
        # Protect against extremely long Message-Ids (there is no limit in the
        # email spec), it's set to VARCHAR(255) in the database
        if len(msg_id) >= 255:
            msg_id = msg_id[:254]
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            logger.info("Duplicate email from %s: %s" % (message["From"], message.get("Subject", '""')))
            return email.message_id_hash

        # if not getattr(settings.KITTYSTORE_FULL_EMAIL):
        #    # If it's a valid value, leave it to the "prototype" archiver
        #    # Note: the message.as_string() call must be done before scrubbing
        #    email_full = EmailFull(list_name, msg_id, message.as_string())
        #    self.db.add(email_full)

        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        try:
            from_name, from_email = parseaddr(message["From"])
            from_name = header_to_unicode(from_name).strip()
            email.sender_email = unicode(from_email).strip()
        except (UnicodeDecodeError, UnicodeEncodeError):
            raise ValueError("Non-ascii sender address", message)
        sender = self.db.find(Sender, Sender.email == email.sender_email).one()
        if sender is None:
            sender = Sender(email.sender_email, from_name)
            self.db.add(sender)
        else:
            sender.name = from_name  # update the name if needed
        email.subject = header_to_unicode(message.get("Subject"))
        if email.subject is not None:
            # limit subject size to 2000 chars or PostgreSQL may complain
            email.subject = email.subject[:2000]
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ((utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # category = 'Question' # TODO: enum + i18n ?
        # if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(Thread.list_name == list_name, Thread.thread_id == thread_id)).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # invalidate the cache
        events.notify(events.NewMessage(self, mlist, email))
        if new_thread:
            events.notify(events.NewThread(self, mlist, thread))
        # search indexing
        # do it after caching because we need some list properties (like
        # archive_policy)
        if self.search_index is not None:
            self.search_index.add(email)

        return email.message_id_hash