Example #1
0
 def test_classical_thread(self):
     # msg1
     # |-msg2
     # | `-msg4
     # `-msg3
     thread = Thread("example-list", "<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg2 = make_fake_email(2)
     msg3 = make_fake_email(3)
     msg4 = make_fake_email(4)
     # All in the same thread
     msg2.thread_id = msg3.thread_id = msg4.thread_id = u"<msg1>"
     # Set up the reply tree
     msg2.in_reply_to = msg3.in_reply_to = u"<msg1>"
     msg4.in_reply_to = u"<msg2>"
     # Init with false values
     msg1.thread_order = msg1.thread_depth = \
             msg2.thread_order = msg2.thread_depth = \
             msg3.thread_order = msg3.thread_depth = \
             msg4.thread_order = msg4.thread_depth = 42
     self.store.db.add(msg1)
     self.store.db.add(msg2)
     self.store.db.add(msg3)
     self.store.db.add(msg4)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
     self.assertEqual(msg2.thread_order, 1)
     self.assertEqual(msg2.thread_depth, 1)
     self.assertEqual(msg3.thread_order, 3)
     self.assertEqual(msg3.thread_depth, 1)
     self.assertEqual(msg4.thread_order, 2)
     self.assertEqual(msg4.thread_depth, 2)
Example #2
0
 def test_classical_thread(self):
     # msg1
     # |-msg2
     # | `-msg4
     # `-msg3
     thread = Thread(list_name="example-list", thread_id="<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg2 = make_fake_email(2)
     msg3 = make_fake_email(3)
     msg4 = make_fake_email(4)
     # All in the same thread
     msg2.thread_id = msg3.thread_id = msg4.thread_id = u"<msg1>"
     # Set up the reply tree
     msg2.in_reply_to = msg3.in_reply_to = u"<msg1>"
     msg4.in_reply_to = u"<msg2>"
     # Init with false values
     msg1.thread_order = msg1.thread_depth = \
             msg2.thread_order = msg2.thread_depth = \
             msg3.thread_order = msg3.thread_depth = \
             msg4.thread_order = msg4.thread_depth = 42
     self.store.db.add(msg1)
     self.store.db.add(msg2)
     self.store.db.add(msg3)
     self.store.db.add(msg4)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
     self.assertEqual(msg2.thread_order, 1)
     self.assertEqual(msg2.thread_depth, 1)
     self.assertEqual(msg3.thread_order, 3)
     self.assertEqual(msg3.thread_depth, 1)
     self.assertEqual(msg4.thread_order, 2)
     self.assertEqual(msg4.thread_depth, 2)
Example #3
0
def apply(store):
    """Add the thread_order and thread_depth columns and populate them"""
    dbtype = get_db_type(store)
    for statement in SQL[dbtype]:
        store.execute(statement)
    for thread in store.find(Thread):
        compute_thread_order_and_depth(thread)
        store.add(thread)
    store.commit()
Example #4
0
def apply(store):
    """Add the thread_order and thread_depth columns and populate them"""
    dbtype = get_db_type(store)
    for statement in SQL[dbtype]:
        store.execute(statement)
    for thread in store.find(Thread):
        compute_thread_order_and_depth(thread)
        store.add(thread)
    store.commit()
Example #5
0
 def attach_to_thread(self, email, thread):
     """Attach an email to an existing thread"""
     if email.date <= thread.starting_email.date:
         raise ValueError("Can't attach emails older than the first "
                          "email in a thread")
     email.thread_id = thread.thread_id
     email.in_reply_to = thread.starting_email.message_id
     if email.date > thread.date_active:
         thread.date_active = email.date
     compute_thread_order_and_depth(thread)
     self.flush()
Example #6
0
 def attach_to_thread(self, email, thread):
     """Attach an email to an existing thread"""
     if email.date <= thread.starting_email.date:
         raise ValueError("Can't attach emails older than the first "
                          "email in a thread")
     email.thread_id = thread.thread_id
     email.in_reply_to = thread.starting_email.message_id
     if email.date > thread.date_active:
         thread.date_active = email.date
     compute_thread_order_and_depth(thread)
     self.flush()
Example #7
0
 def test_reply_to_oneself(self):
     # A message replying to itself (yes, it's been spotted in the wild)
     thread = Thread("example-list", "<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.in_reply_to = u"<msg1>"
     msg1.thread_order = msg1.thread_depth = 42
     self.store.db.add(msg1)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     # Don't traceback with a "maximum recursion depth exceeded" error
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
Example #8
0
 def test_reply_to_oneself(self):
     # A message replying to itself (yes, it's been spotted in the wild)
     thread = Thread(list_name="example-list", thread_id="<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.in_reply_to = u"<msg1>"
     msg1.thread_order = msg1.thread_depth = 42
     self.store.db.add(msg1)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     # Don't traceback with a "maximum recursion depth exceeded" error
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
Example #9
0
 def test_reply_loops(self):
     """Loops in message replies"""
     # This implies that someone replies to a message not yet sent, but you
     # never know, Dr Who can be on your mailing-list.
     thread = Thread("example-list", "<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.in_reply_to = u"<msg2>"
     self.store.db.add(msg1)
     msg2 = make_fake_email(2)
     msg2.thread_id = u"<msg1>"
     msg2.in_reply_to = u"<msg1>"
     self.store.db.add(msg2)
     self.store.flush()
     compute_thread_order_and_depth(thread)
Example #10
0
 def test_reply_loops(self):
     """Loops in message replies"""
     # This implies that someone replies to a message not yet sent, but you
     # never know, Dr Who can be on your mailing-list.
     thread = Thread(list_name="example-list", thread_id="<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.in_reply_to = u"<msg2>"
     self.store.db.add(msg1)
     msg2 = make_fake_email(2)
     msg2.thread_id = u"<msg1>"
     msg2.in_reply_to = u"<msg1>"
     self.store.db.add(msg2)
     self.store.flush()
     compute_thread_order_and_depth(thread)
Example #11
0
 def test_simple_thread(self):
     # A basic thread: msg2 replies to msg1
     thread = Thread("example-list", "<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.thread_order = msg1.thread_depth = 42
     self.store.db.add(msg1)
     msg2 = make_fake_email(2)
     msg2.thread_id = u"<msg1>"
     msg2.in_reply_to = u"<msg1>"
     msg2.thread_order = msg2.thread_depth = 42
     self.store.db.add(msg2)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
     self.assertEqual(msg2.thread_order, 1)
     self.assertEqual(msg2.thread_depth, 1)
Example #12
0
 def test_simple_thread(self):
     # A basic thread: msg2 replies to msg1
     thread = Thread(list_name="example-list", thread_id="<msg1>")
     self.store.db.add(thread)
     msg1 = make_fake_email(1)
     msg1.thread_order = msg1.thread_depth = 42
     self.store.db.add(msg1)
     msg2 = make_fake_email(2)
     msg2.thread_id = u"<msg1>"
     msg2.in_reply_to = u"<msg1>"
     msg2.thread_order = msg2.thread_depth = 42
     self.store.db.add(msg2)
     self.store.flush()
     compute_thread_order_and_depth(thread)
     self.assertEqual(msg1.thread_order, 0)
     self.assertEqual(msg1.thread_depth, 0)
     self.assertEqual(msg2.thread_order, 1)
     self.assertEqual(msg2.thread_depth, 1)
Example #13
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        l.subject_prefix = mlist.subject_prefix
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print ("Duplicate email from %s: %s" %
                   (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        #if not getattr(settings.KITTYSTORE_FULL_EMAIL):
        #    # If it's a valid value, leave it to the "prototype" archiver
        #    # Note: the message.as_string() call must be done before scrubbing
        #    email_full = EmailFull(list_name, msg_id, message.as_string())
        #    self.db.add(email_full)

        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ( (utcoffset.days * 24 * 60 * 60)
                               + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # store the Mailman user
        email.user_id = self._store_mailman_user(email.sender_email)

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(
                            Thread.list_name == list_name,
                            Thread.thread_id == thread_id,
                            )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # search indexing
        if self.search_index is not None:
            self.search_index.add(email)
        return email.message_id_hash
Example #14
0
    def add_to_list(self, mlist, message):
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            # Don't wait for the cache to set those properties
            for propname in l.mailman_props:
                setattr(l, propname, getattr(mlist, propname))
            self.db.add(l)
        if mlist.archive_policy == ArchivePolicy.never:
            logger.info("Archiving disabled by list policy for %s" % list_name)
            return None
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message["Message-Id"]))
        # Protect against extremely long Message-Ids (there is no limit in the
        # email spec), it's set to VARCHAR(255) in the database
        if len(msg_id) >= 255:
            msg_id = msg_id[:254]
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            logger.info("Duplicate email from %s: %s" % (message["From"], message.get("Subject", '""')))
            return email.message_id_hash

        # if not getattr(settings.KITTYSTORE_FULL_EMAIL):
        #    # If it's a valid value, leave it to the "prototype" archiver
        #    # Note: the message.as_string() call must be done before scrubbing
        #    email_full = EmailFull(list_name, msg_id, message.as_string())
        #    self.db.add(email_full)

        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        try:
            from_name, from_email = parseaddr(message["From"])
            from_name = header_to_unicode(from_name).strip()
            email.sender_email = unicode(from_email).strip()
        except (UnicodeDecodeError, UnicodeEncodeError):
            raise ValueError("Non-ascii sender address", message)
        sender = self.db.find(Sender, Sender.email == email.sender_email).one()
        if sender is None:
            sender = Sender(email.sender_email, from_name)
            self.db.add(sender)
        else:
            sender.name = from_name  # update the name if needed
        email.subject = header_to_unicode(message.get("Subject"))
        if email.subject is not None:
            # limit subject size to 2000 chars or PostgreSQL may complain
            email.subject = email.subject[:2000]
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = ((utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # category = 'Question' # TODO: enum + i18n ?
        # if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(Thread, And(Thread.list_name == list_name, Thread.thread_id == thread_id)).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # invalidate the cache
        events.notify(events.NewMessage(self, mlist, email))
        if new_thread:
            events.notify(events.NewThread(self, mlist, thread))
        # search indexing
        # do it after caching because we need some list properties (like
        # archive_policy)
        if self.search_index is not None:
            self.search_index.add(email)

        return email.message_id_hash
Example #15
0
    def add_to_list(self, mlist, message):
        """Add the message to a specific list of the store.

        :param mlist: The mailing-list object, implementing
            mailman.interfaces.mailinglist.IMailingList.
        :param message: An email.message.Message instance containing at
            least a unique Message-ID header.  The message will be given
            an X-Message-ID-Hash header, overriding any existing such
            header.
        :returns: The calculated X-Message-ID-Hash header.
        :raises ValueError: if the message is missing a Message-ID 
            header.
            The storage service is also allowed to raise this exception
            if it find, but disallows collisions.
        """
        list_name = unicode(mlist.fqdn_listname)
        # Create the list if it does not exist
        l = self.db.find(List, List.name == list_name).one()
        if l is None:
            l = List(list_name)
            self.db.add(l)
        l.display_name = mlist.display_name
        l.subject_prefix = mlist.subject_prefix
        if not message.has_key("Message-Id"):
            raise ValueError("No 'Message-Id' header in email", message)
        msg_id = unicode(unquote(message['Message-Id']))
        email = Email(list_name, msg_id)
        if self.is_message_in_list(list_name, email.message_id):
            print("Duplicate email from %s: %s" %
                  (message['From'], message.get('Subject', '""')))
            return email.message_id_hash

        # the message.as_string() call must be done before scrubbing
        email_full = EmailFull(list_name, msg_id, message.as_string())
        # Find thread id
        new_thread = False
        ref, thread_id = get_ref_and_thread_id(message, list_name, self)
        if thread_id is None:
            new_thread = True
            # make up the thread_id if not found
            thread_id = email.message_id_hash
        email.thread_id = thread_id
        email.in_reply_to = ref

        from_name, from_email = parseaddr(message['From'])
        from_name = header_to_unicode(from_name)
        email.sender_name = from_name.strip()
        email.sender_email = unicode(from_email).strip()
        email.subject = header_to_unicode(message.get('Subject'))
        msg_date = parsedate(message.get("Date"))
        if msg_date is None:
            # Absent or unparseable date
            msg_date = datetime.datetime.utcnow()
        utcoffset = msg_date.utcoffset()
        if msg_date.tzinfo is not None:
            msg_date = msg_date.astimezone(tzutc()).replace(tzinfo=None)
        email.date = msg_date
        if utcoffset is None:
            email.timezone = 0
        else:
            # in minutes
            email.timezone = (
                (utcoffset.days * 24 * 60 * 60) + utcoffset.seconds) / 60

        scrubber = Scrubber(list_name, message)
        # warning: scrubbing modifies the msg in-place
        email.content, attachments = scrubber.scrub()

        # store the Mailman user
        email.user_id = self._store_mailman_user(email.sender_email)

        #category = 'Question' # TODO: enum + i18n ?
        #if ('agenda' in message.get('Subject', '').lower() or
        #        'reminder' in message.get('Subject', '').lower()):
        #    # i18n!
        #    category = 'Agenda'

        if new_thread:
            thread = Thread(list_name, thread_id, email.date)
        else:
            thread = self.db.find(
                Thread,
                And(
                    Thread.list_name == list_name,
                    Thread.thread_id == thread_id,
                )).one()
        thread.date_active = email.date
        self.db.add(thread)

        self.db.add(email)
        self.db.add(email_full)
        compute_thread_order_and_depth(thread)
        for attachment in attachments:
            self.add_attachment(list_name, msg_id, *attachment)
        self.flush()
        # search indexing
        if self.search_index is not None:
            self.search_index.add(email)
        return email.message_id_hash