예제 #1
0
def test_message_cleanup():
    assert cleanup_subject("Re: Birthday") == "Birthday"
    assert cleanup_subject("Re:Birthday") == "Birthday"
    assert cleanup_subject("Re:FWD:   Birthday") == "Birthday"
    assert (cleanup_subject("Re: RE: Alors, comment ça s'est passé ?")
                == "Alors, comment ça s'est passé ?")
    assert cleanup_subject("Re: FWD:FWD: Re:La chaise") == "La chaise"

    assert cleanup_subject("Aw: über cool") == "über cool"
    assert cleanup_subject("Aw:Re:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("Aw: wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("aw: wg:wienerschnitzel") == "wienerschnitzel"
예제 #2
0
def test_message_cleanup():
    assert cleanup_subject("Re: Birthday") == "Birthday"
    assert cleanup_subject("Re:Birthday") == "Birthday"
    assert cleanup_subject("Re:FWD:   Birthday") == "Birthday"
    assert (cleanup_subject("Re: RE: Alors, comment ça s'est passé ?") ==
            "Alors, comment ça s'est passé ?")
    assert cleanup_subject("Re: FWD:FWD: Re:La chaise") == "La chaise"

    assert cleanup_subject("Aw: über cool") == "über cool"
    assert cleanup_subject("Aw:Re:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("Aw: wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("aw: wg:wienerschnitzel") == "wienerschnitzel"
예제 #3
0
def fetch_corresponding_thread(db_session, namespace_id, message):
    """fetch a thread matching the corresponding message. Returns None if
       there's no matching thread."""
    # FIXME: for performance reasons, we make the assumption that a reply
    # to a message always has a similar subject. This is only
    # right 95% of the time.
    clean_subject = cleanup_subject(message.subject)
    threads = db_session.query(Thread).filter(
        Thread.namespace_id == namespace_id,
        Thread._cleaned_subject == clean_subject). \
        order_by(desc(Thread.id))

    for thread in safer_yield_per(threads, Thread.id, 0, 100):
        for match in thread.messages:
            # A lot of people BCC some address when sending mass
            # emails so ignore BCC.
            match_bcc = match.bcc_addr if match.bcc_addr else []
            message_bcc = message.bcc_addr if message.bcc_addr else []

            match_emails = [
                t[1] for t in match.participants if t not in match_bcc
            ]
            message_emails = [
                t[1] for t in message.participants if t not in message_bcc
            ]

            # A conversation takes place between two or more persons.
            # Are there more than two participants in common in this
            # thread? If yes, it's probably a related thread.
            match_participants_set = set(match_emails)
            message_participants_set = set(message_emails)

            if len(match_participants_set & message_participants_set) >= 2:
                # No need to loop through the rest of the messages
                # in the thread
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

            # handle the case where someone is self-sending an email.
            if not message.from_addr or not message.to_addr:
                return

            match_from = [t[1] for t in match.from_addr]
            match_to = [t[1] for t in match.from_addr]
            message_from = [t[1] for t in message.from_addr]
            message_to = [t[1] for t in message.to_addr]

            if (len(message_to) == 1 and message_from == message_to
                    and match_from == match_to and message_to == match_from):
                # Check that we're not over max thread length in this case
                # No need to loop through the rest of the messages
                # in the thread.
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

    return
예제 #4
0
def fetch_corresponding_thread(db_session, namespace_id, message):
    """fetch a thread matching the corresponding message. Returns None if
       there's no matching thread."""
    # FIXME: for performance reasons, we make the assumption that a reply
    # to a message always has a similar subject. This is only
    # right 95% of the time.
    clean_subject = cleanup_subject(message.subject)
    threads = db_session.query(Thread). \
        filter(Thread.namespace_id == namespace_id,
               Thread._cleaned_subject == clean_subject). \
        order_by(desc(Thread.id)). \
        options(joinedload(Thread.messages).load_only(
            'from_addr', 'to_addr', 'bcc_addr', 'cc_addr'))

    for thread in threads:
        for match in thread.messages:
            # A lot of people BCC some address when sending mass
            # emails so ignore BCC.
            match_bcc = match.bcc_addr if match.bcc_addr else []
            message_bcc = message.bcc_addr if message.bcc_addr else []

            match_emails = [t[1] for t in match.participants
                            if t not in match_bcc]
            message_emails = [t[1] for t in message.participants
                              if t not in message_bcc]

            # A conversation takes place between two or more persons.
            # Are there more than two participants in common in this
            # thread? If yes, it's probably a related thread.
            match_participants_set = set(match_emails)
            message_participants_set = set(message_emails)

            if len(match_participants_set & message_participants_set) >= 2:
                # No need to loop through the rest of the messages
                # in the thread
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

            # handle the case where someone is self-sending an email.
            if not message.from_addr or not message.to_addr:
                return

            match_from = [t[1] for t in match.from_addr]
            match_to = [t[1] for t in match.from_addr]
            message_from = [t[1] for t in message.from_addr]
            message_to = [t[1] for t in message.to_addr]

            if (len(message_to) == 1 and message_from == message_to and
                    match_from == match_to and message_to == match_from):
                # Check that we're not over max thread length in this case
                # No need to loop through the rest of the messages
                # in the thread.
                if len(thread.messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

    return
예제 #5
0
 def from_imap_message(cls, session, namespace, message):
     if message.thread is not None:
         # If this message *already* has a thread associated with it, don't
         # create a new one.
         return message.thread
     clean_subject = cleanup_subject(message.subject)
     thread = cls(subject=clean_subject, recentdate=message.received_date,
                  namespace=namespace, subjectdate=message.received_date,
                  snippet=message.snippet)
     return thread
예제 #6
0
 def from_imap_message(cls, session, namespace, message):
     if message.thread is not None:
         # If this message *already* has a thread associated with it, don't
         # create a new one.
         return message.thread
     clean_subject = cleanup_subject(message.subject)
     thread = cls(subject=clean_subject, recentdate=message.received_date,
                  namespace=namespace, subjectdate=message.received_date,
                  snippet=message.snippet)
     return thread
예제 #7
0
 def compute_cleaned_up_subject(self, key, value):
     self._cleaned_subject = cleanup_subject(value)
     return value
예제 #8
0
 def compute_cleaned_up_subject(self, key, value):
     self._cleaned_subject = cleanup_subject(value)
     return value
예제 #9
0
def test_message_cleanup():
    assert cleanup_subject("Re: Birthday") == "Birthday"
    assert cleanup_subject("Re:Birthday") == "Birthday"
    assert cleanup_subject("Re:FWD:   Birthday") == "Birthday"
    assert cleanup_subject(
        "RE:FWD: My\tBirthday\n   Party") == "My Birthday Party"
    assert (cleanup_subject("Re: RE: Alors, comment ça s'est passé ?") ==
            "Alors, comment ça s'est passé ?")
    assert cleanup_subject("Re: FWD:FWD: Re:La chaise") == "La chaise"

    assert cleanup_subject("Aw: über cool") == "über cool"
    assert cleanup_subject("Aw:Re:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("Aw: wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("aw: wg:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("Undeliverable: Message returned to sender"
                           ) == "Message returned to sender"
    assert cleanup_subject("Undelivered: Message returned to sender"
                           ) == "Message returned to sender"
예제 #10
0
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None):
    # folder attrs -> RFC 6154 Special-Use mailbox flags
    singleton_flags = {
        'all_folder': u'\\All',
        'archive_folder': u'\\Archive',
        'drafts_folder': u'\\Drafts',
        'starred_folder': u'\\Flagged',
        'spam_folder': u'\\Junk',
        'sent_folder': u'\\Sent',
        'trash_folder': u'\\Trash',
    }

    # List folders -- Returns sequence of (flags, delimiter, name)
    folders_fdn = imap.list_folders()
    with db:
        # Folder names & delimiters
        db.executemany(
            """
            INSERT INTO folders (
                folder_name, clean_folder_name, imap_delimiter
            ) VALUES (?, ?, ?)
            """, ((name, cleanup_folder_name(name), delimiter)
                  for flags, delimiter, name in folders_fdn))

        # Folder flags
        db.executemany(
            """
            INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?)
            """, ((name, flag) for flags, delimiter, name in folders_fdn
                  for flag in flags))

        # Set imap_noselect = 1 on folders that have the \Noselect flag;
        # Set imap_noselect = 0 on folders that don't.
        db.execute("""
            UPDATE folders SET imap_noselect = (
                SELECT folder_flags.flag IS NOT NULL
                FROM folders AS a LEFT JOIN folder_flags ON (
                    a.folder_name = folder_flags.folder_name AND
                    folder_flags.flag = '\Noselect'
                )
                WHERE folders.folder_name = a.folder_name
            )
            """)

        # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which
        # there should always be, I think.
        db.execute(
            """
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folders WHERE folder_name = ?
            """, ['inbox_folder', 'INBOX'])

        # Insert other special folder names
        db.executemany(
            """
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folder_flags WHERE flag = ?
            """, singleton_flags.items())

    # Fetch all messages from each folder
    with db:
        folder_names = [
            row[0] for row in db.execute(
                "SELECT folder_name FROM folders WHERE NOT imap_noselect")
        ]

        for folder_name in folder_names:
            # EXAMINE the folder
            examine_response = imap.select_folder(folder_name, readonly=True)

            # Update imap_uidvalidity
            db.execute(
                """
                UPDATE folders
                SET imap_uidvalidity = ?, imap_uidnext = ?
                WHERE folder_name = ?
                """, [
                    examine_response[u'UIDVALIDITY'],
                    examine_response[u'UIDNEXT'], folder_name
                ])

            # Get uids of the messages in the folder
            imap_uids = imap.search(u'ALL')

            # Result should match the stated number of messages in the folder.
            if len(imap_uids) != examine_response[u'EXISTS']:
                raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format(
                    len(imap_uids), examine_response[u'EXISTS']))

            # Create folder_messages entries
            db.executemany(
                """
                INSERT INTO folder_messages (folder_name, imap_uid)
                VALUES (?, ?)
                """, ((folder_name, imap_uid) for imap_uid in imap_uids))

            ## Get the folder flags
            #folder_flags = set(row[0] for row in db.execute(
            #    "SELECT flag FROM folder_flags WHERE folder_name = ?",
            #    [folder_name]))
            #
            ## This is Gmail, so only actually fetch messages from the 'All
            ## Mail' and 'Trash' folders.  This *should* give us all of the
            ## messages.
            #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}:
            #    continue

            # Get folder messages
            batch_size = 1000
            fetch_data = [
                'RFC822.SIZE', 'ENVELOPE', 'FLAGS', 'X-GM-MSGID', 'X-GM-THRID',
                'X-GM-LABELS', 'INTERNALDATE', 'RFC822.HEADER'
            ]
            for i in range(0, len(imap_uids), batch_size):
                imap_uids_batch = imap_uids[i:i + batch_size]

                # Fetch message info from the IMAP server
                fetch_response = imap.fetch(imap_uids_batch, fetch_data)

                # Fetch message info and insert it into the messages table.
                # Don't bother deduplicating at this point.
                for uid, data in fetch_response.items():
                    headers = MimeHeaders.from_stream(
                        StringIO(data['RFC822.HEADER']))
                    msg_data = dict(
                        date=data['INTERNALDATE'],
                        subject=data['ENVELOPE'].subject,
                        in_reply_to=data['ENVELOPE'].in_reply_to,
                        size=data['RFC822.SIZE'],
                        message_id_header=data['ENVELOPE'].message_id,
                        x_gm_thrid=unicode(data['X-GM-THRID']),
                        x_gm_msgid=unicode(data['X-GM-MSGID']),
                        sender_addr=json.dumps(
                            parse_email_address_list(headers.get('Sender'))),
                        from_addr=json.dumps(
                            parse_email_address_list(headers.get('From'))),
                        reply_to_addr=json.dumps(
                            parse_email_address_list(headers.get('Reply-To'))),
                        to_addr=json.dumps(
                            parse_email_address_list(headers.get('To'))),
                        cc_addr=json.dumps(
                            parse_email_address_list(headers.get('Cc'))),
                        bcc_addr=json.dumps(
                            parse_email_address_list(headers.get('Bcc'))),
                    )
                    msg_data['clean_subject'] = \
                        cleanup_subject(parse_header_value('Subject', msg_data['subject']))

                    # Check if we've already stored the message
                    cur = db.execute(
                        """
                        SELECT id, x_gm_msgid FROM messages
                        WHERE x_gm_msgid = :x_gm_msgid
                        """, msg_data)
                    row = next(iter(cur.fetchall()),
                               None)  # returns 0 or 1 rows
                    message_id = row['id'] if row is not None else None

                    # If we've never stored the message, store it now.
                    if message_id is None:
                        cur = db.execute(
                            """
                            INSERT INTO messages (
                                date, subject, clean_subject,
                                in_reply_to, size, message_id_header,
                                x_gm_msgid, x_gm_thrid,
                                sender_addr, from_addr, reply_to_addr,
                                to_addr, cc_addr, bcc_addr
                            ) VALUES (
                                :date, :subject, :clean_subject,
                                :in_reply_to, :size, :message_id_header,
                                :x_gm_msgid, :x_gm_thrid,
                                :sender_addr, :from_addr, :reply_to_addr,
                                :to_addr, :cc_addr, :bcc_addr
                            )
                            """, msg_data)
                        message_id = cur.lastrowid

                    # Store the Gmail labels (these can be different in
                    # different folders; e.g. messages in the 'Sent' folder are
                    # missing the u'\\Sent' label)
                    db.executemany(
                        """
                        INSERT INTO folder_message_gm_labels
                            (folder_name, message_id, label)
                        VALUES (?, ?, ?)
                        """, ((folder_name, message_id, label)
                              for label in data['X-GM-LABELS']))

                    # Mark the message as being in the current folder.
                    db.execute(
                        """
                        UPDATE folder_messages
                        SET message_id = ?
                        WHERE folder_name = ? AND imap_uid = ?
                        """, (message_id, folder_name, uid))

        # Construct threads (assuming gmail for now)
        db.execute("""
            INSERT INTO threads (x_gm_thrid)
            SELECT DISTINCT x_gm_thrid FROM messages
            """)
        db.execute("""
            INSERT INTO thread_messages (thread_id, message_id)
            SELECT threads.id, messages.id
            FROM threads, messages
            WHERE threads.x_gm_thrid = messages.x_gm_thrid
            """)

        # Construct folder_threads
        db.execute("""
            INSERT INTO folder_threads (folder_name, thread_id)
            SELECT DISTINCT
                folder_messages.folder_name, thread_messages.thread_id
            FROM
                folder_messages
                LEFT JOIN thread_messages USING (message_id)
            """)
예제 #11
0
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None):
    # folder attrs -> RFC 6154 Special-Use mailbox flags
    singleton_flags = {
        'all_folder': u'\\All',
        'archive_folder': u'\\Archive',
        'drafts_folder': u'\\Drafts',
        'starred_folder': u'\\Flagged',
        'spam_folder': u'\\Junk',
        'sent_folder': u'\\Sent',
        'trash_folder': u'\\Trash',
    }

    # List folders -- Returns sequence of (flags, delimiter, name)
    folders_fdn = imap.list_folders()
    with db:
        # Folder names & delimiters
        db.executemany("""
            INSERT INTO folders (
                folder_name, clean_folder_name, imap_delimiter
            ) VALUES (?, ?, ?)
            """, ((name, cleanup_folder_name(name), delimiter)
                  for flags, delimiter, name in folders_fdn))

        # Folder flags
        db.executemany("""
            INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?)
            """, ((name, flag)
                  for flags, delimiter, name in folders_fdn
                  for flag in flags))

        # Set imap_noselect = 1 on folders that have the \Noselect flag;
        # Set imap_noselect = 0 on folders that don't.
        db.execute("""
            UPDATE folders SET imap_noselect = (
                SELECT folder_flags.flag IS NOT NULL
                FROM folders AS a LEFT JOIN folder_flags ON (
                    a.folder_name = folder_flags.folder_name AND
                    folder_flags.flag = '\Noselect'
                )
                WHERE folders.folder_name = a.folder_name
            )
            """)

        # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which
        # there should always be, I think.
        db.execute("""
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folders WHERE folder_name = ?
            """, ['inbox_folder', 'INBOX'])

        # Insert other special folder names
        db.executemany("""
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folder_flags WHERE flag = ?
            """, singleton_flags.items())

    # Fetch all messages from each folder
    with db:
        folder_names = [row[0] for row in db.execute(
            "SELECT folder_name FROM folders WHERE NOT imap_noselect")]

        for folder_name in folder_names:
            # EXAMINE the folder
            examine_response = imap.select_folder(folder_name, readonly=True)

            # Update imap_uidvalidity
            db.execute("""
                UPDATE folders
                SET imap_uidvalidity = ?, imap_uidnext = ?
                WHERE folder_name = ?
                """, [examine_response[u'UIDVALIDITY'],
                      examine_response[u'UIDNEXT'],
                      folder_name])

            # Get uids of the messages in the folder
            imap_uids = imap.search(u'ALL')

            # Result should match the stated number of messages in the folder.
            if len(imap_uids) != examine_response[u'EXISTS']:
                raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format(
                    len(imap_uids), examine_response[u'EXISTS']))

            # Create folder_messages entries
            db.executemany("""
                INSERT INTO folder_messages (folder_name, imap_uid)
                VALUES (?, ?)
                """, ((folder_name, imap_uid) for imap_uid in imap_uids))

            ## Get the folder flags
            #folder_flags = set(row[0] for row in db.execute(
            #    "SELECT flag FROM folder_flags WHERE folder_name = ?",
            #    [folder_name]))
            #
            ## This is Gmail, so only actually fetch messages from the 'All
            ## Mail' and 'Trash' folders.  This *should* give us all of the
            ## messages.
            #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}:
            #    continue

            # Get folder messages
            batch_size = 1000
            fetch_data = ['RFC822.SIZE', 'ENVELOPE', 'FLAGS',
                          'X-GM-MSGID', 'X-GM-THRID', 'X-GM-LABELS',
                          'INTERNALDATE', 'RFC822.HEADER']
            for i in range(0, len(imap_uids), batch_size):
                imap_uids_batch = imap_uids[i:i+batch_size]

                # Fetch message info from the IMAP server
                fetch_response = imap.fetch(imap_uids_batch, fetch_data)

                # Fetch message info and insert it into the messages table.
                # Don't bother deduplicating at this point.
                for uid, data in fetch_response.items():
                    headers = MimeHeaders.from_stream(StringIO(data['RFC822.HEADER']))
                    msg_data = dict(
                        date=data['INTERNALDATE'],
                        subject=data['ENVELOPE'].subject,
                        in_reply_to=data['ENVELOPE'].in_reply_to,
                        size=data['RFC822.SIZE'],
                        message_id_header=data['ENVELOPE'].message_id,
                        x_gm_thrid=unicode(data['X-GM-THRID']),
                        x_gm_msgid=unicode(data['X-GM-MSGID']),
                        sender_addr=json.dumps(parse_email_address_list(headers.get('Sender'))),
                        from_addr=json.dumps(parse_email_address_list(headers.get('From'))),
                        reply_to_addr=json.dumps(parse_email_address_list(headers.get('Reply-To'))),
                        to_addr=json.dumps(parse_email_address_list(headers.get('To'))),
                        cc_addr=json.dumps(parse_email_address_list(headers.get('Cc'))),
                        bcc_addr=json.dumps(parse_email_address_list(headers.get('Bcc'))),
                    )
                    msg_data['clean_subject'] = \
                        cleanup_subject(parse_header_value('Subject', msg_data['subject']))

                    # Check if we've already stored the message
                    cur = db.execute("""
                        SELECT id, x_gm_msgid FROM messages
                        WHERE x_gm_msgid = :x_gm_msgid
                        """, msg_data)
                    row = next(iter(cur.fetchall()), None)    # returns 0 or 1 rows
                    message_id = row['id'] if row is not None else None

                    # If we've never stored the message, store it now.
                    if message_id is None:
                        cur = db.execute("""
                            INSERT INTO messages (
                                date, subject, clean_subject,
                                in_reply_to, size, message_id_header,
                                x_gm_msgid, x_gm_thrid,
                                sender_addr, from_addr, reply_to_addr,
                                to_addr, cc_addr, bcc_addr
                            ) VALUES (
                                :date, :subject, :clean_subject,
                                :in_reply_to, :size, :message_id_header,
                                :x_gm_msgid, :x_gm_thrid,
                                :sender_addr, :from_addr, :reply_to_addr,
                                :to_addr, :cc_addr, :bcc_addr
                            )
                            """, msg_data)
                        message_id = cur.lastrowid

                    # Store the Gmail labels (these can be different in
                    # different folders; e.g. messages in the 'Sent' folder are
                    # missing the u'\\Sent' label)
                    db.executemany("""
                        INSERT INTO folder_message_gm_labels
                            (folder_name, message_id, label)
                        VALUES (?, ?, ?)
                        """, ((folder_name, message_id, label)
                              for label in data['X-GM-LABELS']))

                    # Mark the message as being in the current folder.
                    db.execute("""
                        UPDATE folder_messages
                        SET message_id = ?
                        WHERE folder_name = ? AND imap_uid = ?
                        """, (message_id, folder_name, uid))

        # Construct threads (assuming gmail for now)
        db.execute("""
            INSERT INTO threads (x_gm_thrid)
            SELECT DISTINCT x_gm_thrid FROM messages
            """)
        db.execute("""
            INSERT INTO thread_messages (thread_id, message_id)
            SELECT threads.id, messages.id
            FROM threads, messages
            WHERE threads.x_gm_thrid = messages.x_gm_thrid
            """)

        # Construct folder_threads
        db.execute("""
            INSERT INTO folder_threads (folder_name, thread_id)
            SELECT DISTINCT
                folder_messages.folder_name, thread_messages.thread_id
            FROM
                folder_messages
                LEFT JOIN thread_messages USING (message_id)
            """)
예제 #12
0
def test_message_cleanup():
    assert cleanup_subject("Re: Birthday") == "Birthday"
    assert cleanup_subject("Re:Birthday") == "Birthday"
    assert cleanup_subject("Re:FWD:   Birthday") == "Birthday"
    assert cleanup_subject("RE:FWD: My\tBirthday\n   Party") == "My Birthday Party"
    assert (cleanup_subject("Re: RE: Alors, comment ça s'est passé ?") ==
            "Alors, comment ça s'est passé ?")
    assert cleanup_subject("Re: FWD:FWD: Re:La chaise") == "La chaise"

    assert cleanup_subject("Aw: über cool") == "über cool"
    assert cleanup_subject("Aw:Re:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("Aw: wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject("aw: wg:wienerschnitzel") == "wienerschnitzel"
    assert cleanup_subject(
        "Undeliverable: Message returned to sender") == "Message returned to sender"
    assert cleanup_subject(
        "Undelivered: Message returned to sender") == "Message returned to sender"
예제 #13
0
def fetch_corresponding_thread(db_session, namespace_id, message):
    """fetch a thread matching the corresponding message. Returns None if
       there's no matching thread."""
    # handle the case where someone is self-sending an email.
    if not message.from_addr or not message.to_addr:
        return None

    message_from = [t[1] for t in message.from_addr]
    message_to = [t[1] for t in message.to_addr]

    # FIXME: for performance reasons, we make the assumption that a reply
    # to a message always has a similar subject. This is only
    # right 95% of the time.
    clean_subject = cleanup_subject(message.subject)

    # XXX: It is much faster to sort client-side by message date. We therefore
    # use `contains_eager` and `outerjoin` to fetch the messages by thread in
    # no particular order (as opposed to `joinedload`, which would use the
    # order_by on the Message._thread backref).  We also use a limit to avoid
    # scanning too many / large threads.
    threads = (db_session.query(Thread).filter(
        Thread.namespace_id == namespace_id,
        Thread._cleaned_subject == clean_subject,
    ).outerjoin(Message, Thread.messages).order_by(desc(Thread.id)).options(
        load_only("id", "discriminator"),
        contains_eager(Thread.messages).load_only("from_addr", "to_addr",
                                                  "bcc_addr", "cc_addr",
                                                  "received_date"),
    ).limit(MAX_MESSAGES_SCANNED))

    for thread in threads:
        messages = sorted(thread.messages, key=attrgetter("received_date"))
        for match in messages:
            # A lot of people BCC some address when sending mass
            # emails so ignore BCC.
            match_bcc = match.bcc_addr if match.bcc_addr else []
            message_bcc = message.bcc_addr if message.bcc_addr else []

            match_emails = set([
                t[1].lower() for t in match.participants if t not in match_bcc
            ])
            message_emails = set([
                t[1].lower() for t in message.participants
                if t not in message_bcc
            ])

            # A conversation takes place between two or more persons.
            # Are there more than two participants in common in this
            # thread? If yes, it's probably a related thread.
            if len(match_emails & message_emails) >= 2:
                # No need to loop through the rest of the messages
                # in the thread
                if len(messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

            match_from = [t[1] for t in match.from_addr]
            match_to = [t[1] for t in match.from_addr]

            if (len(message_to) == 1 and message_from == message_to
                    and match_from == match_to and message_to == match_from):
                # Check that we're not over max thread length in this case
                # No need to loop through the rest of the messages
                # in the thread.
                if len(messages) >= MAX_THREAD_LENGTH:
                    break
                else:
                    return match.thread

    return None