Example #1
0
    def __init__(self, account=None, mid=None, folder_name=None,
                 received_date=None, flags=None, body_string=None,
                 *args, **kwargs):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Block objects through
        relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, flags, body_string]

        MailSyncBase.__init__(self, *args, **kwargs)

        # for drafts
        if not any(_rqd):
            return

        if any(_rqd) and not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "flags, body_string")

        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        try:
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            self.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            self.subject = parsed.clean_subject
            self.from_addr = parse_email_address_list(
                parsed.headers.get('From'))
            self.sender_addr = parse_email_address_list(
                parsed.headers.get('Sender'))
            self.reply_to = parse_email_address_list(
                parsed.headers.get('Reply-To'))

            self.to_addr = parse_email_address_list(
                parsed.headers.getall('To'))
            self.cc_addr = parse_email_address_list(
                parsed.headers.getall('Cc'))
            self.bcc_addr = parse_email_address_list(
                parsed.headers.getall('Bcc'))

            self.in_reply_to = parsed.headers.get('In-Reply-To')
            self.message_id_header = parsed.headers.get('Message-Id')

            self.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            self.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            self.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            self.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Part

            # Store all message headers as object with index 0
            headers_part = Part()
            headers_part.namespace_id = account.namespace.id
            headers_part.message = self
            headers_part.walk_index = i
            headers_part.data = json.dumps(parsed.headers.items())
            self.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?

                new_part = Part()
                new_part.namespace_id = account.namespace.id
                new_part.message = self
                new_part.walk_index = i
                new_part.content_type = mimepart.content_type.value
                new_part.filename = _trim_filename(
                    mimepart.content_type.params.get('name'),
                    account.id, mid)
                # TODO maybe also trim other headers?

                if mimepart.content_disposition[0] is not None:
                    value, params = mimepart.content_disposition
                    if value not in ['inline', 'attachment']:
                        log.error('Unknown Content-Disposition',
                                  account_id=account.id, mid=mid,
                                  folder_name=folder_name,
                                  bad_content_disposition=
                                  mimepart.content_disposition,
                                  parsed_content_disposition=value)
                        continue
                    else:
                        new_part.content_disposition = value
                        if value == 'attachment':
                            new_part.filename = _trim_filename(
                                params.get('filename'), account.id, mid)

                if mimepart.body is None:
                    data_to_write = ''
                elif new_part.content_type.startswith('text'):
                    data_to_write = mimepart.body.encode('utf-8', 'strict')
                    # normalize mac/win/unix newlines
                    data_to_write = data_to_write \
                        .replace('\r\n', '\n').replace('\r', '\n')
                else:
                    data_to_write = mimepart.body
                if data_to_write is None:
                    data_to_write = ''

                new_part.content_id = mimepart.headers.get('Content-Id')
                new_part.data = data_to_write
                self.parts.append(new_part)
            self.calculate_sanitized_body()
        except mime.DecodingError:
            # Occasionally iconv will fail via maximum recursion depth. We
            # still keep the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing DecodeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except AttributeError:
            # For EAS messages that are missing Date + Received headers, due
            # to the processing we do in inbox.util.misc.get_internaldate()
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing AttributeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except RuntimeError:
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing RuntimeError<iconv>'.format(
                err_filename=_get_errfilename(account.id, folder_name, mid)))
            self.mark_error()
            return
Example #2
0
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None):
    # folder attrs -> RFC 6154 Special-Use mailbox flags
    singleton_flags = {
        'all_folder': u'\\All',
        'archive_folder': u'\\Archive',
        'drafts_folder': u'\\Drafts',
        'starred_folder': u'\\Flagged',
        'spam_folder': u'\\Junk',
        'sent_folder': u'\\Sent',
        'trash_folder': u'\\Trash',
    }

    # List folders -- Returns sequence of (flags, delimiter, name)
    folders_fdn = imap.list_folders()
    with db:
        # Folder names & delimiters
        db.executemany(
            """
            INSERT INTO folders (
                folder_name, clean_folder_name, imap_delimiter
            ) VALUES (?, ?, ?)
            """, ((name, cleanup_folder_name(name), delimiter)
                  for flags, delimiter, name in folders_fdn))

        # Folder flags
        db.executemany(
            """
            INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?)
            """, ((name, flag) for flags, delimiter, name in folders_fdn
                  for flag in flags))

        # Set imap_noselect = 1 on folders that have the \Noselect flag;
        # Set imap_noselect = 0 on folders that don't.
        db.execute("""
            UPDATE folders SET imap_noselect = (
                SELECT folder_flags.flag IS NOT NULL
                FROM folders AS a LEFT JOIN folder_flags ON (
                    a.folder_name = folder_flags.folder_name AND
                    folder_flags.flag = '\Noselect'
                )
                WHERE folders.folder_name = a.folder_name
            )
            """)

        # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which
        # there should always be, I think.
        db.execute(
            """
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folders WHERE folder_name = ?
            """, ['inbox_folder', 'INBOX'])

        # Insert other special folder names
        db.executemany(
            """
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folder_flags WHERE flag = ?
            """, singleton_flags.items())

    # Fetch all messages from each folder
    with db:
        folder_names = [
            row[0] for row in db.execute(
                "SELECT folder_name FROM folders WHERE NOT imap_noselect")
        ]

        for folder_name in folder_names:
            # EXAMINE the folder
            examine_response = imap.select_folder(folder_name, readonly=True)

            # Update imap_uidvalidity
            db.execute(
                """
                UPDATE folders
                SET imap_uidvalidity = ?, imap_uidnext = ?
                WHERE folder_name = ?
                """, [
                    examine_response[u'UIDVALIDITY'],
                    examine_response[u'UIDNEXT'], folder_name
                ])

            # Get uids of the messages in the folder
            imap_uids = imap.search(u'ALL')

            # Result should match the stated number of messages in the folder.
            if len(imap_uids) != examine_response[u'EXISTS']:
                raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format(
                    len(imap_uids), examine_response[u'EXISTS']))

            # Create folder_messages entries
            db.executemany(
                """
                INSERT INTO folder_messages (folder_name, imap_uid)
                VALUES (?, ?)
                """, ((folder_name, imap_uid) for imap_uid in imap_uids))

            ## Get the folder flags
            #folder_flags = set(row[0] for row in db.execute(
            #    "SELECT flag FROM folder_flags WHERE folder_name = ?",
            #    [folder_name]))
            #
            ## This is Gmail, so only actually fetch messages from the 'All
            ## Mail' and 'Trash' folders.  This *should* give us all of the
            ## messages.
            #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}:
            #    continue

            # Get folder messages
            batch_size = 1000
            fetch_data = [
                'RFC822.SIZE', 'ENVELOPE', 'FLAGS', 'X-GM-MSGID', 'X-GM-THRID',
                'X-GM-LABELS', 'INTERNALDATE', 'RFC822.HEADER'
            ]
            for i in range(0, len(imap_uids), batch_size):
                imap_uids_batch = imap_uids[i:i + batch_size]

                # Fetch message info from the IMAP server
                fetch_response = imap.fetch(imap_uids_batch, fetch_data)

                # Fetch message info and insert it into the messages table.
                # Don't bother deduplicating at this point.
                for uid, data in fetch_response.items():
                    headers = MimeHeaders.from_stream(
                        StringIO(data['RFC822.HEADER']))
                    msg_data = dict(
                        date=data['INTERNALDATE'],
                        subject=data['ENVELOPE'].subject,
                        in_reply_to=data['ENVELOPE'].in_reply_to,
                        size=data['RFC822.SIZE'],
                        message_id_header=data['ENVELOPE'].message_id,
                        x_gm_thrid=unicode(data['X-GM-THRID']),
                        x_gm_msgid=unicode(data['X-GM-MSGID']),
                        sender_addr=json.dumps(
                            parse_email_address_list(headers.get('Sender'))),
                        from_addr=json.dumps(
                            parse_email_address_list(headers.get('From'))),
                        reply_to_addr=json.dumps(
                            parse_email_address_list(headers.get('Reply-To'))),
                        to_addr=json.dumps(
                            parse_email_address_list(headers.get('To'))),
                        cc_addr=json.dumps(
                            parse_email_address_list(headers.get('Cc'))),
                        bcc_addr=json.dumps(
                            parse_email_address_list(headers.get('Bcc'))),
                    )
                    msg_data['clean_subject'] = \
                        cleanup_subject(parse_header_value('Subject', msg_data['subject']))

                    # Check if we've already stored the message
                    cur = db.execute(
                        """
                        SELECT id, x_gm_msgid FROM messages
                        WHERE x_gm_msgid = :x_gm_msgid
                        """, msg_data)
                    row = next(iter(cur.fetchall()),
                               None)  # returns 0 or 1 rows
                    message_id = row['id'] if row is not None else None

                    # If we've never stored the message, store it now.
                    if message_id is None:
                        cur = db.execute(
                            """
                            INSERT INTO messages (
                                date, subject, clean_subject,
                                in_reply_to, size, message_id_header,
                                x_gm_msgid, x_gm_thrid,
                                sender_addr, from_addr, reply_to_addr,
                                to_addr, cc_addr, bcc_addr
                            ) VALUES (
                                :date, :subject, :clean_subject,
                                :in_reply_to, :size, :message_id_header,
                                :x_gm_msgid, :x_gm_thrid,
                                :sender_addr, :from_addr, :reply_to_addr,
                                :to_addr, :cc_addr, :bcc_addr
                            )
                            """, msg_data)
                        message_id = cur.lastrowid

                    # Store the Gmail labels (these can be different in
                    # different folders; e.g. messages in the 'Sent' folder are
                    # missing the u'\\Sent' label)
                    db.executemany(
                        """
                        INSERT INTO folder_message_gm_labels
                            (folder_name, message_id, label)
                        VALUES (?, ?, ?)
                        """, ((folder_name, message_id, label)
                              for label in data['X-GM-LABELS']))

                    # Mark the message as being in the current folder.
                    db.execute(
                        """
                        UPDATE folder_messages
                        SET message_id = ?
                        WHERE folder_name = ? AND imap_uid = ?
                        """, (message_id, folder_name, uid))

        # Construct threads (assuming gmail for now)
        db.execute("""
            INSERT INTO threads (x_gm_thrid)
            SELECT DISTINCT x_gm_thrid FROM messages
            """)
        db.execute("""
            INSERT INTO thread_messages (thread_id, message_id)
            SELECT threads.id, messages.id
            FROM threads, messages
            WHERE threads.x_gm_thrid = messages.x_gm_thrid
            """)

        # Construct folder_threads
        db.execute("""
            INSERT INTO folder_threads (folder_name, thread_id)
            SELECT DISTINCT
                folder_messages.folder_name, thread_messages.thread_id
            FROM
                folder_messages
                LEFT JOIN thread_messages USING (message_id)
            """)
Example #3
0
def create_message(db_session, log, account, mid, folder_name, received_date,
                   flags, body_string, created):
    """ Parses message data and writes out db metadata and MIME blocks.

    Returns the new Message, which links to the new Block objects through
    relationships. All new objects are uncommitted.

    Threads are not computed here; you gotta do that separately.

    Parameters
    ----------
    mid : int
        The account backend-specific message identifier; it's only used for
        logging errors.

    raw_message : str
        The full message including headers (encoded).
    """
    # trickle-down bugs
    assert account is not None and account.namespace is not None
    assert not isinstance(body_string, unicode)

    try:
        parsed = mime.from_string(body_string)

        mime_version = parsed.headers.get('Mime-Version')
        # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the
        # .startswith
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.error('Unexpected MIME-Version: {0}'.format(mime_version))

        new_msg = SpoolMessage() if created else Message()
        new_msg.data_sha256 = sha256(body_string).hexdigest()

        # clean_subject strips re:, fwd: etc.
        new_msg.subject = parsed.clean_subject
        new_msg.from_addr = parse_email_address_list(
            parsed.headers.get('From'))
        new_msg.sender_addr = parse_email_address_list(
            parsed.headers.get('Sender'))
        new_msg.reply_to = parse_email_address_list(
            parsed.headers.get('Reply-To'))

        new_msg.to_addr = parse_email_address_list(parsed.headers.getall('To'))
        new_msg.cc_addr = parse_email_address_list(parsed.headers.getall('Cc'))
        new_msg.bcc_addr = parse_email_address_list(
            parsed.headers.getall('Bcc'))

        new_msg.in_reply_to = parsed.headers.get('In-Reply-To')
        new_msg.message_id_header = parsed.headers.get('Message-Id')

        new_msg.received_date = received_date

        # Optional mailing list headers
        new_msg.mailing_list_headers = parse_ml_headers(parsed.headers)

        # Custom Inbox header
        new_msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        new_msg.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        new_msg.size = len(body_string)  # includes headers text

        i = 0  # for walk_index

        # Store all message headers as object with index 0
        headers_part = Part()
        headers_part.namespace_id = account.namespace.id
        headers_part.message = new_msg
        headers_part.walk_index = i
        headers_part.data = json.dumps(parsed.headers.items())
        new_msg.parts.append(headers_part)

        for mimepart in parsed.walk(
                with_self=parsed.content_type.is_singlepart()):
            i += 1
            if mimepart.content_type.is_multipart():
                log.warning("multipart sub-part found! on {}"
                            .format(new_msg.g_msgid))
                continue  # TODO should we store relations?

            new_part = Part()
            new_part.namespace_id = account.namespace.id
            new_part.message = new_msg
            new_part.walk_index = i
            new_part.misc_keyval = mimepart.headers.items()  # everything
            new_part.content_type = mimepart.content_type.value
            new_part.filename = trim_filename(
                mimepart.content_type.params.get('name'),
                log=log)
            # TODO maybe also trim other headers?

            if mimepart.content_disposition[0] is not None:
                value, params = mimepart.content_disposition
                if value not in ['inline', 'attachment']:
                    errmsg = """
    Unknown Content-Disposition on message {0} found in {1}.
    Bad Content-Disposition was: '{2}'
    Parsed Content-Disposition was: '{3}'""".format(
                        mid, folder_name, mimepart.content_disposition)
                    log.error(errmsg)
                    continue
                else:
                    new_part.content_disposition = value
                    if value == 'attachment':
                        new_part.filename = trim_filename(
                            params.get('filename'),
                            log=log)

            if mimepart.body is None:
                data_to_write = ''
            elif new_part.content_type.startswith('text'):
                data_to_write = mimepart.body.encode('utf-8', 'strict')
                # normalize mac/win/unix newlines
                data_to_write = data_to_write \
                    .replace('\r\n', '\n').replace('\r', '\n')
            else:
                data_to_write = mimepart.body
            if data_to_write is None:
                data_to_write = ''

            new_part.content_id = mimepart.headers.get('Content-Id')
            new_part.data = data_to_write
            new_msg.parts.append(new_part)
    except mime.DecodingError:
        # occasionally iconv will fail via maximum recursion depth
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('DecodeError, msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return
    except RuntimeError:
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('RuntimeError<iconv> msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return

    new_msg.calculate_sanitized_body()
    return new_msg
Example #4
0
def slurp_imap_namespace_gmail(imap, db, namespace=None, account=None):
    # folder attrs -> RFC 6154 Special-Use mailbox flags
    singleton_flags = {
        'all_folder': u'\\All',
        'archive_folder': u'\\Archive',
        'drafts_folder': u'\\Drafts',
        'starred_folder': u'\\Flagged',
        'spam_folder': u'\\Junk',
        'sent_folder': u'\\Sent',
        'trash_folder': u'\\Trash',
    }

    # List folders -- Returns sequence of (flags, delimiter, name)
    folders_fdn = imap.list_folders()
    with db:
        # Folder names & delimiters
        db.executemany("""
            INSERT INTO folders (
                folder_name, clean_folder_name, imap_delimiter
            ) VALUES (?, ?, ?)
            """, ((name, cleanup_folder_name(name), delimiter)
                  for flags, delimiter, name in folders_fdn))

        # Folder flags
        db.executemany("""
            INSERT INTO folder_flags (folder_name, flag) VALUES (?, ?)
            """, ((name, flag)
                  for flags, delimiter, name in folders_fdn
                  for flag in flags))

        # Set imap_noselect = 1 on folders that have the \Noselect flag;
        # Set imap_noselect = 0 on folders that don't.
        db.execute("""
            UPDATE folders SET imap_noselect = (
                SELECT folder_flags.flag IS NOT NULL
                FROM folders AS a LEFT JOIN folder_flags ON (
                    a.folder_name = folder_flags.folder_name AND
                    folder_flags.flag = '\Noselect'
                )
                WHERE folders.folder_name = a.folder_name
            )
            """)

        # Insert 'inbox_folder' -> 'INBOX' if there is an INBOX folder, which
        # there should always be, I think.
        db.execute("""
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folders WHERE folder_name = ?
            """, ['inbox_folder', 'INBOX'])

        # Insert other special folder names
        db.executemany("""
            INSERT INTO special_folders (attr_name, folder_name)
            SELECT ?, folder_name FROM folder_flags WHERE flag = ?
            """, singleton_flags.items())

    # Fetch all messages from each folder
    with db:
        folder_names = [row[0] for row in db.execute(
            "SELECT folder_name FROM folders WHERE NOT imap_noselect")]

        for folder_name in folder_names:
            # EXAMINE the folder
            examine_response = imap.select_folder(folder_name, readonly=True)

            # Update imap_uidvalidity
            db.execute("""
                UPDATE folders
                SET imap_uidvalidity = ?, imap_uidnext = ?
                WHERE folder_name = ?
                """, [examine_response[u'UIDVALIDITY'],
                      examine_response[u'UIDNEXT'],
                      folder_name])

            # Get uids of the messages in the folder
            imap_uids = imap.search(u'ALL')

            # Result should match the stated number of messages in the folder.
            if len(imap_uids) != examine_response[u'EXISTS']:
                raise AssertionError("len(imap_uids)={0}, EXISTS={1!r}".format(
                    len(imap_uids), examine_response[u'EXISTS']))

            # Create folder_messages entries
            db.executemany("""
                INSERT INTO folder_messages (folder_name, imap_uid)
                VALUES (?, ?)
                """, ((folder_name, imap_uid) for imap_uid in imap_uids))

            ## Get the folder flags
            #folder_flags = set(row[0] for row in db.execute(
            #    "SELECT flag FROM folder_flags WHERE folder_name = ?",
            #    [folder_name]))
            #
            ## This is Gmail, so only actually fetch messages from the 'All
            ## Mail' and 'Trash' folders.  This *should* give us all of the
            ## messages.
            #if not folder_flags & {u'\\All', u'\\Trash', u'\\Sent'}:
            #    continue

            # Get folder messages
            batch_size = 1000
            fetch_data = ['RFC822.SIZE', 'ENVELOPE', 'FLAGS',
                          'X-GM-MSGID', 'X-GM-THRID', 'X-GM-LABELS',
                          'INTERNALDATE', 'RFC822.HEADER']
            for i in range(0, len(imap_uids), batch_size):
                imap_uids_batch = imap_uids[i:i+batch_size]

                # Fetch message info from the IMAP server
                fetch_response = imap.fetch(imap_uids_batch, fetch_data)

                # Fetch message info and insert it into the messages table.
                # Don't bother deduplicating at this point.
                for uid, data in fetch_response.items():
                    headers = MimeHeaders.from_stream(StringIO(data['RFC822.HEADER']))
                    msg_data = dict(
                        date=data['INTERNALDATE'],
                        subject=data['ENVELOPE'].subject,
                        in_reply_to=data['ENVELOPE'].in_reply_to,
                        size=data['RFC822.SIZE'],
                        message_id_header=data['ENVELOPE'].message_id,
                        x_gm_thrid=unicode(data['X-GM-THRID']),
                        x_gm_msgid=unicode(data['X-GM-MSGID']),
                        sender_addr=json.dumps(parse_email_address_list(headers.get('Sender'))),
                        from_addr=json.dumps(parse_email_address_list(headers.get('From'))),
                        reply_to_addr=json.dumps(parse_email_address_list(headers.get('Reply-To'))),
                        to_addr=json.dumps(parse_email_address_list(headers.get('To'))),
                        cc_addr=json.dumps(parse_email_address_list(headers.get('Cc'))),
                        bcc_addr=json.dumps(parse_email_address_list(headers.get('Bcc'))),
                    )
                    msg_data['clean_subject'] = \
                        cleanup_subject(parse_header_value('Subject', msg_data['subject']))

                    # Check if we've already stored the message
                    cur = db.execute("""
                        SELECT id, x_gm_msgid FROM messages
                        WHERE x_gm_msgid = :x_gm_msgid
                        """, msg_data)
                    row = next(iter(cur.fetchall()), None)    # returns 0 or 1 rows
                    message_id = row['id'] if row is not None else None

                    # If we've never stored the message, store it now.
                    if message_id is None:
                        cur = db.execute("""
                            INSERT INTO messages (
                                date, subject, clean_subject,
                                in_reply_to, size, message_id_header,
                                x_gm_msgid, x_gm_thrid,
                                sender_addr, from_addr, reply_to_addr,
                                to_addr, cc_addr, bcc_addr
                            ) VALUES (
                                :date, :subject, :clean_subject,
                                :in_reply_to, :size, :message_id_header,
                                :x_gm_msgid, :x_gm_thrid,
                                :sender_addr, :from_addr, :reply_to_addr,
                                :to_addr, :cc_addr, :bcc_addr
                            )
                            """, msg_data)
                        message_id = cur.lastrowid

                    # Store the Gmail labels (these can be different in
                    # different folders; e.g. messages in the 'Sent' folder are
                    # missing the u'\\Sent' label)
                    db.executemany("""
                        INSERT INTO folder_message_gm_labels
                            (folder_name, message_id, label)
                        VALUES (?, ?, ?)
                        """, ((folder_name, message_id, label)
                              for label in data['X-GM-LABELS']))

                    # Mark the message as being in the current folder.
                    db.execute("""
                        UPDATE folder_messages
                        SET message_id = ?
                        WHERE folder_name = ? AND imap_uid = ?
                        """, (message_id, folder_name, uid))

        # Construct threads (assuming gmail for now)
        db.execute("""
            INSERT INTO threads (x_gm_thrid)
            SELECT DISTINCT x_gm_thrid FROM messages
            """)
        db.execute("""
            INSERT INTO thread_messages (thread_id, message_id)
            SELECT threads.id, messages.id
            FROM threads, messages
            WHERE threads.x_gm_thrid = messages.x_gm_thrid
            """)

        # Construct folder_threads
        db.execute("""
            INSERT INTO folder_threads (folder_name, thread_id)
            SELECT DISTINCT
                folder_messages.folder_name, thread_messages.thread_id
            FROM
                folder_messages
                LEFT JOIN thread_messages USING (message_id)
            """)
Example #5
0
# Email from the sync dump exported to the 'test' db
with open('tests/data/messages/mailing_list_message.txt', 'r') as f:
    message = f.read()

# Repr for testing
parsed = mime.from_string(message)
headers = json.dumps(parsed.headers.items())
message_id = parsed.headers.get('Message-ID')

subject = parsed.headers.get('Subject').strip('Re: ')

sender = parsed.headers.get('Sender')
delivered_to = parsed.headers.get('Delivered-To')

_to = parsed.headers.get('To')
to_addr = parse_email_address_list(_to)[0][1]

_from = parsed.headers.get('From')
from_addr = parse_email_address_list(_from)[0][1]

date = parsed.headers.get('Date')
parsed_date = parsedate_tz(date)
timestamp = mktime_tz(parsed_date)
received_date = datetime.fromtimestamp(timestamp)

# We have to hard-code these values unfortunately
msg_id = 2
thread_id = 2
mailing_list_headers = {
    "List-Id": "<golang-nuts.googlegroups.com>",
    "List-Post": "<http://groups.google.com/group/golang-nuts/post>, <mailto:[email protected]>",
Example #6
0
    def __init__(self, account=None, mid=None, folder_name=None,
                 received_date=None, flags=None, body_string=None,
                 *args, **kwargs):
        """ Use .create() instead to handle common errors!

        (Can't abort object creation in a constructor.)

        """
        _rqd = [account, mid, folder_name, received_date, flags, body_string]

        # for drafts - skip parsing
        if not any(_rqd):
            MailSyncBase.__init__(self, *args, **kwargs)
            return

        parsed = mime.from_string(body_string)

        mime_version = parsed.headers.get('Mime-Version')
        # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the
        # .startswith
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.error('Unexpected MIME-Version: {0}'.format(mime_version))

        self.data_sha256 = sha256(body_string).hexdigest()

        # clean_subject strips re:, fwd: etc.
        self.subject = parsed.clean_subject
        self.from_addr = parse_email_address_list(
            parsed.headers.get('From'))
        self.sender_addr = parse_email_address_list(
            parsed.headers.get('Sender'))
        self.reply_to = parse_email_address_list(
            parsed.headers.get('Reply-To'))

        self.to_addr = parse_email_address_list(
            parsed.headers.getall('To'))
        self.cc_addr = parse_email_address_list(
            parsed.headers.getall('Cc'))
        self.bcc_addr = parse_email_address_list(
            parsed.headers.getall('Bcc'))

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date

        # Optional mailing list headers
        self.mailing_list_headers = parse_ml_headers(parsed.headers)

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text

        i = 0  # for walk_index

        from inbox.models.block import Part

        # Store all message headers as object with index 0
        headers_part = Part()
        headers_part.namespace_id = account.namespace.id
        headers_part.message = self
        headers_part.walk_index = i
        headers_part.data = json.dumps(parsed.headers.items())
        self.parts.append(headers_part)

        for mimepart in parsed.walk(
                with_self=parsed.content_type.is_singlepart()):
            i += 1
            if mimepart.content_type.is_multipart():
                log.warning("multipart sub-part found! on {}"
                            .format(self.g_msgid))
                continue  # TODO should we store relations?

            new_part = Part()
            new_part.namespace_id = account.namespace.id
            new_part.message = self
            new_part.walk_index = i
            new_part.misc_keyval = mimepart.headers.items()  # everything
            new_part.content_type = mimepart.content_type.value
            new_part.filename = _trim_filename(
                mimepart.content_type.params.get('name'),
                log=log)
            # TODO maybe also trim other headers?

            if mimepart.content_disposition[0] is not None:
                value, params = mimepart.content_disposition
                if value not in ['inline', 'attachment']:
                    errmsg = """
    Unknown Content-Disposition on message {0} found in {1}.
    Bad Content-Disposition was: '{2}'
    Parsed Content-Disposition was: '{3}'""".format(
                        mid, folder_name, mimepart.content_disposition)
                    log.error(errmsg)
                    continue
                else:
                    new_part.content_disposition = value
                    if value == 'attachment':
                        new_part.filename = _trim_filename(
                            params.get('filename'),
                            log=log)

            if mimepart.body is None:
                data_to_write = ''
            elif new_part.content_type.startswith('text'):
                data_to_write = mimepart.body.encode('utf-8', 'strict')
                # normalize mac/win/unix newlines
                data_to_write = data_to_write \
                    .replace('\r\n', '\n').replace('\r', '\n')
            else:
                data_to_write = mimepart.body
            if data_to_write is None:
                data_to_write = ''

            new_part.content_id = mimepart.headers.get('Content-Id')
            new_part.data = data_to_write
            self.parts.append(new_part)

        self.calculate_sanitized_body()
        MailSyncBase.__init__(self, *args, **kwargs)