def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.data_sha256 = sha256(body_string).hexdigest()

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                                parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Beispiel #2
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.data_sha256 = sha256(body_string).hexdigest()

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                                parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Beispiel #3
0
    def _parse_metadata(self, parsed, body_string, received_date, account_id,
                        folder_name, mid):
        mime_version = parsed.headers.get("Mime-Version")
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith("1.0"):
            log.warning(
                "Unexpected MIME-Version",
                account_id=account_id,
                folder_name=folder_name,
                mid=mid,
                mime_version=mime_version,
            )

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, "From")
        self.sender_addr = parse_mimepart_address_header(parsed, "Sender")
        self.reply_to = parse_mimepart_address_header(parsed, "Reply-To")
        self.to_addr = parse_mimepart_address_header(parsed, "To")
        self.cc_addr = parse_mimepart_address_header(parsed, "Cc")
        self.bcc_addr = parse_mimepart_address_header(parsed, "Bcc")

        self.in_reply_to = parsed.headers.get("In-Reply-To")

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get("Message-Id")
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning(
                "Message-Id header too long. Truncating",
                parsed.headers.get("Message-Id"),
                logstash_tag="truncated_message_id",
            )

        self.received_date = (received_date if received_date else
                              get_internaldate(parsed.headers.get("Date"),
                                               parsed.headers.get("Received")))

        # It seems MySQL rounds up fractional seconds in a weird way,
        # preventing us from reconciling messages correctly. See:
        # https://github.com/nylas/sync-engine/commit/ed16b406e0a for
        # more details.
        self.received_date = self.received_date.replace(microsecond=0)

        # Custom Nylas header
        self.nylas_uid = parsed.headers.get("X-INBOX-ID")

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get("References", ""),
            parsed.headers.get("In-Reply-To", ""))

        self.size = len(body_string)  # includes headers text
Beispiel #4
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # It seems MySQL rounds up fractional seconds in a weird way,
        # preventing us from reconciling messages correctly. See:
        # https://github.com/nylas/sync-engine/commit/ed16b406e0a for
        # more details.
        self.received_date = self.received_date.replace(microsecond=0)

        # Custom Nylas header
        self.nylas_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Beispiel #5
0
    def _parse_metadata(self, parsed, body_string, received_date, account_id,
                        folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id,
                        folder_name=folder_name,
                        mid=mid,
                        mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Beispiel #6
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Beispiel #7
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "body_string")
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id,
                            folder_name=folder_name,
                            mid=mid,
                            mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Block, Part

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i
            msg.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id,
                                folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name,
                      account_id=account.id,
                      err_filename=err_filename,
                      error=e)
            msg._mark_error()

        # Occasionally people try to send messages to way too many
        # recipients. In such cases, empty the field and treat as a parsing
        # error so that we don't break the entire sync.
        for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
            value = getattr(msg, field)
            if json_field_too_long(value):
                _log_decode_error(account.id, folder_name, mid, body_string)
                err_filename = _get_errfilename(account.id, folder_name, mid)
                log.error('Recipient field too long',
                          field=field,
                          account_id=account.id,
                          folder_name=folder_name,
                          mid=mid)
                setattr(msg, field, [])
                msg._mark_error()

        return msg
Beispiel #8
0
    def __init__(self, account=None, mid=None, folder_name=None,
                 received_date=None, flags=None, body_string=None,
                 *args, **kwargs):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Block objects through
        relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, flags, body_string]

        MailSyncBase.__init__(self, *args, **kwargs)

        # for drafts
        if not any(_rqd):
            return

        if any(_rqd) and not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "flags, body_string")

        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        try:
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            self.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            self.subject = parsed.clean_subject
            self.from_addr = parse_email_address_list(
                parsed.headers.get('From'))
            self.sender_addr = parse_email_address_list(
                parsed.headers.get('Sender'))
            self.reply_to = parse_email_address_list(
                parsed.headers.get('Reply-To'))

            self.to_addr = parse_email_address_list(
                parsed.headers.getall('To'))
            self.cc_addr = parse_email_address_list(
                parsed.headers.getall('Cc'))
            self.bcc_addr = parse_email_address_list(
                parsed.headers.getall('Bcc'))

            self.in_reply_to = parsed.headers.get('In-Reply-To')
            self.message_id_header = parsed.headers.get('Message-Id')

            self.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            self.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            self.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            self.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Part

            # Store all message headers as object with index 0
            headers_part = Part()
            headers_part.namespace_id = account.namespace.id
            headers_part.message = self
            headers_part.walk_index = i
            headers_part.data = json.dumps(parsed.headers.items())
            self.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?

                new_part = Part()
                new_part.namespace_id = account.namespace.id
                new_part.message = self
                new_part.walk_index = i
                new_part.content_type = mimepart.content_type.value
                new_part.filename = _trim_filename(
                    mimepart.content_type.params.get('name'),
                    account.id, mid)
                # TODO maybe also trim other headers?

                if mimepart.content_disposition[0] is not None:
                    value, params = mimepart.content_disposition
                    if value not in ['inline', 'attachment']:
                        log.error('Unknown Content-Disposition',
                                  account_id=account.id, mid=mid,
                                  folder_name=folder_name,
                                  bad_content_disposition=
                                  mimepart.content_disposition,
                                  parsed_content_disposition=value)
                        continue
                    else:
                        new_part.content_disposition = value
                        if value == 'attachment':
                            new_part.filename = _trim_filename(
                                params.get('filename'), account.id, mid)

                if mimepart.body is None:
                    data_to_write = ''
                elif new_part.content_type.startswith('text'):
                    data_to_write = mimepart.body.encode('utf-8', 'strict')
                    # normalize mac/win/unix newlines
                    data_to_write = data_to_write \
                        .replace('\r\n', '\n').replace('\r', '\n')
                else:
                    data_to_write = mimepart.body
                if data_to_write is None:
                    data_to_write = ''

                new_part.content_id = mimepart.headers.get('Content-Id')
                new_part.data = data_to_write
                self.parts.append(new_part)
            self.calculate_sanitized_body()
        except mime.DecodingError:
            # Occasionally iconv will fail via maximum recursion depth. We
            # still keep the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing DecodeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except AttributeError:
            # For EAS messages that are missing Date + Received headers, due
            # to the processing we do in inbox.util.misc.get_internaldate()
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing AttributeError', account_id=account.id,
                      folder_name=folder_name, err_filename=_get_errfilename(
                          account.id, folder_name, mid))
            self.mark_error()
            return
        except RuntimeError:
            _log_decode_error(account.id, folder_name, mid, body_string)
            log.error('Message parsing RuntimeError<iconv>'.format(
                err_filename=_get_errfilename(account.id, folder_name, mid)))
            self.mark_error()
            return
Beispiel #9
0
def create_message(db_session, log, account, mid, folder_name, received_date,
                   flags, body_string, created):
    """ Parses message data and writes out db metadata and MIME blocks.

    Returns the new Message, which links to the new Block objects through
    relationships. All new objects are uncommitted.

    Threads are not computed here; you gotta do that separately.

    Parameters
    ----------
    mid : int
        The account backend-specific message identifier; it's only used for
        logging errors.

    raw_message : str
        The full message including headers (encoded).
    """
    # trickle-down bugs
    assert account is not None and account.namespace is not None
    assert not isinstance(body_string, unicode)

    try:
        parsed = mime.from_string(body_string)

        mime_version = parsed.headers.get('Mime-Version')
        # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the
        # .startswith
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.error('Unexpected MIME-Version: {0}'.format(mime_version))

        new_msg = SpoolMessage() if created else Message()
        new_msg.data_sha256 = sha256(body_string).hexdigest()

        # clean_subject strips re:, fwd: etc.
        new_msg.subject = parsed.clean_subject
        new_msg.from_addr = parse_email_address_list(
            parsed.headers.get('From'))
        new_msg.sender_addr = parse_email_address_list(
            parsed.headers.get('Sender'))
        new_msg.reply_to = parse_email_address_list(
            parsed.headers.get('Reply-To'))

        new_msg.to_addr = parse_email_address_list(parsed.headers.getall('To'))
        new_msg.cc_addr = parse_email_address_list(parsed.headers.getall('Cc'))
        new_msg.bcc_addr = parse_email_address_list(
            parsed.headers.getall('Bcc'))

        new_msg.in_reply_to = parsed.headers.get('In-Reply-To')
        new_msg.message_id_header = parsed.headers.get('Message-Id')

        new_msg.received_date = received_date

        # Optional mailing list headers
        new_msg.mailing_list_headers = parse_ml_headers(parsed.headers)

        # Custom Inbox header
        new_msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        new_msg.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        new_msg.size = len(body_string)  # includes headers text

        i = 0  # for walk_index

        # Store all message headers as object with index 0
        headers_part = Part()
        headers_part.namespace_id = account.namespace.id
        headers_part.message = new_msg
        headers_part.walk_index = i
        headers_part.data = json.dumps(parsed.headers.items())
        new_msg.parts.append(headers_part)

        for mimepart in parsed.walk(
                with_self=parsed.content_type.is_singlepart()):
            i += 1
            if mimepart.content_type.is_multipart():
                log.warning("multipart sub-part found! on {}"
                            .format(new_msg.g_msgid))
                continue  # TODO should we store relations?

            new_part = Part()
            new_part.namespace_id = account.namespace.id
            new_part.message = new_msg
            new_part.walk_index = i
            new_part.misc_keyval = mimepart.headers.items()  # everything
            new_part.content_type = mimepart.content_type.value
            new_part.filename = trim_filename(
                mimepart.content_type.params.get('name'),
                log=log)
            # TODO maybe also trim other headers?

            if mimepart.content_disposition[0] is not None:
                value, params = mimepart.content_disposition
                if value not in ['inline', 'attachment']:
                    errmsg = """
    Unknown Content-Disposition on message {0} found in {1}.
    Bad Content-Disposition was: '{2}'
    Parsed Content-Disposition was: '{3}'""".format(
                        mid, folder_name, mimepart.content_disposition)
                    log.error(errmsg)
                    continue
                else:
                    new_part.content_disposition = value
                    if value == 'attachment':
                        new_part.filename = trim_filename(
                            params.get('filename'),
                            log=log)

            if mimepart.body is None:
                data_to_write = ''
            elif new_part.content_type.startswith('text'):
                data_to_write = mimepart.body.encode('utf-8', 'strict')
                # normalize mac/win/unix newlines
                data_to_write = data_to_write \
                    .replace('\r\n', '\n').replace('\r', '\n')
            else:
                data_to_write = mimepart.body
            if data_to_write is None:
                data_to_write = ''

            new_part.content_id = mimepart.headers.get('Content-Id')
            new_part.data = data_to_write
            new_msg.parts.append(new_part)
    except mime.DecodingError:
        # occasionally iconv will fail via maximum recursion depth
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('DecodeError, msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return
    except RuntimeError:
        log_decode_error(account.id, folder_name, mid, body_string)
        log.error('RuntimeError<iconv> msg logged to {0}'.format(
            get_errfilename(account.id, folder_name, mid)))
        return

    new_msg.calculate_sanitized_body()
    return new_msg
Beispiel #10
0
    def create_from_synced(cls, account, mid, folder_name,
                           received_date, body_string):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "body_string")
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Block, Part

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i
            msg.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name, account_id=account.id,
                      err_filename=err_filename, error=e)
            msg._mark_error()

        return msg
Beispiel #11
0
    def __init__(self, account=None, mid=None, folder_name=None,
                 received_date=None, flags=None, body_string=None,
                 *args, **kwargs):
        """ Use .create() instead to handle common errors!

        (Can't abort object creation in a constructor.)

        """
        _rqd = [account, mid, folder_name, received_date, flags, body_string]

        # for drafts - skip parsing
        if not any(_rqd):
            MailSyncBase.__init__(self, *args, **kwargs)
            return

        parsed = mime.from_string(body_string)

        mime_version = parsed.headers.get('Mime-Version')
        # NOTE: sometimes MIME-Version is set to "1.0 (1.0)", hence the
        # .startswith
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.error('Unexpected MIME-Version: {0}'.format(mime_version))

        self.data_sha256 = sha256(body_string).hexdigest()

        # clean_subject strips re:, fwd: etc.
        self.subject = parsed.clean_subject
        self.from_addr = parse_email_address_list(
            parsed.headers.get('From'))
        self.sender_addr = parse_email_address_list(
            parsed.headers.get('Sender'))
        self.reply_to = parse_email_address_list(
            parsed.headers.get('Reply-To'))

        self.to_addr = parse_email_address_list(
            parsed.headers.getall('To'))
        self.cc_addr = parse_email_address_list(
            parsed.headers.getall('Cc'))
        self.bcc_addr = parse_email_address_list(
            parsed.headers.getall('Bcc'))

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date

        # Optional mailing list headers
        self.mailing_list_headers = parse_ml_headers(parsed.headers)

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text

        i = 0  # for walk_index

        from inbox.models.block import Part

        # Store all message headers as object with index 0
        headers_part = Part()
        headers_part.namespace_id = account.namespace.id
        headers_part.message = self
        headers_part.walk_index = i
        headers_part.data = json.dumps(parsed.headers.items())
        self.parts.append(headers_part)

        for mimepart in parsed.walk(
                with_self=parsed.content_type.is_singlepart()):
            i += 1
            if mimepart.content_type.is_multipart():
                log.warning("multipart sub-part found! on {}"
                            .format(self.g_msgid))
                continue  # TODO should we store relations?

            new_part = Part()
            new_part.namespace_id = account.namespace.id
            new_part.message = self
            new_part.walk_index = i
            new_part.misc_keyval = mimepart.headers.items()  # everything
            new_part.content_type = mimepart.content_type.value
            new_part.filename = _trim_filename(
                mimepart.content_type.params.get('name'),
                log=log)
            # TODO maybe also trim other headers?

            if mimepart.content_disposition[0] is not None:
                value, params = mimepart.content_disposition
                if value not in ['inline', 'attachment']:
                    errmsg = """
    Unknown Content-Disposition on message {0} found in {1}.
    Bad Content-Disposition was: '{2}'
    Parsed Content-Disposition was: '{3}'""".format(
                        mid, folder_name, mimepart.content_disposition)
                    log.error(errmsg)
                    continue
                else:
                    new_part.content_disposition = value
                    if value == 'attachment':
                        new_part.filename = _trim_filename(
                            params.get('filename'),
                            log=log)

            if mimepart.body is None:
                data_to_write = ''
            elif new_part.content_type.startswith('text'):
                data_to_write = mimepart.body.encode('utf-8', 'strict')
                # normalize mac/win/unix newlines
                data_to_write = data_to_write \
                    .replace('\r\n', '\n').replace('\r', '\n')
            else:
                data_to_write = mimepart.body
            if data_to_write is None:
                data_to_write = ''

            new_part.content_id = mimepart.headers.get('Content-Id')
            new_part.data = data_to_write
            self.parts.append(new_part)

        self.calculate_sanitized_body()
        MailSyncBase.__init__(self, *args, **kwargs)