Example #1
0
 def _save_attachment(self, mimepart, content_disposition, content_type,
                      filename, content_id, namespace_id, mid):
     from inbox.models import Part, Block
     block = Block()
     block.namespace_id = namespace_id
     block.filename = _trim_filename(filename, mid=mid)
     block.content_type = content_type
     part = Part(block=block, message=self)
     part.content_id = content_id
     part.content_disposition = content_disposition
     data = mimepart.body or ''
     if isinstance(data, unicode):
         data = data.encode('utf-8', 'strict')
     block.data = data
 def _save_attachment(self, mimepart, content_disposition, content_type,
                      filename, content_id, namespace_id, mid):
     from inbox.models import Part, Block
     block = Block()
     block.namespace_id = namespace_id
     block.filename = _trim_filename(filename, mid=mid)
     block.content_type = content_type
     part = Part(block=block, message=self)
     part.content_id = content_id
     part.content_disposition = content_disposition
     data = mimepart.body or ''
     if isinstance(data, unicode):
         data = data.encode('utf-8', 'strict')
     block.data = data
Example #3
0
    def _parse_mimepart(self, mimepart, mid, index, namespace_id):
        """Parse a single MIME part into a Block and Part object linked to this
        message."""
        from inbox.models.block import Block, Part
        disposition, disposition_params = mimepart.content_disposition
        if (disposition is not None and
                disposition not in ['inline', 'attachment']):
            cd = mimepart.content_disposition
            log.error('Unknown Content-Disposition',
                      mid=mid, bad_content_disposition=cd,
                      parsed_content_disposition=disposition)
            self._mark_error()
            return
        block = Block()
        block.namespace_id = namespace_id
        block.content_type = mimepart.content_type.value
        block.filename = _trim_filename(
            mimepart.content_type.params.get('name'), mid)

        new_part = Part(block=block)
        new_part.walk_index = index

        # TODO maybe also trim other headers?
        if disposition is not None:
            new_part.content_disposition = disposition
            if disposition == 'attachment':
                new_part.block.filename = _trim_filename(
                    disposition_params.get('filename'), mid)

        if mimepart.body is None:
            data_to_write = ''
        elif new_part.block.content_type.startswith('text'):
            data_to_write = mimepart.body.encode('utf-8', 'strict')
            # normalize mac/win/unix newlines
            data_to_write = data_to_write.replace('\r\n', '\n'). \
                replace('\r', '\n')
        else:
            data_to_write = mimepart.body
        if data_to_write is None:
            data_to_write = ''

        new_part.content_id = mimepart.headers.get('Content-Id')

        block.data = data_to_write

        # Wait until end so we don't create incomplete blocks/parts for MIME
        # parts which fail to parse.
        new_part.message = self
Example #4
0
    def _parse_mimepart(self, mimepart, mid, index, namespace_id):
        """Parse a single MIME part into a Block and Part object linked to this
        message."""
        from inbox.models.block import Block, Part
        disposition, disposition_params = mimepart.content_disposition
        if (disposition is not None
                and disposition not in ['inline', 'attachment']):
            cd = mimepart.content_disposition
            log.error('Unknown Content-Disposition',
                      mid=mid,
                      bad_content_disposition=cd,
                      parsed_content_disposition=disposition)
            self._mark_error()
            return
        block = Block()
        block.namespace_id = namespace_id
        block.content_type = mimepart.content_type.value
        block.filename = _trim_filename(
            mimepart.content_type.params.get('name'), mid)

        new_part = Part(block=block, message=self)
        new_part.walk_index = index

        # TODO maybe also trim other headers?
        if disposition is not None:
            new_part.content_disposition = disposition
            if disposition == 'attachment':
                new_part.block.filename = _trim_filename(
                    disposition_params.get('filename'), mid)

        if mimepart.body is None:
            data_to_write = ''
        elif new_part.block.content_type.startswith('text'):
            data_to_write = mimepart.body.encode('utf-8', 'strict')
            # normalize mac/win/unix newlines
            data_to_write = data_to_write.replace('\r\n', '\n'). \
                replace('\r', '\n')
        else:
            data_to_write = mimepart.body
        if data_to_write is None:
            data_to_write = ''

        new_part.content_id = mimepart.headers.get('Content-Id')

        block.data = data_to_write
Example #5
0
    def _parse_mimepart(self, mimepart, mid, index, namespace_id):
        """Parse a single MIME part into a Block and Part object linked to this
        message."""
        from inbox.models.block import Block, Part
        block = Block()
        block.namespace_id = namespace_id
        block.content_type = mimepart.content_type.value
        block.filename = _trim_filename(
            mimepart.content_type.params.get('name'), mid)

        new_part = Part(block=block, message=self)
        new_part.walk_index = index

        # TODO maybe also trim other headers?
        if mimepart.content_disposition[0] is not None:
            value, params = mimepart.content_disposition
            if value not in ['inline', 'attachment']:
                cd = mimepart.content_disposition
                log.error('Unknown Content-Disposition',
                          mid=mid, bad_content_disposition=cd,
                          parsed_content_disposition=value)
                return
            else:
                new_part.content_disposition = value
                if value == 'attachment':
                    new_part.block.filename = _trim_filename(
                        params.get('filename'), mid)

        if mimepart.body is None:
            data_to_write = ''
        elif new_part.block.content_type.startswith('text'):
            data_to_write = mimepart.body.encode('utf-8', 'strict')
            # normalize mac/win/unix newlines
            data_to_write = data_to_write.replace('\r\n', '\n'). \
                replace('\r', '\n')
        else:
            data_to_write = mimepart.body
        if data_to_write is None:
            data_to_write = ''

        new_part.content_id = mimepart.headers.get('Content-Id')
        block.data = data_to_write
        self.parts.append(new_part)
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        from inbox.models.block import Block
        body_block = Block()
        body_block.namespace_id = account.namespace.id
        body_block.data = body_string
        body_block.content_type = "text/plain"
        msg.full_body = body_block

        msg.namespace_id = account.namespace.id

        try:
            parsed = mime.from_string(body_string)
            msg._parse_metadata(parsed, body_string, received_date, account.id,
                                folder_name, mid)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError) as e:
            parsed = None
            log.error('Error parsing message metadata',
                      folder_name=folder_name, account_id=account.id, error=e)
            msg._mark_error()

        if parsed is not None:
            plain_parts = []
            html_parts = []
            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                try:
                    if mimepart.content_type.is_multipart():
                        log.warning('multipart sub-part found',
                                    account_id=account.id,
                                    folder_name=folder_name,
                                    mid=mid)
                        continue  # TODO should we store relations?
                    msg._parse_mimepart(mid, mimepart, account.namespace.id,
                                        html_parts, plain_parts)
                except (mime.DecodingError, AttributeError, RuntimeError,
                        TypeError) as e:
                    log.error('Error parsing message MIME parts',
                              folder_name=folder_name, account_id=account.id,
                              error=e)
                    msg._mark_error()
            msg.calculate_body(html_parts, plain_parts)

            # Occasionally people try to send messages to way too many
            # recipients. In such cases, empty the field and treat as a parsing
            # error so that we don't break the entire sync.
            for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
                value = getattr(msg, field)
                if json_field_too_long(value):
                    log.error('Recipient field too long', field=field,
                              account_id=account.id, folder_name=folder_name,
                              mid=mid)
                    setattr(msg, field, [])
                    msg._mark_error()

        return msg
Example #7
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            from inbox.models.block import Block, Part
            body_block = Block()
            body_block.namespace_id = account.namespace.id
            body_block.data = body_string
            body_block.content_type = "text/plain"
            msg.full_body = body_block

            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            msg.subject = parsed.subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)
            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError, TypeError,
                ValueError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name, account_id=account.id,
                      err_filename=err_filename, error=e)
            msg._mark_error()

        # Occasionally people try to send messages to way too many
        # recipients. In such cases, empty the field and treat as a parsing
        # error so that we don't break the entire sync.
        for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
            value = getattr(msg, field)
            if json_field_too_long(value):
                _log_decode_error(account.id, folder_name, mid, body_string)
                err_filename = _get_errfilename(account.id, folder_name, mid)
                log.error('Recipient field too long', field=field,
                          account_id=account.id, folder_name=folder_name,
                          mid=mid)
                setattr(msg, field, [])
                msg._mark_error()

        return msg
Example #8
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            from inbox.models.block import Block, Part
            body_block = Block()
            body_block.namespace_id = account.namespace.id
            body_block.data = body_string
            body_block.content_type = "text/plain"
            msg.full_body = body_block

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError, TypeError,
                ValueError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name, account_id=account.id,
                      err_filename=err_filename, error=e)
            msg._mark_error()

        # Occasionally people try to send messages to way too many
        # recipients. In such cases, empty the field and treat as a parsing
        # error so that we don't break the entire sync.
        for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
            value = getattr(msg, field)
            if json_field_too_long(value):
                _log_decode_error(account.id, folder_name, mid, body_string)
                err_filename = _get_errfilename(account.id, folder_name, mid)
                log.error('Recipient field too long', field=field,
                          account_id=account.id, folder_name=folder_name,
                          mid=mid)
                setattr(msg, field, [])
                msg._mark_error()

        return msg
Example #9
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """
        Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).

        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                'Required keyword arguments: account, mid, folder_name, '
                'body_string')
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        from inbox.models.block import Block
        body_block = Block()
        body_block.namespace_id = account.namespace.id
        body_block.data = body_string
        body_block.content_type = "text/plain"
        msg.full_body = body_block

        msg.namespace_id = account.namespace.id

        try:
            parsed = mime.from_string(body_string)
            msg._parse_metadata(parsed, body_string, received_date, account.id,
                                folder_name, mid)
        except (mime.DecodingError, AttributeError, RuntimeError,
                TypeError) as e:
            parsed = None
            log.error('Error parsing message metadata',
                      folder_name=folder_name,
                      account_id=account.id,
                      error=e)
            msg._mark_error()

        if parsed is not None:
            plain_parts = []
            html_parts = []
            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                try:
                    if mimepart.content_type.is_multipart():
                        continue  # TODO should we store relations?
                    msg._parse_mimepart(mid, mimepart, account.namespace.id,
                                        html_parts, plain_parts)
                except (mime.DecodingError, AttributeError, RuntimeError,
                        TypeError, binascii.Error, UnicodeDecodeError) as e:
                    log.error('Error parsing message MIME parts',
                              folder_name=folder_name,
                              account_id=account.id,
                              error=e)
                    msg._mark_error()
            msg.calculate_body(html_parts, plain_parts)

            # Occasionally people try to send messages to way too many
            # recipients. In such cases, empty the field and treat as a parsing
            # error so that we don't break the entire sync.
            for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
                value = getattr(msg, field)
                if json_field_too_long(value):
                    log.error('Recipient field too long',
                              field=field,
                              account_id=account.id,
                              folder_name=folder_name,
                              mid=mid)
                    setattr(msg, field, [])
                    msg._mark_error()

        return msg