Esempio n. 1
0
def test_address_parsing_edge_cases():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [(' Bob ', '*****@*****.**')]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Indiegogo', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Anon', '*****@*****.**')]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Cron Daemon', 'root@gunks')]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('', '*****@*****.**')]
def test_address_parsing_edge_cases():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [(' Bob ', '*****@*****.**')]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Indiegogo', '*****@*****.**')]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Anon', '*****@*****.**')]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Cron Daemon', 'root@gunks')]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('Bob', '*****@*****.**')]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [('', '*****@*****.**')]
Esempio n. 3
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.data_sha256 = sha256(body_string).hexdigest()

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                                parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Esempio n. 4
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.data_sha256 = sha256(body_string).hexdigest()

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')
        self.message_id_header = parsed.headers.get('Message-Id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                                parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Esempio n. 5
0
    def _parse_metadata(self, parsed, body_string, received_date, account_id,
                        folder_name, mid):
        mime_version = parsed.headers.get("Mime-Version")
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith("1.0"):
            log.warning(
                "Unexpected MIME-Version",
                account_id=account_id,
                folder_name=folder_name,
                mid=mid,
                mime_version=mime_version,
            )

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, "From")
        self.sender_addr = parse_mimepart_address_header(parsed, "Sender")
        self.reply_to = parse_mimepart_address_header(parsed, "Reply-To")
        self.to_addr = parse_mimepart_address_header(parsed, "To")
        self.cc_addr = parse_mimepart_address_header(parsed, "Cc")
        self.bcc_addr = parse_mimepart_address_header(parsed, "Bcc")

        self.in_reply_to = parsed.headers.get("In-Reply-To")

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get("Message-Id")
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning(
                "Message-Id header too long. Truncating",
                parsed.headers.get("Message-Id"),
                logstash_tag="truncated_message_id",
            )

        self.received_date = (received_date if received_date else
                              get_internaldate(parsed.headers.get("Date"),
                                               parsed.headers.get("Received")))

        # It seems MySQL rounds up fractional seconds in a weird way,
        # preventing us from reconciling messages correctly. See:
        # https://github.com/nylas/sync-engine/commit/ed16b406e0a for
        # more details.
        self.received_date = self.received_date.replace(microsecond=0)

        # Custom Nylas header
        self.nylas_uid = parsed.headers.get("X-INBOX-ID")

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get("References", ""),
            parsed.headers.get("In-Reply-To", ""))

        self.size = len(body_string)  # includes headers text
Esempio n. 6
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # It seems MySQL rounds up fractional seconds in a weird way,
        # preventing us from reconciling messages correctly. See:
        # https://github.com/nylas/sync-engine/commit/ed16b406e0a for
        # more details.
        self.received_date = self.received_date.replace(microsecond=0)

        # Custom Nylas header
        self.nylas_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Esempio n. 7
0
    def _parse_metadata(self, parsed, body_string, received_date, account_id,
                        folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id,
                        folder_name=folder_name,
                        mid=mid,
                        mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
Esempio n. 8
0
    def _parse_metadata(self, parsed, body_string, received_date,
                        account_id, folder_name, mid):
        mime_version = parsed.headers.get('Mime-Version')
        # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith()
        if mime_version is not None and not mime_version.startswith('1.0'):
            log.warning('Unexpected MIME-Version',
                        account_id=account_id, folder_name=folder_name,
                        mid=mid, mime_version=mime_version)

        self.subject = parsed.subject
        self.from_addr = parse_mimepart_address_header(parsed, 'From')
        self.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
        self.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
        self.to_addr = parse_mimepart_address_header(parsed, 'To')
        self.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
        self.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

        self.in_reply_to = parsed.headers.get('In-Reply-To')

        # The RFC mandates that the Message-Id header must be at most 998
        # characters. Sadly, not everybody follows specs.
        self.message_id_header = parsed.headers.get('Message-Id')
        if self.message_id_header and len(self.message_id_header) > 998:
            self.message_id_header = self.message_id_header[:998]
            log.warning('Message-Id header too long. Truncating',
                        parsed.headers.get('Message-Id'),
                        logstash_tag='truncated_message_id')

        self.received_date = received_date if received_date else \
            get_internaldate(parsed.headers.get('Date'),
                             parsed.headers.get('Received'))

        # Custom Inbox header
        self.inbox_uid = parsed.headers.get('X-INBOX-ID')

        # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
        self.references = parse_references(
            parsed.headers.get('References', ''),
            parsed.headers.get('In-Reply-To', ''))

        self.size = len(body_string)  # includes headers text
def test_address_parsing():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [[' Bob ', '*****@*****.**']]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Indiegogo', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Anon', '*****@*****.**']]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Cron Daemon', 'root@gunks']]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['', '*****@*****.**']]

    # RFC2047-encoded phrases with commas
    mimepart = mime.from_string(
        'From: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Foo, Corp.', '*****@*****.**']]

    mimepart = mime.from_string(
        'To: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>, '
        '=?utf-8?Q?Support?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['Foo, Corp.', '*****@*****.**'],
                      ['Support', '*****@*****.**']]

    # Multiple header lines
    mimepart = mime.from_string(
        'To: [email protected]\nSubject: Hello\nTo: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['', '*****@*****.**'], ['', '*****@*****.**']]
Esempio n. 10
0
    def create_from_synced(cls, account, mid, folder_name, received_date,
                           body_string):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "body_string")
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id,
                            folder_name=folder_name,
                            mid=mid,
                            mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Block, Part

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i
            msg.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id,
                                folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name,
                      account_id=account.id,
                      err_filename=err_filename,
                      error=e)
            msg._mark_error()

        # Occasionally people try to send messages to way too many
        # recipients. In such cases, empty the field and treat as a parsing
        # error so that we don't break the entire sync.
        for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'):
            value = getattr(msg, field)
            if json_field_too_long(value):
                _log_decode_error(account.id, folder_name, mid, body_string)
                err_filename = _get_errfilename(account.id, folder_name, mid)
                log.error('Recipient field too long',
                          field=field,
                          account_id=account.id,
                          folder_name=folder_name,
                          mid=mid)
                setattr(msg, field, [])
                msg._mark_error()

        return msg
Esempio n. 11
0
def test_address_parsing():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [[" Bob ", "*****@*****.**"]]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Bob", "*****@*****.**"]]

    mimepart = mime.from_string(
        "From: Indiegogo <*****@*****.**> (no reply)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Indiegogo", "*****@*****.**"]]

    mimepart = mime.from_string(
        "From: Anon <*****@*****.**> (GitHub Staff)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Anon", "*****@*****.**"]]

    # Display name in comment
    mimepart = mime.from_string("From: root@gunks (Cron Daemon)")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Cron Daemon", "root@gunks"]]

    # Missing closing angle bracket
    mimepart = mime.from_string("From: Bob <*****@*****.**")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Bob", "*****@*****.**"]]

    # Blank (spammers)
    mimepart = mime.from_string("From:  ()")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == []

    # Missing header
    mimepart = mime.from_string("")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string("From: [email protected]\r\n"
                                "From: [email protected]")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["", "*****@*****.**"]]

    # RFC2047-encoded phrases with commas
    mimepart = mime.from_string(
        "From: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>")
    parsed = parse_mimepart_address_header(mimepart, "From")
    assert parsed == [["Foo, Corp.", "*****@*****.**"]]

    mimepart = mime.from_string(
        "To: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>, "
        "=?utf-8?Q?Support?= <*****@*****.**>")
    parsed = parse_mimepart_address_header(mimepart, "To")
    assert parsed == [
        ["Foo, Corp.", "*****@*****.**"],
        ["Support", "*****@*****.**"],
    ]

    # Multiple header lines
    mimepart = mime.from_string(
        "To: [email protected]\nSubject: Hello\nTo: [email protected]")
    parsed = parse_mimepart_address_header(mimepart, "To")
    assert parsed == [["", "*****@*****.**"], ["", "*****@*****.**"]]
Esempio n. 12
0
def test_address_parsing():
    """Check that header parsing can handle a variety of tricky input."""
    # Extra quotes around display name
    mimepart = mime.from_string('From: ""Bob"" <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [[' Bob ', '*****@*****.**']]

    # Comments after addr-spec
    mimepart = mime.from_string(
        'From: "Bob" <*****@*****.**>(through Yahoo!  Store Order System)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Indiegogo <*****@*****.**> (no reply)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Indiegogo', '*****@*****.**']]

    mimepart = mime.from_string(
        'From: Anon <*****@*****.**> (GitHub Staff)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Anon', '*****@*****.**']]

    # Display name in comment
    mimepart = mime.from_string('From: root@gunks (Cron Daemon)')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Cron Daemon', 'root@gunks']]

    # Missing closing angle bracket
    mimepart = mime.from_string('From: Bob <*****@*****.**')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Bob', '*****@*****.**']]

    # Blank (spammers)
    mimepart = mime.from_string('From:  ()')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Missing header
    mimepart = mime.from_string('')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == []

    # Duplicate header
    mimepart = mime.from_string('From: [email protected]\r\n'
                                'From: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['', '*****@*****.**']]

    # RFC2047-encoded phrases with commas
    mimepart = mime.from_string(
        'From: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'From')
    assert parsed == [['Foo, Corp.', '*****@*****.**']]

    mimepart = mime.from_string(
        'To: =?utf-8?Q?Foo=2C=20Corp.?= <*****@*****.**>, '
        '=?utf-8?Q?Support?= <*****@*****.**>')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['Foo, Corp.', '*****@*****.**'],
                      ['Support', '*****@*****.**']]

    # Multiple header lines
    mimepart = mime.from_string(
        'To: [email protected]\nSubject: Hello\nTo: [email protected]')
    parsed = parse_mimepart_address_header(mimepart, 'To')
    assert parsed == [['', '*****@*****.**'], ['', '*****@*****.**']]
Esempio n. 13
0
    def create_from_synced(cls, account, mid, folder_name,
                           received_date, body_string):
        """ Parses message data and writes out db metadata and MIME blocks.

        Returns the new Message, which links to the new Part and Block objects
        through relationships. All new objects are uncommitted.

        Threads are not computed here; you gotta do that separately.

        Parameters
        ----------
        mid : int
            The account backend-specific message identifier; it's only used for
            logging errors.

        raw_message : str
            The full message including headers (encoded).
        """
        _rqd = [account, mid, folder_name, body_string]
        if not all([v is not None for v in _rqd]):
            raise ValueError(
                "Required keyword arguments: account, mid, folder_name, "
                "body_string")
        # stop trickle-down bugs
        assert account.namespace is not None
        assert not isinstance(body_string, unicode)

        msg = Message()

        try:
            msg.namespace_id = account.namespace.id
            parsed = mime.from_string(body_string)

            mime_version = parsed.headers.get('Mime-Version')
            # sometimes MIME-Version is "1.0 (1.0)", hence the .startswith()
            if mime_version is not None and not mime_version.startswith('1.0'):
                log.warning('Unexpected MIME-Version',
                            account_id=account.id, folder_name=folder_name,
                            mid=mid, mime_version=mime_version)

            msg.data_sha256 = sha256(body_string).hexdigest()

            # clean_subject strips re:, fwd: etc.
            msg.subject = parsed.clean_subject
            msg.from_addr = parse_mimepart_address_header(parsed, 'From')
            msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender')
            msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To')
            msg.to_addr = parse_mimepart_address_header(parsed, 'To')
            msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc')
            msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc')

            msg.in_reply_to = parsed.headers.get('In-Reply-To')
            msg.message_id_header = parsed.headers.get('Message-Id')

            msg.received_date = received_date if received_date else \
                get_internaldate(parsed.headers.get('Date'),
                                 parsed.headers.get('Received'))

            # Custom Inbox header
            msg.inbox_uid = parsed.headers.get('X-INBOX-ID')

            # In accordance with JWZ (http://www.jwz.org/doc/threading.html)
            msg.references = parse_references(
                parsed.headers.get('References', ''),
                parsed.headers.get('In-Reply-To', ''))

            msg.size = len(body_string)  # includes headers text

            i = 0  # for walk_index

            from inbox.models.block import Block, Part

            # Store all message headers as object with index 0
            block = Block()
            block.namespace_id = account.namespace.id
            block.data = json.dumps(parsed.headers.items())

            headers_part = Part(block=block, message=msg)
            headers_part.walk_index = i
            msg.parts.append(headers_part)

            for mimepart in parsed.walk(
                    with_self=parsed.content_type.is_singlepart()):
                i += 1
                if mimepart.content_type.is_multipart():
                    log.warning('multipart sub-part found',
                                account_id=account.id, folder_name=folder_name,
                                mid=mid)
                    continue  # TODO should we store relations?
                msg._parse_mimepart(mimepart, mid, i, account.namespace.id)

            msg.calculate_sanitized_body()
        except (mime.DecodingError, AttributeError, RuntimeError) as e:
            # Message parsing can fail for several reasons. Occasionally iconv
            # will fail via maximum recursion depth. EAS messages may be
            # missing Date and Received headers. In such cases, we still keep
            # the metadata and mark it as b0rked.
            _log_decode_error(account.id, folder_name, mid, body_string)
            err_filename = _get_errfilename(account.id, folder_name, mid)
            log.error('Message parsing error',
                      folder_name=folder_name, account_id=account.id,
                      err_filename=err_filename, error=e)
            msg._mark_error()

        return msg