def create_from_synced(cls, account, mid, folder_name, received_date, body_string): """ Parses message data and writes out db metadata and MIME blocks. Returns the new Message, which links to the new Part and Block objects through relationships. All new objects are uncommitted. Threads are not computed here; you gotta do that separately. Parameters ---------- mid : int The account backend-specific message identifier; it's only used for logging errors. raw_message : str The full message including headers (encoded). """ _rqd = [account, mid, folder_name, body_string] if not all([v is not None for v in _rqd]): raise ValueError( 'Required keyword arguments: account, mid, folder_name, ' 'body_string') # stop trickle-down bugs assert account.namespace is not None assert not isinstance(body_string, unicode) msg = Message() try: msg.namespace_id = account.namespace.id parsed = mime.from_string(body_string) mime_version = parsed.headers.get('Mime-Version') # sometimes MIME-Version is '1.0 (1.0)', hence the .startswith() if mime_version is not None and not mime_version.startswith('1.0'): log.warning('Unexpected MIME-Version', account_id=account.id, folder_name=folder_name, mid=mid, mime_version=mime_version) msg.data_sha256 = sha256(body_string).hexdigest() # clean_subject strips re:, fwd: etc. msg.subject = parsed.clean_subject msg.from_addr = parse_mimepart_address_header(parsed, 'From') msg.sender_addr = parse_mimepart_address_header(parsed, 'Sender') msg.reply_to = parse_mimepart_address_header(parsed, 'Reply-To') msg.to_addr = parse_mimepart_address_header(parsed, 'To') msg.cc_addr = parse_mimepart_address_header(parsed, 'Cc') msg.bcc_addr = parse_mimepart_address_header(parsed, 'Bcc') msg.in_reply_to = parsed.headers.get('In-Reply-To') msg.message_id_header = parsed.headers.get('Message-Id') msg.received_date = received_date if received_date else \ get_internaldate(parsed.headers.get('Date'), parsed.headers.get('Received')) # Custom Inbox header msg.inbox_uid = parsed.headers.get('X-INBOX-ID') # In accordance with JWZ (http://www.jwz.org/doc/threading.html) msg.references = parse_references( parsed.headers.get('References', ''), parsed.headers.get('In-Reply-To', '')) msg.size = len(body_string) # includes headers text i = 0 # for walk_index from inbox.models.block import Block, Part # Store all message headers as object with index 0 block = Block() block.namespace_id = account.namespace.id block.data = json.dumps(parsed.headers.items()) headers_part = Part(block=block, message=msg) headers_part.walk_index = i msg.parts.append(headers_part) for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): i += 1 if mimepart.content_type.is_multipart(): log.warning('multipart sub-part found', account_id=account.id, folder_name=folder_name, mid=mid) continue # TODO should we store relations? msg._parse_mimepart(mimepart, mid, i, account.namespace.id) msg.calculate_sanitized_body() except (mime.DecodingError, AttributeError, RuntimeError, TypeError) \ as e: # Message parsing can fail for several reasons. Occasionally iconv # will fail via maximum recursion depth. EAS messages may be # missing Date and Received headers. In such cases, we still keep # the metadata and mark it as b0rked. _log_decode_error(account.id, folder_name, mid, body_string) err_filename = _get_errfilename(account.id, folder_name, mid) log.error('Message parsing error', folder_name=folder_name, account_id=account.id, err_filename=err_filename, error=e) msg._mark_error() # Occasionally people try to send messages to way too many # recipients. In such cases, empty the field and treat as a parsing # error so that we don't break the entire sync. for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'): value = getattr(msg, field) if json_field_too_long(value): _log_decode_error(account.id, folder_name, mid, body_string) err_filename = _get_errfilename(account.id, folder_name, mid) log.error('Recipient field too long', field=field, account_id=account.id, folder_name=folder_name, mid=mid) setattr(msg, field, []) msg._mark_error() return msg
def create_from_synced(cls, account, mid, folder_name, received_date, body_string): """ Parses message data and writes out db metadata and MIME blocks. Returns the new Message, which links to the new Part and Block objects through relationships. All new objects are uncommitted. Threads are not computed here; you gotta do that separately. Parameters ---------- mid : int The account backend-specific message identifier; it's only used for logging errors. raw_message : str The full message including headers (encoded). """ _rqd = [account, mid, folder_name, body_string] if not all([v is not None for v in _rqd]): raise ValueError( 'Required keyword arguments: account, mid, folder_name, ' 'body_string') # stop trickle-down bugs assert account.namespace is not None assert not isinstance(body_string, unicode) msg = Message() from inbox.models.block import Block body_block = Block() body_block.namespace_id = account.namespace.id body_block.data = body_string body_block.content_type = "text/plain" msg.full_body = body_block msg.namespace_id = account.namespace.id try: parsed = mime.from_string(body_string) msg._parse_metadata(parsed, body_string, received_date, account.id, folder_name, mid) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: parsed = None log.error('Error parsing message metadata', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() if parsed is not None: plain_parts = [] html_parts = [] for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): try: if mimepart.content_type.is_multipart(): log.warning('multipart sub-part found', account_id=account.id, folder_name=folder_name, mid=mid) continue # TODO should we store relations? msg._parse_mimepart(mid, mimepart, account.namespace.id, html_parts, plain_parts) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: log.error('Error parsing message MIME parts', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() msg.calculate_body(html_parts, plain_parts) # Occasionally people try to send messages to way too many # recipients. In such cases, empty the field and treat as a parsing # error so that we don't break the entire sync. for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references'): value = getattr(msg, field) if json_field_too_long(value): log.error('Recipient field too long', field=field, account_id=account.id, folder_name=folder_name, mid=mid) setattr(msg, field, []) msg._mark_error() return msg
def create_from_synced(cls, account, mid, folder_name, received_date, body_string): """ Parses message data and writes out db metadata and MIME blocks. Returns the new Message, which links to the new Part and Block objects through relationships. All new objects are uncommitted. Threads are not computed here; you gotta do that separately. Parameters ---------- mid : int The account backend-specific message identifier; it's only used for logging errors. raw_message : str The full message including headers (encoded). """ _rqd = [account, mid, folder_name, body_string] if not all([v is not None for v in _rqd]): raise ValueError( 'Required keyword arguments: account, mid, folder_name, ' 'body_string') # stop trickle-down bugs assert account.namespace is not None assert not isinstance(body_string, unicode) msg = Message() msg.data_sha256 = sha256(body_string).hexdigest() # Persist the raw MIME message to disk/ S3 save_to_blockstore(msg.data_sha256, body_string) # Persist the processed message to the database msg.namespace_id = account.namespace.id try: parsed = mime.from_string(body_string) # Non-persisted instance attribute used by EAS. msg.parsed_body = parsed msg._parse_metadata(parsed, body_string, received_date, account.id, folder_name, mid) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: parsed = None # Non-persisted instance attribute used by EAS. msg.parsed_body = '' log.error('Error parsing message metadata', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() if parsed is not None: plain_parts = [] html_parts = [] for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): try: if mimepart.content_type.is_multipart(): continue # TODO should we store relations? msg._parse_mimepart(mid, mimepart, account.namespace.id, html_parts, plain_parts) except (mime.DecodingError, AttributeError, RuntimeError, TypeError, binascii.Error, UnicodeDecodeError) as e: log.error('Error parsing message MIME parts', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() msg.calculate_body(html_parts, plain_parts) # Occasionally people try to send messages to way too many # recipients. In such cases, empty the field and treat as a parsing # error so that we don't break the entire sync. for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references', 'reply_to'): value = getattr(msg, field) if json_field_too_long(value): log.error('Recipient field too long', field=field, account_id=account.id, folder_name=folder_name, mid=mid) setattr(msg, field, []) msg._mark_error() return msg