def _extract_parts(namespace_id, folder_id, body_string): data_sha256 = sha256(body_string).hexdigest() if not is_in_blockstore(data_sha256): save_to_blockstore(data_sha256, body_string) try: parsed = mime.from_string(body_string) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: log.error('Error parsing message metadata', folder_id=folder_id, namespace_id=namespace_id, error=e) return if parsed is None: return for mimepart in parsed.walk(with_self=parsed.content_type.is_singlepart()): try: if mimepart.content_type.is_multipart(): continue # TODO should we store relations? _parse_mimepart(namespace_id, mimepart) except (mime.DecodingError, AttributeError, RuntimeError, TypeError, binascii.Error, UnicodeDecodeError) as e: log.error('Error parsing message MIME parts', folder_id=folder_id, namespace_id=namespace_id, exc_info=True) return
def _extract_parts(namespace_id, folder_id, body_string): data_sha256 = sha256(body_string).hexdigest() if not is_in_blockstore(data_sha256): save_to_blockstore(data_sha256, body_string) try: parsed = mime.from_string(body_string) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: log.error('Error parsing message metadata', folder_id=folder_id, namespace_id=namespace_id, error=e) return if parsed is None: return for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): try: if mimepart.content_type.is_multipart(): continue # TODO should we store relations? _parse_mimepart(namespace_id, mimepart) except (mime.DecodingError, AttributeError, RuntimeError, TypeError, binascii.Error, UnicodeDecodeError) as e: log.error('Error parsing message MIME parts', folder_id=folder_id, namespace_id=namespace_id, exc_info=True) return
def _save_attachment(data): if len(data) == 0: log.warning('Not saving 0-length data blob') return if isinstance(data, unicode): data = data.encode('utf-8', 'strict') data_sha256 = sha256(data).hexdigest() save_to_blockstore(data_sha256, data)
def data(self): if self.size == 0: log.warning('Block size is 0') return '' elif hasattr(self, '_data'): # On initial download we temporarily store data in memory value = self._data else: value = get_from_blockstore(self.data_sha256) if value is None: log.warning( "Couldn't find data on S3 for block with hash {}".format( self.data_sha256)) from inbox.models.block import Block if isinstance(self, Block): if self.parts: # This block is an attachment of a message that was # accidentially deleted. We will attempt to fetch the raw # message and parse out the needed attachment. message = self.parts[0].message # only grab one raw_mime = get_from_blockstore(message.data_sha256) if raw_mime is None: log.error("Don't have raw message for hash {}".format( message.data_sha256)) return None parsed = mime.from_string(raw_mime) if parsed is not None: for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): if mimepart.content_type.is_multipart(): continue # TODO should we store relations? data = mimepart.body if isinstance(data, unicode): data = data.encode('utf-8', 'strict') # Found it! if sha256(data).hexdigest() == self.data_sha256: log.info('Found subpart with hash {}'.format( self.data_sha256)) save_to_blockstore(self.data_sha256, data) return data log.error('No data returned!') return value assert self.data_sha256 == sha256(value).hexdigest(), \ "Returned data doesn't match stored hash!" return value
def data(self): if self.size == 0: log.warning("Block size is 0") return "" elif hasattr(self, "_data"): # On initial download we temporarily store data in memory value = self._data else: value = get_from_blockstore(self.data_sha256) if value is None: log.warning("Couldn't find data on S3 for block with hash {}".format(self.data_sha256)) from inbox.models.block import Block if isinstance(self, Block): if self.parts: # This block is an attachment of a message that was # accidentially deleted. We will attempt to fetch the raw # message and parse out the needed attachment. message = self.parts[0].message # only grab one raw_mime = get_from_blockstore(message.data_sha256) if raw_mime is None: log.error("Don't have raw message for hash {}".format(message.data_sha256)) return None parsed = mime.from_string(raw_mime) if parsed is not None: for mimepart in parsed.walk(with_self=parsed.content_type.is_singlepart()): if mimepart.content_type.is_multipart(): continue # TODO should we store relations? data = mimepart.body if isinstance(data, unicode): data = data.encode("utf-8", "strict") # Found it! if sha256(data).hexdigest() == self.data_sha256: log.info("Found subpart with hash {}".format(self.data_sha256)) save_to_blockstore(self.data_sha256, data) return data log.error("No data returned!") return value assert self.data_sha256 == sha256(value).hexdigest(), "Returned data doesn't match stored hash!" return value
def data(self, value): assert value is not None assert type(value) is not unicode # Cache value in memory. Otherwise message-parsing incurs a disk or S3 # roundtrip. self._data = value self.size = len(value) self.data_sha256 = sha256(value).hexdigest() assert self.data_sha256 if len(value) == 0: log.warning('Not saving 0-length data blob') return blockstore.save_to_blockstore(self.data_sha256, value)
def data(self, value): assert value is not None assert isinstance(value, bytes) # Cache value in memory. Otherwise message-parsing incurs a disk or S3 # roundtrip. self._data = value self.size = len(value) self.data_sha256 = sha256(value).hexdigest() assert self.data_sha256 if len(value) == 0: log.warning("Not saving 0-length data blob") return if STORE_MESSAGE_ATTACHMENTS: blockstore.save_to_blockstore(self.data_sha256, value)
def create_from_synced(cls, account, mid, folder_name, received_date, body_string): """ Parses message data and writes out db metadata and MIME blocks. Returns the new Message, which links to the new Part and Block objects through relationships. All new objects are uncommitted. Threads are not computed here; you gotta do that separately. Parameters ---------- mid : int The account backend-specific message identifier; it's only used for logging errors. raw_message : str The full message including headers (encoded). """ _rqd = [account, mid, folder_name, body_string] if not all([v is not None for v in _rqd]): raise ValueError( 'Required keyword arguments: account, mid, folder_name, ' 'body_string') # stop trickle-down bugs assert account.namespace is not None assert not isinstance(body_string, unicode) msg = Message() msg.data_sha256 = sha256(body_string).hexdigest() # Persist the raw MIME message to disk/ S3 save_to_blockstore(msg.data_sha256, body_string) # Persist the processed message to the database msg.namespace_id = account.namespace.id try: parsed = mime.from_string(body_string) # Non-persisted instance attribute used by EAS. msg.parsed_body = parsed msg._parse_metadata(parsed, body_string, received_date, account.id, folder_name, mid) except (mime.DecodingError, AttributeError, RuntimeError, TypeError) as e: parsed = None # Non-persisted instance attribute used by EAS. msg.parsed_body = '' log.error('Error parsing message metadata', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() if parsed is not None: plain_parts = [] html_parts = [] for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): try: if mimepart.content_type.is_multipart(): continue # TODO should we store relations? msg._parse_mimepart(mid, mimepart, account.namespace.id, html_parts, plain_parts) except (mime.DecodingError, AttributeError, RuntimeError, TypeError, binascii.Error, UnicodeDecodeError) as e: log.error('Error parsing message MIME parts', folder_name=folder_name, account_id=account.id, error=e) msg._mark_error() msg.calculate_body(html_parts, plain_parts) # Occasionally people try to send messages to way too many # recipients. In such cases, empty the field and treat as a parsing # error so that we don't break the entire sync. for field in ('to_addr', 'cc_addr', 'bcc_addr', 'references', 'reply_to'): value = getattr(msg, field) if json_field_too_long(value): log.error('Recipient field too long', field=field, account_id=account.id, folder_name=folder_name, mid=mid) setattr(msg, field, []) msg._mark_error() return msg
def data(self): if self.size == 0: log.warning('Block size is 0') return '' elif hasattr(self, '_data'): # On initial download we temporarily store data in memory value = self._data else: value = blockstore.get_from_blockstore(self.data_sha256) if value is None: log.warning("Couldn't find data on S3 for block", sha_hash=self.data_sha256) from inbox.models.block import Block if isinstance(self, Block): if self.parts: # This block is an attachment of a message that was # deleted. We will attempt to fetch the raw # message and parse out the needed attachment. message = self.parts[0].message # only grab one account = message.namespace.account statsd_string = 'api.direct_fetching.{}.{}'.format( account.provider, account.id) # Try to fetch the message from S3 first. with statsd_client.timer('{}.blockstore_latency'.format( statsd_string)): raw_mime = blockstore.get_from_blockstore(message.data_sha256) # If it's not there, get it from the provider. if raw_mime is None: statsd_client.incr('{}.cache_misses'.format(statsd_string)) with statsd_client.timer('{}.provider_latency'.format( statsd_string)): raw_mime = get_raw_from_provider(message) msg_sha256 = sha256(raw_mime).hexdigest() # Cache the raw message in the blockstore so that # we don't have to fetch it over and over. with statsd_client.timer('{}.blockstore_save_latency'.format( statsd_string)): blockstore.save_to_blockstore(msg_sha256, raw_mime) else: # We found it in the blockstore --- report this. statsd_client.incr('{}.cache_hits'.format(statsd_string)) # If we couldn't find it there, give up. if raw_mime is None: log.error("Don't have raw message for hash {}" .format(message.data_sha256)) return None parsed = mime.from_string(raw_mime) if parsed is not None: for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): if mimepart.content_type.is_multipart(): continue # TODO should we store relations? data = mimepart.body if isinstance(data, unicode): data = data.encode('utf-8', 'strict') if data is None: continue # Found it! if sha256(data).hexdigest() == self.data_sha256: log.info('Found subpart with hash {}'.format( self.data_sha256)) with statsd_client.timer('{}.blockstore_save_latency'.format( statsd_string)): blockstore.save_to_blockstore(self.data_sha256, data) return data log.error("Couldn't find the attachment in the raw message", message_id=message.id) log.error('No data returned!') return value assert self.data_sha256 == sha256(value).hexdigest(), \ "Returned data doesn't match stored hash!" return value
def data(self): if self.size == 0: log.warning('Block size is 0') return '' elif hasattr(self, '_data'): # On initial download we temporarily store data in memory value = self._data else: value = blockstore.get_from_blockstore(self.data_sha256) if value is None: log.warning("Couldn't find data on S3 for block", sha_hash=self.data_sha256) from inbox.models.block import Block if isinstance(self, Block): if self.parts: # This block is an attachment of a message that was # deleted. We will attempt to fetch the raw # message and parse out the needed attachment. message = self.parts[0].message # only grab one account = message.namespace.account statsd_string = 'api.direct_fetching.{}.{}'.format( account.provider, account.id) # Try to fetch the message from S3 first. with statsd_client.timer( '{}.blockstore_latency'.format(statsd_string)): raw_mime = blockstore.get_from_blockstore( message.data_sha256) # If it's not there, get it from the provider. if raw_mime is None: statsd_client.incr( '{}.cache_misses'.format(statsd_string)) with statsd_client.timer( '{}.provider_latency'.format(statsd_string)): raw_mime = get_raw_from_provider(message) msg_sha256 = sha256(raw_mime).hexdigest() # Cache the raw message in the blockstore so that # we don't have to fetch it over and over. with statsd_client.timer( '{}.blockstore_save_latency'.format( statsd_string)): blockstore.save_to_blockstore(msg_sha256, raw_mime) else: # We found it in the blockstore --- report this. statsd_client.incr( '{}.cache_hits'.format(statsd_string)) # If we couldn't find it there, give up. if raw_mime is None: log.error("Don't have raw message for hash {}".format( message.data_sha256)) return None parsed = mime.from_string(raw_mime) if parsed is not None: for mimepart in parsed.walk( with_self=parsed.content_type.is_singlepart()): if mimepart.content_type.is_multipart(): continue # TODO should we store relations? data = mimepart.body if isinstance(data, unicode): data = data.encode('utf-8', 'strict') if data is None: continue # Found it! if sha256(data).hexdigest() == self.data_sha256: log.info('Found subpart with hash {}'.format( self.data_sha256)) with statsd_client.timer( '{}.blockstore_save_latency'.format( statsd_string)): blockstore.save_to_blockstore( self.data_sha256, data) return data log.error( "Couldn't find the attachment in the raw message", message_id=message.id) log.error('No data returned!') return value assert self.data_sha256 == sha256(value).hexdigest(), \ "Returned data doesn't match stored hash!" return value