class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout): """Connection class override for uploads.""" bytes_uploaded_container = outer_bytes_uploaded_container # After we instantiate this class, apitools will check with the server # to find out how many bytes remain for a resumable upload. This allows # us to update our progress once based on that number. processed_initial_bytes = False GCS_JSON_BUFFER_SIZE = outer_buffer_size callback_processor = None size = outer_total_size def __init__(self, *args, **kwargs): kwargs['timeout'] = SSL_TIMEOUT httplib2.HTTPSConnectionWithTimeout.__init__( self, *args, **kwargs) def send(self, data): """Overrides HTTPConnection.send.""" if not self.processed_initial_bytes: self.processed_initial_bytes = True if outer_progress_callback: self.callback_processor = ProgressCallbackWithBackoff( outer_total_size, outer_progress_callback) self.callback_processor.Progress( self.bytes_uploaded_container.bytes_transferred) # httplib.HTTPConnection.send accepts either a string or a file-like # object (anything that implements read()). if isinstance(data, basestring): full_buffer = cStringIO.StringIO(data) else: full_buffer = data partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE) while partial_buffer: httplib2.HTTPSConnectionWithTimeout.send( self, partial_buffer) send_length = len(partial_buffer) if self.callback_processor: # This is the only place where gsutil has control over making a # callback, but here we can't differentiate the metadata bytes # (such as headers and OAuth2 refreshes) sent during an upload # from the actual upload bytes, so we will actually report # slightly more bytes than desired to the callback handler. # # One considered/rejected alternative is to move the callbacks # into the HashingFileUploadWrapper which only processes reads on # the bytes. This has the disadvantages of being removed from # where we actually send the bytes and unnecessarily # multi-purposing that class. self.callback_processor.Progress(send_length) partial_buffer = full_buffer.read( self.GCS_JSON_BUFFER_SIZE)
class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout): """Connection class override for uploads.""" bytes_uploaded_container = outer_bytes_uploaded_container # After we instantiate this class, apitools will check with the server # to find out how many bytes remain for a resumable upload. This allows # us to update our progress once based on that number. processed_initial_bytes = False GCS_JSON_BUFFER_SIZE = outer_buffer_size callback_processor = None size = outer_total_size def __init__(self, *args, **kwargs): kwargs['timeout'] = SSL_TIMEOUT httplib2.HTTPSConnectionWithTimeout.__init__( self, *args, **kwargs) # Override httplib.HTTPConnection._send_output for debug logging. # Because the distinction between headers and message body occurs # only in this httplib function, we can only differentiate them here. def _send_output(self, message_body=None): """Send the currently buffered request and clear the buffer. Appends an extra \\r\\n to the buffer. Args: message_body: if specified, this is appended to the request. """ # TODO: Presently, apitools will set http2lib2.debuglevel to 0 # (no prints) or 4 (dump upload payload, httplib prints to stdout). # Refactor to allow our media-handling functions to handle # debuglevel == 4 and print messages to stderr. self._buffer.extend(('', '')) msg = '\r\n'.join(self._buffer) num_metadata_bytes = len(msg) if outer_debug == DEBUGLEVEL_DUMP_REQUESTS and outer_logger: outer_logger.debug('send: %s' % msg) del self._buffer[:] # If msg and message_body are sent in a single send() call, # it will avoid performance problems caused by the interaction # between delayed ack and the Nagle algorithm. if isinstance(message_body, str): msg += message_body message_body = None self.send(msg, num_metadata_bytes=num_metadata_bytes) if message_body is not None: # message_body was not a string (i.e. it is a file) and # we must run the risk of Nagle self.send(message_body) def send(self, data, num_metadata_bytes=0): """Overrides HTTPConnection.send. Args: data: string or file-like object (implements read()) of data to send. num_metadata_bytes: number of bytes that consist of metadata (headers, etc.) not representing the data being uploaded. """ if not self.processed_initial_bytes: self.processed_initial_bytes = True if outer_progress_callback: self.callback_processor = ProgressCallbackWithBackoff( outer_total_size, outer_progress_callback) self.callback_processor.Progress( self.bytes_uploaded_container.bytes_transferred) # httplib.HTTPConnection.send accepts either a string or a file-like # object (anything that implements read()). if isinstance(data, basestring): full_buffer = cStringIO.StringIO(data) else: full_buffer = data partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE) while partial_buffer: httplib2.HTTPSConnectionWithTimeout.send( self, partial_buffer) sent_data_bytes = len(partial_buffer) if num_metadata_bytes: if num_metadata_bytes <= sent_data_bytes: sent_data_bytes -= num_metadata_bytes num_metadata_bytes = 0 else: num_metadata_bytes -= sent_data_bytes sent_data_bytes = 0 if self.callback_processor: # TODO: We can't differentiate the multipart upload # metadata in the request body from the actual upload bytes, so we # will actually report slightly more bytes than desired to the # callback handler. Get the number of multipart upload metadata # bytes from apitools and subtract from sent_data_bytes. self.callback_processor.Progress(sent_data_bytes) partial_buffer = full_buffer.read( self.GCS_JSON_BUFFER_SIZE)
class DownloadCallbackConnection(httplib2.HTTPSConnectionWithTimeout): """Connection class override for downloads.""" outer_total_size = self.total_size outer_digesters = self.digesters outer_progress_callback = self.progress_callback outer_bytes_downloaded_container = self.bytes_downloaded_container processed_initial_bytes = False callback_processor = None def __init__(self, *args, **kwargs): kwargs['timeout'] = SSL_TIMEOUT httplib2.HTTPSConnectionWithTimeout.__init__( self, *args, **kwargs) def getresponse(self, buffering=False): """Wraps an HTTPResponse to perform callbacks and hashing. In this function, self is a DownloadCallbackConnection. Args: buffering: Unused. This function uses a local buffer. Returns: HTTPResponse object with wrapped read function. """ orig_response = httplib.HTTPConnection.getresponse(self) if orig_response.status not in (httplib.OK, httplib.PARTIAL_CONTENT): return orig_response orig_read_func = orig_response.read def read(amt=None): # pylint: disable=invalid-name """Overrides HTTPConnection.getresponse.read. This function only supports reads of TRANSFER_BUFFER_SIZE or smaller. Args: amt: Integer n where 0 < n <= TRANSFER_BUFFER_SIZE. This is a keyword argument to match the read function it overrides, but it is required. Returns: Data read from HTTPConnection. """ if not amt or amt > TRANSFER_BUFFER_SIZE: raise BadRequestException( 'Invalid HTTP read size %s during download, expected %s.' % (amt, TRANSFER_BUFFER_SIZE)) else: amt = amt or TRANSFER_BUFFER_SIZE if not self.processed_initial_bytes: self.processed_initial_bytes = True if self.outer_progress_callback: self.callback_processor = ProgressCallbackWithBackoff( self.outer_total_size, self.outer_progress_callback) self.callback_processor.Progress( self.outer_bytes_downloaded_container. bytes_transferred) data = orig_read_func(amt) read_length = len(data) if self.callback_processor: self.callback_processor.Progress(read_length) if self.outer_digesters: for alg in self.outer_digesters: self.outer_digesters[alg].update(data) return data orig_response.read = read return orig_response