def send(self, data, num_metadata_bytes=0):
                """Overrides HTTPConnection.send.

        Args:
          data: string or file-like object (implements read()) of data to send.
          num_metadata_bytes: number of bytes that consist of metadata
              (headers, etc.) not representing the data being uploaded.
        """
                if not self.processed_initial_bytes:
                    self.processed_initial_bytes = True
                    if outer_progress_callback:
                        self.callback_processor = ProgressCallbackWithTimeout(
                            outer_total_size, outer_progress_callback)
                        self.callback_processor.Progress(
                            self.bytes_uploaded_container.bytes_transferred)
                # httplib.HTTPConnection.send accepts either a string or a file-like
                # object (anything that implements read()).
                if isinstance(data, six.text_type):
                    full_buffer = cStringIO(data)
                elif isinstance(data, six.binary_type):
                    full_buffer = six.BytesIO(data)
                else:
                    full_buffer = data
                partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)
                while partial_buffer:
                    if six.PY2:
                        httplib2.HTTPSConnectionWithTimeout.send(
                            self, partial_buffer)
                    else:
                        if isinstance(partial_buffer, bytes):
                            httplib2.HTTPSConnectionWithTimeout.send(
                                self, partial_buffer)
                        else:
                            httplib2.HTTPSConnectionWithTimeout.send(
                                self, partial_buffer.encode(UTF8))
                    sent_data_bytes = len(partial_buffer)
                    if num_metadata_bytes:
                        if num_metadata_bytes <= sent_data_bytes:
                            sent_data_bytes -= num_metadata_bytes
                            num_metadata_bytes = 0
                        else:
                            num_metadata_bytes -= sent_data_bytes
                            sent_data_bytes = 0
                    if self.callback_processor:
                        # Modify the sent data bytes by the size modifier. These are
                        # stored as floats, so the result should be floored.
                        sent_data_bytes = int(sent_data_bytes *
                                              self.size_modifier)
                        # TODO: We can't differentiate the multipart upload
                        # metadata in the request body from the actual upload bytes, so we
                        # will actually report slightly more bytes than desired to the
                        # callback handler. Get the number of multipart upload metadata
                        # bytes from apitools and subtract from sent_data_bytes.
                        self.callback_processor.Progress(sent_data_bytes)
                    partial_buffer = full_buffer.read(
                        self.GCS_JSON_BUFFER_SIZE)
Exemple #2
0
  def RunCommand(self):
    """Command entry point for the hash command."""
    (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = (
        self._ParseOpts(self.sub_opts, self.logger))

    matched_one = False
    for url_str in self.args:
      for file_ref in self.WildcardIterator(
          url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash',
                                                      'customerEncryption',
                                                      'size']):
        matched_one = True
        url = StorageUrlFromString(url_str)
        file_name = file_ref.storage_url.object_name
        if StorageUrlFromString(url_str).IsFileUrl():
          file_size = os.path.getsize(file_name)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size,
                          finished=False, message_type=FileMessage.FILE_HASH))
          callback_processor = ProgressCallbackWithTimeout(
              file_size, FileProgressCallbackHandler(
                  self.gsutil_api.status_queue,
                  src_url=StorageUrlFromString(url_str),
                  operation_name='Hashing').call)
          hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
          with open(file_name, 'rb') as fp:
            CalculateHashesFromContents(fp, hash_dict,
                                        callback_processor=callback_processor)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size, finished=True,
                          message_type=FileMessage.FILE_HASH))
        else:
          hash_dict = {}
          obj_metadata = file_ref.root_object
          file_size = obj_metadata.size
          md5_present = obj_metadata.md5Hash is not None
          crc32c_present = obj_metadata.crc32c is not None
          if not md5_present and not crc32c_present:
            logging.getLogger().warn('No hashes present for %s', url_str)
            continue
          if md5_present:
            hash_dict['md5'] = obj_metadata.md5Hash
          if crc32c_present:
            hash_dict['crc32c'] = obj_metadata.crc32c
        print 'Hashes [%s] for %s:' % (output_format, file_name)
        for name, digest in hash_dict.iteritems():
          print '\tHash (%s):\t\t%s' % (name,
                                        (format_func(digest) if url.IsFileUrl()
                                         else cloud_format_func(digest)))

    if not matched_one:
      raise CommandException('No files matched')
    PutToQueueWithTimeout(self.gsutil_api.status_queue,
                          FinalMessage(time.time()))
    return 0
Exemple #3
0
                def read(amt=None):  # pylint: disable=invalid-name
                    """Overrides HTTPConnection.getresponse.read.

          This function only supports reads of TRANSFER_BUFFER_SIZE or smaller.

          Args:
            amt: Integer n where 0 < n <= TRANSFER_BUFFER_SIZE. This is a
                 keyword argument to match the read function it overrides,
                 but it is required.

          Returns:
            Data read from HTTPConnection.
          """
                    if not amt or amt > TRANSFER_BUFFER_SIZE:
                        raise BadRequestException(
                            'Invalid HTTP read size %s during download, expected %s.'
                            % (amt, TRANSFER_BUFFER_SIZE))
                    else:
                        amt = amt or TRANSFER_BUFFER_SIZE

                    if not self.processed_initial_bytes:
                        self.processed_initial_bytes = True
                        if self.outer_progress_callback:
                            self.callback_processor = ProgressCallbackWithTimeout(
                                self.outer_total_size,
                                self.outer_progress_callback)
                            self.callback_processor.Progress(
                                self.outer_bytes_downloaded_container.
                                bytes_transferred)

                    data = orig_read_func(amt)
                    read_length = len(data)
                    if self.callback_processor:
                        self.callback_processor.Progress(read_length)
                    if self.outer_digesters:
                        for alg in self.outer_digesters:
                            self.outer_digesters[alg].update(data)
                    return data
Exemple #4
0
        class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):
            """Connection class override for uploads."""
            bytes_uploaded_container = outer_bytes_uploaded_container
            # After we instantiate this class, apitools will check with the server
            # to find out how many bytes remain for a resumable upload.  This allows
            # us to update our progress once based on that number.
            processed_initial_bytes = False
            GCS_JSON_BUFFER_SIZE = outer_buffer_size
            callback_processor = None
            size = outer_total_size

            def __init__(self, *args, **kwargs):
                kwargs['timeout'] = SSL_TIMEOUT
                httplib2.HTTPSConnectionWithTimeout.__init__(
                    self, *args, **kwargs)

            # Override httplib.HTTPConnection._send_output for debug logging.
            # Because the distinction between headers and message body occurs
            # only in this httplib function, we can only differentiate them here.
            def _send_output(self, message_body=None):
                r"""Send the currently buffered request and clear the buffer.

        Appends an extra \r\n to the buffer.

        Args:
          message_body: if specified, this is appended to the request.
        """
                # TODO: Presently, apitools will set http2lib2.debuglevel to 0
                # (no prints) or 4 (dump upload payload, httplib prints to stdout).
                # Refactor to allow our media-handling functions to handle
                # debuglevel == 4 and print messages to stderr.
                self._buffer.extend(('', ''))
                msg = '\r\n'.join(self._buffer)
                num_metadata_bytes = len(msg)
                if outer_debug == DEBUGLEVEL_DUMP_REQUESTS and outer_logger:
                    outer_logger.debug('send: %s' % msg)
                del self._buffer[:]
                # If msg and message_body are sent in a single send() call,
                # it will avoid performance problems caused by the interaction
                # between delayed ack and the Nagle algorithm.
                if isinstance(message_body, str):
                    msg += message_body
                    message_body = None
                self.send(msg, num_metadata_bytes=num_metadata_bytes)
                if message_body is not None:
                    # message_body was not a string (i.e. it is a file) and
                    # we must run the risk of Nagle
                    self.send(message_body)

            def send(self, data, num_metadata_bytes=0):
                """Overrides HTTPConnection.send.

        Args:
          data: string or file-like object (implements read()) of data to send.
          num_metadata_bytes: number of bytes that consist of metadata
              (headers, etc.) not representing the data being uploaded.
        """
                if not self.processed_initial_bytes:
                    self.processed_initial_bytes = True
                    if outer_progress_callback:
                        self.callback_processor = ProgressCallbackWithTimeout(
                            outer_total_size, outer_progress_callback)
                        self.callback_processor.Progress(
                            self.bytes_uploaded_container.bytes_transferred)
                # httplib.HTTPConnection.send accepts either a string or a file-like
                # object (anything that implements read()).
                if isinstance(data, basestring):
                    full_buffer = cStringIO.StringIO(data)
                else:
                    full_buffer = data
                partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)
                while partial_buffer:
                    httplib2.HTTPSConnectionWithTimeout.send(
                        self, partial_buffer)
                    sent_data_bytes = len(partial_buffer)
                    if num_metadata_bytes:
                        if num_metadata_bytes <= sent_data_bytes:
                            sent_data_bytes -= num_metadata_bytes
                            num_metadata_bytes = 0
                        else:
                            num_metadata_bytes -= sent_data_bytes
                            sent_data_bytes = 0
                    if self.callback_processor:
                        # TODO: We can't differentiate the multipart upload
                        # metadata in the request body from the actual upload bytes, so we
                        # will actually report slightly more bytes than desired to the
                        # callback handler. Get the number of multipart upload metadata
                        # bytes from apitools and subtract from sent_data_bytes.
                        self.callback_processor.Progress(sent_data_bytes)
                    partial_buffer = full_buffer.read(
                        self.GCS_JSON_BUFFER_SIZE)
Exemple #5
0
        class DownloadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):
            """Connection class override for downloads."""
            outer_total_size = self.total_size
            outer_digesters = self.digesters
            outer_progress_callback = self.progress_callback
            outer_bytes_downloaded_container = self.bytes_downloaded_container
            processed_initial_bytes = False
            callback_processor = None

            def __init__(self, *args, **kwargs):
                kwargs['timeout'] = SSL_TIMEOUT
                httplib2.HTTPSConnectionWithTimeout.__init__(
                    self, *args, **kwargs)

            def getresponse(self, buffering=False):
                """Wraps an HTTPResponse to perform callbacks and hashing.

        In this function, self is a DownloadCallbackConnection.

        Args:
          buffering: Unused. This function uses a local buffer.

        Returns:
          HTTPResponse object with wrapped read function.
        """
                orig_response = httplib.HTTPConnection.getresponse(self)
                if orig_response.status not in (httplib.OK,
                                                httplib.PARTIAL_CONTENT):
                    return orig_response
                orig_read_func = orig_response.read

                def read(amt=None):  # pylint: disable=invalid-name
                    """Overrides HTTPConnection.getresponse.read.

          This function only supports reads of TRANSFER_BUFFER_SIZE or smaller.

          Args:
            amt: Integer n where 0 < n <= TRANSFER_BUFFER_SIZE. This is a
                 keyword argument to match the read function it overrides,
                 but it is required.

          Returns:
            Data read from HTTPConnection.
          """
                    if not amt or amt > TRANSFER_BUFFER_SIZE:
                        raise BadRequestException(
                            'Invalid HTTP read size %s during download, expected %s.'
                            % (amt, TRANSFER_BUFFER_SIZE))
                    else:
                        amt = amt or TRANSFER_BUFFER_SIZE

                    if not self.processed_initial_bytes:
                        self.processed_initial_bytes = True
                        if self.outer_progress_callback:
                            self.callback_processor = ProgressCallbackWithTimeout(
                                self.outer_total_size,
                                self.outer_progress_callback)
                            self.callback_processor.Progress(
                                self.outer_bytes_downloaded_container.
                                bytes_transferred)

                    data = orig_read_func(amt)
                    read_length = len(data)
                    if self.callback_processor:
                        self.callback_processor.Progress(read_length)
                    if self.outer_digesters:
                        for alg in self.outer_digesters:
                            self.outer_digesters[alg].update(data)
                    return data

                orig_response.read = read

                return orig_response
    class UploadCallbackConnection(httplib2.HTTPSConnectionWithTimeout):
      """Connection class override for uploads."""
      bytes_uploaded_container = outer_bytes_uploaded_container
      # After we instantiate this class, apitools will check with the server
      # to find out how many bytes remain for a resumable upload.  This allows
      # us to update our progress once based on that number.
      processed_initial_bytes = False
      GCS_JSON_BUFFER_SIZE = outer_buffer_size
      callback_processor = None
      size = outer_total_size
      header_encoding = ''
      header_length = None
      header_range = None
      size_modifier = 1.0

      def __init__(self, *args, **kwargs):
        kwargs['timeout'] = SSL_TIMEOUT_SEC
        httplib2.HTTPSConnectionWithTimeout.__init__(self, *args, **kwargs)

      # Override httplib.HTTPConnection._send_output for debug logging.
      # Because the distinction between headers and message body occurs
      # only in this httplib function, we can only differentiate them here.
      def _send_output(self, message_body=None, encode_chunked=False):
        r"""Send the currently buffered request and clear the buffer.

        Appends an extra \r\n to the buffer.

        Args:
          message_body: if specified, this is appended to the request.
        """
        # TODO: Presently, apitools will set http2lib2.debuglevel to 0
        # (no prints) or 4 (dump upload payload, httplib prints to stdout).
        # Refactor to allow our media-handling functions to handle
        # debuglevel == 4 and print messages to stderr.
        self._buffer.extend((b'', b''))
        if six.PY2:
          items = self._buffer
        else:
          items = []
          for item in self._buffer:
            if isinstance(item, bytes):
              items.append(item)
            else:
              items.append(item.encode(UTF8))
        msg = b'\r\n'.join(items)
        num_metadata_bytes = len(msg)
        if outer_debug == DEBUGLEVEL_DUMP_REQUESTS and outer_logger:
          outer_logger.debug('send: %s' % msg)
        del self._buffer[:]
        # If msg and message_body are sent in a single send() call,
        # it will avoid performance problems caused by the interaction
        # between delayed ack and the Nagle algorithm.
        if isinstance(message_body, str):
          msg += message_body
          message_body = None
        self.send(msg, num_metadata_bytes=num_metadata_bytes)
        if message_body is not None:
          # message_body was not a string (i.e. it is a file) and
          # we must run the risk of Nagle
          self.send(message_body)

      def putheader(self, header, *values):
        """Overrides HTTPConnection.putheader.

        Send a request header line to the server. For example:
        h.putheader('Accept', 'text/html').

        This override records the content encoding, length, and range of the
        payload. For uploads where the content-range difference does not match
        the content-length, progress printing will under-report progress. These
        headers are used to calculate a multiplier to correct the progress.

        For example: the content-length for gzip transport encoded data
        represents the compressed size of the data while the content-range
        difference represents the uncompressed size. Dividing the
        content-range difference by the content-length gives the ratio to
        multiply the progress by to correctly report the relative progress.

        Args:
          header: The header.
          *values: A set of values for the header.
        """
        if header == 'content-encoding':
          value = ''.join([str(v) for v in values])
          self.header_encoding = value
          if outer_debug == DEBUGLEVEL_DUMP_REQUESTS and outer_logger:
            outer_logger.debug(
                'send: Using gzip transport encoding for the request.')
        elif header == 'content-length':
          try:
            value = int(''.join([str(v) for v in values]))
            self.header_length = value
          except ValueError:
            pass
        elif header == 'content-range':
          try:
            # There are 3 valid header formats:
            #  '*/%d', '%d-%d/*', and '%d-%d/%d'
            value = ''.join([str(v) for v in values])
            ranges = DECIMAL_REGEX().findall(value)
            # If there are 2 or more range values, they will always
            # correspond to the start and end ranges in the header.
            if len(ranges) > 1:
              # Subtract the end position from the start position.
              self.header_range = (int(ranges[1]) - int(ranges[0])) + 1
          except ValueError:
            pass
        # If the content header is gzip, and a range and length are set,
        # update the modifier.
        if (self.header_encoding == 'gzip' and self.header_length and
            self.header_range):
          # Update the modifier
          self.size_modifier = self.header_range / float(self.header_length)
          # Reset the headers
          self.header_encoding = ''
          self.header_length = None
          self.header_range = None
          # Log debug information to catch in tests.
          if outer_debug == DEBUGLEVEL_DUMP_REQUESTS and outer_logger:
            outer_logger.debug('send: Setting progress modifier to %s.' %
                               (self.size_modifier))
        # Propagate header values.
        http_client.HTTPSConnection.putheader(self, header, *values)

      def send(self, data, num_metadata_bytes=0):
        """Overrides HTTPConnection.send.

        Args:
          data: string or file-like object (implements read()) of data to send.
          num_metadata_bytes: number of bytes that consist of metadata
              (headers, etc.) not representing the data being uploaded.
        """
        if not self.processed_initial_bytes:
          self.processed_initial_bytes = True
          if outer_progress_callback:
            self.callback_processor = ProgressCallbackWithTimeout(
                outer_total_size, outer_progress_callback)
            self.callback_processor.Progress(
                self.bytes_uploaded_container.bytes_transferred)
        # httplib.HTTPConnection.send accepts either a string or a file-like
        # object (anything that implements read()).
        if isinstance(data, six.text_type):
          full_buffer = cStringIO(data)
        elif isinstance(data, six.binary_type):
          full_buffer = six.BytesIO(data)
        else:
          full_buffer = data
        partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)
        while partial_buffer:
          if six.PY2:
            httplib2.HTTPSConnectionWithTimeout.send(self, partial_buffer)
          else:
            if isinstance(partial_buffer, bytes):
              httplib2.HTTPSConnectionWithTimeout.send(self, partial_buffer)
            else:
              httplib2.HTTPSConnectionWithTimeout.send(
                  self, partial_buffer.encode(UTF8))
          sent_data_bytes = len(partial_buffer)
          if num_metadata_bytes:
            if num_metadata_bytes <= sent_data_bytes:
              sent_data_bytes -= num_metadata_bytes
              num_metadata_bytes = 0
            else:
              num_metadata_bytes -= sent_data_bytes
              sent_data_bytes = 0
          if self.callback_processor:
            # Modify the sent data bytes by the size modifier. These are
            # stored as floats, so the result should be floored.
            sent_data_bytes = int(sent_data_bytes * self.size_modifier)
            # TODO: We can't differentiate the multipart upload
            # metadata in the request body from the actual upload bytes, so we
            # will actually report slightly more bytes than desired to the
            # callback handler. Get the number of multipart upload metadata
            # bytes from apitools and subtract from sent_data_bytes.
            self.callback_processor.Progress(sent_data_bytes)
          partial_buffer = full_buffer.read(self.GCS_JSON_BUFFER_SIZE)