Example #1
0
  def testPerformanceSummaryEventCollection(self):
    """Test the collection of PerformanceSummary GA events."""
    # PerformanceSummaries are only collected for cp and rsync.
    self.collector.ga_params[metrics._GA_LABEL_MAP['Command Name']] = 'cp'
    # GetDiskCounters is called at initialization of _PerformanceSummaryParams,
    # which occurs during the first call to LogPerformanceSummaryParams.
    with mock.patch('gslib.metrics.GetDiskCounters',
                    return_value={'fake-disk': (0, 0, 0, 0, 0, 0)}):
      metrics.LogPerformanceSummaryParams(
          uses_fan=True, uses_slice=True, avg_throughput=10,
          is_daisy_chain=True, has_file_dst=False, has_cloud_dst=True,
          has_file_src=False, has_cloud_src=True, total_bytes_transferred=100,
          total_elapsed_time=10, thread_idle_time=40, thread_execution_time=10,
          num_processes=2, num_threads=3, num_objects_transferred=3,
          provider_types=['gs'])

    # Log a retryable service error and two retryable network errors.
    service_retry_msg = RetryableErrorMessage(
        apitools_exceptions.CommunicationError(), 0)
    network_retry_msg = RetryableErrorMessage(socket.error(), 0)
    metrics.LogRetryableError(service_retry_msg)
    metrics.LogRetryableError(network_retry_msg)
    metrics.LogRetryableError(network_retry_msg)

    # Log some thread throughput.
    start_file_msg = FileMessage('src', 'dst', 0, size=100)
    end_file_msg = FileMessage('src', 'dst', 10, finished=True)
    start_file_msg.thread_id = end_file_msg.thread_id = 1
    start_file_msg.process_id = end_file_msg.process_id = 1
    metrics.LogPerformanceSummaryParams(file_message=start_file_msg)
    metrics.LogPerformanceSummaryParams(file_message=end_file_msg)
    self.assertEqual(self.collector.perf_sum_params.thread_throughputs[
        (1, 1)].GetThroughput(), 10)

    # GetDiskCounters is called a second time during collection.
    with mock.patch('gslib.metrics.GetDiskCounters',
                    return_value={'fake-disk': (0, 0, 0, 0, 10, 10)}):
      self.collector._CollectPerformanceSummaryMetric()

    # Check for all the expected parameters.
    metric_body = self.collector._metrics[0].body
    label_and_value_pairs = [
        ('Event Category', metrics._GA_PERFSUM_CATEGORY),
        ('Event Action', 'CloudToCloud%2CDaisyChain'), ('Execution Time', '10'),
        ('Parallelism Strategy', 'both'), ('Source URL Type', 'cloud'),
        ('Provider Types', 'gs'), ('Num Processes', '2'), ('Num Threads', '3'),
        ('Number of Files/Objects Transferred', '3'),
        ('Size of Files/Objects Transferred', '100'),
        ('Average Overall Throughput', '10'),
        ('Num Retryable Service Errors', '1'),
        ('Num Retryable Network Errors', '2'),
        ('Thread Idle Time Percent', '0.8'),
        ('Slowest Thread Throughput', '10'),
        ('Fastest Thread Throughput', '10'),
    ]
    if IS_LINUX:  # Disk I/O time is only available on Linux.
      label_and_value_pairs.append(('Disk I/O Time', '20'))
    for label, exp_value in label_and_value_pairs:
      self.assertIn('{0}={1}'.format(metrics._GA_LABEL_MAP[label], exp_value),
                    metric_body)
Example #2
0
  def RunCommand(self):
    """Command entry point for the hash command."""
    (calc_crc32c, calc_md5, format_func, cloud_format_func, output_format) = (
        self._ParseOpts(self.sub_opts, self.logger))

    matched_one = False
    for url_str in self.args:
      for file_ref in self.WildcardIterator(
          url_str).IterObjects(bucket_listing_fields=['crc32c', 'md5Hash',
                                                      'customerEncryption',
                                                      'size']):
        matched_one = True
        url = StorageUrlFromString(url_str)
        file_name = file_ref.storage_url.object_name
        if StorageUrlFromString(url_str).IsFileUrl():
          file_size = os.path.getsize(file_name)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size,
                          finished=False, message_type=FileMessage.FILE_HASH))
          callback_processor = ProgressCallbackWithTimeout(
              file_size, FileProgressCallbackHandler(
                  self.gsutil_api.status_queue,
                  src_url=StorageUrlFromString(url_str),
                  operation_name='Hashing').call)
          hash_dict = self._GetHashClassesFromArgs(calc_crc32c, calc_md5)
          with open(file_name, 'rb') as fp:
            CalculateHashesFromContents(fp, hash_dict,
                                        callback_processor=callback_processor)
          self.gsutil_api.status_queue.put(
              FileMessage(url, None, time.time(), size=file_size, finished=True,
                          message_type=FileMessage.FILE_HASH))
        else:
          hash_dict = {}
          obj_metadata = file_ref.root_object
          file_size = obj_metadata.size
          md5_present = obj_metadata.md5Hash is not None
          crc32c_present = obj_metadata.crc32c is not None
          if not md5_present and not crc32c_present:
            logging.getLogger().warn('No hashes present for %s', url_str)
            continue
          if md5_present:
            hash_dict['md5'] = obj_metadata.md5Hash
          if crc32c_present:
            hash_dict['crc32c'] = obj_metadata.crc32c
        print 'Hashes [%s] for %s:' % (output_format, file_name)
        for name, digest in hash_dict.iteritems():
          print '\tHash (%s):\t\t%s' % (name,
                                        (format_func(digest) if url.IsFileUrl()
                                         else cloud_format_func(digest)))

    if not matched_one:
      raise CommandException('No files matched')
    PutToQueueWithTimeout(self.gsutil_api.status_queue,
                          FinalMessage(time.time()))
    return 0
Example #3
0
    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
        transform_url = name_expansion_result.expanded_storage_url
        # Make a local copy of the requested transformations for each thread. As
        # a redundant transformation for one object might not be redundant for
        # another, we wouldn't want to remove it from the transform_types set that
        # all threads share.
        transforms_to_perform = set(self.transform_types)

        self.CheckProvider(transform_url)

        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        # Note: If other transform types are added, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).
        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        should_encrypt_target = self.boto_file_encryption_sha256 is not None
        source_was_encrypted = src_encryption_sha256 is not None
        using_same_encryption_key_value = (
            src_encryption_sha256 == self.boto_file_encryption_sha256)

        # Prevent accidental key rotation.
        if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform
                and not using_same_encryption_key_value):
            raise EncryptionException(
                'The "-k" flag was not passed to the rewrite command, but the '
                'encryption_key value in your boto config file did not match the key '
                'used to encrypt the object "%s" (hash: %s). To encrypt the object '
                'using a different key, you must specify the "-k" flag.' %
                (transform_url, src_encryption_sha256))

        # Remove any redundant changes.

        # STORAGE_CLASS transform should be skipped if the target storage class
        # matches the existing storage class.
        if (_TransformTypes.STORAGE_CLASS in transforms_to_perform
                and self.dest_storage_class == NormalizeStorageClass(
                    src_metadata.storageClass)):
            transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS)
            self.logger.info(
                'Redundant transform: %s already had storage class of '
                '%s.' % (transform_url, src_metadata.storageClass))

        # CRYPTO_KEY transform should be skipped if we're using the same encryption
        # key (if any) that was used to encrypt the source.
        if (_TransformTypes.CRYPTO_KEY in transforms_to_perform
                and using_same_encryption_key_value):
            if self.boto_file_encryption_sha256 is None:
                log_msg = '%s is already decrypted.' % transform_url
            else:
                log_msg = '%s already has current encryption key.' % transform_url
            transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY)
            self.logger.info('Redundant transform: %s' % log_msg)

        if not transforms_to_perform:
            self.logger.info(
                'Skipping %s, all transformations were redundant.' %
                transform_url)
            return

        # Make a deep copy of the source metadata.
        dst_metadata = encoding.PyValueToMessage(
            apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

        # Remove some unnecessary/invalid fields.
        dst_metadata.customerEncryption = None
        dst_metadata.generation = None
        # Service has problems if we supply an ID, but it is responsible for
        # generating one, so it is not necessary to include it here.
        dst_metadata.id = None
        decryption_tuple = None
        # Use a generic operation name by default - this can be altered below for
        # specific transformations (encryption changes, etc.).
        operation_name = 'Rewriting'

        if source_was_encrypted:
            decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
            if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption key '
                    'matches object %s' %
                    (src_encryption_sha256, transform_url))
            decryption_tuple = CryptoTupleFromKey(decryption_key)

        if _TransformTypes.CRYPTO_KEY in transforms_to_perform:
            if not source_was_encrypted:
                operation_name = 'Encrypting'
            elif not should_encrypt_target:
                operation_name = 'Decrypting'
            else:
                operation_name = 'Rotating'

        if _TransformTypes.STORAGE_CLASS in transforms_to_perform:
            dst_metadata.storageClass = self.dest_storage_class

        # TODO: Remove this call (used to verify tests) and make it processed by
        # the UIThread.
        sys.stderr.write(
            _ConstructAnnounceText(operation_name, transform_url.url_string))

        # Message indicating beginning of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=False,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))

        progress_callback = FileProgressCallbackHandler(
            gsutil_api.status_queue,
            src_url=transform_url,
            operation_name=operation_name).call

        gsutil_api.CopyObject(src_metadata,
                              dst_metadata,
                              src_generation=transform_url.generation,
                              preconditions=self.preconditions,
                              progress_callback=progress_callback,
                              decryption_tuple=decryption_tuple,
                              encryption_tuple=self.boto_file_encryption_tuple,
                              provider=transform_url.scheme,
                              fields=[])

        # Message indicating end of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=True,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))
Example #4
0
  def RewriteFunc(self, name_expansion_result, thread_state=None):
    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
    transform_url = name_expansion_result.expanded_storage_url

    self.CheckProvider(transform_url)

    # Get all fields so that we can ensure that the target metadata is
    # specified correctly.
    src_metadata = gsutil_api.GetObjectMetadata(
        transform_url.bucket_name,
        transform_url.object_name,
        generation=transform_url.generation,
        provider=transform_url.scheme)

    if self.no_preserve_acl:
      # Leave ACL unchanged.
      src_metadata.acl = []
    elif not src_metadata.acl:
      raise CommandException(
          'No OWNER permission found for object %s. OWNER permission is '
          'required for rewriting objects, (otherwise their ACLs would be '
          'reset).' % transform_url)

    # Note: If other transform types are added, they must ensure that the
    # encryption key configuration matches the boto configuration, because
    # gsutil maintains an invariant that all objects it writes use the
    # encryption_key value (including decrypting if no key is present).

    # Store metadata about src encryption to make logic below easier to read.
    src_encryption_kms_key = (src_metadata.kmsKeyName
                              if src_metadata.kmsKeyName else None)

    src_encryption_sha256 = None
    if (src_metadata.customerEncryption and
        src_metadata.customerEncryption.keySha256):
      src_encryption_sha256 = src_metadata.customerEncryption.keySha256
      # In python3, hashes are bytes, use ascii since it should be ascii
      src_encryption_sha256 = src_encryption_sha256.encode('ascii')

    src_was_encrypted = (src_encryption_sha256 is not None or
                         src_encryption_kms_key is not None)

    # Also store metadata about dest encryption.
    dest_encryption_kms_key = None
    if (self.boto_file_encryption_keywrapper is not None and
        self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK):
      dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key

    dest_encryption_sha256 = None
    if (self.boto_file_encryption_keywrapper is not None and
        self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK):
      dest_encryption_sha256 = (
          self.boto_file_encryption_keywrapper.crypto_key_sha256)

    should_encrypt_dest = self.boto_file_encryption_keywrapper is not None

    encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and
                            src_encryption_kms_key == dest_encryption_kms_key)

    # Prevent accidental key rotation.
    if (_TransformTypes.CRYPTO_KEY not in self.transform_types and
        not encryption_unchanged):
      raise EncryptionException(
          'The "-k" flag was not passed to the rewrite command, but the '
          'encryption_key value in your boto config file did not match the key '
          'used to encrypt the object "%s" (hash: %s). To encrypt the object '
          'using a different key, you must specify the "-k" flag.' %
          (transform_url, src_encryption_sha256))

    # Determine if we can skip this rewrite operation (this should only be done
    # when ALL of the specified transformations are redundant).
    redundant_transforms = []

    # STORAGE_CLASS transform is redundant if the target storage class matches
    # the existing storage class.
    if (_TransformTypes.STORAGE_CLASS in self.transform_types and
        self.dest_storage_class == NormalizeStorageClass(
            src_metadata.storageClass)):
      redundant_transforms.append('storage class')

    # CRYPTO_KEY transform is redundant if we're using the same encryption
    # key that was used to encrypt the source. However, if no encryption key was
    # specified, we should still perform the rewrite. This results in the
    # rewritten object either being encrypted with its bucket's default KMS key
    # or having no CSEK/CMEK encryption applied. While we could attempt fetching
    # the bucket's metadata and checking its default KMS key before performing
    # the rewrite (in the case where we appear to be transitioning from
    # no key to no key), that is vulnerable to the race condition where the
    # default KMS key is changed between when we check it and when we rewrite
    # the object.
    if (_TransformTypes.CRYPTO_KEY in self.transform_types and
        should_encrypt_dest and encryption_unchanged):
      redundant_transforms.append('encryption key')

    if len(redundant_transforms) == len(self.transform_types):
      self.logger.info('Skipping %s, all transformations were redundant: %s' %
                       (transform_url, redundant_transforms))
      return

    # First make a deep copy of the source metadata, then overwrite any
    # requested attributes (e.g. if a storage class change was specified).
    dest_metadata = encoding.PyValueToMessage(
        apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

    # Remove some unnecessary/invalid fields.
    dest_metadata.generation = None
    # Service has problems if we supply an ID, but it is responsible for
    # generating one, so it is not necessary to include it here.
    dest_metadata.id = None
    # Ensure we don't copy over the KMS key name or CSEK key info from the
    # source object; those should only come from the boto config's
    # encryption_key value.
    dest_metadata.customerEncryption = None
    dest_metadata.kmsKeyName = None

    # Both a storage class change and CMEK encryption should be set as part of
    # the dest object's metadata. CSEK encryption, if specified, is added to the
    # request later via headers obtained from the keywrapper value passed to
    # encryption_tuple.
    if _TransformTypes.STORAGE_CLASS in self.transform_types:
      dest_metadata.storageClass = self.dest_storage_class
    if dest_encryption_kms_key is not None:
      dest_metadata.kmsKeyName = dest_encryption_kms_key

    # Make sure we have the CSEK key necessary to decrypt.
    decryption_keywrapper = None
    if src_encryption_sha256 is not None:
      if src_encryption_sha256 in self.csek_hash_to_keywrapper:
        decryption_keywrapper = (
            self.csek_hash_to_keywrapper[src_encryption_sha256])
      else:
        raise EncryptionException(
            'Missing decryption key with SHA256 hash %s. No decryption key '
            'matches object %s' % (src_encryption_sha256, transform_url))

    operation_name = 'Rewriting'
    if _TransformTypes.CRYPTO_KEY in self.transform_types:
      if src_was_encrypted and should_encrypt_dest:
        if not encryption_unchanged:
          operation_name = 'Rotating'
        # Else, keep "Rewriting". This might occur when -k was specified and was
        # redundant, but we're performing the operation anyway because some
        # other transformation was not redundant.
      elif src_was_encrypted and not should_encrypt_dest:
        operation_name = 'Decrypting'
      elif not src_was_encrypted and should_encrypt_dest:
        operation_name = 'Encrypting'

    # TODO: Remove this call (used to verify tests) and make it processed by
    # the UIThread.
    sys.stderr.write(
        _ConstructAnnounceText(operation_name, transform_url.url_string))
    sys.stderr.flush()

    # Message indicating beginning of operation.
    gsutil_api.status_queue.put(
        FileMessage(transform_url,
                    None,
                    time.time(),
                    finished=False,
                    size=src_metadata.size,
                    message_type=FileMessage.FILE_REWRITE))

    progress_callback = FileProgressCallbackHandler(
        gsutil_api.status_queue,
        src_url=transform_url,
        operation_name=operation_name).call

    gsutil_api.CopyObject(src_metadata,
                          dest_metadata,
                          src_generation=transform_url.generation,
                          preconditions=self.preconditions,
                          progress_callback=progress_callback,
                          decryption_tuple=decryption_keywrapper,
                          encryption_tuple=self.boto_file_encryption_keywrapper,
                          provider=transform_url.scheme,
                          fields=[])

    # Message indicating end of operation.
    gsutil_api.status_queue.put(
        FileMessage(transform_url,
                    None,
                    time.time(),
                    finished=True,
                    size=src_metadata.size,
                    message_type=FileMessage.FILE_REWRITE))
Example #5
0
    def CryptoRewrite(self, transform_url, gsutil_api):
        """Make the cloud object at transform_url match encryption configuration.

    Args:
      transform_url: CloudUrl to rewrite.
      gsutil_api: gsutil CloudApi instance for making API calls.
    """
        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        if src_encryption_sha256 == self.current_encryption_sha256:
            if self.current_encryption_sha256 is not None:
                self.logger.info(
                    'Skipping %s, already has current encryption key' %
                    transform_url)
            else:
                self.logger.info('Skipping %s, already decrypted' %
                                 transform_url)
        else:
            # Make a deep copy of the source metadata
            dst_metadata = encoding.PyValueToMessage(
                apitools_messages.Object,
                encoding.MessageToPyValue(src_metadata))

            # Remove some unnecessary/invalid fields.
            dst_metadata.customerEncryption = None
            dst_metadata.generation = None
            # Service has problems if we supply an ID, but it is responsible for
            # generating one, so it is not necessary to include it here.
            dst_metadata.id = None
            decryption_tuple = None

            if src_encryption_sha256 is None:
                operation_name = 'Encrypting'
            else:
                decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
                if not decryption_key:
                    raise EncryptionException(
                        'Missing decryption key with SHA256 hash %s. No decryption key '
                        'matches object %s' %
                        (src_encryption_sha256, transform_url))
                decryption_tuple = CryptoTupleFromKey(decryption_key)

                if self.current_encryption_sha256 is None:
                    operation_name = 'Decrypting'
                else:
                    operation_name = 'Rotating'

            # TODO: Remove this call (used to verify tests) and make it processed by
            # the UIThread.
            sys.stderr.write(
                _ConstructAnnounceText(operation_name,
                                       transform_url.url_string))

            # Message indicating beginning of operation.
            gsutil_api.status_queue.put(
                FileMessage(transform_url,
                            None,
                            time.time(),
                            finished=False,
                            size=src_metadata.size,
                            message_type=FileMessage.FILE_REWRITE))

            progress_callback = FileProgressCallbackHandler(
                gsutil_api.status_queue,
                src_url=transform_url,
                operation_name=operation_name).call

            gsutil_api.CopyObject(
                src_metadata,
                dst_metadata,
                src_generation=transform_url.generation,
                preconditions=self.preconditions,
                progress_callback=progress_callback,
                decryption_tuple=decryption_tuple,
                encryption_tuple=self.current_encryption_tuple,
                provider=transform_url.scheme,
                fields=[])

            # Message indicating end of operation.
            gsutil_api.status_queue.put(
                FileMessage(transform_url,
                            None,
                            time.time(),
                            finished=True,
                            size=src_metadata.size,
                            message_type=FileMessage.FILE_REWRITE))