Ejemplo n.º 1
0
    def _SetIamHelperInternal(self, storage_url, policy, thread_state=None):
        """Sets IAM policy for a single, resolved bucket / object URL.

    Args:
      storage_url: A CloudUrl instance with no wildcards, pointing to a
                   specific bucket or object.
      policy: A Policy object to set on the bucket / object.
      thread_state: CloudApiDelegator instance which is passed from
                    command.WorkerThread.__init__() if the -m flag is
                    specified. Will use self.gsutil_api if thread_state is set
                    to None.

    Raises:
      ServiceException passed from the API call if an HTTP error was returned.
    """

        # SetIamHelper may be called by a command.WorkerThread. In the
        # single-threaded case, WorkerThread will not pass the CloudApiDelegator
        # instance to thread_state. GetCloudInstance is called to resolve the
        # edge case.
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        if storage_url.IsBucket():
            gsutil_api.SetBucketIamPolicy(storage_url.bucket_name,
                                          policy,
                                          provider=storage_url.scheme)
        else:
            gsutil_api.SetObjectIamPolicy(storage_url.bucket_name,
                                          storage_url.object_name,
                                          policy,
                                          generation=storage_url.generation,
                                          provider=storage_url.scheme)
Ejemplo n.º 2
0
def _RsyncFunc(cls, diff_to_apply, thread_state=None):
  """Worker function for performing the actual copy and remove operations."""
  gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
  dst_url_str = diff_to_apply.dst_url_str
  dst_url = StorageUrlFromString(dst_url_str)
  if diff_to_apply.diff_action == _DiffAction.REMOVE:
    if cls.dryrun:
      cls.logger.info('Would remove %s', dst_url)
    else:
      cls.logger.info('Removing %s', dst_url)
      if dst_url.IsFileUrl():
        os.unlink(dst_url.object_name)
      else:
        try:
          gsutil_api.DeleteObject(
              dst_url.bucket_name, dst_url.object_name,
              generation=dst_url.generation, provider=dst_url.scheme)
        except NotFoundException:
          # If the object happened to be deleted by an external process, this
          # is fine because it moves us closer to the desired state.
          pass
  elif diff_to_apply.diff_action == _DiffAction.COPY:
    src_url_str = diff_to_apply.src_url_str
    src_url = StorageUrlFromString(src_url_str)
    if cls.dryrun:
      cls.logger.info('Would copy %s to %s', src_url, dst_url)
    else:
      copy_helper.PerformCopy(cls.logger, src_url, dst_url, gsutil_api, cls,
                              _RsyncExceptionHandler,
                              headers=cls.headers)
  else:
    raise CommandException('Got unexpected DiffAction (%d)'
                           % diff_to_apply.diff_action)
Ejemplo n.º 3
0
    def GetIamHelper(self, storage_url, thread_state=None):
        """Gets an IAM policy for a single, resolved bucket / object URL.

    Args:
      storage_url: A CloudUrl instance with no wildcards, pointing to a
                   specific bucket or object.
      thread_state: CloudApiDelegator instance which is passed from
                    command.WorkerThread.__init__() if the global -m flag is
                    specified. Will use self.gsutil_api if thread_state is set
                    to None.

    Returns:
      Serialized Policy instance.
    """

        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        if storage_url.IsBucket():
            policy = gsutil_api.GetBucketIamPolicy(
                storage_url.bucket_name,
                provider=storage_url.scheme,
            )
        else:
            policy = gsutil_api.GetObjectIamPolicy(
                storage_url.bucket_name,
                storage_url.object_name,
                generation=storage_url.generation,
                provider=storage_url.scheme,
            )

        return policy
Ejemplo n.º 4
0
  def RemoveFunc(self, name_expansion_result, thread_state=None):
    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

    exp_src_url = name_expansion_result.expanded_storage_url
    self.logger.info('Removing %s...', exp_src_url)
    gsutil_api.DeleteObject(
        exp_src_url.bucket_name, exp_src_url.object_name,
        preconditions=self.preconditions, generation=exp_src_url.generation,
        provider=exp_src_url.scheme)
Ejemplo n.º 5
0
    def SetMetadataFunc(self, name_expansion_result, thread_state=None):
        """Sets metadata on an object.

    Args:
      name_expansion_result: NameExpansionResult describing target object.
      thread_state: gsutil Cloud API instance to use for the operation.
    """
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        exp_src_url = name_expansion_result.expanded_storage_url
        self.logger.info('Setting metadata on %s...', exp_src_url)

        cloud_obj_metadata = encoding.JsonToMessage(
            apitools_messages.Object, name_expansion_result.expanded_result)

        preconditions = Preconditions(
            gen_match=self.preconditions.gen_match,
            meta_gen_match=self.preconditions.meta_gen_match)
        if preconditions.gen_match is None:
            preconditions.gen_match = cloud_obj_metadata.generation
        if preconditions.meta_gen_match is None:
            preconditions.meta_gen_match = cloud_obj_metadata.metageneration

        # Patch handles the patch semantics for most metadata, but we need to
        # merge the custom metadata field manually.
        patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)

        api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
        # For XML we only want to patch through custom metadata that has
        # changed.  For JSON we need to build the complete set.
        if api == ApiSelector.XML:
            pass
        elif api == ApiSelector.JSON:
            CopyObjectMetadata(patch_obj_metadata,
                               cloud_obj_metadata,
                               override=True)
            patch_obj_metadata = cloud_obj_metadata
            # Patch body does not need the object generation and metageneration.
            patch_obj_metadata.generation = None
            patch_obj_metadata.metageneration = None

        gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name,
                                       exp_src_url.object_name,
                                       patch_obj_metadata,
                                       generation=exp_src_url.generation,
                                       preconditions=preconditions,
                                       provider=exp_src_url.scheme,
                                       fields=['id'])
        PutToQueueWithTimeout(gsutil_api.status_queue,
                              MetadataMessage(message_time=time.time()))
Ejemplo n.º 6
0
def _ListUrlRootFunc(cls, args_tuple, thread_state=None):
  """Worker function for listing files/objects under to be sync'd.

  Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine. We
  sort the listed URLs because we don't want to depend on consistent sort
  order across file systems and cloud providers.

  Args:
    cls: Command instance.
    args_tuple: (base_url_str, out_file_name, desc), where base_url_str is
                top-level URL string to list; out_filename is name of file to
                which sorted output should be written; desc is 'source' or
                'destination'.
    thread_state: gsutil Cloud API instance to use.
  """
  gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
  (base_url_str, out_filename, desc) = args_tuple
  # We sort while iterating over base_url_str, allowing parallelism of batched
  # sorting with collecting the listing.
  out_file = io.open(out_filename, mode='w', encoding=UTF8)
  try:
    _BatchSort(_FieldedListingIterator(cls, gsutil_api, base_url_str, desc),
               out_file)
  except Exception as e:  # pylint: disable=broad-except
    # Abandon rsync if an exception percolates up to this layer - retryable
    # exceptions are handled in the lower layers, so we got a non-retryable
    # exception (like 404 bucket not found) and proceeding would either be
    # futile or could result in data loss - for example:
    #     gsutil rsync -d gs://non-existent-bucket ./localdir
    # would delete files from localdir.
    cls.logger.error(
        'Caught non-retryable exception while listing %s: %s' %
        (base_url_str, e))
    cls.non_retryable_listing_failures = 1
  out_file.close()
Ejemplo n.º 7
0
    def SetMetadataFunc(self, name_expansion_result, thread_state=None):
        """Sets metadata on an object.

    Args:
      name_expansion_result: NameExpansionResult describing target object.
      thread_state: gsutil Cloud API instance to use for the operation.
    """
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        exp_src_url = name_expansion_result.expanded_storage_url
        self.logger.info('Setting metadata on %s...', exp_src_url)

        fields = ['generation', 'metadata', 'metageneration']
        cloud_obj_metadata = gsutil_api.GetObjectMetadata(
            exp_src_url.bucket_name,
            exp_src_url.object_name,
            generation=exp_src_url.generation,
            provider=exp_src_url.scheme,
            fields=fields)

        preconditions = Preconditions(
            gen_match=cloud_obj_metadata.generation,
            meta_gen_match=cloud_obj_metadata.metageneration)

        # Patch handles the patch semantics for most metadata, but we need to
        # merge the custom metadata field manually.
        patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)

        api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
        # For XML we only want to patch through custom metadata that has
        # changed.  For JSON we need to build the complete set.
        if api == ApiSelector.XML:
            pass
        elif api == ApiSelector.JSON:
            CopyObjectMetadata(patch_obj_metadata,
                               cloud_obj_metadata,
                               override=True)
            patch_obj_metadata = cloud_obj_metadata

        gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name,
                                       exp_src_url.object_name,
                                       patch_obj_metadata,
                                       generation=exp_src_url.generation,
                                       preconditions=preconditions,
                                       provider=exp_src_url.scheme)
Ejemplo n.º 8
0
    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        self.CheckProvider(name_expansion_result.expanded_storage_url)

        # If other transform types are added here, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).
        if _TransformTypes.CRYPTO_KEY in self.transform_types:
            self.CryptoRewrite(name_expansion_result.expanded_storage_url,
                               gsutil_api)
Ejemplo n.º 9
0
def _ListUrlRootFunc(cls, args_tuple, thread_state=None):
  """Worker function for listing files/objects under to be sync'd.

  Outputs sorted list to out_file_name, formatted per _BuildTmpOutputLine. We
  sort the listed URLs because we don't want to depend on consistent sort
  order across file systems and cloud providers.

  Args:
    cls: Command instance.
    args_tuple: (url_str, out_file_name, desc), where url_str is URL string to
                list; out_file_name is name of file to which sorted output
                should be written; desc is 'source' or 'destination'.
    thread_state: gsutil Cloud API instance to use.
  """
  gsutil_api = GetCloudApiInstance(cls, thread_state=thread_state)
  (url_str, out_file_name, desc) = args_tuple
  # We sort while iterating over url_str, allowing parallelism of batched
  # sorting with collecting the listing.
  out_file = io.open(out_file_name, mode='w', encoding=UTF8)
  _BatchSort(_FieldedListingIterator(cls, gsutil_api, url_str, desc), out_file)
  out_file.close()
Ejemplo n.º 10
0
    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
        transform_url = name_expansion_result.expanded_storage_url
        # Make a local copy of the requested transformations for each thread. As
        # a redundant transformation for one object might not be redundant for
        # another, we wouldn't want to remove it from the transform_types set that
        # all threads share.
        transforms_to_perform = set(self.transform_types)

        self.CheckProvider(transform_url)

        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        # Note: If other transform types are added, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).
        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        should_encrypt_target = self.boto_file_encryption_sha256 is not None
        source_was_encrypted = src_encryption_sha256 is not None
        using_same_encryption_key_value = (
            src_encryption_sha256 == self.boto_file_encryption_sha256)

        # Prevent accidental key rotation.
        if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform
                and not using_same_encryption_key_value):
            raise EncryptionException(
                'The "-k" flag was not passed to the rewrite command, but the '
                'encryption_key value in your boto config file did not match the key '
                'used to encrypt the object "%s" (hash: %s). To encrypt the object '
                'using a different key, you must specify the "-k" flag.' %
                (transform_url, src_encryption_sha256))

        # Remove any redundant changes.

        # STORAGE_CLASS transform should be skipped if the target storage class
        # matches the existing storage class.
        if (_TransformTypes.STORAGE_CLASS in transforms_to_perform
                and self.dest_storage_class == NormalizeStorageClass(
                    src_metadata.storageClass)):
            transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS)
            self.logger.info(
                'Redundant transform: %s already had storage class of '
                '%s.' % (transform_url, src_metadata.storageClass))

        # CRYPTO_KEY transform should be skipped if we're using the same encryption
        # key (if any) that was used to encrypt the source.
        if (_TransformTypes.CRYPTO_KEY in transforms_to_perform
                and using_same_encryption_key_value):
            if self.boto_file_encryption_sha256 is None:
                log_msg = '%s is already decrypted.' % transform_url
            else:
                log_msg = '%s already has current encryption key.' % transform_url
            transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY)
            self.logger.info('Redundant transform: %s' % log_msg)

        if not transforms_to_perform:
            self.logger.info(
                'Skipping %s, all transformations were redundant.' %
                transform_url)
            return

        # Make a deep copy of the source metadata.
        dst_metadata = encoding.PyValueToMessage(
            apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

        # Remove some unnecessary/invalid fields.
        dst_metadata.customerEncryption = None
        dst_metadata.generation = None
        # Service has problems if we supply an ID, but it is responsible for
        # generating one, so it is not necessary to include it here.
        dst_metadata.id = None
        decryption_tuple = None
        # Use a generic operation name by default - this can be altered below for
        # specific transformations (encryption changes, etc.).
        operation_name = 'Rewriting'

        if source_was_encrypted:
            decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
            if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption key '
                    'matches object %s' %
                    (src_encryption_sha256, transform_url))
            decryption_tuple = CryptoTupleFromKey(decryption_key)

        if _TransformTypes.CRYPTO_KEY in transforms_to_perform:
            if not source_was_encrypted:
                operation_name = 'Encrypting'
            elif not should_encrypt_target:
                operation_name = 'Decrypting'
            else:
                operation_name = 'Rotating'

        if _TransformTypes.STORAGE_CLASS in transforms_to_perform:
            dst_metadata.storageClass = self.dest_storage_class

        # TODO: Remove this call (used to verify tests) and make it processed by
        # the UIThread.
        sys.stderr.write(
            _ConstructAnnounceText(operation_name, transform_url.url_string))

        # Message indicating beginning of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=False,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))

        progress_callback = FileProgressCallbackHandler(
            gsutil_api.status_queue,
            src_url=transform_url,
            operation_name=operation_name).call

        gsutil_api.CopyObject(src_metadata,
                              dst_metadata,
                              src_generation=transform_url.generation,
                              preconditions=self.preconditions,
                              progress_callback=progress_callback,
                              decryption_tuple=decryption_tuple,
                              encryption_tuple=self.boto_file_encryption_tuple,
                              provider=transform_url.scheme,
                              fields=[])

        # Message indicating end of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=True,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))
Ejemplo n.º 11
0
    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
        transform_url = name_expansion_result.expanded_storage_url

        self.CheckProvider(transform_url)

        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        # Note: If other transform types are added, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).

        # Store metadata about src encryption to make logic below easier to read.
        src_encryption_kms_key = (src_metadata.kmsKeyName
                                  if src_metadata.kmsKeyName else None)

        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        src_was_encrypted = (src_encryption_sha256 is not None
                             or src_encryption_kms_key is not None)

        # Also store metadata about dest encryption.
        dest_encryption_kms_key = None
        if (self.boto_file_encryption_keywrapper is not None
                and self.boto_file_encryption_keywrapper.crypto_type
                == CryptoKeyType.CMEK):
            dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key

        dest_encryption_sha256 = None
        if (self.boto_file_encryption_keywrapper is not None
                and self.boto_file_encryption_keywrapper.crypto_type
                == CryptoKeyType.CSEK):
            dest_encryption_sha256 = (
                self.boto_file_encryption_keywrapper.crypto_key_sha256)

        should_encrypt_dest = self.boto_file_encryption_keywrapper is not None

        encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256
                                and src_encryption_kms_key
                                == dest_encryption_kms_key)

        # Prevent accidental key rotation.
        if (_TransformTypes.CRYPTO_KEY not in self.transform_types
                and not encryption_unchanged):
            raise EncryptionException(
                'The "-k" flag was not passed to the rewrite command, but the '
                'encryption_key value in your boto config file did not match the key '
                'used to encrypt the object "%s" (hash: %s). To encrypt the object '
                'using a different key, you must specify the "-k" flag.' %
                (transform_url, src_encryption_sha256))

        # Determine if we can skip this rewrite operation (this should only be done
        # when ALL of the specified transformations are redundant).
        redundant_transforms = []

        # STORAGE_CLASS transform is redundant if the target storage class matches
        # the existing storage class.
        if (_TransformTypes.STORAGE_CLASS in self.transform_types
                and self.dest_storage_class == NormalizeStorageClass(
                    src_metadata.storageClass)):
            redundant_transforms.append('storage class')

        # CRYPTO_KEY transform is redundant if we're using the same encryption
        # key (if any) that was used to encrypt the source.
        if (_TransformTypes.CRYPTO_KEY in self.transform_types
                and encryption_unchanged):
            redundant_transforms.append('encryption key')

        if len(redundant_transforms) == len(self.transform_types):
            self.logger.info(
                'Skipping %s, all transformations were redundant: %s' %
                (transform_url, redundant_transforms))
            return

        # First make a deep copy of the source metadata, then overwrite any
        # requested attributes (e.g. if a storage class change was specified).
        dest_metadata = encoding.PyValueToMessage(
            apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

        # Remove some unnecessary/invalid fields.
        dest_metadata.generation = None
        # Service has problems if we supply an ID, but it is responsible for
        # generating one, so it is not necessary to include it here.
        dest_metadata.id = None
        # Ensure we don't copy over the KMS key name or CSEK key info from the
        # source object; those should only come from the boto config's
        # encryption_key value.
        dest_metadata.customerEncryption = None
        dest_metadata.kmsKeyName = None

        # Both a storage class change and CMEK encryption should be set as part of
        # the dest object's metadata. CSEK encryption, if specified, is added to the
        # request later via headers obtained from the keywrapper value passed to
        # encryption_tuple.
        if _TransformTypes.STORAGE_CLASS in self.transform_types:
            dest_metadata.storageClass = self.dest_storage_class
        if dest_encryption_kms_key is not None:
            dest_metadata.kmsKeyName = dest_encryption_kms_key

        # Make sure we have the CSEK key necessary to decrypt.
        decryption_keywrapper = None
        if src_encryption_sha256 is not None:
            if src_encryption_sha256 in self.csek_hash_to_keywrapper:
                decryption_keywrapper = (
                    self.csek_hash_to_keywrapper[src_encryption_sha256])
            else:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption key '
                    'matches object %s' %
                    (src_encryption_sha256, transform_url))

        operation_name = 'Rewriting'
        if _TransformTypes.CRYPTO_KEY in self.transform_types:
            if src_was_encrypted and should_encrypt_dest:
                if not encryption_unchanged:
                    operation_name = 'Rotating'
                # Else, keep "Rewriting". This might occur when -k was specified and was
                # redundant, but we're performing the operation anyway because some
                # other transformation was not redundant.
            elif src_was_encrypted and not should_encrypt_dest:
                operation_name = 'Decrypting'
            elif not src_was_encrypted and should_encrypt_dest:
                operation_name = 'Encrypting'

        # TODO: Remove this call (used to verify tests) and make it processed by
        # the UIThread.
        sys.stderr.write(
            _ConstructAnnounceText(operation_name, transform_url.url_string))

        # Message indicating beginning of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=False,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))

        progress_callback = FileProgressCallbackHandler(
            gsutil_api.status_queue,
            src_url=transform_url,
            operation_name=operation_name).call

        gsutil_api.CopyObject(
            src_metadata,
            dest_metadata,
            src_generation=transform_url.generation,
            preconditions=self.preconditions,
            progress_callback=progress_callback,
            decryption_tuple=decryption_keywrapper,
            encryption_tuple=self.boto_file_encryption_keywrapper,
            provider=transform_url.scheme,
            fields=[])

        # Message indicating end of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=True,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))