Example #1
0
  def RunCommand(self):
    """Command entry point for the rewrite command."""
    self.continue_on_error = self.parallel_operations
    self.csek_hash_to_keywrapper = {}
    self.dest_storage_class = None
    self.no_preserve_acl = False
    self.read_args_from_stdin = False
    self.supported_transformation_flags = ['-k', '-s']
    self.transform_types = set()

    self.op_failure_count = 0
    self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
    self.boto_file_encryption_sha256 = (
        self.boto_file_encryption_keywrapper.crypto_key_sha256
        if self.boto_file_encryption_keywrapper else None)

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-f':
          self.continue_on_error = True
        elif o == '-k':
          self.transform_types.add(_TransformTypes.CRYPTO_KEY)
        elif o == '-I':
          self.read_args_from_stdin = True
        elif o == '-O':
          self.no_preserve_acl = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
          self.all_versions = True
        elif o == '-s':
          self.transform_types.add(_TransformTypes.STORAGE_CLASS)
          self.dest_storage_class = NormalizeStorageClass(a)

    if self.read_args_from_stdin:
      if self.args:
        raise CommandException('No arguments allowed with the -I flag.')
      url_strs = StdinIterator()
    else:
      if not self.args:
        raise CommandException('The rewrite command (without -I) expects at '
                               'least one URL.')
      url_strs = self.args

    if not self.transform_types:
      raise CommandException(
          'rewrite command requires at least one transformation flag. '
          'Currently supported transformation flags: %s' %
          self.supported_transformation_flags)

    self.preconditions = PreconditionsFromHeaders(self.headers or {})

    url_strs_generator = GenerationCheckGenerator(url_strs)

    # Convert recursive flag to flat wildcard to avoid performing multiple
    # listings.
    if self.recursion_requested:
      url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator)

    # Expand the source argument(s).
    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        url_strs_generator,
        self.recursion_requested,
        project_id=self.project_id,
        continue_on_error=self.continue_on_error or self.parallel_operations,
        bucket_listing_fields=['name', 'size'])

    seek_ahead_iterator = None
    # Cannot seek ahead with stdin args, since we can only iterate them
    # once without buffering in memory.
    if not self.read_args_from_stdin:
      # Perform the same recursive-to-flat conversion on original url_strs so
      # that it is as true to the original iterator as possible.
      seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          seek_ahead_url_strs,
          self.recursion_requested,
          all_versions=self.all_versions,
          project_id=self.project_id)

    # Rather than have each worker repeatedly calculate the sha256 hash for each
    # decryption_key in the boto config, do this once now and cache the results.
    for i in range(0, MAX_DECRYPTION_KEYS):
      key_number = i + 1
      keywrapper = CryptoKeyWrapperFromKey(
          config.get('GSUtil', 'decryption_key%s' % str(key_number), None))
      if keywrapper is None:
        # Stop at first attribute absence in lexicographical iteration.
        break
      if keywrapper.crypto_type == CryptoKeyType.CSEK:
        self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper
    # Also include the encryption_key, since it should be used to decrypt and
    # then encrypt if the object's CSEK should remain the same.
    if self.boto_file_encryption_sha256 is not None:
      self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
          self.boto_file_encryption_keywrapper)

    if self.boto_file_encryption_keywrapper is None:
      msg = '\n'.join(
          textwrap.wrap(
              'NOTE: No encryption_key was specified in the boto configuration '
              'file, so gsutil will not provide an encryption key in its rewrite '
              'API requests. This will decrypt the objects unless they are in '
              'buckets with a default KMS key set, in which case the service '
              'will automatically encrypt the rewritten objects with that key.')
      )
      print('%s\n' % msg, file=sys.stderr)

    # Perform rewrite requests in parallel (-m) mode, if requested.
    self.Apply(_RewriteFuncWrapper,
               name_expansion_iterator,
               _RewriteExceptionHandler,
               fail_on_error=(not self.continue_on_error),
               shared_attrs=['op_failure_count'],
               seek_ahead_iterator=seek_ahead_iterator)

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException('%d file%s/object%s could not be rewritten.' %
                             (self.op_failure_count, plural_str, plural_str))

    return 0
Example #2
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException as e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Example #3
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise