Esempio n. 1
0
  def RunCommand(self):
    """Command entry point for the compose command."""
    target_url_str = self.args[-1]
    self.args = self.args[:-1]
    target_url = StorageUrlFromString(target_url_str)
    self.CheckProvider(target_url)
    if target_url.HasGeneration():
      raise CommandException('A version-specific URL (%s) cannot be '
                             'the destination for gsutil compose - abort.'
                             % target_url)

    dst_obj_metadata = apitools_messages.Object(name=target_url.object_name,
                                                bucket=target_url.bucket_name)

    components = []
    # Remember the first source object so we can get its content type.
    first_src_url = None
    for src_url_str in self.args:
      if ContainsWildcard(src_url_str):
        src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
      else:
        src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))]
      for blr in src_url_iter:
        src_url = blr.storage_url
        self.CheckProvider(src_url)

        if src_url.bucket_name != target_url.bucket_name:
          raise CommandException(
              'GCS does not support inter-bucket composing.')

        if not first_src_url:
          first_src_url = src_url
        src_obj_metadata = (
            apitools_messages.ComposeRequest.SourceObjectsValueListEntry(
                name=src_url.object_name))
        if src_url.HasGeneration():
          src_obj_metadata.generation = src_url.generation
        components.append(src_obj_metadata)
        # Avoid expanding too many components, and sanity check each name
        # expansion result.
        if len(components) > MAX_COMPOSE_ARITY:
          raise CommandException('"compose" called with too many component '
                                 'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

    if not components:
      raise CommandException('"compose" requires at least 1 component object.')

    dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
        first_src_url.bucket_name, first_src_url.object_name,
        provider=first_src_url.scheme, fields=['contentType']).contentType

    preconditions = PreconditionsFromHeaders(self.headers or {})

    self.logger.info(
        'Composing %s from %d component object(s).',
        target_url, len(components))
    self.gsutil_api.ComposeObject(
        components, dst_obj_metadata, preconditions=preconditions,
        provider=target_url.scheme,
        encryption_tuple=GetEncryptionKeyWrapper(config))
Esempio n. 2
0
  def RunCommand(self):
    """Command entry point for the setmeta command."""
    headers = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-h':
          if 'x-goog-acl' in a or 'x-amz-acl' in a:
            raise CommandException(
                'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                'set ... to set canned ACLs.')
          headers.append(a)

    (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

    self.metadata_change = metadata_plus
    for header in metadata_minus:
      self.metadata_change[header] = ''

    if len(self.args) == 1 and not self.recursion_requested:
      url = StorageUrlFromString(self.args[0])
      if not (url.IsCloudUrl() and url.IsObject()):
        raise CommandException('URL (%s) must name an object' % self.args[0])

    # Used to track if any objects' metadata failed to be set.
    self.everything_set_okay = True

    self.preconditions = PreconditionsFromHeaders(self.headers)

    name_expansion_iterator = NameExpansionIterator(
        self.command_name, self.debug, self.logger, self.gsutil_api,
        self.args, self.recursion_requested, all_versions=self.all_versions,
        continue_on_error=self.parallel_operations,
        bucket_listing_fields=['generation', 'metadata', 'metageneration'])

    seek_ahead_iterator = SeekAheadNameExpansionIterator(
        self.command_name, self.debug, self.GetSeekAheadGsutilApi(),
        self.args, self.recursion_requested,
        all_versions=self.all_versions, project_id=self.project_id)

    try:
      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(_SetMetadataFuncWrapper, name_expansion_iterator,
                 _SetMetadataExceptionHandler, fail_on_error=True,
                 seek_ahead_iterator=seek_ahead_iterator)
    except AccessDeniedException as e:
      if e.status == 403:
        self._WarnServiceAccounts()
      raise

    if not self.everything_set_okay:
      raise CommandException('Metadata for some objects could not be set.')

    return 0
Esempio n. 3
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Esempio n. 4
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.dest_storage_class = None
        self.no_preserve_acl = False
        self.read_args_from_stdin = False
        self.supported_transformation_flags = ['-k', '-s']
        self.transform_types = set()

        self.op_failure_count = 0
        self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.add(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True
                elif o == '-s':
                    self.transform_types.add(_TransformTypes.STORAGE_CLASS)
                    self.dest_storage_class = NormalizeStorageClass(a)

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        url_strs_generator = GenerationCheckGenerator(url_strs)

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs_generator = ConvertRecursiveToFlatWildcard(
                url_strs_generator)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs_generator,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations,
            bucket_listing_fields=['name', 'size'])

        seek_ahead_iterator = None
        # Cannot seek ahead with stdin args, since we can only iterate them
        # once without buffering in memory.
        if not self.read_args_from_stdin:
            # Perform the same recursive-to-flat conversion on original url_strs so
            # that it is as true to the original iterator as possible.
            seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                seek_ahead_url_strs,
                self.recursion_requested,
                all_versions=self.all_versions,
                project_id=self.project_id)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'],
                   seek_ahead_iterator=seek_ahead_iterator)

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Esempio n. 5
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.no_preserve_acl = False
        self.supported_transformation_flags = ['-k']
        self.transform_types = []

        self.op_failure_count = 0
        self.current_encryption_tuple, self.current_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.append(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        url_strs = GenerationCheckGenerator(url_strs)

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs = ConvertRecursiveToFlatWildcard(url_strs)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'])

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Esempio n. 6
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Esempio n. 7
0
    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.csek_hash_to_keywrapper = {}
        self.dest_storage_class = None
        self.no_preserve_acl = False
        self.read_args_from_stdin = False
        self.supported_transformation_flags = ['-k', '-s']
        self.transform_types = set()

        self.op_failure_count = 0
        self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
        self.boto_file_encryption_sha256 = (
            self.boto_file_encryption_keywrapper.crypto_key_sha256
            if self.boto_file_encryption_keywrapper else None)

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.add(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True
                elif o == '-s':
                    self.transform_types.add(_TransformTypes.STORAGE_CLASS)
                    self.dest_storage_class = NormalizeStorageClass(a)

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        url_strs_generator = GenerationCheckGenerator(url_strs)

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs_generator = ConvertRecursiveToFlatWildcard(
                url_strs_generator)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs_generator,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations,
            bucket_listing_fields=['name', 'size'])

        seek_ahead_iterator = None
        # Cannot seek ahead with stdin args, since we can only iterate them
        # once without buffering in memory.
        if not self.read_args_from_stdin:
            # Perform the same recursive-to-flat conversion on original url_strs so
            # that it is as true to the original iterator as possible.
            seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                seek_ahead_url_strs,
                self.recursion_requested,
                all_versions=self.all_versions,
                project_id=self.project_id)

        # Rather than have each worker repeatedly calculate the sha256 hash for each
        # decryption_key in the boto config, do this once now and cache the results.
        for i in range(0, MAX_DECRYPTION_KEYS):
            key_number = i + 1
            keywrapper = CryptoKeyWrapperFromKey(
                config.get('GSUtil', 'decryption_key%s' % str(key_number),
                           None))
            if keywrapper is None:
                # Stop at first attribute absence in lexicographical iteration.
                break
            if keywrapper.crypto_type == CryptoKeyType.CSEK:
                self.csek_hash_to_keywrapper[
                    keywrapper.crypto_key_sha256] = keywrapper
        # Also include the encryption_key, since it should be used to decrypt and
        # then encrypt if the object's CSEK should remain the same.
        if self.boto_file_encryption_sha256 is not None:
            self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
                self.boto_file_encryption_keywrapper)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'],
                   seek_ahead_iterator=seek_ahead_iterator)

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0