class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=constants.NO_MAX,
        supported_sub_args='afIrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    gcloud_storage_map = GcloudStorageMap(
        gcloud_command=['alpha', 'storage', 'rm'],
        flag_map={
            '-r': GcloudStorageFlag('-r'),
            '-R': GcloudStorageFlag('-r'),
            '-a': GcloudStorageFlag('-a'),
            '-I': GcloudStorageFlag('-I'),
            '-f': GcloudStorageFlag('--continue-on-error'),
        },
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException as e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append(
                        url_str.rstrip('*') + '*_$folder$')
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_PREFIX):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0

    def RemoveFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        exp_src_url = name_expansion_result.expanded_storage_url
        self.logger.info('Removing %s...', exp_src_url)
        try:
            gsutil_api.DeleteObject(exp_src_url.bucket_name,
                                    exp_src_url.object_name,
                                    preconditions=self.preconditions,
                                    generation=exp_src_url.generation,
                                    provider=exp_src_url.scheme)
        except NotFoundException as e:
            # DeleteObject will sometimes return a 504 (DEADLINE_EXCEEDED) when
            # the operation was in fact successful. When a retry is attempted in
            # these cases, it will fail with a (harmless) 404. The 404 is harmless
            # since it really just means the file was already deleted, which is
            # what we want anyway. Here we simply downgrade the message to info
            # rather than error and correct the command-level failure total.
            self.logger.info('Cannot find %s', exp_src_url)
            DecrementFailureCount()
        _PutToQueueWithTimeout(gsutil_api.status_queue,
                               MetadataMessage(message_time=time.time()))
Beispiel #2
0
class LsCommand(Command):
  """Implementation of gsutil ls command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'ls',
      command_name_aliases=[
          'dir',
          'list',
      ],
      usage_synopsis=_SYNOPSIS,
      min_args=0,
      max_args=NO_MAX,
      supported_sub_args='aebdlLhp:rR',
      file_url_ok=False,
      provider_url_ok=True,
      urls_start_arg=0,
      gs_api_support=[
          ApiSelector.XML,
          ApiSelector.JSON,
      ],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
      ],
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='ls',
      help_name_aliases=[
          'dir',
          'list',
      ],
      help_type='command_help',
      help_one_line_summary='List providers, buckets, or objects',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  # TODO(b/206151616) Add mappings for remaining flags.
  gcloud_storage_map = GcloudStorageMap(
      gcloud_command='alpha storage ls',
      flag_map={
          '-r': GcloudStorageFlag('-r'),
          '-R': GcloudStorageFlag('-r'),
          '-l': GcloudStorageFlag('-l'),
          '-L': GcloudStorageFlag('-L'),
          '-b': GcloudStorageFlag('-b'),
          '-e': GcloudStorageFlag('-e'),
          '-a': GcloudStorageFlag('-a'),
          '-h': GcloudStorageFlag('--readable-sizes'),
          '-p': GcloudStorageFlag('--project'),
      },
  )

  def _PrintBucketInfo(self, bucket_blr, listing_style):
    """Print listing info for given bucket.

    Args:
      bucket_blr: BucketListingReference for the bucket being listed
      listing_style: ListingStyle enum describing type of output desired.

    Returns:
      Tuple (total objects, total bytes) in the bucket.
    """
    if (listing_style == ListingStyle.SHORT or
        listing_style == ListingStyle.LONG):
      text_util.print_to_fd(bucket_blr)
      return
    # listing_style == ListingStyle.LONG_LONG:
    # We're guaranteed by the caller that the root object is populated.
    bucket = bucket_blr.root_object
    location_constraint = bucket.location
    storage_class = bucket.storageClass
    fields = {
        'bucket': bucket_blr.url_string,
        'storage_class': storage_class,
        'location_constraint': location_constraint,
        'acl': AclTranslation.JsonFromMessage(bucket.acl),
        'default_acl': AclTranslation.JsonFromMessage(bucket.defaultObjectAcl),
        'versioning': bucket.versioning and bucket.versioning.enabled,
        'website_config': 'Present' if bucket.website else 'None',
        'logging_config': 'Present' if bucket.logging else 'None',
        'cors_config': 'Present' if bucket.cors else 'None',
        'lifecycle_config': 'Present' if bucket.lifecycle else 'None',
        'requester_pays': bucket.billing and bucket.billing.requesterPays
    }
    if bucket.retentionPolicy:
      fields['retention_policy'] = 'Present'
    if bucket.labels:
      fields['labels'] = LabelTranslation.JsonFromMessage(bucket.labels,
                                                          pretty_print=True)
    else:
      fields['labels'] = 'None'
    if bucket.encryption and bucket.encryption.defaultKmsKeyName:
      fields['default_kms_key'] = bucket.encryption.defaultKmsKeyName
    else:
      fields['default_kms_key'] = 'None'
    fields['encryption_config'] = 'Present' if bucket.encryption else 'None'
    # Fields not available in all APIs (e.g. the XML API)
    if bucket.autoclass and bucket.autoclass.enabled:
      fields['autoclass_enabled_date'] = (
          bucket.autoclass.toggleTime.strftime('%a, %d %b %Y'))
    if bucket.locationType:
      fields['location_type'] = bucket.locationType
    if bucket.customPlacementConfig:
      fields['custom_placement_locations'] = (
          bucket.customPlacementConfig.dataLocations)
    if bucket.metageneration:
      fields['metageneration'] = bucket.metageneration
    if bucket.timeCreated:
      fields['time_created'] = bucket.timeCreated.strftime(
          '%a, %d %b %Y %H:%M:%S GMT')
    if bucket.updated:
      fields['updated'] = bucket.updated.strftime('%a, %d %b %Y %H:%M:%S GMT')
    if bucket.defaultEventBasedHold:
      fields['default_eventbased_hold'] = bucket.defaultEventBasedHold
    if bucket.iamConfiguration:
      if bucket.iamConfiguration.bucketPolicyOnly:
        enabled = bucket.iamConfiguration.bucketPolicyOnly.enabled
        fields['bucket_policy_only_enabled'] = enabled
      if bucket.iamConfiguration.publicAccessPrevention:
        fields[
            'public_access_prevention'] = bucket.iamConfiguration.publicAccessPrevention
    if bucket.rpo:
      fields['rpo'] = bucket.rpo
    if bucket.satisfiesPZS:
      fields['satisfies_pzs'] = bucket.satisfiesPZS

    # For field values that are multiline, add indenting to make it look
    # prettier.
    for key in fields:
      previous_value = fields[key]
      if (not isinstance(previous_value, six.string_types) or
          '\n' not in previous_value):
        continue
      new_value = previous_value.replace('\n', '\n\t  ')
      # Start multiline values on a new line if they aren't already.
      if not new_value.startswith('\n'):
        new_value = '\n\t  ' + new_value
      fields[key] = new_value

    # Only display certain properties if the given API returned them (JSON API
    # returns many fields that the XML API does not).
    autoclass_line = ''
    location_type_line = ''
    custom_placement_locations_line = ''
    metageneration_line = ''
    time_created_line = ''
    time_updated_line = ''
    default_eventbased_hold_line = ''
    retention_policy_line = ''
    bucket_policy_only_enabled_line = ''
    public_access_prevention_line = ''
    rpo_line = ''
    satisifies_pzs_line = ''
    if 'autoclass_enabled_date' in fields:
      autoclass_line = '\tAutoclass:\t\t\tEnabled on {autoclass_enabled_date}\n'
    if 'location_type' in fields:
      location_type_line = '\tLocation type:\t\t\t{location_type}\n'
    if 'custom_placement_locations' in fields:
      custom_placement_locations_line = (
          '\tPlacement locations:\t\t{custom_placement_locations}\n')
    if 'metageneration' in fields:
      metageneration_line = '\tMetageneration:\t\t\t{metageneration}\n'
    if 'time_created' in fields:
      time_created_line = '\tTime created:\t\t\t{time_created}\n'
    if 'updated' in fields:
      time_updated_line = '\tTime updated:\t\t\t{updated}\n'
    if 'default_eventbased_hold' in fields:
      default_eventbased_hold_line = (
          '\tDefault Event-Based Hold:\t{default_eventbased_hold}\n')
    if 'retention_policy' in fields:
      retention_policy_line = '\tRetention Policy:\t\t{retention_policy}\n'
    if 'bucket_policy_only_enabled' in fields:
      bucket_policy_only_enabled_line = ('\tBucket Policy Only enabled:\t'
                                         '{bucket_policy_only_enabled}\n')
    if 'public_access_prevention' in fields:
      public_access_prevention_line = ('\tPublic access prevention:\t'
                                       '{public_access_prevention}\n')
    if 'rpo' in fields:
      rpo_line = ('\tRPO:\t\t\t\t{rpo}\n')
    if 'satisfies_pzs' in fields:
      satisifies_pzs_line = '\tSatisfies PZS:\t\t\t{satisfies_pzs}\n'

    text_util.print_to_fd(
        ('{bucket} :\n'
         '\tStorage class:\t\t\t{storage_class}\n' + location_type_line +
         '\tLocation constraint:\t\t{location_constraint}\n' +
         custom_placement_locations_line +
         '\tVersioning enabled:\t\t{versioning}\n'
         '\tLogging configuration:\t\t{logging_config}\n'
         '\tWebsite configuration:\t\t{website_config}\n'
         '\tCORS configuration: \t\t{cors_config}\n'
         '\tLifecycle configuration:\t{lifecycle_config}\n'
         '\tRequester Pays enabled:\t\t{requester_pays}\n' +
         retention_policy_line + default_eventbased_hold_line +
         '\tLabels:\t\t\t\t{labels}\n' +
         '\tDefault KMS key:\t\t{default_kms_key}\n' + time_created_line +
         time_updated_line + metageneration_line +
         bucket_policy_only_enabled_line + autoclass_line +
         public_access_prevention_line + rpo_line + satisifies_pzs_line +
         '\tACL:\t\t\t\t{acl}\n'
         '\tDefault ACL:\t\t\t{default_acl}').format(**fields))
    if bucket_blr.storage_url.scheme == 's3':
      text_util.print_to_fd(
          'Note: this is an S3 bucket so configuration values may be '
          'blank. To retrieve bucket configuration values, use '
          'individual configuration commands such as gsutil acl get '
          '<bucket>.')

  def _PrintLongListing(self, bucket_listing_ref):
    """Prints an object with ListingStyle.LONG."""
    obj = bucket_listing_ref.root_object
    url_str = bucket_listing_ref.url_string
    if (obj.metadata and
        S3_DELETE_MARKER_GUID in obj.metadata.additionalProperties):
      size_string = '0'
      num_bytes = 0
      num_objs = 0
      url_str += '<DeleteMarker>'
    else:
      size_string = (MakeHumanReadable(obj.size)
                     if self.human_readable else str(obj.size))
      num_bytes = obj.size
      num_objs = 1

    timestamp = JSON_TIMESTAMP_RE.sub(r'\1T\2Z', str(obj.timeCreated))
    printstr = '%(size)10s  %(timestamp)s  %(url)s'
    encoded_etag = None
    encoded_metagen = None
    if self.all_versions:
      printstr += '  metageneration=%(metageneration)s'
      encoded_metagen = str(obj.metageneration)
    if self.include_etag:
      printstr += '  etag=%(etag)s'
      encoded_etag = obj.etag
    format_args = {
        'size': size_string,
        'timestamp': timestamp,
        'url': url_str,
        'metageneration': encoded_metagen,
        'etag': encoded_etag
    }
    text_util.print_to_fd(printstr % format_args)
    return (num_objs, num_bytes)

  def RunCommand(self):
    """Command entry point for the ls command."""
    got_nomatch_errors = False
    got_bucket_nomatch_errors = False
    listing_style = ListingStyle.SHORT
    get_bucket_info = False
    self.recursion_requested = False
    self.all_versions = False
    self.include_etag = False
    self.human_readable = False
    self.list_subdir_contents = True
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-a':
          self.all_versions = True
        elif o == '-e':
          self.include_etag = True
        elif o == '-b':
          get_bucket_info = True
        elif o == '-h':
          self.human_readable = True
        elif o == '-l':
          listing_style = ListingStyle.LONG
        elif o == '-L':
          listing_style = ListingStyle.LONG_LONG
        elif o == '-p':
          # Project IDs are sent as header values when using gs and s3 XML APIs.
          InsistAscii(a, 'Invalid non-ASCII character found in project ID')
          self.project_id = a
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
        elif o == '-d':
          self.list_subdir_contents = False

    if not self.args:
      # default to listing all gs buckets
      self.args = ['gs://']

    total_objs = 0
    total_bytes = 0

    def MaybePrintBucketHeader(blr):
      if len(self.args) > 1:
        text_util.print_to_fd('%s:' % six.ensure_text(blr.url_string))

    print_bucket_header = MaybePrintBucketHeader

    for url_str in self.args:
      storage_url = StorageUrlFromString(url_str)
      if storage_url.IsFileUrl():
        raise CommandException('Only cloud URLs are supported for %s' %
                               self.command_name)
      bucket_fields = None
      if (listing_style == ListingStyle.SHORT or
          listing_style == ListingStyle.LONG):
        bucket_fields = ['id']
      elif listing_style == ListingStyle.LONG_LONG:
        bucket_fields = [
            'acl',
            'autoclass',
            'billing',
            'cors',
            'customPlacementConfig',
            'defaultObjectAcl',
            'encryption',
            'iamConfiguration',
            'labels',
            'location',
            'locationType',
            'logging',
            'lifecycle',
            'metageneration',
            'retentionPolicy',
            'defaultEventBasedHold',
            'rpo',
            'satisfiesPZS',
            'storageClass',
            'timeCreated',
            'updated',
            'versioning',
            'website',
        ]
      if storage_url.IsProvider():
        # Provider URL: use bucket wildcard to list buckets.
        for blr in self.WildcardIterator(
            '%s://*' %
            storage_url.scheme).IterBuckets(bucket_fields=bucket_fields):
          self._PrintBucketInfo(blr, listing_style)
      elif storage_url.IsBucket() and get_bucket_info:
        # ls -b bucket listing request: List info about bucket(s).
        total_buckets = 0
        for blr in self.WildcardIterator(url_str).IterBuckets(
            bucket_fields=bucket_fields):
          if not ContainsWildcard(url_str) and not blr.root_object:
            # Iterator does not make an HTTP call for non-wildcarded
            # listings with fields=='id'. Ensure the bucket exists by calling
            # GetBucket.
            self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                      fields=['id'],
                                      provider=storage_url.scheme)
          self._PrintBucketInfo(blr, listing_style)
          total_buckets += 1
        if not ContainsWildcard(url_str) and not total_buckets:
          got_bucket_nomatch_errors = True
      else:
        # URL names a bucket, object, or object subdir ->
        # list matching object(s) / subdirs.
        def _PrintPrefixLong(blr):
          text_util.print_to_fd('%-33s%s' %
                                ('', six.ensure_text(blr.url_string)))

        if listing_style == ListingStyle.SHORT:
          # ls helper by default readies us for a short listing.
          listing_helper = LsHelper(
              self.WildcardIterator,
              self.logger,
              all_versions=self.all_versions,
              print_bucket_header_func=print_bucket_header,
              should_recurse=self.recursion_requested,
              list_subdir_contents=self.list_subdir_contents)
        elif listing_style == ListingStyle.LONG:
          bucket_listing_fields = [
              'name',
              'size',
              'timeCreated',
              'updated',
          ]
          if self.all_versions:
            bucket_listing_fields.extend([
                'generation',
                'metageneration',
            ])
          if self.include_etag:
            bucket_listing_fields.append('etag')

          listing_helper = LsHelper(
              self.WildcardIterator,
              self.logger,
              print_object_func=self._PrintLongListing,
              print_dir_func=_PrintPrefixLong,
              print_bucket_header_func=print_bucket_header,
              all_versions=self.all_versions,
              should_recurse=self.recursion_requested,
              fields=bucket_listing_fields,
              list_subdir_contents=self.list_subdir_contents)

        elif listing_style == ListingStyle.LONG_LONG:
          # List all fields
          bucket_listing_fields = (UNENCRYPTED_FULL_LISTING_FIELDS +
                                   ENCRYPTED_FIELDS)
          listing_helper = LsHelper(
              self.WildcardIterator,
              self.logger,
              print_object_func=PrintFullInfoAboutObject,
              print_dir_func=_PrintPrefixLong,
              print_bucket_header_func=print_bucket_header,
              all_versions=self.all_versions,
              should_recurse=self.recursion_requested,
              fields=bucket_listing_fields,
              list_subdir_contents=self.list_subdir_contents)
        else:
          raise CommandException('Unknown listing style: %s' % listing_style)

        exp_dirs, exp_objs, exp_bytes = (
            listing_helper.ExpandUrlAndPrint(storage_url))
        if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
          got_nomatch_errors = True
        total_bytes += exp_bytes
        total_objs += exp_objs

    if total_objs and listing_style != ListingStyle.SHORT:
      text_util.print_to_fd(
          'TOTAL: %d objects, %d bytes (%s)' %
          (total_objs, total_bytes, MakeHumanReadable(float(total_bytes))))
    if got_nomatch_errors:
      raise CommandException('One or more URLs matched no objects.')
    if got_bucket_nomatch_errors:
      raise NotFoundException('One or more bucket URLs matched no buckets.')

    return 0
class DefStorageClassCommand(Command):
  """Implementation of gsutil defstorageclass command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'defstorageclass',
      usage_synopsis=_SYNOPSIS,
      min_args=2,
      max_args=NO_MAX,
      supported_sub_args='',
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=2,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments={
          'set': [
              # FreeTextArgument allows for using storage class abbreviations.
              CommandArgument.MakeFreeTextArgument(),
              CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(),
          ],
          'get': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(),],
      },
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='defstorageclass',
      help_name_aliases=['defaultstorageclass'],
      help_type='command_help',
      help_one_line_summary='Get or set the default storage class on buckets',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={
          'get': _get_help_text,
          'set': _set_help_text,
      },
  )

  gcloud_storage_map = GcloudStorageMap(
      gcloud_command={
          'get': SHIM_GET_COMMAND_MAP,
          'set': SHIM_SET_COMMAND_MAP,
      },
      flag_map={},
  )

  def _CheckIsGsUrl(self, url_str):
    if not url_str.startswith('gs://'):
      raise CommandException(
          '"%s" does not support the URL "%s". Did you mean to use a gs:// '
          'URL?' % (self.command_name, url_str))

  def _CalculateUrlsStartArg(self):
    if not self.args:
      self.RaiseWrongNumberOfArgumentsException()
    if self.args[0].lower() == 'set':
      return 2
    else:
      return 1

  def _SetDefStorageClass(self):
    """Sets the default storage class for a bucket."""
    # At this point, "set" has been popped off the front of self.args.
    normalized_storage_class = NormalizeStorageClass(self.args[0])
    url_args = self.args[1:]
    if not url_args:
      self.RaiseWrongNumberOfArgumentsException()

    some_matched = False
    for url_str in url_args:
      self._CheckIsGsUrl(url_str)
      # Throws a CommandException if the argument is not a bucket.
      bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id'])
      for blr in bucket_iter:
        some_matched = True
        bucket_metadata = apitools_messages.Bucket()
        self.logger.info('Setting default storage class to "%s" for bucket %s' %
                         (normalized_storage_class, blr.url_string.rstrip('/')))
        bucket_metadata.storageClass = normalized_storage_class
        self.gsutil_api.PatchBucket(blr.storage_url.bucket_name,
                                    bucket_metadata,
                                    provider=blr.storage_url.scheme,
                                    fields=['id'])
    if not some_matched:
      raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))

  def _GetDefStorageClass(self):
    """Gets the default storage class for a bucket."""
    # At this point, "get" has been popped off the front of self.args.
    url_args = self.args
    some_matched = False
    for url_str in url_args:
      self._CheckIsGsUrl(url_str)
      bucket_iter = self.GetBucketUrlIterFromArg(url_str,
                                                 bucket_fields=['storageClass'])
      for blr in bucket_iter:
        some_matched = True
        print('%s: %s' %
              (blr.url_string.rstrip('/'), blr.root_object.storageClass))
    if not some_matched:
      raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))

  def RunCommand(self):
    """Command entry point for the defstorageclass command."""
    action_subcommand = self.args.pop(0)
    subcommand_args = [action_subcommand]
    if action_subcommand == 'get':
      func = self._GetDefStorageClass
    elif action_subcommand == 'set':
      func = self._SetDefStorageClass
      normalized_storage_class = NormalizeStorageClass(self.args[0])
      subcommand_args.append(normalized_storage_class)
    else:
      raise CommandException(
          ('Invalid subcommand "%s" for the %s command.\n'
           'See "gsutil help %s".') %
          (action_subcommand, self.command_name, self.command_name))
    metrics.LogCommandParams(subcommands=subcommand_args)
    func()
    return 0
_DESCRIPTION = """
  The defstorageclass command has two sub-commands:
""" + '\n'.join([_SET_DESCRIPTION + _GET_DESCRIPTION])

_DETAILED_HELP_TEXT = CreateHelpText(_SYNOPSIS, _DESCRIPTION)

_get_help_text = CreateHelpText(_GET_SYNOPSIS, _GET_DESCRIPTION)
_set_help_text = CreateHelpText(_SET_SYNOPSIS, _SET_DESCRIPTION)

SHIM_GET_COMMAND_MAP = GcloudStorageMap(
    # Using a list because a string gets splitted up on space and the
    # format string below has a space.
    gcloud_command=[
        'alpha',
        'storage',
        'buckets',
        'list',
        # The url_string for buckets ends with a slash.
        # Substitute the last slash with a colon.
        '--format=value[separator=" "](url_string.sub("/$", ":"),storage_class)'
    ],
    flag_map={},
)
SHIM_SET_COMMAND_MAP = GcloudStorageMap(
    gcloud_command='alpha storage buckets update --default-storage-class',
    flag_map={},
)


class DefStorageClassCommand(Command):
  """Implementation of gsutil defstorageclass command."""
Beispiel #5
0
class KmsCommand(Command):
    """Implements of gsutil kms command."""

    command_spec = Command.CreateCommandSpec(
        'kms',
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='dk:p:w',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments={
            'authorize': [],
            'encryption': [CommandArgument.MakeNCloudBucketURLsArgument(1)],
            'serviceaccount': [],
        })
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='kms',
        help_name_aliases=[],
        help_type='command_help',
        help_one_line_summary='Configure Cloud KMS encryption',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={
            'authorize': _authorize_help_text,
            'encryption': _encryption_help_text,
            'serviceaccount': _serviceaccount_help_text
        },
    )

    gcloud_storage_map = GcloudStorageMap(gcloud_command={
        'authorize':
        _AUTHORIZE_COMMAND,
        'encryption':
        _ENCRYPTION_COMMAND,
        'serviceaccount':
        _SERVICEACCOUNT_COMMAND,
    },
                                          flag_map={})

    def get_gcloud_storage_args(self):
        common_command = ['alpha', 'storage', 'buckets']
        if self.args[0] == 'encryption':
            if '-d' in self.args or '-k' in self.args:
                _ENCRYPTION_COMMAND.gcloud_command = common_command + [
                    'update'
                ]
            else:
                _ENCRYPTION_COMMAND.gcloud_command = common_command + [
                    'describe',
                    ('--format="value[separator=\": \"](\"name\",\"default_kms_key\"'
                     '.yesno(no=\"No default encryption key.\"))'),
                ]

        return super().get_gcloud_storage_args()

    def _GatherSubOptions(self, subcommand_name):
        self.CheckArguments()
        self.clear_kms_key = False
        self.kms_key = None
        self.warn_on_key_authorize_failure = False

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-p':
                    self.project_id = a
                elif o == '-k':
                    self.kms_key = a
                    ValidateCMEK(self.kms_key)
                elif o == '-d':
                    self.clear_kms_key = True
                elif o == '-w':
                    self.warn_on_key_authorize_failure = True

        if self.warn_on_key_authorize_failure and (
                self.subcommand_name != 'encryption' or not self.kms_key):
            raise CommandException('\n'.join(
                textwrap.wrap(
                    'The "-w" option should only be specified for the "encryption" '
                    'subcommand and must be used with the "-k" option.')))
        # Determine the project (used in the serviceaccount and authorize
        # subcommands), either from the "-p" option's value or the default specified
        # in the user's Boto config file.
        if not self.project_id:
            self.project_id = PopulateProjectId(None)

    def _AuthorizeProject(self, project_id, kms_key):
        """Authorizes a project's service account to be used with a KMS key.

    Authorizes the Cloud Storage-owned service account for project_id to be used
    with kms_key.

    Args:
      project_id: (str) Project id string (not number).
      kms_key: (str) Fully qualified resource name for the KMS key.

    Returns:
      (str, bool) A 2-tuple consisting of:
      1) The email address for the service account associated with the project,
         which is authorized to encrypt/decrypt with the specified key.
      2) A bool value - True if we had to grant the service account permission
         to encrypt/decrypt with the given key; False if the required permission
         was already present.
    """
        # Request the Cloud Storage-owned service account for project_id, creating
        # it if it does not exist.
        service_account = self.gsutil_api.GetProjectServiceAccount(
            project_id, provider='gs').email_address

        kms_api = KmsApi(logger=self.logger)
        self.logger.debug('Getting IAM policy for %s', kms_key)
        try:
            policy = kms_api.GetKeyIamPolicy(kms_key)
            self.logger.debug('Current policy is %s', policy)

            # Check if the required binding is already present; if not, add it and
            # update the key's IAM policy.
            added_new_binding = False
            binding = Binding(
                role='roles/cloudkms.cryptoKeyEncrypterDecrypter',
                members=['serviceAccount:%s' % service_account])
            if binding not in policy.bindings:
                policy.bindings.append(binding)
                kms_api.SetKeyIamPolicy(kms_key, policy)
                added_new_binding = True
            return (service_account, added_new_binding)
        except AccessDeniedException:
            if self.warn_on_key_authorize_failure:
                text_util.print_to_fd('\n'.join(
                    textwrap.wrap(
                        'Warning: Check that your Cloud Platform project\'s service '
                        'account has the "cloudkms.cryptoKeyEncrypterDecrypter" role '
                        'for the specified key. Without this role, you may not be '
                        'able to encrypt or decrypt objects using the key which will '
                        'prevent you from uploading or downloading objects.')))
                return (service_account, False)
            else:
                raise

    def _Authorize(self):
        self._GatherSubOptions('authorize')
        if not self.kms_key:
            raise CommandException(
                '%s %s requires a key to be specified with -k' %
                (self.command_name, self.subcommand_name))

        _, newly_authorized = self._AuthorizeProject(self.project_id,
                                                     self.kms_key)
        if newly_authorized:
            print(
                'Authorized project %s to encrypt and decrypt with key:\n%s' %
                (self.project_id, self.kms_key))
        else:
            print(
                'Project %s was already authorized to encrypt and decrypt with '
                'key:\n%s.' % (self.project_id, self.kms_key))
        return 0

    def _EncryptionClearKey(self, bucket_metadata, bucket_url):
        """Clears the defaultKmsKeyName on a Cloud Storage bucket.

    Args:
      bucket_metadata: (apitools_messages.Bucket) Metadata for the given bucket.
      bucket_url: (gslib.storage_url.StorageUrl) StorageUrl of the given bucket.
    """
        bucket_metadata.encryption = apitools_messages.Bucket.EncryptionValue()
        print('Clearing default encryption key for %s...' %
              str(bucket_url).rstrip('/'))
        self.gsutil_api.PatchBucket(bucket_url.bucket_name,
                                    bucket_metadata,
                                    fields=['encryption'],
                                    provider=bucket_url.scheme)

    def _EncryptionSetKey(self, bucket_metadata, bucket_url,
                          svc_acct_for_project_num):
        """Sets defaultKmsKeyName on a Cloud Storage bucket.

    Args:
      bucket_metadata: (apitools_messages.Bucket) Metadata for the given bucket.
      bucket_url: (gslib.storage_url.StorageUrl) StorageUrl of the given bucket.
      svc_acct_for_project_num: (Dict[int, str]) Mapping of project numbers to
          their corresponding service account.
    """
        bucket_project_number = bucket_metadata.projectNumber
        try:
            # newly_authorized will always be False if the project number is in our
            # cache dict, since we've already called _AuthorizeProject on it.
            service_account, newly_authorized = (
                svc_acct_for_project_num[bucket_project_number], False)
        except KeyError:
            service_account, newly_authorized = self._AuthorizeProject(
                bucket_project_number, self.kms_key)
            svc_acct_for_project_num[bucket_project_number] = service_account
        if newly_authorized:
            text_util.print_to_fd(
                'Authorized service account %s to use key:\n%s' %
                (service_account, self.kms_key))

        bucket_metadata.encryption = apitools_messages.Bucket.EncryptionValue(
            defaultKmsKeyName=self.kms_key)
        print('Setting default KMS key for bucket %s...' %
              str(bucket_url).rstrip('/'))
        self.gsutil_api.PatchBucket(bucket_url.bucket_name,
                                    bucket_metadata,
                                    fields=['encryption'],
                                    provider=bucket_url.scheme)

    def _Encryption(self):
        self._GatherSubOptions('encryption')
        # For each project, we should only make one API call to look up its
        # associated Cloud Storage-owned service account; subsequent lookups can be
        # pulled from this cache dict.
        svc_acct_for_project_num = {}

        def _EncryptionForBucket(blr):
            """Set, clear, or get the defaultKmsKeyName for a bucket."""
            bucket_url = blr.storage_url

            if bucket_url.scheme != 'gs':
                raise CommandException(
                    'The %s command can only be used with gs:// bucket URLs.' %
                    self.command_name)

            # Determine the project from the provided bucket.
            bucket_metadata = self.gsutil_api.GetBucket(
                bucket_url.bucket_name,
                fields=['encryption', 'projectNumber'],
                provider=bucket_url.scheme)

            # "-d" flag was specified, so clear the default KMS key and return.
            if self.clear_kms_key:
                self._EncryptionClearKey(bucket_metadata, bucket_url)
                return 0
            # "-k" flag was specified, so set the default KMS key and return.
            if self.kms_key:
                self._EncryptionSetKey(bucket_metadata, bucket_url,
                                       svc_acct_for_project_num)
                return 0
            # Neither "-d" nor "-k" was specified, so emit the default KMS key and
            # return.
            bucket_url_string = str(bucket_url).rstrip('/')
            if (bucket_metadata.encryption
                    and bucket_metadata.encryption.defaultKmsKeyName):
                print('Default encryption key for %s:\n%s' %
                      (bucket_url_string,
                       bucket_metadata.encryption.defaultKmsKeyName))
            else:
                print('Bucket %s has no default encryption key' %
                      bucket_url_string)
            return 0

        # Iterate over bucket args, performing the specified encryption operation
        # for each.
        some_matched = False
        url_args = self.args
        if not url_args:
            self.RaiseWrongNumberOfArgumentsException()
        for url_str in url_args:
            # Throws a CommandException if the argument is not a bucket.
            bucket_iter = self.GetBucketUrlIterFromArg(url_str)
            for bucket_listing_ref in bucket_iter:
                some_matched = True
                _EncryptionForBucket(bucket_listing_ref)

        if not some_matched:
            raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))
        return 0

    def _ServiceAccount(self):
        self.CheckArguments()
        if not self.args:
            self.args = ['gs://']
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-p':
                    self.project_id = a

        if not self.project_id:
            self.project_id = PopulateProjectId(None)

        # Request the service account for that project; this might create the
        # service account if it doesn't already exist.
        self.logger.debug('Checking service account for project %s',
                          self.project_id)

        service_account = self.gsutil_api.GetProjectServiceAccount(
            self.project_id, provider='gs').email_address

        print(service_account)

        return 0

    def _RunSubCommand(self, func):
        try:
            self.sub_opts, self.args = getopt.getopt(
                self.args, self.command_spec.supported_sub_args)
            # Commands with both suboptions and subcommands need to reparse for
            # suboptions, so we log again.
            metrics.LogCommandParams(sub_opts=self.sub_opts)
            return func(self)
        except getopt.GetoptError:
            self.RaiseInvalidArgumentException()

    def RunCommand(self):
        """Command entry point for the kms command."""
        # If the only credential type the user supplies in their boto file is hmac,
        # GetApiSelector logic will force us to use the XML API. As the XML API does
        # not support all the operations needed for kms subcommands, fail early.
        if self.gsutil_api.GetApiSelector(provider='gs') != ApiSelector.JSON:
            raise CommandException('\n'.join(
                textwrap.wrap(
                    'The "%s" command can only be used with the GCS JSON API. If you '
                    'have only supplied hmac credentials in your boto file, please '
                    'instead supply a credential type that can be used with the JSON '
                    'API.' % self.command_name)))

    def RunCommand(self):
        """Command entry point for the kms command."""
        # If the only credential type the user supplies in their boto file is hmac,
        # GetApiSelector logic will force us to use the XML API. As the XML API does
        # not support all the operations needed for kms subcommands, fail early.
        if self.gsutil_api.GetApiSelector(provider='gs') != ApiSelector.JSON:
            raise CommandException('\n'.join(
                textwrap.wrap(
                    'The "%s" command can only be used with the GCS JSON API, which '
                    'cannot use HMAC credentials. Please supply a credential '
                    'type that is compatible with the JSON API (e.g. OAuth2) in your '
                    'boto config file.' % self.command_name)))

        method_for_subcommand = {
            'authorize': KmsCommand._Authorize,
            'encryption': KmsCommand._Encryption,
            'serviceaccount': KmsCommand._ServiceAccount
        }
        self.subcommand_name = self.args.pop(0)
        if self.subcommand_name in method_for_subcommand:
            metrics.LogCommandParams(subcommands=[self.subcommand_name])
            return self._RunSubCommand(
                method_for_subcommand[self.subcommand_name])
        else:
            raise CommandException(
                'Invalid subcommand "%s" for the %s command.' %
                (self.subcommand_name, self.command_name))
Beispiel #6
0
  and ``serviceaccount``.
""" + (_AUTHORIZE_DESCRIPTION + _ENCRYPTION_DESCRIPTION +
       _SERVICEACCOUNT_DESCRIPTION)

_DETAILED_HELP_TEXT = CreateHelpText(_SYNOPSIS, _DESCRIPTION)

_authorize_help_text = CreateHelpText(_AUTHORIZE_SYNOPSIS,
                                      _AUTHORIZE_DESCRIPTION)
_encryption_help_text = CreateHelpText(_ENCRYPTION_SYNOPSIS,
                                       _ENCRYPTION_DESCRIPTION)
_serviceaccount_help_text = CreateHelpText(_SERVICEACCOUNT_SYNOPSIS,
                                           _SERVICEACCOUNT_DESCRIPTION)

_AUTHORIZE_COMMAND = GcloudStorageMap(
    gcloud_command=['alpha', 'storage', 'service-agent'],
    flag_map={
        '-p': GcloudStorageFlag('--project'),
        '-k': GcloudStorageFlag('--authorize-cmek'),
    })

_ENCRYPTION_COMMAND = GcloudStorageMap(
    gcloud_command='PLACEHOLDER',
    flag_map={
        '-d': GcloudStorageFlag('--clear-default-encryption-key'),
        '-k': GcloudStorageFlag('--default-encryption-key'),
        '-w': GcloudStorageFlag(''),
    })

_SERVICEACCOUNT_COMMAND = GcloudStorageMap(
    gcloud_command=['alpha', 'storage', 'service-agent'],
    flag_map={
        '-p': GcloudStorageFlag('--project'),