Ejemplo n.º 1
0
class CatCommand(Command):
    """Implementation of gsutil cat command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'cat',
        command_name_aliases=[],
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='hr:',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='cat',
        help_name_aliases=[],
        help_type='command_help',
        help_one_line_summary='Concatenate object content to stdout',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    # Command entry point.
    def RunCommand(self):
        """Command entry point for the cat command."""
        show_header = False
        request_range = None
        start_byte = 0
        end_byte = None
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-h':
                    show_header = True
                elif o == '-r':
                    request_range = a.strip()
                    range_matcher = re.compile(
                        '^(?P<start>[0-9]+)-(?P<end>[0-9]*)$|^(?P<endslice>-[0-9]+)$'
                    )
                    range_match = range_matcher.match(request_range)
                    if not range_match:
                        raise CommandException('Invalid range (%s)' %
                                               request_range)
                    if range_match.group('start'):
                        start_byte = long(range_match.group('start'))
                    if range_match.group('end'):
                        end_byte = long(range_match.group('end'))
                    if range_match.group('endslice'):
                        start_byte = long(range_match.group('endslice'))
                else:
                    self.RaiseInvalidArgumentException()

        return CatHelper(self).CatUrlStrings(self.args,
                                             show_header=show_header,
                                             start_byte=start_byte,
                                             end_byte=end_byte)
class FakeCommandWithCompleters(Command):
  """Command with various completer types."""

  command_spec = Command.CreateCommandSpec(
      'fake2',
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
          CommandArgument.MakeZeroOrMoreFileURLsArgument(),
          CommandArgument.MakeZeroOrMoreCloudOrFileURLsArgument(),
          CommandArgument.MakeFreeTextArgument(),
          CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(),
          CommandArgument.MakeFileURLOrCannedACLArgument(),
      ]
  )

  help_spec = Command.HelpSpec(
      help_name='fake2',
      help_name_aliases=[],
      help_type='command_help',
      help_one_line_summary='fake command for tests',
      help_text='fake command for tests',
      subcommand_help_text={}
  )

  def __init__(self):
    pass
Ejemplo n.º 3
0
class StatCommand(Command):
    """Implementation of gsutil stat command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'stat',
        command_name_aliases=[],
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='stat',
        help_name_aliases=[],
        help_type='command_help',
        help_one_line_summary='Display object status',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for stat command."""
        # List of fields we'll print for stat objects.
        stat_fields = [
            'updated', 'cacheControl', 'contentDisposition', 'contentEncoding',
            'contentLanguage', 'size', 'contentType', 'componentCount',
            'metadata', 'crc32c', 'md5Hash', 'etag', 'generation',
            'metageneration'
        ]
        found_nonmatching_arg = False
        for url_str in self.args:
            arg_matches = 0
            url = StorageUrlFromString(url_str)
            if not url.IsObject():
                raise CommandException(
                    'The stat command only works with object URLs')
            try:
                if ContainsWildcard(url_str):
                    blr_iter = self.WildcardIterator(url_str).IterObjects(
                        bucket_listing_fields=stat_fields)
                else:
                    single_obj = self.gsutil_api.GetObjectMetadata(
                        url.bucket_name,
                        url.object_name,
                        generation=url.generation,
                        provider=url.scheme,
                        fields=stat_fields)
                    blr_iter = [
                        BucketListingObject(url, root_object=single_obj)
                    ]
                for blr in blr_iter:
                    if blr.IsObject():
                        arg_matches += 1
                        if logging.getLogger().isEnabledFor(logging.INFO):
                            PrintFullInfoAboutObject(blr, incl_acl=False)
            except AccessDeniedException:
                print 'You aren\'t authorized to read %s - skipping' % url_str
            except InvalidUrlError:
                raise
            except NotFoundException:
                pass
            if not arg_matches:
                if logging.getLogger().isEnabledFor(logging.INFO):
                    print 'No URLs matched %s' % url_str
                found_nonmatching_arg = True
        if found_nonmatching_arg:
            return 1
        return 0
Ejemplo n.º 4
0
class NotificationCommand(Command):
    """Implementation of gsutil notification command."""

    # Notification names might look like one of these:
    #  canonical form:  projects/_/buckets/bucket/notificationConfigs/3
    #  JSON API form:   b/bucket/notificationConfigs/5
    # Either of the above might start with a / if a user is copying & pasting.
    def _GetNotificationPathRegex(self):
        if not NotificationCommand._notification_path_regex:
            NotificationCommand._notification_path_regex = re.compile(
                ('/?(projects/[^/]+/)?b(uckets)?/(?P<bucket>[^/]+)/'
                 'notificationConfigs/(?P<notification>[0-9]+)'))
        return NotificationCommand._notification_path_regex

    _notification_path_regex = None

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'notification',
        command_name_aliases=[
            'notify', 'notifyconfig', 'notifications', 'notif'
        ],
        usage_synopsis=_SYNOPSIS,
        min_args=2,
        max_args=NO_MAX,
        supported_sub_args='i:t:m:t:o:f:e:p:s',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments={
            'watchbucket': [
                CommandArgument.MakeFreeTextArgument(),
                CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()
            ],
            'stopchannel': [],
            'list': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()],
            'delete': [
                # Takes a list of one of the following:
                #   notification: projects/_/buckets/bla/notificationConfigs/5,
                #   bucket: gs://foobar
                CommandArgument.MakeZeroOrMoreCloudURLsArgument()
            ],
            'create': [
                CommandArgument.MakeFreeTextArgument(),  # Cloud Pub/Sub topic
                CommandArgument.MakeNCloudBucketURLsArgument(1)
            ]
        })
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='notification',
        help_name_aliases=['watchbucket', 'stopchannel', 'notifyconfig'],
        help_type='command_help',
        help_one_line_summary='Configure object change notification',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={
            'create': _create_help_text,
            'list': _list_help_text,
            'delete': _delete_help_text,
            'watchbucket': _watchbucket_help_text,
            'stopchannel': _stopchannel_help_text
        },
    )

    def _WatchBucket(self):
        """Creates a watch on a bucket given in self.args."""
        self.CheckArguments()
        identifier = None
        client_token = None
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-i':
                    identifier = a
                if o == '-t':
                    client_token = a

        identifier = identifier or str(uuid.uuid4())
        watch_url = self.args[0]
        bucket_arg = self.args[-1]

        if not watch_url.lower().startswith('https://'):
            raise CommandException(
                'The application URL must be an https:// URL.')

        bucket_url = StorageUrlFromString(bucket_arg)
        if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'):
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        if not bucket_url.IsBucket():
            raise CommandException(
                'URL must name a bucket for the %s command.' %
                self.command_name)

        self.logger.info('Watching bucket %s with application URL %s ...',
                         bucket_url, watch_url)

        try:
            channel = self.gsutil_api.WatchBucket(bucket_url.bucket_name,
                                                  watch_url,
                                                  identifier,
                                                  token=client_token,
                                                  provider=bucket_url.scheme)
        except AccessDeniedException, e:
            self.logger.warn(
                NOTIFICATION_AUTHORIZATION_FAILED_MESSAGE.format(
                    watch_error=str(e), watch_url=watch_url))
            raise

        channel_id = channel.id
        resource_id = channel.resourceId
        client_token = channel.token
        self.logger.info('Successfully created watch notification channel.')
        self.logger.info('Watch channel identifier: %s', channel_id)
        self.logger.info('Canonicalized resource identifier: %s', resource_id)
        self.logger.info('Client state token: %s', client_token)

        return 0
Ejemplo n.º 5
0
class RetentionCommand(Command):
  """Implementation of gsutil retention command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'retention',
      command_name_aliases=[],
      usage_synopsis=_SYNOPSIS,
      min_args=2,
      max_args=NO_MAX,
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=1,
      gs_api_support=[ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments={
          'set': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()],
          'clear': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()],
          'get': [CommandArgument.MakeNCloudBucketURLsArgument(1)],
          'lock': [CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument()],
          'event-default': {
              'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()],
              'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()]
          },
          'event': {
              'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()],
              'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()]
          },
          'temp': {
              'set': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()],
              'release': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()]
          },
      })

  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='retention',
      help_name_aliases=[],
      help_type='command_help',
      help_one_line_summary=(
          'Provides utilities to interact with Retention Policy feature.'),
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={
          'get': _get_help_text,
          'set': _set_help_text,
          'clear': _clear_help_text,
          'lock': _lock_help_text,
          'event-default': _event_default_help_text,
          'event': _event_help_text,
          'temp': _temp_help_text
      },
  )

  def RunCommand(self):
    """Command entry point for the retention command."""
    # If the only credential type the user supplies in their boto file is HMAC,
    # GetApiSelector logic will force us to use the XML API, which bucket lock
    # does not support at the moment.
    if self.gsutil_api.GetApiSelector('gs') != ApiSelector.JSON:
      raise CommandException(('The {} command can only be used with the GCS '
                              'JSON API. If you have only supplied hmac '
                              'credentials in your boto file, please instead '
                              'supply a credential type that can be used with '
                              'the JSON API.').format(self.command_name))

    self.preconditions = PreconditionsFromHeaders(self.headers)

    action_subcommand = self.args.pop(0)
    self.ParseSubOpts(check_args=True)
    if action_subcommand == 'set':
      func = self._SetRetention
    elif action_subcommand == 'clear':
      func = self._ClearRetention
    elif action_subcommand == 'get':
      func = self._GetRetention
    elif action_subcommand == 'lock':
      func = self._LockRetention
    elif action_subcommand == 'event-default':
      func = self._DefaultEventHold
    elif action_subcommand == 'event':
      func = self._EventHold
    elif action_subcommand == 'temp':
      func = self._TempHold
    else:
      raise CommandException(
          ('Invalid subcommand "{}" for the {} command.\n'
           'See "gsutil help retention".').format(action_subcommand,
                                                  self.command_name))

    # Commands with both suboptions and subcommands need to reparse for
    # suboptions, so we log again.
    metrics.LogCommandParams(subcommands=[action_subcommand],
                             sub_opts=self.sub_opts)
    return func()

  def BucketUpdateFunc(self, url_args, bucket_metadata_update, fields,
                       log_msg_template):
    preconditions = Preconditions(
        meta_gen_match=self.preconditions.meta_gen_match)

    # Iterate over URLs, expanding wildcards and setting the new bucket metadata
    # on each bucket.
    some_matched = False
    for url_str in url_args:
      bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id'])
      for blr in bucket_iter:
        url = blr.storage_url
        some_matched = True
        self.logger.info(log_msg_template, blr)
        self.gsutil_api.PatchBucket(url.bucket_name,
                                    bucket_metadata_update,
                                    preconditions=preconditions,
                                    provider=url.scheme,
                                    fields=fields)
    if not some_matched:
      raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))

  def ObjectUpdateMetadataFunc(self,
                               patch_obj_metadata,
                               log_template,
                               name_expansion_result,
                               thread_state=None):
    """Updates metadata on an object using PatchObjectMetadata.

    Args:
      patch_obj_metadata: Metadata changes that should be applied to the
                          existing object.
      log_template: The log template that should be printed for each object.
      name_expansion_result: NameExpansionResult describing target object.
      thread_state: gsutil Cloud API instance to use for the operation.
    """
    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

    exp_src_url = name_expansion_result.expanded_storage_url
    self.logger.info(log_template, exp_src_url)

    cloud_obj_metadata = encoding.JsonToMessage(
        apitools_messages.Object, name_expansion_result.expanded_result)

    preconditions = Preconditions(
        gen_match=self.preconditions.gen_match,
        meta_gen_match=self.preconditions.meta_gen_match)
    if preconditions.gen_match is None:
      preconditions.gen_match = cloud_obj_metadata.generation
    if preconditions.meta_gen_match is None:
      preconditions.meta_gen_match = cloud_obj_metadata.metageneration

    gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name,
                                   exp_src_url.object_name,
                                   patch_obj_metadata,
                                   generation=exp_src_url.generation,
                                   preconditions=preconditions,
                                   provider=exp_src_url.scheme,
                                   fields=['id'])
    PutToQueueWithTimeout(gsutil_api.status_queue,
                          MetadataMessage(message_time=time.time()))

  def _GetObjectNameExpansionIterator(self, url_args):
    return NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        url_args,
        self.recursion_requested,
        all_versions=self.all_versions,
        continue_on_error=self.parallel_operations,
        bucket_listing_fields=['generation', 'metageneration'])

  def _GetSeekAheadNameExpansionIterator(self, url_args):
    return SeekAheadNameExpansionIterator(self.command_name,
                                          self.debug,
                                          self.GetSeekAheadGsutilApi(),
                                          url_args,
                                          self.recursion_requested,
                                          all_versions=self.all_versions,
                                          project_id=self.project_id)

  def _SetRetention(self):
    """Set retention retention_period on one or more buckets."""

    seconds = RetentionInSeconds(self.args[0])
    retention_policy = (apitools_messages.Bucket.RetentionPolicyValue(
        retentionPeriod=seconds))

    log_msg_template = 'Setting Retention Policy on %s...'
    bucket_metadata_update = apitools_messages.Bucket(
        retentionPolicy=retention_policy)
    url_args = self.args[1:]
    self.BucketUpdateFunc(url_args,
                          bucket_metadata_update,
                          fields=['id', 'retentionPolicy'],
                          log_msg_template=log_msg_template)
    return 0

  def _ClearRetention(self):
    """Clear retention retention_period on one or more buckets."""
    retention_policy = (apitools_messages.Bucket.RetentionPolicyValue(
        retentionPeriod=None))
    log_msg_template = 'Clearing Retention Policy on %s...'
    bucket_metadata_update = apitools_messages.Bucket(
        retentionPolicy=retention_policy)
    url_args = self.args
    self.BucketUpdateFunc(url_args,
                          bucket_metadata_update,
                          fields=['id', 'retentionPolicy'],
                          log_msg_template=log_msg_template)
    return 0

  def _GetRetention(self):
    """Get Retention Policy for a single bucket."""
    bucket_url, bucket_metadata = self.GetSingleBucketUrlFromArg(
        self.args[0], bucket_fields=['retentionPolicy'])
    print(RetentionPolicyToString(bucket_metadata.retentionPolicy, bucket_url))
    return 0

  def _LockRetention(self):
    """Lock Retention Policy on one or more buckets."""
    url_args = self.args
    # Iterate over URLs, expanding wildcards and setting the Retention Policy
    # configuration on each.
    some_matched = False
    for url_str in url_args:
      bucket_iter = self.GetBucketUrlIterFromArg(url_str, bucket_fields=['id'])
      for blr in bucket_iter:
        url = blr.storage_url
        some_matched = True
        # Get bucket metadata to provide a precondition.
        bucket_metadata = self.gsutil_api.GetBucket(
            url.bucket_name,
            provider=url.scheme,
            fields=['id', 'metageneration', 'retentionPolicy'])
        if (not (bucket_metadata.retentionPolicy and
                 bucket_metadata.retentionPolicy.retentionPeriod)):
          # TODO: implement '-c' flag to continue_on_error
          raise CommandException(
              'Bucket "{}" does not have an Unlocked Retention Policy.'.format(
                  url.bucket_name))
        elif bucket_metadata.retentionPolicy.isLocked is True:
          self.logger.error('Retention Policy on "%s" is already locked.', blr)
        elif ConfirmLockRequest(url.bucket_name,
                                bucket_metadata.retentionPolicy):
          self.logger.info('Locking Retention Policy on %s...', blr)
          self.gsutil_api.LockRetentionPolicy(url.bucket_name,
                                              bucket_metadata.metageneration,
                                              provider=url.scheme)
        else:
          self.logger.error(
              '  Abort Locking Retention Policy on {}'.format(blr))
    if not some_matched:
      raise CommandException(NO_URLS_MATCHED_TARGET % list(url_args))
    return 0

  def _DefaultEventHold(self):
    """Sets default value for Event-Based Hold on one or more buckets."""
    hold = None
    if self.args:
      if self.args[0].lower() == 'set':
        hold = True
      elif self.args[0].lower() == 'release':
        hold = False
      else:
        raise CommandException(
            ('Invalid subcommand "{}" for the "retention event-default"'
             ' command.\nSee "gsutil help retention event".').format(
                 self.sub_opts))

    verb = 'Setting' if hold else 'Releasing'
    log_msg_template = '{} default Event-Based Hold on %s...'.format(verb)
    bucket_metadata_update = apitools_messages.Bucket(
        defaultEventBasedHold=hold)
    url_args = self.args[1:]
    self.BucketUpdateFunc(url_args,
                          bucket_metadata_update,
                          fields=['id', 'defaultEventBasedHold'],
                          log_msg_template=log_msg_template)
    return 0

  def _EventHold(self):
    """Sets or unsets Event-Based Hold on one or more objects."""
    sub_command_name = 'event'
    sub_command_full_name = 'Event-Based'
    hold = self._ProcessHoldArgs(sub_command_name)
    url_args = self.args[1:]
    obj_metadata_update_wrapper = (SetEventHoldFuncWrapper
                                   if hold else ReleaseEventHoldFuncWrapper)
    self._SetHold(obj_metadata_update_wrapper, url_args, sub_command_full_name)
    return 0

  def _TempHold(self):
    """Sets or unsets Temporary Hold on one or more objects."""
    sub_command_name = 'temp'
    sub_command_full_name = 'Temporary'
    hold = self._ProcessHoldArgs(sub_command_name)
    url_args = self.args[1:]
    obj_metadata_update_wrapper = (SetTempHoldFuncWrapper
                                   if hold else ReleaseTempHoldFuncWrapper)
    self._SetHold(obj_metadata_update_wrapper, url_args, sub_command_full_name)
    return 0

  def _ProcessHoldArgs(self, sub_command_name):
    """Processes command args for Temporary and Event-Based Hold sub-command.

    Args:
      sub_command_name: The name of the subcommand: "temp" / "event"

    Returns:
      Returns a boolean value indicating whether to set (True) or
      release (False)the Hold.
    """
    hold = None
    if self.args[0].lower() == 'set':
      hold = True
    elif self.args[0].lower() == 'release':
      hold = False
    else:
      raise CommandException(
          ('Invalid subcommand "{}" for the "retention {}" command.\n'
           'See "gsutil help retention {}".').format(self.args[0],
                                                     sub_command_name,
                                                     sub_command_name))
    return hold

  def _SetHold(self, obj_metadata_update_wrapper, url_args,
               sub_command_full_name):
    """Common logic to set or unset Event-Based/Temporary Hold on objects.

    Args:
      obj_metadata_update_wrapper: The function for updating related fields in
                                   Object metadata.
      url_args: List of object URIs.
      sub_command_full_name: The full name for sub-command:
                             "Temporary" / "Event-Based"
    """
    if len(url_args) == 1 and not self.recursion_requested:
      url = StorageUrlFromString(url_args[0])
      if not (url.IsCloudUrl() and url.IsObject()):
        raise CommandException('URL ({}) must name an object'.format(
            url_args[0]))

    name_expansion_iterator = self._GetObjectNameExpansionIterator(url_args)
    seek_ahead_iterator = self._GetSeekAheadNameExpansionIterator(url_args)

    # Used to track if any objects' metadata failed to be set.
    self.everything_set_okay = True

    try:
      # TODO: implement '-c' flag to continue_on_error

      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(obj_metadata_update_wrapper,
                 name_expansion_iterator,
                 UpdateObjectMetadataExceptionHandler,
                 fail_on_error=True,
                 seek_ahead_iterator=seek_ahead_iterator)

    except AccessDeniedException as e:
      if e.status == 403:
        self._WarnServiceAccounts()
      raise

    if not self.everything_set_okay:
      raise CommandException(
          '{} Hold for some objects could not be set.'.format(
              sub_command_full_name))
Ejemplo n.º 6
0
Archivo: iam.py Proyecto: vjeffz/gsutil
class IamCommand(Command):
    """Implementation of gsutil iam command."""
    command_spec = Command.CreateCommandSpec(
        'iam',
        min_args=2,
        max_args=NO_MAX,
        supported_sub_args='afRrd:e:',
        file_url_ok=True,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments={
            'get': [CommandArgument.MakeNCloudURLsArgument(1)],
            'set': [
                CommandArgument.MakeNFileURLsArgument(1),
                CommandArgument.MakeZeroOrMoreCloudURLsArgument()
            ],
            'ch': [
                CommandArgument.MakeOneOrMoreBindingsArgument(),
                CommandArgument.MakeZeroOrMoreCloudURLsArgument()
            ],
        },
    )

    help_spec = Command.HelpSpec(
        help_name='iam',
        help_name_aliases=[],
        help_type='command_help',
        help_one_line_summary=('Get, set, or change'
                               ' bucket and/or object IAM permissions.'),
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={
            'get': _get_help_text,
            'set': _set_help_text,
            'ch': _ch_help_text,
        })

    def GetIamHelper(self, storage_url, thread_state=None):
        """Gets an IAM policy for a single, resolved bucket / object URL.

    Args:
      storage_url: A CloudUrl instance with no wildcards, pointing to a
                   specific bucket or object.
      thread_state: CloudApiDelegator instance which is passed from
                    command.WorkerThread.__init__() if the global -m flag is
                    specified. Will use self.gsutil_api if thread_state is set
                    to None.

    Returns:
      Policy instance.
    """

        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        if storage_url.IsBucket():
            policy = gsutil_api.GetBucketIamPolicy(
                storage_url.bucket_name,
                provider=storage_url.scheme,
                fields=['bindings', 'etag'],
            )
        else:
            policy = gsutil_api.GetObjectIamPolicy(
                storage_url.bucket_name,
                storage_url.object_name,
                generation=storage_url.generation,
                provider=storage_url.scheme,
                fields=['bindings', 'etag'],
            )
        return policy

    def _GetIam(self, thread_state=None):
        """Gets IAM policy for single bucket or object."""

        pattern = self.args[0]

        matches = PluralityCheckableIterator(
            self.WildcardIterator(pattern).IterAll(
                bucket_listing_fields=['name']))
        if matches.IsEmpty():
            raise CommandException('%s matched no URLs' % pattern)
        if matches.HasPlurality():
            raise CommandException(
                '%s matched more than one URL, which is not allowed by the %s '
                'command' % (pattern, self.command_name))

        storage_url = StorageUrlFromString(list(matches)[0].url_string)
        policy = self.GetIamHelper(storage_url, thread_state=thread_state)
        print json.dumps(json.loads(protojson.encode_message(policy)),
                         sort_keys=True,
                         indent=2)

    def _SetIamHelperInternal(self, storage_url, policy, thread_state=None):
        """Sets IAM policy for a single, resolved bucket / object URL.

    Args:
      storage_url: A CloudUrl instance with no wildcards, pointing to a
                   specific bucket or object.
      policy: A Policy object to set on the bucket / object.
      thread_state: CloudApiDelegator instance which is passed from
                    command.WorkerThread.__init__() if the -m flag is
                    specified. Will use self.gsutil_api if thread_state is set
                    to None.

    Raises:
      ServiceException passed from the API call if an HTTP error was returned.
    """

        # SetIamHelper may be called by a command.WorkerThread. In the
        # single-threaded case, WorkerThread will not pass the CloudApiDelegator
        # instance to thread_state. GetCloudInstance is called to resolve the
        # edge case.
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        if storage_url.IsBucket():
            gsutil_api.SetBucketIamPolicy(storage_url.bucket_name,
                                          policy,
                                          provider=storage_url.scheme)
        else:
            gsutil_api.SetObjectIamPolicy(storage_url.bucket_name,
                                          storage_url.object_name,
                                          policy,
                                          generation=storage_url.generation,
                                          provider=storage_url.scheme)

    def SetIamHelper(self, storage_url, policy, thread_state=None):
        """Handles the potential exception raised by the internal set function."""
        try:
            self._SetIamHelperInternal(storage_url,
                                       policy,
                                       thread_state=thread_state)
        except ServiceException:
            if self.continue_on_error:
                self.everything_set_okay = False
            else:
                raise

    def PatchIamHelper(self, storage_url, bindings_tuples, thread_state=None):
        """Patches an IAM policy for a single, resolved bucket / object URL.

    The patch is applied by altering the policy from an IAM get request, and
    setting the new IAM with the specified etag. Because concurrent IAM set
    requests may alter the etag, we may need to retry this operation several
    times before success.

    Args:
      storage_url: A CloudUrl instance with no wildcards, pointing to a
                   specific bucket or object.
      bindings_tuples: A list of BindingsTuple instances.
      thread_state: CloudApiDelegator instance which is passed from
                    command.WorkerThread.__init__() if the -m flag is
                    specified. Will use self.gsutil_api if thread_state is set
                    to None.
    """
        try:
            self._PatchIamHelperInternal(storage_url,
                                         bindings_tuples,
                                         thread_state=thread_state)
        except ServiceException:
            if self.continue_on_error:
                self.everything_set_okay = False
            else:
                raise
        except IamChOnResourceWithConditionsException as e:
            if self.continue_on_error:
                self.everything_set_okay = False
                self.tried_ch_on_resource_with_conditions = True
                self.logger.debug(e.message)
            else:
                raise CommandException(e.message)

    @Retry(PreconditionException, tries=3, timeout_secs=1.0)
    def _PatchIamHelperInternal(self,
                                storage_url,
                                bindings_tuples,
                                thread_state=None):

        policy = self.GetIamHelper(storage_url, thread_state=thread_state)
        (etag, bindings) = (policy.etag, policy.bindings)

        # If any of the bindings have conditions present, raise an exception.
        # See the docstring for the IamChOnResourceWithConditionsException class
        # for more details on why we raise this exception.
        for binding in bindings:
            if binding.condition:
                message = 'Could not patch IAM policy for %s.' % storage_url
                message += '\n'
                message += '\n'.join(
                    textwrap.wrap(
                        'The resource had conditions present in its IAM policy bindings, '
                        'which is not supported by "iam ch". %s' %
                        IAM_CH_CONDITIONS_WORKAROUND_MSG))
                raise IamChOnResourceWithConditionsException(message)

        # Create a backup which is untainted by any references to the original
        # bindings.
        orig_bindings = list(bindings)

        for (is_grant, diff) in bindings_tuples:
            bindings = PatchBindings(bindings, BindingsTuple(is_grant, diff))

        if IsEqualBindings(bindings, orig_bindings):
            self.logger.info('No changes made to %s', storage_url)
            return

        policy = apitools_messages.Policy(bindings=bindings, etag=etag)

        # We explicitly wish for etag mismatches to raise an error and allow this
        # function to error out, so we are bypassing the exception handling offered
        # by IamCommand.SetIamHelper in lieu of our own handling (@Retry).
        self._SetIamHelperInternal(storage_url,
                                   policy,
                                   thread_state=thread_state)

    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if STORAGE_URI_REGEX.match(token):
                patterns.append(token)
                break
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                patch_bindings_tuples.append(BindingStringToTuple(True, token))
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        # All following arguments are urls.
        for token in it:
            patterns.append(token)

        self.everything_set_okay = True
        self.tried_ch_on_resource_with_conditions = False
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            msg = 'Some IAM policies could not be patched.'
            if self.tried_ch_on_resource_with_conditions:
                msg += '\n'
                msg += '\n'.join(
                    textwrap.wrap(
                        'Some resources had conditions present in their IAM policy '
                        'bindings, which is not supported by "iam ch". %s' %
                        (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
            raise CommandException(msg)

    # TODO(iam-beta): Add an optional flag to specify etag and edit the policy
    # accordingly to be passed into the helper functions.
    def _SetIam(self):
        """Set IAM policy for given wildcards on the command line."""

        self.continue_on_error = False
        self.recursion_requested = False
        self.all_versions = False
        force_etag = False
        etag = ''
        if self.sub_opts:
            for o, arg in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    etag = str(arg)
                    force_etag = True
                else:
                    self.RaiseInvalidArgumentException()

        file_url = self.args[0]
        patterns = self.args[1:]

        # Load the IAM policy file and raise error if the file is invalid JSON or
        # does not exist.
        try:
            with open(file_url, 'r') as fp:
                policy = json.loads(fp.read())
        except IOError:
            raise ArgumentException(
                'Specified IAM policy file "%s" does not exist.' % file_url)
        except ValueError as e:
            self.logger.debug('Invalid IAM policy file, ValueError:\n', e)
            raise ArgumentException('Invalid IAM policy file "%s".' % file_url)

        bindings = policy.get('bindings', [])
        if not force_etag:
            etag = policy.get('etag', '')

        policy_json = json.dumps({'bindings': bindings, 'etag': etag})
        try:
            policy = protojson.decode_message(apitools_messages.Policy,
                                              policy_json)
        except DecodeError:
            raise ArgumentException(
                'Invalid IAM policy file "%s" or etag "%s".' %
                (file_url, etag))

        self.everything_set_okay = True

        # This list of wildcard strings will be handled by NameExpansionIterator.
        threaded_wildcards = []

        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            if surl.IsBucket():
                if self.recursion_requested:
                    surl.object_name = '*'
                    threaded_wildcards.append(surl.url_string)
                else:
                    self.SetIamHelper(surl, policy)
            else:
                threaded_wildcards.append(surl.url_string)

        # N.B.: If threaded_wildcards contains a non-existent bucket
        # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
        # will raise an exception in iter.next. This halts all iteration, even
        # when -f is set. This behavior is also evident in acl set. This behavior
        # also appears for any exception that will be raised when iterating over
        # wildcard expansions (access denied if bucket cannot be listed, etc.).
        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            policy_it = itertools.repeat(protojson.encode_message(policy))
            self.Apply(_SetIamWrapper,
                       itertools.izip(policy_it, name_expansion_iterator),
                       _SetIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be set.')

    def RunCommand(self):
        """Command entry point for the acl command."""
        action_subcommand = self.args.pop(0)
        self.ParseSubOpts(check_args=True)
        # Commands with both suboptions and subcommands need to reparse for
        # suboptions, so we log again.
        LogCommandParams(sub_opts=self.sub_opts)
        self.def_acl = False
        if action_subcommand == 'get':
            LogCommandParams(subcommands=[action_subcommand])
            self._GetIam()
        elif action_subcommand == 'set':
            LogCommandParams(subcommands=[action_subcommand])
            self._SetIam()
        elif action_subcommand == 'ch':
            LogCommandParams(subcommands=[action_subcommand])
            self._PatchIam()
        else:
            raise CommandException(
                'Invalid subcommand "%s" for the %s command.\n'
                'See "gsutil help iam".' %
                (action_subcommand, self.command_name))

        return 0
Ejemplo n.º 7
0
class StatCommand(Command):
  """Implementation of gsutil stat command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'stat',
      command_name_aliases=[],
      usage_synopsis=_SYNOPSIS,
      min_args=1,
      max_args=NO_MAX,
      supported_sub_args='',
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=0,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreCloudURLsArgument()
      ]
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='stat',
      help_name_aliases=[],
      help_type='command_help',
      help_one_line_summary='Display object status',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  def RunCommand(self):
    """Command entry point for stat command."""
    stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS
    found_nonmatching_arg = False
    for url_str in self.args:
      arg_matches = 0
      url = StorageUrlFromString(url_str)
      if not url.IsObject():
        raise CommandException('The stat command only works with object URLs')
      try:
        if ContainsWildcard(url_str):
          blr_iter = self.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=stat_fields)
        else:
          try:
            single_obj = self.gsutil_api.GetObjectMetadata(
                url.bucket_name, url.object_name, generation=url.generation,
                provider=url.scheme, fields=stat_fields)
          except EncryptionException:
            # Retry without requesting hashes.
            single_obj = self.gsutil_api.GetObjectMetadata(
                url.bucket_name, url.object_name, generation=url.generation,
                provider=url.scheme, fields=UNENCRYPTED_FULL_LISTING_FIELDS)
          blr_iter = [BucketListingObject(url, root_object=single_obj)]
        for blr in blr_iter:
          if blr.IsObject():
            arg_matches += 1
            # TODO: Request fewer fields if we're not printing the object.
            if logging.getLogger().isEnabledFor(logging.INFO):
              PrintFullInfoAboutObject(blr, incl_acl=False)
      except AccessDeniedException:
        if logging.getLogger().isEnabledFor(logging.INFO):
          sys.stderr.write('You aren\'t authorized to read %s - skipping' %
                           url_str)
      except InvalidUrlError:
        raise
      except NotFoundException:
        pass
      if not arg_matches:
        if logging.getLogger().isEnabledFor(logging.INFO):
          sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str)
        found_nonmatching_arg = True
    if found_nonmatching_arg:
      return 1
    return 0
Ejemplo n.º 8
0
class AclCommand(Command):
  """Implementation of gsutil acl command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'acl',
      command_name_aliases=['getacl', 'setacl', 'chacl'],
      usage_synopsis=_SYNOPSIS,
      min_args=2,
      max_args=NO_MAX,
      supported_sub_args='afRrg:u:d:p:',
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=1,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments={
          'set': [
              CommandArgument.MakeFileURLOrCannedACLArgument(),
              CommandArgument.MakeZeroOrMoreCloudURLsArgument()
          ],
          'get': [
              CommandArgument.MakeNCloudURLsArgument(1)
          ],
          'ch': [
              CommandArgument.MakeZeroOrMoreCloudURLsArgument()
          ],
      }
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='acl',
      help_name_aliases=['getacl', 'setacl', 'chmod', 'chacl'],
      help_type='command_help',
      help_one_line_summary='Get, set, or change bucket and/or object ACLs',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={
          'get': _get_help_text, 'set': _set_help_text, 'ch': _ch_help_text},
  )

  def _CalculateUrlsStartArg(self):
    if not self.args:
      self.RaiseWrongNumberOfArgumentsException()
    if (self.args[0].lower() == 'set') or (self.command_alias_used == 'setacl'):
      return 1
    else:
      return 0

  def _SetAcl(self):
    """Parses options and sets ACLs on the specified buckets/objects."""
    self.continue_on_error = False
    if self.sub_opts:
      for o, unused_a in self.sub_opts:
        if o == '-a':
          self.all_versions = True
        elif o == '-f':
          self.continue_on_error = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
        else:
          self.RaiseInvalidArgumentException()
    try:
      self.SetAclCommandHelper(SetAclFuncWrapper, SetAclExceptionHandler)
    except AccessDeniedException, unused_e:
      self._WarnServiceAccounts()
      raise
    if not self.everything_set_okay:
      raise CommandException('ACLs for some objects could not be set.')
Ejemplo n.º 9
0
class RewriteCommand(Command):
    """Implementation of gsutil rewrite command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rewrite',
        command_name_aliases=[],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='fkIrROs:',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rewrite',
        help_name_aliases=['rekey', 'rotate'],
        help_type='command_help',
        help_one_line_summary='Rewrite objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def CheckProvider(self, url):
        if url.scheme != 'gs':
            raise CommandException(
                '"rewrite" called on URL with unsupported provider: %s' %
                str(url))

    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.dest_storage_class = None
        self.no_preserve_acl = False
        self.read_args_from_stdin = False
        self.supported_transformation_flags = ['-k', '-s']
        self.transform_types = set()

        self.op_failure_count = 0
        self.boto_file_encryption_tuple, self.boto_file_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.add(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True
                elif o == '-s':
                    self.transform_types.add(_TransformTypes.STORAGE_CLASS)
                    self.dest_storage_class = NormalizeStorageClass(a)

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        url_strs_generator = GenerationCheckGenerator(url_strs)

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs_generator = ConvertRecursiveToFlatWildcard(
                url_strs_generator)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs_generator,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations,
            bucket_listing_fields=['name', 'size'])

        seek_ahead_iterator = None
        # Cannot seek ahead with stdin args, since we can only iterate them
        # once without buffering in memory.
        if not self.read_args_from_stdin:
            # Perform the same recursive-to-flat conversion on original url_strs so
            # that it is as true to the original iterator as possible.
            seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                seek_ahead_url_strs,
                self.recursion_requested,
                all_versions=self.all_versions,
                project_id=self.project_id)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'],
                   seek_ahead_iterator=seek_ahead_iterator)

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0

    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
        transform_url = name_expansion_result.expanded_storage_url
        # Make a local copy of the requested transformations for each thread. As
        # a redundant transformation for one object might not be redundant for
        # another, we wouldn't want to remove it from the transform_types set that
        # all threads share.
        transforms_to_perform = set(self.transform_types)

        self.CheckProvider(transform_url)

        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        # Note: If other transform types are added, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).
        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        should_encrypt_target = self.boto_file_encryption_sha256 is not None
        source_was_encrypted = src_encryption_sha256 is not None
        using_same_encryption_key_value = (
            src_encryption_sha256 == self.boto_file_encryption_sha256)

        # Prevent accidental key rotation.
        if (_TransformTypes.CRYPTO_KEY not in transforms_to_perform
                and not using_same_encryption_key_value):
            raise EncryptionException(
                'The "-k" flag was not passed to the rewrite command, but the '
                'encryption_key value in your boto config file did not match the key '
                'used to encrypt the object "%s" (hash: %s). To encrypt the object '
                'using a different key, you must specify the "-k" flag.' %
                (transform_url, src_encryption_sha256))

        # Remove any redundant changes.

        # STORAGE_CLASS transform should be skipped if the target storage class
        # matches the existing storage class.
        if (_TransformTypes.STORAGE_CLASS in transforms_to_perform
                and self.dest_storage_class == NormalizeStorageClass(
                    src_metadata.storageClass)):
            transforms_to_perform.remove(_TransformTypes.STORAGE_CLASS)
            self.logger.info(
                'Redundant transform: %s already had storage class of '
                '%s.' % (transform_url, src_metadata.storageClass))

        # CRYPTO_KEY transform should be skipped if we're using the same encryption
        # key (if any) that was used to encrypt the source.
        if (_TransformTypes.CRYPTO_KEY in transforms_to_perform
                and using_same_encryption_key_value):
            if self.boto_file_encryption_sha256 is None:
                log_msg = '%s is already decrypted.' % transform_url
            else:
                log_msg = '%s already has current encryption key.' % transform_url
            transforms_to_perform.remove(_TransformTypes.CRYPTO_KEY)
            self.logger.info('Redundant transform: %s' % log_msg)

        if not transforms_to_perform:
            self.logger.info(
                'Skipping %s, all transformations were redundant.' %
                transform_url)
            return

        # Make a deep copy of the source metadata.
        dst_metadata = encoding.PyValueToMessage(
            apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

        # Remove some unnecessary/invalid fields.
        dst_metadata.customerEncryption = None
        dst_metadata.generation = None
        # Service has problems if we supply an ID, but it is responsible for
        # generating one, so it is not necessary to include it here.
        dst_metadata.id = None
        decryption_tuple = None
        # Use a generic operation name by default - this can be altered below for
        # specific transformations (encryption changes, etc.).
        operation_name = 'Rewriting'

        if source_was_encrypted:
            decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
            if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption key '
                    'matches object %s' %
                    (src_encryption_sha256, transform_url))
            decryption_tuple = CryptoTupleFromKey(decryption_key)

        if _TransformTypes.CRYPTO_KEY in transforms_to_perform:
            if not source_was_encrypted:
                operation_name = 'Encrypting'
            elif not should_encrypt_target:
                operation_name = 'Decrypting'
            else:
                operation_name = 'Rotating'

        if _TransformTypes.STORAGE_CLASS in transforms_to_perform:
            dst_metadata.storageClass = self.dest_storage_class

        # TODO: Remove this call (used to verify tests) and make it processed by
        # the UIThread.
        sys.stderr.write(
            _ConstructAnnounceText(operation_name, transform_url.url_string))

        # Message indicating beginning of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=False,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))

        progress_callback = FileProgressCallbackHandler(
            gsutil_api.status_queue,
            src_url=transform_url,
            operation_name=operation_name).call

        gsutil_api.CopyObject(src_metadata,
                              dst_metadata,
                              src_generation=transform_url.generation,
                              preconditions=self.preconditions,
                              progress_callback=progress_callback,
                              decryption_tuple=decryption_tuple,
                              encryption_tuple=self.boto_file_encryption_tuple,
                              provider=transform_url.scheme,
                              fields=[])

        # Message indicating end of operation.
        gsutil_api.status_queue.put(
            FileMessage(transform_url,
                        None,
                        time.time(),
                        finished=True,
                        size=src_metadata.size,
                        message_type=FileMessage.FILE_REWRITE))
Ejemplo n.º 10
0
class RewriteCommand(Command):
    """Implementation of gsutil rewrite command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rewrite',
        command_name_aliases=[],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='fkIrRO',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rewrite',
        help_name_aliases=['rekey', 'rotate'],
        help_type='command_help',
        help_one_line_summary='Rewrite objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def CheckProvider(self, url):
        if url.scheme != 'gs':
            raise CommandException(
                '"rewrite" called on URL with unsupported provider (%s).' %
                str(url))

    def RunCommand(self):
        """Command entry point for the rewrite command."""
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.no_preserve_acl = False
        self.supported_transformation_flags = ['-k']
        self.transform_types = []

        self.op_failure_count = 0
        self.current_encryption_tuple, self.current_encryption_sha256 = (
            GetEncryptionTupleAndSha256Hash())

        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-k':
                    self.transform_types.append(_TransformTypes.CRYPTO_KEY)
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-O':
                    self.no_preserve_acl = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rewrite command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        url_strs = GenerationCheckGenerator(url_strs)

        if not self.transform_types:
            raise CommandException(
                'rewrite command requires at least one transformation flag. '
                'Currently supported transformation flags: %s' %
                self.supported_transformation_flags)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Convert recursive flag to flat wildcard to avoid performing multiple
        # listings.
        if self.recursion_requested:
            url_strs = ConvertRecursiveToFlatWildcard(url_strs)

        # Expand the source argument(s).
        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            url_strs,
            self.recursion_requested,
            project_id=self.project_id,
            continue_on_error=self.continue_on_error
            or self.parallel_operations)

        # Perform rewrite requests in parallel (-m) mode, if requested.
        self.Apply(_RewriteFuncWrapper,
                   name_expansion_iterator,
                   _RewriteExceptionHandler,
                   fail_on_error=(not self.continue_on_error),
                   shared_attrs=['op_failure_count'])

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be rewritten.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0

    def RewriteFunc(self, name_expansion_result, thread_state=None):
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        self.CheckProvider(name_expansion_result.expanded_storage_url)

        # If other transform types are added here, they must ensure that the
        # encryption key configuration matches the boto configuration, because
        # gsutil maintains an invariant that all objects it writes use the
        # encryption_key value (including decrypting if no key is present).
        if _TransformTypes.CRYPTO_KEY in self.transform_types:
            self.CryptoRewrite(name_expansion_result.expanded_storage_url,
                               gsutil_api)

    def CryptoRewrite(self, transform_url, gsutil_api):
        """Make the cloud object at transform_url match encryption configuration.

    Args:
      transform_url: CloudUrl to rewrite.
      gsutil_api: gsutil CloudApi instance for making API calls.
    """
        # Get all fields so that we can ensure that the target metadata is
        # specified correctly.
        src_metadata = gsutil_api.GetObjectMetadata(
            transform_url.bucket_name,
            transform_url.object_name,
            generation=transform_url.generation,
            provider=transform_url.scheme)

        if self.no_preserve_acl:
            # Leave ACL unchanged.
            src_metadata.acl = []
        elif not src_metadata.acl:
            raise CommandException(
                'No OWNER permission found for object %s. OWNER permission is '
                'required for rewriting objects, (otherwise their ACLs would be '
                'reset).' % transform_url)

        src_encryption_sha256 = None
        if (src_metadata.customerEncryption
                and src_metadata.customerEncryption.keySha256):
            src_encryption_sha256 = src_metadata.customerEncryption.keySha256

        if src_encryption_sha256 == self.current_encryption_sha256:
            if self.current_encryption_sha256 is not None:
                self.logger.info(
                    'Skipping %s, already has current encryption key' %
                    transform_url)
            else:
                self.logger.info('Skipping %s, already decrypted' %
                                 transform_url)
        else:
            # Make a deep copy of the source metadata
            dst_metadata = encoding.PyValueToMessage(
                apitools_messages.Object,
                encoding.MessageToPyValue(src_metadata))

            # Remove some unnecessary/invalid fields.
            dst_metadata.customerEncryption = None
            dst_metadata.generation = None
            # Service has problems if we supply an ID, but it is responsible for
            # generating one, so it is not necessary to include it here.
            dst_metadata.id = None
            decryption_tuple = None

            if src_encryption_sha256 is None:
                announce_text = 'Encrypting'
            else:
                decryption_key = FindMatchingCryptoKey(src_encryption_sha256)
                if not decryption_key:
                    raise EncryptionException(
                        'Missing decryption key with SHA256 hash %s. No decryption key '
                        'matches object %s' %
                        (src_encryption_sha256, transform_url))
                decryption_tuple = CryptoTupleFromKey(decryption_key)

                if self.current_encryption_sha256 is None:
                    announce_text = 'Decrypting'
                else:
                    announce_text = 'Rotating'

            progress_callback = FileProgressCallbackHandler(
                ConstructAnnounceText(announce_text, transform_url.url_string),
                gsutil_api.status_queue).call

            gsutil_api.CopyObject(
                src_metadata,
                dst_metadata,
                src_generation=transform_url.generation,
                preconditions=self.preconditions,
                progress_callback=progress_callback,
                decryption_tuple=decryption_tuple,
                encryption_tuple=self.current_encryption_tuple,
                provider=transform_url.scheme,
                fields=[])
Ejemplo n.º 11
0
class RewriteCommand(Command):
  """Implementation of gsutil rewrite command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'rewrite',
      command_name_aliases=[],
      usage_synopsis=_SYNOPSIS,
      min_args=0,
      max_args=NO_MAX,
      supported_sub_args='fkIrROs:',
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=0,
      gs_api_support=[ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='rewrite',
      help_name_aliases=['rekey', 'rotate'],
      help_type='command_help',
      help_one_line_summary='Rewrite objects',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  def CheckProvider(self, url):
    if url.scheme != 'gs':
      raise CommandException(
          '"rewrite" called on URL with unsupported provider: %s' % str(url))

  def RunCommand(self):
    """Command entry point for the rewrite command."""
    self.continue_on_error = self.parallel_operations
    self.csek_hash_to_keywrapper = {}
    self.dest_storage_class = None
    self.no_preserve_acl = False
    self.read_args_from_stdin = False
    self.supported_transformation_flags = ['-k', '-s']
    self.transform_types = set()

    self.op_failure_count = 0
    self.boto_file_encryption_keywrapper = GetEncryptionKeyWrapper(config)
    self.boto_file_encryption_sha256 = (
        self.boto_file_encryption_keywrapper.crypto_key_sha256
        if self.boto_file_encryption_keywrapper else None)

    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-f':
          self.continue_on_error = True
        elif o == '-k':
          self.transform_types.add(_TransformTypes.CRYPTO_KEY)
        elif o == '-I':
          self.read_args_from_stdin = True
        elif o == '-O':
          self.no_preserve_acl = True
        elif o == '-r' or o == '-R':
          self.recursion_requested = True
          self.all_versions = True
        elif o == '-s':
          self.transform_types.add(_TransformTypes.STORAGE_CLASS)
          self.dest_storage_class = NormalizeStorageClass(a)

    if self.read_args_from_stdin:
      if self.args:
        raise CommandException('No arguments allowed with the -I flag.')
      url_strs = StdinIterator()
    else:
      if not self.args:
        raise CommandException('The rewrite command (without -I) expects at '
                               'least one URL.')
      url_strs = self.args

    if not self.transform_types:
      raise CommandException(
          'rewrite command requires at least one transformation flag. '
          'Currently supported transformation flags: %s' %
          self.supported_transformation_flags)

    self.preconditions = PreconditionsFromHeaders(self.headers or {})

    url_strs_generator = GenerationCheckGenerator(url_strs)

    # Convert recursive flag to flat wildcard to avoid performing multiple
    # listings.
    if self.recursion_requested:
      url_strs_generator = ConvertRecursiveToFlatWildcard(url_strs_generator)

    # Expand the source argument(s).
    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        url_strs_generator,
        self.recursion_requested,
        project_id=self.project_id,
        continue_on_error=self.continue_on_error or self.parallel_operations,
        bucket_listing_fields=['name', 'size'])

    seek_ahead_iterator = None
    # Cannot seek ahead with stdin args, since we can only iterate them
    # once without buffering in memory.
    if not self.read_args_from_stdin:
      # Perform the same recursive-to-flat conversion on original url_strs so
      # that it is as true to the original iterator as possible.
      seek_ahead_url_strs = ConvertRecursiveToFlatWildcard(url_strs)
      seek_ahead_iterator = SeekAheadNameExpansionIterator(
          self.command_name,
          self.debug,
          self.GetSeekAheadGsutilApi(),
          seek_ahead_url_strs,
          self.recursion_requested,
          all_versions=self.all_versions,
          project_id=self.project_id)

    # Rather than have each worker repeatedly calculate the sha256 hash for each
    # decryption_key in the boto config, do this once now and cache the results.
    for i in range(0, MAX_DECRYPTION_KEYS):
      key_number = i + 1
      keywrapper = CryptoKeyWrapperFromKey(
          config.get('GSUtil', 'decryption_key%s' % str(key_number), None))
      if keywrapper is None:
        # Stop at first attribute absence in lexicographical iteration.
        break
      if keywrapper.crypto_type == CryptoKeyType.CSEK:
        self.csek_hash_to_keywrapper[keywrapper.crypto_key_sha256] = keywrapper
    # Also include the encryption_key, since it should be used to decrypt and
    # then encrypt if the object's CSEK should remain the same.
    if self.boto_file_encryption_sha256 is not None:
      self.csek_hash_to_keywrapper[self.boto_file_encryption_sha256] = (
          self.boto_file_encryption_keywrapper)

    if self.boto_file_encryption_keywrapper is None:
      msg = '\n'.join(
          textwrap.wrap(
              'NOTE: No encryption_key was specified in the boto configuration '
              'file, so gsutil will not provide an encryption key in its rewrite '
              'API requests. This will decrypt the objects unless they are in '
              'buckets with a default KMS key set, in which case the service '
              'will automatically encrypt the rewritten objects with that key.')
      )
      print('%s\n' % msg, file=sys.stderr)

    # Perform rewrite requests in parallel (-m) mode, if requested.
    self.Apply(_RewriteFuncWrapper,
               name_expansion_iterator,
               _RewriteExceptionHandler,
               fail_on_error=(not self.continue_on_error),
               shared_attrs=['op_failure_count'],
               seek_ahead_iterator=seek_ahead_iterator)

    if self.op_failure_count:
      plural_str = 's' if self.op_failure_count else ''
      raise CommandException('%d file%s/object%s could not be rewritten.' %
                             (self.op_failure_count, plural_str, plural_str))

    return 0

  def RewriteFunc(self, name_expansion_result, thread_state=None):
    gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)
    transform_url = name_expansion_result.expanded_storage_url

    self.CheckProvider(transform_url)

    # Get all fields so that we can ensure that the target metadata is
    # specified correctly.
    src_metadata = gsutil_api.GetObjectMetadata(
        transform_url.bucket_name,
        transform_url.object_name,
        generation=transform_url.generation,
        provider=transform_url.scheme)

    if self.no_preserve_acl:
      # Leave ACL unchanged.
      src_metadata.acl = []
    elif not src_metadata.acl:
      raise CommandException(
          'No OWNER permission found for object %s. OWNER permission is '
          'required for rewriting objects, (otherwise their ACLs would be '
          'reset).' % transform_url)

    # Note: If other transform types are added, they must ensure that the
    # encryption key configuration matches the boto configuration, because
    # gsutil maintains an invariant that all objects it writes use the
    # encryption_key value (including decrypting if no key is present).

    # Store metadata about src encryption to make logic below easier to read.
    src_encryption_kms_key = (src_metadata.kmsKeyName
                              if src_metadata.kmsKeyName else None)

    src_encryption_sha256 = None
    if (src_metadata.customerEncryption and
        src_metadata.customerEncryption.keySha256):
      src_encryption_sha256 = src_metadata.customerEncryption.keySha256
      # In python3, hashes are bytes, use ascii since it should be ascii
      src_encryption_sha256 = src_encryption_sha256.encode('ascii')

    src_was_encrypted = (src_encryption_sha256 is not None or
                         src_encryption_kms_key is not None)

    # Also store metadata about dest encryption.
    dest_encryption_kms_key = None
    if (self.boto_file_encryption_keywrapper is not None and
        self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CMEK):
      dest_encryption_kms_key = self.boto_file_encryption_keywrapper.crypto_key

    dest_encryption_sha256 = None
    if (self.boto_file_encryption_keywrapper is not None and
        self.boto_file_encryption_keywrapper.crypto_type == CryptoKeyType.CSEK):
      dest_encryption_sha256 = (
          self.boto_file_encryption_keywrapper.crypto_key_sha256)

    should_encrypt_dest = self.boto_file_encryption_keywrapper is not None

    encryption_unchanged = (src_encryption_sha256 == dest_encryption_sha256 and
                            src_encryption_kms_key == dest_encryption_kms_key)

    # Prevent accidental key rotation.
    if (_TransformTypes.CRYPTO_KEY not in self.transform_types and
        not encryption_unchanged):
      raise EncryptionException(
          'The "-k" flag was not passed to the rewrite command, but the '
          'encryption_key value in your boto config file did not match the key '
          'used to encrypt the object "%s" (hash: %s). To encrypt the object '
          'using a different key, you must specify the "-k" flag.' %
          (transform_url, src_encryption_sha256))

    # Determine if we can skip this rewrite operation (this should only be done
    # when ALL of the specified transformations are redundant).
    redundant_transforms = []

    # STORAGE_CLASS transform is redundant if the target storage class matches
    # the existing storage class.
    if (_TransformTypes.STORAGE_CLASS in self.transform_types and
        self.dest_storage_class == NormalizeStorageClass(
            src_metadata.storageClass)):
      redundant_transforms.append('storage class')

    # CRYPTO_KEY transform is redundant if we're using the same encryption
    # key that was used to encrypt the source. However, if no encryption key was
    # specified, we should still perform the rewrite. This results in the
    # rewritten object either being encrypted with its bucket's default KMS key
    # or having no CSEK/CMEK encryption applied. While we could attempt fetching
    # the bucket's metadata and checking its default KMS key before performing
    # the rewrite (in the case where we appear to be transitioning from
    # no key to no key), that is vulnerable to the race condition where the
    # default KMS key is changed between when we check it and when we rewrite
    # the object.
    if (_TransformTypes.CRYPTO_KEY in self.transform_types and
        should_encrypt_dest and encryption_unchanged):
      redundant_transforms.append('encryption key')

    if len(redundant_transforms) == len(self.transform_types):
      self.logger.info('Skipping %s, all transformations were redundant: %s' %
                       (transform_url, redundant_transforms))
      return

    # First make a deep copy of the source metadata, then overwrite any
    # requested attributes (e.g. if a storage class change was specified).
    dest_metadata = encoding.PyValueToMessage(
        apitools_messages.Object, encoding.MessageToPyValue(src_metadata))

    # Remove some unnecessary/invalid fields.
    dest_metadata.generation = None
    # Service has problems if we supply an ID, but it is responsible for
    # generating one, so it is not necessary to include it here.
    dest_metadata.id = None
    # Ensure we don't copy over the KMS key name or CSEK key info from the
    # source object; those should only come from the boto config's
    # encryption_key value.
    dest_metadata.customerEncryption = None
    dest_metadata.kmsKeyName = None

    # Both a storage class change and CMEK encryption should be set as part of
    # the dest object's metadata. CSEK encryption, if specified, is added to the
    # request later via headers obtained from the keywrapper value passed to
    # encryption_tuple.
    if _TransformTypes.STORAGE_CLASS in self.transform_types:
      dest_metadata.storageClass = self.dest_storage_class
    if dest_encryption_kms_key is not None:
      dest_metadata.kmsKeyName = dest_encryption_kms_key

    # Make sure we have the CSEK key necessary to decrypt.
    decryption_keywrapper = None
    if src_encryption_sha256 is not None:
      if src_encryption_sha256 in self.csek_hash_to_keywrapper:
        decryption_keywrapper = (
            self.csek_hash_to_keywrapper[src_encryption_sha256])
      else:
        raise EncryptionException(
            'Missing decryption key with SHA256 hash %s. No decryption key '
            'matches object %s' % (src_encryption_sha256, transform_url))

    operation_name = 'Rewriting'
    if _TransformTypes.CRYPTO_KEY in self.transform_types:
      if src_was_encrypted and should_encrypt_dest:
        if not encryption_unchanged:
          operation_name = 'Rotating'
        # Else, keep "Rewriting". This might occur when -k was specified and was
        # redundant, but we're performing the operation anyway because some
        # other transformation was not redundant.
      elif src_was_encrypted and not should_encrypt_dest:
        operation_name = 'Decrypting'
      elif not src_was_encrypted and should_encrypt_dest:
        operation_name = 'Encrypting'

    # TODO: Remove this call (used to verify tests) and make it processed by
    # the UIThread.
    sys.stderr.write(
        _ConstructAnnounceText(operation_name, transform_url.url_string))
    sys.stderr.flush()

    # Message indicating beginning of operation.
    gsutil_api.status_queue.put(
        FileMessage(transform_url,
                    None,
                    time.time(),
                    finished=False,
                    size=src_metadata.size,
                    message_type=FileMessage.FILE_REWRITE))

    progress_callback = FileProgressCallbackHandler(
        gsutil_api.status_queue,
        src_url=transform_url,
        operation_name=operation_name).call

    gsutil_api.CopyObject(src_metadata,
                          dest_metadata,
                          src_generation=transform_url.generation,
                          preconditions=self.preconditions,
                          progress_callback=progress_callback,
                          decryption_tuple=decryption_keywrapper,
                          encryption_tuple=self.boto_file_encryption_keywrapper,
                          provider=transform_url.scheme,
                          fields=[])

    # Message indicating end of operation.
    gsutil_api.status_queue.put(
        FileMessage(transform_url,
                    None,
                    time.time(),
                    finished=True,
                    size=src_metadata.size,
                    message_type=FileMessage.FILE_REWRITE))
Ejemplo n.º 12
0
class UrlSignCommand(Command):
    """Implementation of gsutil url_sign command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'signurl',
        command_name_aliases=['signedurl', 'queryauth'],
        usage_synopsis=_SYNOPSIS,
        min_args=2,
        max_args=NO_MAX,
        supported_sub_args='m:d:c:p:',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[
            CommandArgument.MakeNFileURLsArgument(1),
            CommandArgument.MakeZeroOrMoreCloudURLsArgument()
        ])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='signurl',
        help_name_aliases=['signedurl', 'queryauth'],
        help_type='command_help',
        help_one_line_summary='Create a signed url',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def _ParseAndCheckSubOpts(self):
        # Default argument values
        delta = None
        method = 'GET'
        content_type = ''
        passwd = None

        for o, v in self.sub_opts:
            if o == '-d':
                if delta is not None:
                    delta += _DurationToTimeDelta(v)
                else:
                    delta = _DurationToTimeDelta(v)
            elif o == '-m':
                method = v
            elif o == '-c':
                content_type = v
            elif o == '-p':
                passwd = v
            else:
                self.RaiseInvalidArgumentException()

        if delta is None:
            delta = timedelta(hours=1)

        expiration = calendar.timegm(
            (datetime.utcnow() + delta).utctimetuple())
        if method not in ['GET', 'PUT', 'DELETE', 'HEAD', 'RESUMABLE']:
            raise CommandException('HTTP method must be one of'
                                   '[GET|HEAD|PUT|DELETE|RESUMABLE]')

        return method, expiration, content_type, passwd

    def _ProbeObjectAccessWithClient(self, key, client_email, gcs_path,
                                     logger):
        """Performs a head request against a signed url to check for read access."""

        # Choose a reasonable time in the future; if the user's system clock is
        # 60 or more seconds behind the server's this will generate an error.
        signed_url = _GenSignedUrl(key, client_email, 'HEAD', '', '',
                                   int(time.time()) + 60, gcs_path, logger)

        try:
            h = GetNewHttp()
            req = Request(signed_url, 'HEAD')
            response = MakeRequest(h, req)

            if response.status_code not in [200, 403, 404]:
                raise HttpError.FromResponse(response)

            return response.status_code
        except HttpError:
            error_string = (
                'Unexpected HTTP response code %s while querying '
                'object readability. Is your system clock accurate?' %
                response.status_code)
            if response.content:
                error_string += ' Content: %s' % response.content
            raise CommandException(error_string)

    def _EnumerateStorageUrls(self, in_urls):
        ret = []

        for url_str in in_urls:
            if ContainsWildcard(url_str):
                ret.extend([
                    blr.storage_url for blr in self.WildcardIterator(url_str)
                ])
            else:
                ret.append(StorageUrlFromString(url_str))

        return ret

    def RunCommand(self):
        """Command entry point for signurl command."""
        if not HAVE_OPENSSL:
            raise CommandException(
                'The signurl command requires the pyopenssl library (try pip '
                'install pyopenssl or easy_install pyopenssl)')

        method, expiration, content_type, passwd = self._ParseAndCheckSubOpts()
        storage_urls = self._EnumerateStorageUrls(self.args[1:])

        key = None
        client_email = None
        try:
            key, client_email = _ReadJSONKeystore(
                open(self.args[0], 'rb').read(), passwd)
        except ValueError:
            # Ignore and try parsing as a pkcs12.
            if not passwd:
                passwd = getpass.getpass('Keystore password:'******'rb').read(), passwd)
            except ValueError:
                raise CommandException(
                    'Unable to parse private key from {0}'.format(
                        self.args[0]))

        print 'URL\tHTTP Method\tExpiration\tSigned URL'
        for url in storage_urls:
            if url.scheme != 'gs':
                raise CommandException(
                    'Can only create signed urls from gs:// urls')
            if url.IsBucket():
                gcs_path = url.bucket_name
                if method == 'RESUMABLE':
                    raise CommandException(
                        'Resumable signed URLs require an object '
                        'name.')
            else:
                # Need to url encode the object name as Google Cloud Storage does when
                # computing the string to sign when checking the signature.
                gcs_path = '{0}/{1}'.format(
                    url.bucket_name,
                    urllib.quote(url.object_name.encode(UTF8)))

            final_url = _GenSignedUrl(key,
                                      client_email,
                                      method,
                                      '',
                                      content_type,
                                      expiration,
                                      gcs_path,
                                      self.logger,
                                      string_to_sign_debug=True)

            expiration_dt = datetime.fromtimestamp(expiration)

            print '{0}\t{1}\t{2}\t{3}'.format(
                url.url_string.encode(UTF8), method,
                (expiration_dt.strftime('%Y-%m-%d %H:%M:%S')),
                final_url.encode(UTF8))

            response_code = self._ProbeObjectAccessWithClient(
                key, client_email, gcs_path, self.logger)

            if response_code == 404:
                if url.IsBucket() and method != 'PUT':
                    raise CommandException(
                        'Bucket {0} does not exist. Please create a bucket with '
                        'that name before a creating signed URL to access it.'.
                        format(url))
                else:
                    if method != 'PUT' and method != 'RESUMABLE':
                        raise CommandException(
                            'Object {0} does not exist. Please create/upload an object '
                            'with that name before a creating signed URL to access it.'
                            .format(url))
            elif response_code == 403:
                self.logger.warn(
                    '%s does not have permissions on %s, using this link will likely '
                    'result in a 403 error until at least READ permissions are granted',
                    client_email, url)

        return 0
Ejemplo n.º 13
0
class AclCommand(Command):
    """Implementation of gsutil acl command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'acl',
        command_name_aliases=['getacl', 'setacl', 'chacl'],
        usage_synopsis=_SYNOPSIS,
        min_args=2,
        max_args=NO_MAX,
        supported_sub_args='afRrg:u:d:p:',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments={
            'set': [
                CommandArgument.MakeFileURLOrCannedACLArgument(),
                CommandArgument.MakeZeroOrMoreCloudURLsArgument()
            ],
            'get': [CommandArgument.MakeNCloudURLsArgument(1)],
            'ch': [CommandArgument.MakeZeroOrMoreCloudURLsArgument()],
        })
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='acl',
        help_name_aliases=['getacl', 'setacl', 'chmod', 'chacl'],
        help_type='command_help',
        help_one_line_summary='Get, set, or change bucket and/or object ACLs',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={
            'get': _get_help_text,
            'set': _set_help_text,
            'ch': _ch_help_text
        },
    )

    def _CalculateUrlsStartArg(self):
        if not self.args:
            self.RaiseWrongNumberOfArgumentsException()
        if (self.args[0].lower() == 'set') or (self.command_alias_used
                                               == 'setacl'):
            return 1
        else:
            return 0

    def _SetAcl(self):
        """Parses options and sets ACLs on the specified buckets/objects."""
        self.continue_on_error = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                else:
                    self.RaiseInvalidArgumentException()
        try:
            self.SetAclCommandHelper(SetAclFuncWrapper, SetAclExceptionHandler)
        except AccessDeniedException as unused_e:
            self._WarnServiceAccounts()
            raise
        if not self.everything_set_okay:
            raise CommandException('ACLs for some objects could not be set.')

    def _ChAcl(self):
        """Parses options and changes ACLs on the specified buckets/objects."""
        self.parse_versions = True
        self.changes = []
        self.continue_on_error = False

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-f':
                    self.continue_on_error = True
                elif o == '-g':
                    if 'gserviceaccount.com' in a:
                        raise CommandException(
                            'Service accounts are considered users, not groups; please use '
                            '"gsutil acl ch -u" instead of "gsutil acl ch -g"')
                    self.changes.append(
                        acl_helper.AclChange(
                            a, scope_type=acl_helper.ChangeType.GROUP))
                elif o == '-p':
                    self.changes.append(
                        acl_helper.AclChange(
                            a, scope_type=acl_helper.ChangeType.PROJECT))
                elif o == '-u':
                    self.changes.append(
                        acl_helper.AclChange(
                            a, scope_type=acl_helper.ChangeType.USER))
                elif o == '-d':
                    self.changes.append(acl_helper.AclDel(a))
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                else:
                    self.RaiseInvalidArgumentException()

        if not self.changes:
            raise CommandException('Please specify at least one access change '
                                   'with the -g, -u, or -d flags')

        if (not UrlsAreForSingleProvider(self.args)
                or StorageUrlFromString(self.args[0]).scheme != 'gs'):
            raise CommandException(
                'The "{0}" command can only be used with gs:// URLs'.format(
                    self.command_name))

        self.everything_set_okay = True
        self.ApplyAclFunc(
            _ApplyAclChangesWrapper,
            _ApplyExceptionHandler,
            self.args,
            object_fields=['acl', 'generation', 'metageneration'])
        if not self.everything_set_okay:
            raise CommandException('ACLs for some objects could not be set.')

    def _RaiseForAccessDenied(self, url):
        self._WarnServiceAccounts()
        raise CommandException(
            'Failed to set acl for %s. Please ensure you have '
            'OWNER-role access to this resource.' % url)

    @Retry(ServiceException, tries=3, timeout_secs=1)
    def ApplyAclChanges(self, name_expansion_result, thread_state=None):
        """Applies the changes in self.changes to the provided URL.

    Args:
      name_expansion_result: NameExpansionResult describing the target object.
      thread_state: If present, gsutil Cloud API instance to apply the changes.
    """
        if thread_state:
            gsutil_api = thread_state
        else:
            gsutil_api = self.gsutil_api

        url = name_expansion_result.expanded_storage_url
        if url.IsBucket():
            bucket = gsutil_api.GetBucket(url.bucket_name,
                                          provider=url.scheme,
                                          fields=['acl', 'metageneration'])
            current_acl = bucket.acl
        elif url.IsObject():
            gcs_object = encoding.JsonToMessage(
                apitools_messages.Object,
                name_expansion_result.expanded_result)
            current_acl = gcs_object.acl

        if not current_acl:
            self._RaiseForAccessDenied(url)
        if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0:
            self.logger.info('No changes to %s', url)
            return

        try:
            if url.IsBucket():
                preconditions = Preconditions(
                    meta_gen_match=bucket.metageneration)
                bucket_metadata = apitools_messages.Bucket(acl=current_acl)
                gsutil_api.PatchBucket(url.bucket_name,
                                       bucket_metadata,
                                       preconditions=preconditions,
                                       provider=url.scheme,
                                       fields=['id'])
            else:  # Object
                preconditions = Preconditions(
                    gen_match=gcs_object.generation,
                    meta_gen_match=gcs_object.metageneration)
                object_metadata = apitools_messages.Object(acl=current_acl)
                try:
                    gsutil_api.PatchObjectMetadata(url.bucket_name,
                                                   url.object_name,
                                                   object_metadata,
                                                   preconditions=preconditions,
                                                   provider=url.scheme,
                                                   generation=url.generation,
                                                   fields=['id'])
                except PreconditionException as e:
                    # Special retry case where we want to do an additional step, the read
                    # of the read-modify-write cycle, to fetch the correct object
                    # metadata before reattempting ACL changes.
                    self._RefetchObjectMetadataAndApplyAclChanges(
                        url, gsutil_api)

            self.logger.info('Updated ACL on %s', url)
        except BadRequestException as e:
            # Don't retry on bad requests, e.g. invalid email address.
            raise CommandException('Received bad request from server: %s' %
                                   str(e))
        except AccessDeniedException:
            self._RaiseForAccessDenied(url)
        except PreconditionException as e:
            # For objects, retry attempts should have already been handled.
            if url.IsObject():
                raise CommandException(str(e))
            # For buckets, raise PreconditionException and continue to next retry.
            raise e

    @Retry(PreconditionException, tries=3, timeout_secs=1)
    def _RefetchObjectMetadataAndApplyAclChanges(self, url, gsutil_api):
        """Reattempts object ACL changes after a PreconditionException."""
        gcs_object = gsutil_api.GetObjectMetadata(
            url.bucket_name,
            url.object_name,
            provider=url.scheme,
            fields=['acl', 'generation', 'metageneration'])
        current_acl = gcs_object.acl

        if self._ApplyAclChangesAndReturnChangeCount(url, current_acl) == 0:
            self.logger.info('No changes to %s', url)
            return

        object_metadata = apitools_messages.Object(acl=current_acl)
        preconditions = Preconditions(gen_match=gcs_object.generation,
                                      meta_gen_match=gcs_object.metageneration)
        gsutil_api.PatchObjectMetadata(url.bucket_name,
                                       url.object_name,
                                       object_metadata,
                                       preconditions=preconditions,
                                       provider=url.scheme,
                                       generation=gcs_object.generation,
                                       fields=['id'])

    def _ApplyAclChangesAndReturnChangeCount(self, storage_url, acl_message):
        modification_count = 0
        for change in self.changes:
            modification_count += change.Execute(storage_url, acl_message,
                                                 'acl', self.logger)
        return modification_count

    def RunCommand(self):
        """Command entry point for the acl command."""
        action_subcommand = self.args.pop(0)
        self.ParseSubOpts(check_args=True)

        # Commands with both suboptions and subcommands need to reparse for
        # suboptions, so we log again.
        metrics.LogCommandParams(sub_opts=self.sub_opts)
        self.def_acl = False
        if action_subcommand == 'get':
            metrics.LogCommandParams(subcommands=[action_subcommand])
            self.GetAndPrintAcl(self.args[0])
        elif action_subcommand == 'set':
            metrics.LogCommandParams(subcommands=[action_subcommand])
            self._SetAcl()
        elif action_subcommand in ('ch', 'change'):
            metrics.LogCommandParams(subcommands=[action_subcommand])
            self._ChAcl()
        else:
            raise CommandException(
                ('Invalid subcommand "%s" for the %s command.\n'
                 'See "gsutil help acl".') %
                (action_subcommand, self.command_name))

        return 0
Ejemplo n.º 14
0
class DuCommand(Command):
    """Implementation of gsutil du command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'du',
        command_name_aliases=[],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='0ace:hsX:',
        file_url_ok=False,
        provider_url_ok=True,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[
            CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
        ],
    )
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='du',
        help_name_aliases=[],
        help_type='command_help',
        help_one_line_summary='Display object size usage',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def _PrintSummaryLine(self, num_bytes, name):
        size_string = (MakeHumanReadable(num_bytes)
                       if self.human_readable else six.text_type(num_bytes))
        text_util.print_to_fd('{size:<11}  {name}'.format(
            size=size_string, name=six.ensure_text(name)),
                              end=self.line_ending)

    def _PrintInfoAboutBucketListingRef(self, bucket_listing_ref):
        """Print listing info for given bucket_listing_ref.

    Args:
      bucket_listing_ref: BucketListing being listed.

    Returns:
      Tuple (number of objects, object size)

    Raises:
      Exception: if calling bug encountered.
    """
        obj = bucket_listing_ref.root_object
        url_str = bucket_listing_ref.url_string
        if (obj.metadata and S3_DELETE_MARKER_GUID
                in obj.metadata.additionalProperties):
            size_string = '0'
            num_bytes = 0
            num_objs = 0
            url_str += '<DeleteMarker>'
        else:
            size_string = (MakeHumanReadable(obj.size)
                           if self.human_readable else str(obj.size))
            num_bytes = obj.size
            num_objs = 1

        if not self.summary_only:
            url_detail = '{size:<11}  {url}{ending}'.format(
                size=size_string,
                url=six.ensure_text(url_str),
                ending=six.ensure_text(self.line_ending))
            print_to_fd(url_detail, file=sys.stdout, end='')

        return (num_objs, num_bytes)

    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                        f_close = False
                    else:
                        f = open(a, 'r') if six.PY2 else open(
                            a, 'r', encoding=UTF8)
                        f_close = True
                    self.exclude_patterns = [
                        six.ensure_text(line.strip()) for line in f
                    ]
                    if f_close:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, blr):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, blr.url_string.encode(UTF8))

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            listing_helper = ls_helper.LsHelper(
                self.WildcardIterator,
                self.logger,
                print_object_func=_PrintObjectLong,
                print_dir_func=_PrintNothing,
                print_dir_header_func=_PrintNothing,
                print_dir_summary_func=_PrintDirectory,
                print_newline_func=_PrintNothing,
                all_versions=self.all_versions,
                should_recurse=True,
                exclude_patterns=self.exclude_patterns,
                fields=bucket_listing_fields)

            # LsHelper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = listing_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(
                        exp_bytes,
                        blr.url_string.rstrip('/').encode(UTF8))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0
Ejemplo n.º 15
0
class SetMetaCommand(Command):
    """Implementation of gsutil setmeta command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'setmeta',
        command_name_aliases=['setheader'],
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=constants.NO_MAX,
        supported_sub_args='h:rR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='setmeta',
        help_name_aliases=['setheader'],
        help_type='command_help',
        help_one_line_summary='Set metadata on already uploaded objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the setmeta command."""
        headers = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-h':
                    if 'x-goog-acl' in a or 'x-amz-acl' in a:
                        raise CommandException(
                            'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                            'set ... to set canned ACLs.')
                    headers.append(a)

        (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

        self.metadata_change = metadata_plus
        for header in metadata_minus:
            self.metadata_change[header] = ''

        if len(self.args) == 1 and not self.recursion_requested:
            url = StorageUrlFromString(self.args[0])
            if not (url.IsCloudUrl() and url.IsObject()):
                raise CommandException('URL (%s) must name an object' %
                                       self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        self.preconditions = PreconditionsFromHeaders(self.headers)

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            continue_on_error=self.parallel_operations,
            bucket_listing_fields=['generation', 'metadata', 'metageneration'])

        seek_ahead_iterator = SeekAheadNameExpansionIterator(
            self.command_name,
            self.debug,
            self.GetSeekAheadGsutilApi(),
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            project_id=self.project_id)

        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFuncWrapper,
                       name_expansion_iterator,
                       _SetMetadataExceptionHandler,
                       fail_on_error=True,
                       seek_ahead_iterator=seek_ahead_iterator)
        except AccessDeniedException as e:
            if e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0

    @Retry(PreconditionException, tries=3, timeout_secs=1)
    def SetMetadataFunc(self, name_expansion_result, thread_state=None):
        """Sets metadata on an object.

    Args:
      name_expansion_result: NameExpansionResult describing target object.
      thread_state: gsutil Cloud API instance to use for the operation.
    """
        gsutil_api = GetCloudApiInstance(self, thread_state=thread_state)

        exp_src_url = name_expansion_result.expanded_storage_url
        self.logger.info('Setting metadata on %s...', exp_src_url)

        cloud_obj_metadata = encoding.JsonToMessage(
            apitools_messages.Object, name_expansion_result.expanded_result)

        preconditions = Preconditions(
            gen_match=self.preconditions.gen_match,
            meta_gen_match=self.preconditions.meta_gen_match)
        if preconditions.gen_match is None:
            preconditions.gen_match = cloud_obj_metadata.generation
        if preconditions.meta_gen_match is None:
            preconditions.meta_gen_match = cloud_obj_metadata.metageneration

        # Patch handles the patch semantics for most metadata, but we need to
        # merge the custom metadata field manually.
        patch_obj_metadata = ObjectMetadataFromHeaders(self.metadata_change)

        api = gsutil_api.GetApiSelector(provider=exp_src_url.scheme)
        # For XML we only want to patch through custom metadata that has
        # changed.  For JSON we need to build the complete set.
        if api == ApiSelector.XML:
            pass
        elif api == ApiSelector.JSON:
            CopyObjectMetadata(patch_obj_metadata,
                               cloud_obj_metadata,
                               override=True)
            patch_obj_metadata = cloud_obj_metadata
            # Patch body does not need the object generation and metageneration.
            patch_obj_metadata.generation = None
            patch_obj_metadata.metageneration = None

        gsutil_api.PatchObjectMetadata(exp_src_url.bucket_name,
                                       exp_src_url.object_name,
                                       patch_obj_metadata,
                                       generation=exp_src_url.generation,
                                       preconditions=preconditions,
                                       provider=exp_src_url.scheme,
                                       fields=['id'])
        _PutToQueueWithTimeout(gsutil_api.status_queue,
                               MetadataMessage(message_time=time.time()))

    def _ParseMetadataHeaders(self, headers):
        """Validates and parses metadata changes from the headers argument.

    Args:
      headers: Header dict to validate and parse.

    Returns:
      (metadata_plus, metadata_minus): Tuple of header sets to add and remove.
    """
        metadata_minus = set()
        cust_metadata_minus = set()
        metadata_plus = {}
        cust_metadata_plus = {}
        # Build a count of the keys encountered from each plus and minus arg so we
        # can check for dupe field specs.
        num_metadata_plus_elems = 0
        num_cust_metadata_plus_elems = 0
        num_metadata_minus_elems = 0
        num_cust_metadata_minus_elems = 0

        for md_arg in headers:
            # Use partition rather than split, as we should treat all characters past
            # the initial : as part of the header's value.
            parts = md_arg.partition(':')
            (header, _, value) = parts
            InsistAsciiHeader(header)

            # Translate headers to lowercase to match the casing assumed by our
            # sanity-checking operations.
            lowercase_header = header.lower()
            # This check is overly simple; it would be stronger to check, for each
            # URL argument, whether the header starts with the provider
            # metadata_prefix, but here we just parse the spec once, before
            # processing any of the URLs. This means we will not detect if the user
            # tries to set an x-goog-meta- field on an another provider's object,
            # for example.
            is_custom_meta = IsCustomMetadataHeader(lowercase_header)
            if not is_custom_meta and lowercase_header not in SETTABLE_FIELDS:
                raise CommandException(
                    'Invalid or disallowed header (%s).\nOnly these fields (plus '
                    'x-goog-meta-* fields) can be set or unset:\n%s' %
                    (header, sorted(list(SETTABLE_FIELDS))))

            if value:
                if is_custom_meta:
                    # Allow non-ASCII data for custom metadata fields.
                    cust_metadata_plus[header] = value
                    num_cust_metadata_plus_elems += 1
                else:
                    # Don't unicode encode other fields because that would perturb their
                    # content (e.g., adding %2F's into the middle of a Cache-Control
                    # value).
                    InsistAsciiHeaderValue(header, value)
                    value = str(value)
                    metadata_plus[lowercase_header] = value
                    num_metadata_plus_elems += 1
            else:
                if is_custom_meta:
                    cust_metadata_minus.add(header)
                    num_cust_metadata_minus_elems += 1
                else:
                    metadata_minus.add(lowercase_header)
                    num_metadata_minus_elems += 1

        if (num_metadata_plus_elems != len(metadata_plus)
                or num_cust_metadata_plus_elems != len(cust_metadata_plus)
                or num_metadata_minus_elems != len(metadata_minus)
                or num_cust_metadata_minus_elems != len(cust_metadata_minus)
                or metadata_minus.intersection(set(metadata_plus.keys()))):
            raise CommandException('Each header must appear at most once.')

        metadata_plus.update(cust_metadata_plus)
        metadata_minus.update(cust_metadata_minus)
        return (metadata_minus, metadata_plus)
Ejemplo n.º 16
0
class ComposeCommand(Command):
    """Implementation of gsutil compose command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'compose',
        command_name_aliases=['concat'],
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=MAX_COMPOSE_ARITY + 1,
        supported_sub_args='',
        # Not files, just object names without gs:// prefix.
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='compose',
        help_name_aliases=['concat'],
        help_type='command_help',
        help_one_line_summary=(
            'Concatenate a sequence of objects into a new composite object.'),
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def CheckProvider(self, url):
        if url.scheme != 'gs':
            raise CommandException(
                '"compose" called on URL with unsupported provider (%s).' %
                str(url))

    # Command entry point.
    def RunCommand(self):
        """Command entry point for the compose command."""
        target_url_str = self.args[-1]
        self.args = self.args[:-1]
        target_url = StorageUrlFromString(target_url_str)
        self.CheckProvider(target_url)
        if target_url.HasGeneration():
            raise CommandException(
                'A version-specific URL (%s) cannot be '
                'the destination for gsutil compose - abort.' % target_url)

        dst_obj_metadata = apitools_messages.Object(
            name=target_url.object_name, bucket=target_url.bucket_name)

        components = []
        # Remember the first source object so we can get its content type.
        first_src_url = None
        for src_url_str in self.args:
            if ContainsWildcard(src_url_str):
                src_url_iter = self.WildcardIterator(src_url_str).IterObjects()
            else:
                src_url_iter = [
                    BucketListingObject(StorageUrlFromString(src_url_str))
                ]
            for blr in src_url_iter:
                src_url = blr.storage_url
                self.CheckProvider(src_url)

                if src_url.bucket_name != target_url.bucket_name:
                    raise CommandException(
                        'GCS does not support inter-bucket composing.')

                if not first_src_url:
                    first_src_url = src_url
                src_obj_metadata = (apitools_messages.ComposeRequest.
                                    SourceObjectsValueListEntry(
                                        name=src_url.object_name))
                if src_url.HasGeneration():
                    src_obj_metadata.generation = int(src_url.generation)
                components.append(src_obj_metadata)
                # Avoid expanding too many components, and sanity check each name
                # expansion result.
                if len(components) > MAX_COMPOSE_ARITY:
                    raise CommandException(
                        '"compose" called with too many component '
                        'objects. Limit is %d.' % MAX_COMPOSE_ARITY)

        if not components:
            raise CommandException(
                '"compose" requires at least 1 component object.')

        dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata(
            first_src_url.bucket_name,
            first_src_url.object_name,
            provider=first_src_url.scheme,
            fields=['contentType']).contentType

        preconditions = PreconditionsFromHeaders(self.headers or {})

        self.logger.info('Composing %s from %d component object(s).',
                         target_url, len(components))
        self.gsutil_api.ComposeObject(
            components,
            dst_obj_metadata,
            preconditions=preconditions,
            provider=target_url.scheme,
            encryption_tuple=GetEncryptionKeyWrapper(config))
Ejemplo n.º 17
0
class NotificationCommand(Command):
    """Implementation of gsutil notification command."""

    # Notification names might look like one of these:
    #  canonical form:  projects/_/buckets/bucket/notificationConfigs/3
    #  JSON API form:   b/bucket/notificationConfigs/5
    # Either of the above might start with a / if a user is copying & pasting.
    def _GetNotificationPathRegex(self):
        if not NotificationCommand._notification_path_regex:
            NotificationCommand._notification_path_regex = re.compile(
                ('/?(projects/[^/]+/)?b(uckets)?/(?P<bucket>[^/]+)/'
                 'notificationConfigs/(?P<notification>[0-9]+)'))
        return NotificationCommand._notification_path_regex

    _notification_path_regex = None

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'notification',
        command_name_aliases=[
            'notify',
            'notifyconfig',
            'notifications',
            'notif',
        ],
        usage_synopsis=_SYNOPSIS,
        min_args=2,
        max_args=NO_MAX,
        supported_sub_args='i:t:m:t:of:e:p:s',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=1,
        gs_api_support=[ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments={
            'watchbucket': [
                CommandArgument.MakeFreeTextArgument(),
                CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(),
            ],
            'stopchannel': [],
            'list': [
                CommandArgument.MakeZeroOrMoreCloudBucketURLsArgument(),
            ],
            'delete': [
                # Takes a list of one of the following:
                #   notification: projects/_/buckets/bla/notificationConfigs/5,
                #   bucket: gs://foobar
                CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
            ],
            'create': [
                CommandArgument.MakeFreeTextArgument(),  # Cloud Pub/Sub topic
                CommandArgument.MakeNCloudBucketURLsArgument(1),
            ]
        },
    )
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='notification',
        help_name_aliases=[
            'watchbucket',
            'stopchannel',
            'notifyconfig',
        ],
        help_type='command_help',
        help_one_line_summary='Configure object change notification',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={
            'create': _create_help_text,
            'list': _list_help_text,
            'delete': _delete_help_text,
            'watchbucket': _watchbucket_help_text,
            'stopchannel': _stopchannel_help_text,
        },
    )

    def _WatchBucket(self):
        """Creates a watch on a bucket given in self.args."""
        self.CheckArguments()
        identifier = None
        client_token = None
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-i':
                    identifier = a
                if o == '-t':
                    client_token = a

        identifier = identifier or str(uuid.uuid4())
        watch_url = self.args[0]
        bucket_arg = self.args[-1]

        if not watch_url.lower().startswith('https://'):
            raise CommandException(
                'The application URL must be an https:// URL.')

        bucket_url = StorageUrlFromString(bucket_arg)
        if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'):
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        if not bucket_url.IsBucket():
            raise CommandException(
                'URL must name a bucket for the %s command.' %
                self.command_name)

        self.logger.info('Watching bucket %s with application URL %s ...',
                         bucket_url, watch_url)

        try:
            channel = self.gsutil_api.WatchBucket(bucket_url.bucket_name,
                                                  watch_url,
                                                  identifier,
                                                  token=client_token,
                                                  provider=bucket_url.scheme)
        except AccessDeniedException as e:
            self.logger.warn(
                NOTIFICATION_AUTHORIZATION_FAILED_MESSAGE.format(
                    watch_error=str(e), watch_url=watch_url))
            raise

        channel_id = channel.id
        resource_id = channel.resourceId
        client_token = channel.token
        self.logger.info('Successfully created watch notification channel.')
        self.logger.info('Watch channel identifier: %s', channel_id)
        self.logger.info('Canonicalized resource identifier: %s', resource_id)
        self.logger.info('Client state token: %s', client_token)

        return 0

    def _StopChannel(self):
        channel_id = self.args[0]
        resource_id = self.args[1]

        self.logger.info('Removing channel %s with resource identifier %s ...',
                         channel_id, resource_id)
        self.gsutil_api.StopChannel(channel_id, resource_id, provider='gs')
        self.logger.info('Succesfully removed channel.')

        return 0

    def _ListChannels(self, bucket_arg):
        """Lists active channel watches on a bucket given in self.args."""
        bucket_url = StorageUrlFromString(bucket_arg)
        if not (bucket_url.IsBucket() and bucket_url.scheme == 'gs'):
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        if not bucket_url.IsBucket():
            raise CommandException(
                'URL must name a bucket for the %s command.' %
                self.command_name)
        channels = self.gsutil_api.ListChannels(bucket_url.bucket_name,
                                                provider='gs').items
        self.logger.info(
            'Bucket %s has the following active Object Change Notifications:',
            bucket_url.bucket_name)
        for idx, channel in enumerate(channels):
            self.logger.info('\tNotification channel %d:', idx + 1)
            self.logger.info('\t\tChannel identifier: %s', channel.channel_id)
            self.logger.info('\t\tResource identifier: %s',
                             channel.resource_id)
            self.logger.info('\t\tApplication URL: %s', channel.push_url)
            self.logger.info('\t\tCreated by: %s', channel.subscriber_email)
            self.logger.info(
                '\t\tCreation time: %s',
                str(datetime.fromtimestamp(channel.creation_time_ms / 1000)))

        return 0

    def _Create(self):
        self.CheckArguments()

        # User-specified options
        pubsub_topic = None
        payload_format = None
        custom_attributes = {}
        event_types = []
        object_name_prefix = None
        should_setup_topic = True

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-e':
                    event_types.append(a)
                elif o == '-f':
                    payload_format = a
                elif o == '-m':
                    if ':' not in a:
                        raise CommandException(
                            'Custom attributes specified with -m should be of the form '
                            'key:value')
                    key, value = a.split(':')
                    custom_attributes[key] = value
                elif o == '-p':
                    object_name_prefix = a
                elif o == '-s':
                    should_setup_topic = False
                elif o == '-t':
                    pubsub_topic = a

        if payload_format not in PAYLOAD_FORMAT_MAP:
            raise CommandException(
                "Must provide a payload format with -f of either 'json' or 'none'"
            )
        payload_format = PAYLOAD_FORMAT_MAP[payload_format]

        bucket_arg = self.args[-1]

        bucket_url = StorageUrlFromString(bucket_arg)
        if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket():
            raise CommandException(
                "%s %s requires a GCS bucket name, but got '%s'" %
                (self.command_name, self.subcommand_name, bucket_arg))
        if bucket_url.scheme != 'gs':
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        bucket_name = bucket_url.bucket_name
        self.logger.debug('Creating notification for bucket %s', bucket_url)

        # Find the project this bucket belongs to
        bucket_metadata = self.gsutil_api.GetBucket(bucket_name,
                                                    fields=['projectNumber'],
                                                    provider=bucket_url.scheme)
        bucket_project_number = bucket_metadata.projectNumber

        # If not specified, choose a sensible default for the Cloud Pub/Sub topic
        # name.
        if not pubsub_topic:
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      bucket_name)
        if not pubsub_topic.startswith('projects/'):
            # If a user picks a topic ID (mytopic) but doesn't pass the whole name (
            # projects/my-project/topics/mytopic ), pick a default project.
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      pubsub_topic)
        self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic)

        just_modified_topic_permissions = False
        if should_setup_topic:
            # Ask GCS for the email address that represents GCS's permission to
            # publish to a Cloud Pub/Sub topic from this project.
            service_account = self.gsutil_api.GetProjectServiceAccount(
                bucket_project_number,
                provider=bucket_url.scheme).email_address
            self.logger.debug('Service account for project %d: %s',
                              bucket_project_number, service_account)
            just_modified_topic_permissions = self._CreateTopic(
                pubsub_topic, service_account)

        for attempt_number in range(0, 2):
            try:
                create_response = self.gsutil_api.CreateNotificationConfig(
                    bucket_name,
                    pubsub_topic=pubsub_topic,
                    payload_format=payload_format,
                    custom_attributes=custom_attributes,
                    event_types=event_types if event_types else None,
                    object_name_prefix=object_name_prefix,
                    provider=bucket_url.scheme)
                break
            except PublishPermissionDeniedException:
                if attempt_number == 0 and just_modified_topic_permissions:
                    # If we have just set the IAM policy, it may take up to 10 seconds to
                    # take effect.
                    self.logger.info(
                        'Retrying create notification in 10 seconds '
                        '(new permissions may take up to 10 seconds to take effect.)'
                    )
                    time.sleep(10)
                else:
                    raise

        notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % (
            bucket_name, create_response.id)
        self.logger.info('Created notification config %s', notification_name)

        return 0

    def _CreateTopic(self, pubsub_topic, service_account):
        """Assures that a topic exists, creating it if necessary.

    Also adds GCS as a publisher on that bucket, if necessary.

    Args:
      pubsub_topic: name of the Cloud Pub/Sub topic to use/create.
      service_account: the GCS service account that needs publish permission.

    Returns:
      true if we modified IAM permissions, otherwise false.
    """

        pubsub_api = PubsubApi(logger=self.logger)

        # Verify that the Pub/Sub topic exists. If it does not, create it.
        try:
            pubsub_api.GetTopic(topic_name=pubsub_topic)
            self.logger.debug('Topic %s already exists', pubsub_topic)
        except NotFoundException:
            self.logger.debug('Creating topic %s', pubsub_topic)
            pubsub_api.CreateTopic(topic_name=pubsub_topic)
            self.logger.info('Created Cloud Pub/Sub topic %s', pubsub_topic)

        # Verify that the service account is in the IAM policy.
        policy = pubsub_api.GetTopicIamPolicy(topic_name=pubsub_topic)
        binding = Binding(role='roles/pubsub.publisher',
                          members=['serviceAccount:%s' % service_account])

        # This could be more extensive. We could, for instance, check for roles
        # that are stronger that pubsub.publisher, like owner. We could also
        # recurse up the hierarchy looking to see if there are project-level
        # permissions. This can get very complex very quickly, as the caller
        # may not necessarily have access to the project-level IAM policy.
        # There's no danger in double-granting permission just to make sure it's
        # there, though.
        if binding not in policy.bindings:
            policy.bindings.append(binding)
            # transactional safety via etag field.
            pubsub_api.SetTopicIamPolicy(topic_name=pubsub_topic,
                                         policy=policy)
            return True
        else:
            self.logger.debug(
                'GCS already has publish permission to topic %s.',
                pubsub_topic)
            return False

    def _EnumerateNotificationsFromArgs(self,
                                        accept_notification_configs=True):
        """Yields bucket/notification tuples from command-line args.

    Given a list of strings that are bucket names (gs://foo) or notification
    config IDs, yield tuples of bucket names and their associated notifications.

    Args:
      accept_notification_configs: whether notification configs are valid args.
    Yields:
      Tuples of the form (bucket_name, Notification)
    """
        path_regex = self._GetNotificationPathRegex()

        for list_entry in self.args:
            match = path_regex.match(list_entry)
            if match:
                if not accept_notification_configs:
                    raise CommandException(
                        '%s %s accepts only bucket names, but you provided %s'
                        %
                        (self.command_name, self.subcommand_name, list_entry))
                bucket_name = match.group('bucket')
                notification_id = match.group('notification')
                found = False
                for notification in self.gsutil_api.ListNotificationConfigs(
                        bucket_name, provider='gs'):
                    if notification.id == notification_id:
                        yield (bucket_name, notification)
                        found = True
                        break
                if not found:
                    raise NotFoundException('Could not find notification %s' %
                                            list_entry)
            else:
                storage_url = StorageUrlFromString(list_entry)
                if not storage_url.IsCloudUrl():
                    raise CommandException(
                        'The %s command must be used on cloud buckets or notification '
                        'config names.' % self.command_name)
                if storage_url.scheme != 'gs':
                    raise CommandException(
                        'The %s command only works on gs:// buckets.')
                path = None
                if storage_url.IsProvider():
                    path = 'gs://*'
                elif storage_url.IsBucket():
                    path = list_entry
                if not path:
                    raise CommandException(
                        'The %s command cannot be used on cloud objects, only buckets'
                        % self.command_name)
                for blr in self.WildcardIterator(path).IterBuckets(
                        bucket_fields=['id']):
                    for notification in self.gsutil_api.ListNotificationConfigs(
                            blr.storage_url.bucket_name, provider='gs'):
                        yield (blr.storage_url.bucket_name, notification)

    def _List(self):
        self.CheckArguments()
        if self.sub_opts:
            if '-o' in dict(self.sub_opts):
                for bucket_name in self.args:
                    self._ListChannels(bucket_name)
        else:
            for bucket_name, notification in self._EnumerateNotificationsFromArgs(
                    accept_notification_configs=False):
                self._PrintNotificationDetails(bucket_name, notification)
        return 0

    def _PrintNotificationDetails(self, bucket, notification):
        print(
            'projects/_/buckets/{bucket}/notificationConfigs/{notification}\n'
            '\tCloud Pub/Sub topic: {topic}'.format(
                bucket=bucket,
                notification=notification.id,
                topic=notification.topic[len('//pubsub.googleapis.com/'):]))
        if notification.custom_attributes:
            print('\tCustom attributes:')
            for attr in notification.custom_attributes.additionalProperties:
                print('\t\t%s: %s' % (attr.key, attr.value))
        filters = []
        if notification.event_types:
            filters.append('\t\tEvent Types: %s' %
                           ', '.join(notification.event_types))
        if notification.object_name_prefix:
            filters.append("\t\tObject name prefix: '%s'" %
                           notification.object_name_prefix)
        if filters:
            print('\tFilters:')
            for line in filters:
                print(line)
        self.logger.info('')

    def _Delete(self):
        for bucket_name, notification in self._EnumerateNotificationsFromArgs(
        ):
            self._DeleteNotification(bucket_name, notification.id)
        return 0

    def _DeleteNotification(self, bucket_name, notification_id):
        self.gsutil_api.DeleteNotificationConfig(bucket_name,
                                                 notification=notification_id,
                                                 provider='gs')
        return 0

    def _RunSubCommand(self, func):
        try:
            (self.sub_opts,
             self.args) = getopt.getopt(self.args,
                                        self.command_spec.supported_sub_args)
            # Commands with both suboptions and subcommands need to reparse for
            # suboptions, so we log again.
            metrics.LogCommandParams(sub_opts=self.sub_opts)
            return func(self)
        except getopt.GetoptError:
            self.RaiseInvalidArgumentException()

    SUBCOMMANDS = {
        'create': _Create,
        'list': _List,
        'delete': _Delete,
        'watchbucket': _WatchBucket,
        'stopchannel': _StopChannel
    }

    def RunCommand(self):
        """Command entry point for the notification command."""
        self.subcommand_name = self.args.pop(0)
        if self.subcommand_name in NotificationCommand.SUBCOMMANDS:
            metrics.LogCommandParams(subcommands=[self.subcommand_name])
            return self._RunSubCommand(
                NotificationCommand.SUBCOMMANDS[self.subcommand_name])
        else:
            raise CommandException(
                'Invalid subcommand "%s" for the %s command.' %
                (self.subcommand_name, self.command_name))
Ejemplo n.º 18
0
class UrlSignCommand(Command):
  """Implementation of gsutil url_sign command."""

  # Command specification. See base class for documentation.
  command_spec = Command.CreateCommandSpec(
      'signurl',
      command_name_aliases=['signedurl', 'queryauth'],
      usage_synopsis=_SYNOPSIS,
      min_args=1,
      max_args=constants.NO_MAX,
      supported_sub_args='m:d:c:p:r:u',
      supported_private_args=['use-service-account'],
      file_url_ok=False,
      provider_url_ok=False,
      urls_start_arg=1,
      gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
      gs_default_api=ApiSelector.JSON,
      argparse_arguments=[
          CommandArgument.MakeZeroOrMoreFileURLsArgument(),
          CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
      ],
  )
  # Help specification. See help_provider.py for documentation.
  help_spec = Command.HelpSpec(
      help_name='signurl',
      help_name_aliases=[
          'signedurl',
          'queryauth',
      ],
      help_type='command_help',
      help_one_line_summary='Create a signed url',
      help_text=_DETAILED_HELP_TEXT,
      subcommand_help_text={},
  )

  def _ParseAndCheckSubOpts(self):
    # Default argument values
    delta = None
    method = 'GET'
    content_type = ''
    passwd = None
    region = _AUTO_DETECT_REGION
    use_service_account = False

    for o, v in self.sub_opts:
      # TODO(PY3-ONLY): Delete this if block.
      if six.PY2:
        v = v.decode(sys.stdin.encoding or constants.UTF8)
      if o == '-d':
        if delta is not None:
          delta += _DurationToTimeDelta(v)
        else:
          delta = _DurationToTimeDelta(v)
      elif o == '-m':
        method = v
      elif o == '-c':
        content_type = v
      elif o == '-p':
        passwd = v
      elif o == '-r':
        region = v
      elif o == '-u' or o == '--use-service-account':
        use_service_account = True
      else:
        self.RaiseInvalidArgumentException()

    if delta is None:
      delta = timedelta(hours=1)
    else:
      if use_service_account and delta > _MAX_EXPIRATION_TIME_WITH_MINUS_U:
        # This restriction comes from the IAM SignBlob API. The SignBlob
        # API uses a system-managed key which can guarantee validation only
        # up to 12 hours. b/156160482#comment4
        raise CommandException(
            'Max valid duration allowed is %s when -u flag is used. For longer'
            ' duration, consider using the private-key-file instead of the -u'
            ' option.' % _MAX_EXPIRATION_TIME_WITH_MINUS_U)
      elif delta > _MAX_EXPIRATION_TIME:
        raise CommandException('Max valid duration allowed is '
                               '%s' % _MAX_EXPIRATION_TIME)

    if method not in ['GET', 'PUT', 'DELETE', 'HEAD', 'RESUMABLE']:
      raise CommandException('HTTP method must be one of'
                             '[GET|HEAD|PUT|DELETE|RESUMABLE]')

    if not use_service_account and len(self.args) < 2:
      raise CommandException(
          'The command requires a key file argument and one or more '
          'url arguments if the --use-service-account flag is missing. '
          'Run `gsutil help signurl` for more info')

    return method, delta, content_type, passwd, region, use_service_account

  def _ProbeObjectAccessWithClient(self, key, use_service_account, provider,
                                   client_email, gcs_path, logger, region):
    """Performs a head request against a signed url to check for read access."""

    # Choose a reasonable time in the future; if the user's system clock is
    # 60 or more seconds behind the server's this will generate an error.
    signed_url = _GenSignedUrl(key=key,
                               api=self.gsutil_api,
                               use_service_account=use_service_account,
                               provider=provider,
                               client_id=client_email,
                               method='HEAD',
                               duration=timedelta(seconds=60),
                               gcs_path=gcs_path,
                               logger=logger,
                               region=region,
                               string_to_sign_debug=True)

    try:
      h = GetNewHttp()
      req = Request(signed_url, 'HEAD')
      response = MakeRequest(h, req)

      if response.status_code not in [200, 403, 404]:
        raise HttpError.FromResponse(response)

      return response.status_code
    except HttpError as http_error:
      if http_error.has_attr('response'):
        error_response = http_error.response
        error_string = ('Unexpected HTTP response code %s while querying '
                        'object readability. Is your system clock accurate?' %
                        error_response.status_code)
        if error_response.content:
          error_string += ' Content: %s' % error_response.content
      else:
        error_string = ('Expected an HTTP response code of '
                        '200 while querying object readability, but received '
                        'an error: %s' % http_error)
      raise CommandException(error_string)

  def _EnumerateStorageUrls(self, in_urls):
    ret = []

    for url_str in in_urls:
      if ContainsWildcard(url_str):
        ret.extend([blr.storage_url for blr in self.WildcardIterator(url_str)])
      else:
        ret.append(StorageUrlFromString(url_str))

    return ret

  def RunCommand(self):
    """Command entry point for signurl command."""
    if not HAVE_OPENSSL:
      raise CommandException(
          'The signurl command requires the pyopenssl library (try pip '
          'install pyopenssl or easy_install pyopenssl)')

    method, delta, content_type, passwd, region, use_service_account = (
        self._ParseAndCheckSubOpts())
    arg_start_index = 0 if use_service_account else 1
    storage_urls = self._EnumerateStorageUrls(self.args[arg_start_index:])
    region_cache = {}

    key = None
    if not use_service_account:
      try:
        key, client_email = _ReadJSONKeystore(
            open(self.args[0], 'rb').read(), passwd)
      except ValueError:
        # Ignore and try parsing as a pkcs12.
        if not passwd:
          passwd = getpass.getpass('Keystore password:'******'rb').read(), passwd)
        except ValueError:
          raise CommandException('Unable to parse private key from {0}'.format(
              self.args[0]))
    else:
      client_email = self.gsutil_api.GetServiceAccountId(provider='gs')

    print('URL\tHTTP Method\tExpiration\tSigned URL')
    for url in storage_urls:
      if url.scheme != 'gs':
        raise CommandException('Can only create signed urls from gs:// urls')
      if url.IsBucket():
        if region == _AUTO_DETECT_REGION:
          raise CommandException('Generating signed URLs for creating buckets'
                                 ' requires a region be specified via the -r '
                                 'option. Run `gsutil help signurl` for more '
                                 'information about the \'-r\' option.')
        gcs_path = url.bucket_name
        if method == 'RESUMABLE':
          raise CommandException('Resumable signed URLs require an object '
                                 'name.')
      else:
        # Need to url encode the object name as Google Cloud Storage does when
        # computing the string to sign when checking the signature.
        gcs_path = '{0}/{1}'.format(
            url.bucket_name,
            urllib.parse.quote(url.object_name.encode(constants.UTF8),
                               safe=b'/~'))

      if region == _AUTO_DETECT_REGION:
        if url.bucket_name in region_cache:
          bucket_region = region_cache[url.bucket_name]
        else:
          try:
            _, bucket = self.GetSingleBucketUrlFromArg(
                'gs://{}'.format(url.bucket_name), bucket_fields=['location'])
          except Exception as e:
            raise CommandException(
                '{}: Failed to auto-detect location for bucket \'{}\'. Please '
                'ensure you have storage.buckets.get permission on the bucket '
                'or specify the bucket\'s location using the \'-r\' option.'.
                format(e.__class__.__name__, url.bucket_name))
          bucket_region = bucket.location.lower()
          region_cache[url.bucket_name] = bucket_region
      else:
        bucket_region = region
      final_url = _GenSignedUrl(key=key,
                                api=self.gsutil_api,
                                use_service_account=use_service_account,
                                provider=url.scheme,
                                client_id=client_email,
                                method=method,
                                duration=delta,
                                gcs_path=gcs_path,
                                logger=self.logger,
                                region=bucket_region,
                                content_type=content_type,
                                string_to_sign_debug=True)

      expiration = calendar.timegm((datetime.utcnow() + delta).utctimetuple())
      expiration_dt = datetime.fromtimestamp(expiration)

      time_str = expiration_dt.strftime('%Y-%m-%d %H:%M:%S')
      # TODO(PY3-ONLY): Delete this if block.
      if six.PY2:
        time_str = time_str.decode(constants.UTF8)

      url_info_str = '{0}\t{1}\t{2}\t{3}'.format(url.url_string, method,
                                                 time_str, final_url)

      # TODO(PY3-ONLY): Delete this if block.
      if six.PY2:
        url_info_str = url_info_str.encode(constants.UTF8)

      print(url_info_str)

      response_code = self._ProbeObjectAccessWithClient(
          key, use_service_account, url.scheme, client_email, gcs_path,
          self.logger, bucket_region)

      if response_code == 404:
        if url.IsBucket() and method != 'PUT':
          raise CommandException(
              'Bucket {0} does not exist. Please create a bucket with '
              'that name before a creating signed URL to access it.'.format(
                  url))
        else:
          if method != 'PUT' and method != 'RESUMABLE':
            raise CommandException(
                'Object {0} does not exist. Please create/upload an object '
                'with that name before a creating signed URL to access it.'.
                format(url))
      elif response_code == 403:
        self.logger.warn(
            '%s does not have permissions on %s, using this link will likely '
            'result in a 403 error until at least READ permissions are granted',
            client_email or 'The account', url)

    return 0
Ejemplo n.º 19
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='afIrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Ejemplo n.º 20
0
class LsCommand(Command):
    """Implementation of gsutil ls command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'ls',
        command_name_aliases=[
            'dir',
            'list',
        ],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='aebdlLhp:rR',
        file_url_ok=False,
        provider_url_ok=True,
        urls_start_arg=0,
        gs_api_support=[
            ApiSelector.XML,
            ApiSelector.JSON,
        ],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[
            CommandArgument.MakeZeroOrMoreCloudURLsArgument(),
        ],
    )
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='ls',
        help_name_aliases=[
            'dir',
            'list',
        ],
        help_type='command_help',
        help_one_line_summary='List providers, buckets, or objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def _PrintBucketInfo(self, bucket_blr, listing_style):
        """Print listing info for given bucket.

    Args:
      bucket_blr: BucketListingReference for the bucket being listed
      listing_style: ListingStyle enum describing type of output desired.

    Returns:
      Tuple (total objects, total bytes) in the bucket.
    """
        if (listing_style == ListingStyle.SHORT
                or listing_style == ListingStyle.LONG):
            text_util.print_to_fd(bucket_blr)
            return
        # listing_style == ListingStyle.LONG_LONG:
        # We're guaranteed by the caller that the root object is populated.
        bucket = bucket_blr.root_object
        location_constraint = bucket.location
        storage_class = bucket.storageClass
        fields = {
            'bucket': bucket_blr.url_string,
            'storage_class': storage_class,
            'location_constraint': location_constraint,
            'acl': AclTranslation.JsonFromMessage(bucket.acl),
            'default_acl':
            AclTranslation.JsonFromMessage(bucket.defaultObjectAcl),
            'versioning': bucket.versioning and bucket.versioning.enabled,
            'website_config': 'Present' if bucket.website else 'None',
            'logging_config': 'Present' if bucket.logging else 'None',
            'cors_config': 'Present' if bucket.cors else 'None',
            'lifecycle_config': 'Present' if bucket.lifecycle else 'None',
            'requester_pays': bucket.billing and bucket.billing.requesterPays
        }
        if bucket.retentionPolicy:
            fields['retention_policy'] = 'Present'
        if bucket.labels:
            fields['labels'] = LabelTranslation.JsonFromMessage(
                bucket.labels, pretty_print=True)
        else:
            fields['labels'] = 'None'
        if bucket.encryption and bucket.encryption.defaultKmsKeyName:
            fields['default_kms_key'] = bucket.encryption.defaultKmsKeyName
        else:
            fields['default_kms_key'] = 'None'
        fields[
            'encryption_config'] = 'Present' if bucket.encryption else 'None'
        # Fields not available in all APIs (e.g. the XML API)
        if bucket.locationType:
            fields['location_type'] = bucket.locationType
        if bucket.metageneration:
            fields['metageneration'] = bucket.metageneration
        if bucket.timeCreated:
            fields['time_created'] = bucket.timeCreated.strftime(
                '%a, %d %b %Y %H:%M:%S GMT')
        if bucket.updated:
            fields['updated'] = bucket.updated.strftime(
                '%a, %d %b %Y %H:%M:%S GMT')
        if bucket.defaultEventBasedHold:
            fields['default_eventbased_hold'] = bucket.defaultEventBasedHold
        if bucket.iamConfiguration and bucket.iamConfiguration.bucketPolicyOnly:
            enabled = bucket.iamConfiguration.bucketPolicyOnly.enabled
            fields['bucket_policy_only_enabled'] = enabled

        # For field values that are multiline, add indenting to make it look
        # prettier.
        for key in fields:
            previous_value = fields[key]
            if (not isinstance(previous_value, six.string_types)
                    or '\n' not in previous_value):
                continue
            new_value = previous_value.replace('\n', '\n\t  ')
            # Start multiline values on a new line if they aren't already.
            if not new_value.startswith('\n'):
                new_value = '\n\t  ' + new_value
            fields[key] = new_value

        # Only display certain properties if the given API returned them (JSON API
        # returns many fields that the XML API does not).
        location_type_line = ''
        metageneration_line = ''
        time_created_line = ''
        time_updated_line = ''
        default_eventbased_hold_line = ''
        retention_policy_line = ''
        bucket_policy_only_enabled_line = ''
        if 'location_type' in fields:
            location_type_line = '\tLocation type:\t\t\t{location_type}\n'
        if 'metageneration' in fields:
            metageneration_line = '\tMetageneration:\t\t\t{metageneration}\n'
        if 'time_created' in fields:
            time_created_line = '\tTime created:\t\t\t{time_created}\n'
        if 'updated' in fields:
            time_updated_line = '\tTime updated:\t\t\t{updated}\n'
        if 'default_eventbased_hold' in fields:
            default_eventbased_hold_line = (
                '\tDefault Event-Based Hold:\t{default_eventbased_hold}\n')
        if 'retention_policy' in fields:
            retention_policy_line = '\tRetention Policy:\t\t{retention_policy}\n'
        if 'bucket_policy_only_enabled' in fields:
            bucket_policy_only_enabled_line = (
                '\tBucket Policy Only enabled:\t'
                '{bucket_policy_only_enabled}\n')

        text_util.print_to_fd(
            ('{bucket} :\n'
             '\tStorage class:\t\t\t{storage_class}\n' + location_type_line +
             '\tLocation constraint:\t\t{location_constraint}\n'
             '\tVersioning enabled:\t\t{versioning}\n'
             '\tLogging configuration:\t\t{logging_config}\n'
             '\tWebsite configuration:\t\t{website_config}\n'
             '\tCORS configuration: \t\t{cors_config}\n'
             '\tLifecycle configuration:\t{lifecycle_config}\n'
             '\tRequester Pays enabled:\t\t{requester_pays}\n' +
             retention_policy_line + default_eventbased_hold_line +
             '\tLabels:\t\t\t\t{labels}\n' +
             '\tDefault KMS key:\t\t{default_kms_key}\n' + time_created_line +
             time_updated_line + metageneration_line +
             bucket_policy_only_enabled_line + '\tACL:\t\t\t\t{acl}\n'
             '\tDefault ACL:\t\t\t{default_acl}').format(**fields))
        if bucket_blr.storage_url.scheme == 's3':
            text_util.print_to_fd(
                'Note: this is an S3 bucket so configuration values may be '
                'blank. To retrieve bucket configuration values, use '
                'individual configuration commands such as gsutil acl get '
                '<bucket>.')

    def _PrintLongListing(self, bucket_listing_ref):
        """Prints an object with ListingStyle.LONG."""
        obj = bucket_listing_ref.root_object
        url_str = bucket_listing_ref.url_string
        if (obj.metadata and S3_DELETE_MARKER_GUID
                in obj.metadata.additionalProperties):
            size_string = '0'
            num_bytes = 0
            num_objs = 0
            url_str += '<DeleteMarker>'
        else:
            size_string = (MakeHumanReadable(obj.size)
                           if self.human_readable else str(obj.size))
            num_bytes = obj.size
            num_objs = 1

        timestamp = JSON_TIMESTAMP_RE.sub(r'\1T\2Z', str(obj.timeCreated))
        printstr = '%(size)10s  %(timestamp)s  %(url)s'
        encoded_etag = None
        encoded_metagen = None
        if self.all_versions:
            printstr += '  metageneration=%(metageneration)s'
            encoded_metagen = str(obj.metageneration)
        if self.include_etag:
            printstr += '  etag=%(etag)s'
            encoded_etag = obj.etag
        format_args = {
            'size': size_string,
            'timestamp': timestamp,
            'url': url_str,
            'metageneration': encoded_metagen,
            'etag': encoded_etag
        }
        text_util.print_to_fd(printstr % format_args)
        return (num_objs, num_bytes)

    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        self.list_subdir_contents = True
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    # Project IDs are sent as header values when using gs and s3 XML APIs.
                    InsistAscii(
                        a, 'Invalid non-ASCII character found in project ID')
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                elif o == '-d':
                    self.list_subdir_contents = False

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                text_util.print_to_fd('%s:' % six.ensure_text(blr.url_string))

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'acl',
                    'billing',
                    'cors',
                    'defaultObjectAcl',
                    'encryption',
                    'iamConfiguration',
                    'labels',
                    'location',
                    'locationType',
                    'logging',
                    'lifecycle',
                    'metageneration',
                    'retentionPolicy',
                    'defaultEventBasedHold',
                    'storageClass',
                    'timeCreated',
                    'updated',
                    'versioning',
                    'website',
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    text_util.print_to_fd(
                        '%-33s%s' % ('', six.ensure_text(blr.url_string)))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    listing_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested,
                        list_subdir_contents=self.list_subdir_contents)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = [
                        'name',
                        'size',
                        'timeCreated',
                        'updated',
                    ]
                    if self.all_versions:
                        bucket_listing_fields.extend([
                            'generation',
                            'metageneration',
                        ])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    listing_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields,
                        list_subdir_contents=self.list_subdir_contents)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = (UNENCRYPTED_FULL_LISTING_FIELDS +
                                             ENCRYPTED_FIELDS)
                    listing_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields,
                        list_subdir_contents=self.list_subdir_contents)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = (
                    listing_helper.ExpandUrlAndPrint(storage_url))
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            text_util.print_to_fd('TOTAL: %d objects, %d bytes (%s)' %
                                  (total_objs, total_bytes,
                                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Ejemplo n.º 21
0
class LsCommand(Command):
    """Implementation of gsutil ls command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'ls',
        command_name_aliases=['dir', 'list'],
        usage_synopsis=_SYNOPSIS,
        min_args=0,
        max_args=NO_MAX,
        supported_sub_args='aeblLhp:rR',
        file_url_ok=False,
        provider_url_ok=True,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='ls',
        help_name_aliases=['dir', 'list'],
        help_type='command_help',
        help_one_line_summary='List providers, buckets, or objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def _PrintBucketInfo(self, bucket_blr, listing_style):
        """Print listing info for given bucket.

    Args:
      bucket_blr: BucketListingReference for the bucket being listed
      listing_style: ListingStyle enum describing type of output desired.

    Returns:
      Tuple (total objects, total bytes) in the bucket.
    """
        if (listing_style == ListingStyle.SHORT
                or listing_style == ListingStyle.LONG):
            print bucket_blr
            return
        # listing_style == ListingStyle.LONG_LONG:
        # We're guaranteed by the caller that the root object is populated.
        bucket = bucket_blr.root_object
        location_constraint = bucket.location
        storage_class = bucket.storageClass
        fields = {
            'bucket': bucket_blr.url_string,
            'storage_class': storage_class,
            'location_constraint': location_constraint,
            'acl': AclTranslation.JsonFromMessage(bucket.acl),
            'default_acl':
            AclTranslation.JsonFromMessage(bucket.defaultObjectAcl)
        }

        fields['versioning'] = bucket.versioning and bucket.versioning.enabled
        fields['website_config'] = 'Present' if bucket.website else 'None'
        fields['logging_config'] = 'Present' if bucket.logging else 'None'
        fields['cors_config'] = 'Present' if bucket.cors else 'None'
        fields['lifecycle_config'] = 'Present' if bucket.lifecycle else 'None'

        # For field values that are multiline, add indenting to make it look
        # prettier.
        for key in fields:
            previous_value = fields[key]
            if (not isinstance(previous_value, basestring)
                    or '\n' not in previous_value):
                continue
            new_value = previous_value.replace('\n', '\n\t  ')
            # Start multiline values on a new line if they aren't already.
            if not new_value.startswith('\n'):
                new_value = '\n\t  ' + new_value
            fields[key] = new_value

        print(
            '{bucket} :\n'
            '\tStorage class:\t\t\t{storage_class}\n'
            '\tLocation constraint:\t\t{location_constraint}\n'
            '\tVersioning enabled:\t\t{versioning}\n'
            '\tLogging configuration:\t\t{logging_config}\n'
            '\tWebsite configuration:\t\t{website_config}\n'
            '\tCORS configuration: \t\t{cors_config}\n'
            '\tLifecycle configuration:\t{lifecycle_config}\n'
            '\tACL:\t\t\t\t{acl}\n'
            '\tDefault ACL:\t\t\t{default_acl}'.format(**fields))
        if bucket_blr.storage_url.scheme == 's3':
            print(
                'Note: this is an S3 bucket so configuration values may be '
                'blank. To retrieve bucket configuration values, use '
                'individual configuration commands such as gsutil acl get '
                '<bucket>.')

    def _PrintLongListing(self, bucket_listing_ref):
        """Prints an object with ListingStyle.LONG."""
        obj = bucket_listing_ref.root_object
        url_str = bucket_listing_ref.url_string
        if (obj.metadata and S3_DELETE_MARKER_GUID
                in obj.metadata.additionalProperties):
            size_string = '0'
            num_bytes = 0
            num_objs = 0
            url_str += '<DeleteMarker>'
        else:
            size_string = (MakeHumanReadable(obj.size)
                           if self.human_readable else str(obj.size))
            num_bytes = obj.size
            num_objs = 1

        timestamp = JSON_TIMESTAMP_RE.sub(
            r'\1T\2Z',
            str(obj.updated).decode(UTF8).encode('ascii'))
        printstr = '%(size)10s  %(timestamp)s  %(url)s'
        encoded_etag = None
        encoded_metagen = None
        if self.all_versions:
            printstr += '  metageneration=%(metageneration)s'
            encoded_metagen = str(obj.metageneration).encode(UTF8)
        if self.include_etag:
            printstr += '  etag=%(etag)s'
            encoded_etag = obj.etag.encode(UTF8)
        format_args = {
            'size': size_string,
            'timestamp': timestamp,
            'url': url_str.encode(UTF8),
            'metageneration': encoded_metagen,
            'etag': encoded_etag
        }
        print printstr % format_args
        return (num_objs, num_bytes)

    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Ejemplo n.º 22
0
class RmCommand(Command):
    """Implementation of gsutil rm command."""

    # Command specification. See base class for documentation.
    command_spec = Command.CreateCommandSpec(
        'rm',
        command_name_aliases=['del', 'delete', 'remove'],
        usage_synopsis=_SYNOPSIS,
        min_args=1,
        max_args=NO_MAX,
        supported_sub_args='afrR',
        file_url_ok=False,
        provider_url_ok=False,
        urls_start_arg=0,
        gs_api_support=[ApiSelector.XML, ApiSelector.JSON],
        gs_default_api=ApiSelector.JSON,
        argparse_arguments=[CommandArgument.MakeZeroOrMoreCloudURLsArgument()])
    # Help specification. See help_provider.py for documentation.
    help_spec = Command.HelpSpec(
        help_name='rm',
        help_name_aliases=['del', 'delete', 'remove'],
        help_type='command_help',
        help_one_line_summary='Remove objects',
        help_text=_DETAILED_HELP_TEXT,
        subcommand_help_text={},
    )

    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                self.args,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise

        if not self.everything_removed_okay and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            had_previous_failures = GetFailureCount() > 0
            folder_object_wildcards = []
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith('No URLs matched:'):
                        raise
                if not had_previous_failures:
                    ResetFailureCount()

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        return 0