Beispiel #1
0
    def RunCommand(self):
        """Command entry point for the setmeta command."""
        headers = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-n':
                    self.logger.warning(
                        'Warning: gsutil setmeta -n is now on by default, and will be '
                        'removed in the future.\nPlease use gsutil acl set ... to set '
                        'canned ACLs.')
                elif o == '-h':
                    if 'x-goog-acl' in a or 'x-amz-acl' in a:
                        raise CommandException(
                            'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                            'set ... to set canned ACLs.')
                    headers.append(a)

        (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

        self.metadata_change = metadata_plus
        for header in metadata_minus:
            self.metadata_change[header] = ''

        if len(self.args) == 1 and not self.recursion_requested:
            url = StorageUrlFromString(self.args[0])
            if not (url.IsCloudUrl() and url.IsObject()):
                raise CommandException('URL (%s) must name an object' %
                                       self.args[0])

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        name_expansion_iterator = NameExpansionIterator(
            self.command_name,
            self.debug,
            self.logger,
            self.gsutil_api,
            self.args,
            self.recursion_requested,
            all_versions=self.all_versions,
            continue_on_error=self.parallel_operations)

        try:
            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_SetMetadataFuncWrapper,
                       name_expansion_iterator,
                       _SetMetadataExceptionHandler,
                       fail_on_error=True)
        except AccessDeniedException as e:
            if e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                'Metadata for some objects could not be set.')

        return 0
Beispiel #2
0
    def RunCommand(self):
        """Command entry point for the mv command."""
        # Check each source arg up, refusing to delete a bucket src URL (force users
        # to explicitly do that as a separate operation).
        for arg_to_check in self.args[0:-1]:
            url = StorageUrlFromString(arg_to_check)
            if url.IsCloudUrl() and (url.IsBucket() or url.IsProvider()):
                raise CommandException(
                    'You cannot move a source bucket using the mv '
                    'command. If you meant to move\nall objects in '
                    'the bucket, you can use a command like:\n'
                    '\tgsutil mv %s/* %s' % (arg_to_check, self.args[-1]))

        # Insert command-line opts in front of args so they'll be picked up by cp
        # and rm commands (e.g., for -p option). Use undocumented (internal
        # use-only) cp -M option, which causes each original object to be deleted
        # after successfully copying to its destination, and also causes naming
        # behavior consistent with Unix mv naming behavior (see comments in
        # ConstructDstUrl).
        unparsed_args = ['-M']
        if self.recursion_requested:
            unparsed_args.append('-R')
        unparsed_args.extend(self.unparsed_args)
        self.command_runner.RunNamedCommand('cp', unparsed_args, self.headers,
                                            self.debug,
                                            self.parallel_operations)

        return 0
Beispiel #3
0
def GetUserAgent(args, metrics_off=True):
    """Using the command arguments return a suffix for the UserAgent string.

  Args:
    args: str[], parsed set of arguments entered in the CLI.
    metrics_off: boolean, whether the MetricsCollector is disabled.

  Returns:
    str, A string value that can be appended to an existing UserAgent.
  """
    user_agent = ' gsutil/%s' % gslib.VERSION
    user_agent += ' (%s)' % sys.platform
    user_agent += ' analytics/%s' % ('disabled' if metrics_off else 'enabled')
    user_agent += ' interactive/%s' % system_util.IsRunningInteractively()

    if len(args) > 0:
        user_agent += ' command/%s' % args[0]

        if len(args) > 2:
            if args[0] in ['cp', 'mv', 'rsync']:
                # Any cp, mv or rsync commands that use daisy chain mode should be noted
                # as that represents a unique use case that may be better served by the
                # storage transfer service.
                try:
                    src = StorageUrlFromString(six.ensure_text(args[-2]))
                    dst = StorageUrlFromString(six.ensure_text(args[-1]))
                    if src.IsCloudUrl() and dst.IsCloudUrl(
                    ) and src.scheme != dst.scheme:
                        user_agent += '-DaisyChain'
                except InvalidUrlError:
                    pass
            elif args[0] == 'rewrite':
                if '-k' in args:
                    # Rewrite encryption key.
                    user_agent += '-k'
                if '-s' in args:
                    # Rewrite storage class.
                    user_agent += '-s'

    if system_util.InvokedViaCloudSdk():
        user_agent += ' google-cloud-sdk'
        if system_util.CloudSdkVersion():
            user_agent += '/%s' % system_util.CloudSdkVersion()

    return user_agent
Beispiel #4
0
    def _EnumerateNotificationsFromArgs(self,
                                        accept_notification_configs=True):
        """Yields bucket/notification tuples from command-line args.

    Given a list of strings that are bucket names (gs://foo) or notification
    config IDs, yield tuples of bucket names and their associated notifications.

    Args:
      accept_notification_configs: whether notification configs are valid args.
    Yields:
      Tuples of the form (bucket_name, Notification)
    """
        path_regex = self._GetNotificationPathRegex()

        for list_entry in self.args:
            match = path_regex.match(list_entry)
            if match:
                if not accept_notification_configs:
                    raise CommandException(
                        '%s %s accepts only bucket names, but you provided %s'
                        %
                        (self.command_name, self.subcommand_name, list_entry))
                bucket_name = match.group('bucket')
                notification_id = match.group('notification')
                found = False
                for notification in self.gsutil_api.ListNotificationConfigs(
                        bucket_name, provider='gs'):
                    if notification.id == notification_id:
                        yield (bucket_name, notification)
                        found = True
                        break
                if not found:
                    raise NotFoundException('Could not find notification %s' %
                                            list_entry)
            else:
                storage_url = StorageUrlFromString(list_entry)
                if not storage_url.IsCloudUrl():
                    raise CommandException(
                        'The %s command must be used on cloud buckets or notification '
                        'config names.' % self.command_name)
                if storage_url.scheme != 'gs':
                    raise CommandException(
                        'The %s command only works on gs:// buckets.')
                path = None
                if storage_url.IsProvider():
                    path = 'gs://*'
                elif storage_url.IsBucket():
                    path = list_entry
                if not path:
                    raise CommandException(
                        'The %s command cannot be used on cloud objects, only buckets'
                        % self.command_name)
                for blr in self.WildcardIterator(path).IterBuckets(
                        bucket_fields=['id']):
                    for notification in self.gsutil_api.ListNotificationConfigs(
                            blr.storage_url.bucket_name, provider='gs'):
                        yield (blr.storage_url.bucket_name, notification)
Beispiel #5
0
    def test_storage_url_from_string(self):
        storage_url = StorageUrlFromString('abc')
        self.assertTrue(storage_url.IsFileUrl())
        self.assertEquals('abc', storage_url.object_name)

        storage_url = StorageUrlFromString('file://abc/123')
        self.assertTrue(storage_url.IsFileUrl())
        self.assertEquals('abc/123', storage_url.object_name)

        storage_url = StorageUrlFromString('gs://abc/123')
        self.assertTrue(storage_url.IsCloudUrl())
        self.assertEquals('abc', storage_url.bucket_name)
        self.assertEquals('123', storage_url.object_name)

        storage_url = StorageUrlFromString('s3://abc/123')
        self.assertTrue(storage_url.IsCloudUrl())
        self.assertEquals('abc', storage_url.bucket_name)
        self.assertEquals('123', storage_url.object_name)
Beispiel #6
0
def HaveProviderUrls(args_to_check):
    """Checks whether args_to_check contains any provider URLs (like 'gs://').

  Args:
    args_to_check: Command-line argument subset to check.

  Returns:
    True if args_to_check contains any provider URLs.
  """
    for url_str in args_to_check:
        storage_url = StorageUrlFromString(url_str)
        if storage_url.IsCloudUrl() and storage_url.IsProvider():
            return True
    return False
Beispiel #7
0
    def _SetHold(self, obj_metadata_update_wrapper, url_args,
                 sub_command_full_name):
        """Common logic to set or unset Event-Based/Temporary Hold on objects.

    Args:
      obj_metadata_update_wrapper: The function for updating related fields in
                                   Object metadata.
      url_args: List of object URIs.
      sub_command_full_name: The full name for sub-command:
                             "Temporary" / "Event-Based"
    """
        if len(url_args) == 1 and not self.recursion_requested:
            url = StorageUrlFromString(url_args[0])
            if not (url.IsCloudUrl() and url.IsObject()):
                raise CommandException('URL ({}) must name an object'.format(
                    url_args[0]))

        name_expansion_iterator = self._GetObjectNameExpansionIterator(
            url_args)
        seek_ahead_iterator = self._GetSeekAheadNameExpansionIterator(url_args)

        # Used to track if any objects' metadata failed to be set.
        self.everything_set_okay = True

        try:
            # TODO: implement '-c' flag to continue_on_error

            # Perform requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(obj_metadata_update_wrapper,
                       name_expansion_iterator,
                       UpdateObjectMetadataExceptionHandler,
                       fail_on_error=True,
                       seek_ahead_iterator=seek_ahead_iterator)

        except AccessDeniedException as e:
            if e.status == 403:
                self._WarnServiceAccounts()
            raise

        if not self.everything_set_okay:
            raise CommandException(
                '{} Hold for some objects could not be set.'.format(
                    sub_command_full_name))
Beispiel #8
0
def LookUpGsutilVersion(gsutil_api, url_str):
  """Looks up the gsutil version of the specified gsutil tarball URL.

  Version is specified in the metadata field set on that object.

  Args:
    gsutil_api: gsutil Cloud API to use when retrieving gsutil tarball.
    url_str: tarball URL to retrieve (such as 'gs://pub/gsutil.tar.gz').

  Returns:
    Version string if URL is a cloud URL containing x-goog-meta-gsutil-version
    metadata, else None.
  """
  url = StorageUrlFromString(url_str)
  if url.IsCloudUrl():
    obj = gsutil_api.GetObjectMetadata(url.bucket_name, url.object_name,
                                       provider=url.scheme,
                                       fields=['metadata'])
    if obj.metadata and obj.metadata.additionalProperties:
      for prop in obj.metadata.additionalProperties:
        if prop.key == 'gsutil_version':
          return prop.value
Beispiel #9
0
    def _Create(self):
        self.CheckArguments()

        # User-specified options
        pubsub_topic = None
        payload_format = None
        custom_attributes = {}
        event_types = []
        object_name_prefix = None
        should_setup_topic = True

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-e':
                    event_types.append(a)
                elif o == '-f':
                    payload_format = a
                elif o == '-m':
                    if ':' not in a:
                        raise CommandException(
                            'Custom attributes specified with -m should be of the form '
                            'key:value')
                    key, value = a.split(':')
                    custom_attributes[key] = value
                elif o == '-p':
                    object_name_prefix = a
                elif o == '-s':
                    should_setup_topic = False
                elif o == '-t':
                    pubsub_topic = a

        if payload_format not in PAYLOAD_FORMAT_MAP:
            raise CommandException(
                "Must provide a payload format with -f of either 'json' or 'none'"
            )
        payload_format = PAYLOAD_FORMAT_MAP[payload_format]

        bucket_arg = self.args[-1]

        bucket_url = StorageUrlFromString(bucket_arg)
        if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket():
            raise CommandException(
                "%s %s requires a GCS bucket name, but got '%s'" %
                (self.command_name, self.subcommand_name, bucket_arg))
        if bucket_url.scheme != 'gs':
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        bucket_name = bucket_url.bucket_name
        self.logger.debug('Creating notification for bucket %s', bucket_url)

        # Find the project this bucket belongs to
        bucket_metadata = self.gsutil_api.GetBucket(bucket_name,
                                                    fields=['projectNumber'],
                                                    provider=bucket_url.scheme)
        bucket_project_number = bucket_metadata.projectNumber

        # If not specified, choose a sensible default for the Cloud Pub/Sub topic
        # name.
        if not pubsub_topic:
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      bucket_name)
        if not pubsub_topic.startswith('projects/'):
            # If a user picks a topic ID (mytopic) but doesn't pass the whole name (
            # projects/my-project/topics/mytopic ), pick a default project.
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      pubsub_topic)
        self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic)

        just_modified_topic_permissions = False
        if should_setup_topic:
            # Ask GCS for the email address that represents GCS's permission to
            # publish to a Cloud Pub/Sub topic from this project.
            service_account = self.gsutil_api.GetProjectServiceAccount(
                bucket_project_number,
                provider=bucket_url.scheme).email_address
            self.logger.debug('Service account for project %d: %s',
                              bucket_project_number, service_account)
            just_modified_topic_permissions = self._CreateTopic(
                pubsub_topic, service_account)

        for attempt_number in range(0, 2):
            try:
                create_response = self.gsutil_api.CreateNotificationConfig(
                    bucket_name,
                    pubsub_topic=pubsub_topic,
                    payload_format=payload_format,
                    custom_attributes=custom_attributes,
                    event_types=event_types if event_types else None,
                    object_name_prefix=object_name_prefix,
                    provider=bucket_url.scheme)
                break
            except PublishPermissionDeniedException:
                if attempt_number == 0 and just_modified_topic_permissions:
                    # If we have just set the IAM policy, it may take up to 10 seconds to
                    # take effect.
                    self.logger.info(
                        'Retrying create notification in 10 seconds '
                        '(new permissions may take up to 10 seconds to take effect.)'
                    )
                    time.sleep(10)
                else:
                    raise

        notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % (
            bucket_name, create_response.id)
        self.logger.info('Created notification config %s', notification_name)

        return 0
Beispiel #10
0
  def CatUrlStrings(self,
                    url_strings,
                    show_header=False,
                    start_byte=0,
                    end_byte=None,
                    cat_out_fd=None):
    """Prints each of the url strings to stdout.

    Args:
      url_strings: String iterable.
      show_header: If true, print a header per file.
      start_byte: Starting byte of the file to print, used for constructing
                  range requests.
      end_byte: Ending byte of the file to print; used for constructing range
                requests. If this is negative, the start_byte is ignored and
                and end range is sent over HTTP (such as range: bytes -9)
      cat_out_fd: File descriptor to which output should be written. Defaults to
                 stdout if no file descriptor is supplied.
    Returns:
      0 on success.

    Raises:
      CommandException if no URLs can be found.
    """
    printed_one = False
    # This should refer to whatever sys.stdin refers to when this method is
    # run, not when this method is defined, so we do the initialization here
    # rather than define sys.stdin as the cat_out_fd parameter's default value.
    if cat_out_fd is None:
      cat_out_fd = sys.stdout
    # We manipulate the stdout so that all other data other than the Object
    # contents go to stderr.
    old_stdout = sys.stdout
    sys.stdout = sys.stderr
    try:
      if url_strings and url_strings[0] in ('-', 'file://-'):
        self._WriteBytesBufferedFileToFile(sys.stdin, cat_out_fd)
      else:
        for url_str in url_strings:
          did_some_work = False
          # TODO: Get only the needed fields here.
          for blr in self.command_obj.WildcardIterator(url_str).IterObjects(
              bucket_listing_fields=_CAT_BUCKET_LISTING_FIELDS):
            decryption_keywrapper = None
            if (blr.root_object and blr.root_object.customerEncryption and
                blr.root_object.customerEncryption.keySha256):
              decryption_key = FindMatchingCSEKInBotoConfig(
                  blr.root_object.customerEncryption.keySha256, config)
              if not decryption_key:
                raise EncryptionException(
                    'Missing decryption key with SHA256 hash %s. No decryption '
                    'key matches object %s' %
                    (blr.root_object.customerEncryption.keySha256,
                     blr.url_string))
              decryption_keywrapper = CryptoKeyWrapperFromKey(decryption_key)

            did_some_work = True
            if show_header:
              if printed_one:
                print()
              print('==> %s <==' % blr)
              printed_one = True
            cat_object = blr.root_object
            storage_url = StorageUrlFromString(blr.url_string)
            if storage_url.IsCloudUrl():
              compressed_encoding = ObjectIsGzipEncoded(cat_object)
              self.command_obj.gsutil_api.GetObjectMedia(
                  cat_object.bucket,
                  cat_object.name,
                  cat_out_fd,
                  compressed_encoding=compressed_encoding,
                  start_byte=start_byte,
                  end_byte=end_byte,
                  object_size=cat_object.size,
                  generation=storage_url.generation,
                  decryption_tuple=decryption_keywrapper,
                  provider=storage_url.scheme)
            else:
              with open(storage_url.object_name, 'rb') as f:
                self._WriteBytesBufferedFileToFile(f, cat_out_fd)
          if not did_some_work:
            raise CommandException(NO_URLS_MATCHED_TARGET % url_str)
    finally:
      sys.stdout = old_stdout

    return 0
Beispiel #11
0
  def RunCommand(self):
    """Command entry point for the setmeta command."""
    headers = []
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-h':
          if 'x-goog-acl' in a or 'x-amz-acl' in a:
            raise CommandException(
                'gsutil setmeta no longer allows canned ACLs. Use gsutil acl '
                'set ... to set canned ACLs.')
          headers.append(a)

    (metadata_minus, metadata_plus) = self._ParseMetadataHeaders(headers)

    self.metadata_change = metadata_plus
    for header in metadata_minus:
      self.metadata_change[header] = ''

    if not self.metadata_change:
      raise CommandException(
          'gsutil setmeta requires one or more headers to be provided with the'
          ' -h flag. See "gsutil help setmeta" for more information.')

    if len(self.args) == 1 and not self.recursion_requested:
      url = StorageUrlFromString(self.args[0])
      if not (url.IsCloudUrl() and url.IsObject()):
        raise CommandException('URL (%s) must name an object' % self.args[0])

    # Used to track if any objects' metadata failed to be set.
    self.everything_set_okay = True

    self.preconditions = PreconditionsFromHeaders(self.headers)

    name_expansion_iterator = NameExpansionIterator(
        self.command_name,
        self.debug,
        self.logger,
        self.gsutil_api,
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        continue_on_error=self.parallel_operations,
        bucket_listing_fields=['generation', 'metadata', 'metageneration'])

    seek_ahead_iterator = SeekAheadNameExpansionIterator(
        self.command_name,
        self.debug,
        self.GetSeekAheadGsutilApi(),
        self.args,
        self.recursion_requested,
        all_versions=self.all_versions,
        project_id=self.project_id)

    try:
      # Perform requests in parallel (-m) mode, if requested, using
      # configured number of parallel processes and threads. Otherwise,
      # perform requests with sequential function calls in current process.
      self.Apply(_SetMetadataFuncWrapper,
                 name_expansion_iterator,
                 _SetMetadataExceptionHandler,
                 fail_on_error=True,
                 seek_ahead_iterator=seek_ahead_iterator)
    except AccessDeniedException as e:
      if e.status == 403:
        self._WarnServiceAccounts()
      raise

    if not self.everything_set_okay:
      raise CommandException('Metadata for some objects could not be set.')

    return 0