Beispiel #1
0
    def RunCommand(self):
        """Command entry point for the mv command."""
        # Check each source arg up, refusing to delete a bucket src URL (force users
        # to explicitly do that as a separate operation).
        for arg_to_check in self.args[0:-1]:
            url = StorageUrlFromString(arg_to_check)
            if url.IsCloudUrl() and (url.IsBucket() or url.IsProvider()):
                raise CommandException(
                    'You cannot move a source bucket using the mv '
                    'command. If you meant to move\nall objects in '
                    'the bucket, you can use a command like:\n'
                    '\tgsutil mv %s/* %s' % (arg_to_check, self.args[-1]))

        # Insert command-line opts in front of args so they'll be picked up by cp
        # and rm commands (e.g., for -p option). Use undocumented (internal
        # use-only) cp -M option, which causes each original object to be deleted
        # after successfully copying to its destination, and also causes naming
        # behavior consistent with Unix mv naming behavior (see comments in
        # ConstructDstUrl).
        unparsed_args = ['-M']
        if self.recursion_requested:
            unparsed_args.append('-R')
        unparsed_args.extend(self.unparsed_args)
        self.command_runner.RunNamedCommand('cp', unparsed_args, self.headers,
                                            self.debug,
                                            self.parallel_operations)

        return 0
Beispiel #2
0
    def _EnumerateNotificationsFromArgs(self,
                                        accept_notification_configs=True):
        """Yields bucket/notification tuples from command-line args.

    Given a list of strings that are bucket names (gs://foo) or notification
    config IDs, yield tuples of bucket names and their associated notifications.

    Args:
      accept_notification_configs: whether notification configs are valid args.
    Yields:
      Tuples of the form (bucket_name, Notification)
    """
        path_regex = self._GetNotificationPathRegex()

        for list_entry in self.args:
            match = path_regex.match(list_entry)
            if match:
                if not accept_notification_configs:
                    raise CommandException(
                        '%s %s accepts only bucket names, but you provided %s'
                        %
                        (self.command_name, self.subcommand_name, list_entry))
                bucket_name = match.group('bucket')
                notification_id = match.group('notification')
                found = False
                for notification in self.gsutil_api.ListNotificationConfigs(
                        bucket_name, provider='gs'):
                    if notification.id == notification_id:
                        yield (bucket_name, notification)
                        found = True
                        break
                if not found:
                    raise NotFoundException('Could not find notification %s' %
                                            list_entry)
            else:
                storage_url = StorageUrlFromString(list_entry)
                if not storage_url.IsCloudUrl():
                    raise CommandException(
                        'The %s command must be used on cloud buckets or notification '
                        'config names.' % self.command_name)
                if storage_url.scheme != 'gs':
                    raise CommandException(
                        'The %s command only works on gs:// buckets.')
                path = None
                if storage_url.IsProvider():
                    path = 'gs://*'
                elif storage_url.IsBucket():
                    path = list_entry
                if not path:
                    raise CommandException(
                        'The %s command cannot be used on cloud objects, only buckets'
                        % self.command_name)
                for blr in self.WildcardIterator(path).IterBuckets(
                        bucket_fields=['id']):
                    for notification in self.gsutil_api.ListNotificationConfigs(
                            blr.storage_url.bucket_name, provider='gs'):
                        yield (blr.storage_url.bucket_name, notification)
Beispiel #3
0
def HaveProviderUrls(args_to_check):
    """Checks whether args_to_check contains any provider URLs (like 'gs://').

  Args:
    args_to_check: Command-line argument subset to check.

  Returns:
    True if args_to_check contains any provider URLs.
  """
    for url_str in args_to_check:
        storage_url = StorageUrlFromString(url_str)
        if storage_url.IsCloudUrl() and storage_url.IsProvider():
            return True
    return False
Beispiel #4
0
    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Beispiel #5
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Beispiel #6
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException as e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append('%s**_$folder$' % url_str)
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_GENERIC):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Beispiel #7
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Beispiel #8
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0