Beispiel #1
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Beispiel #2
0
    def _SetIam(self):
        """Set IAM policy for given wildcards on the command line."""

        self.continue_on_error = False
        self.recursion_requested = False
        self.all_versions = False
        force_etag = False
        etag = ''
        if self.sub_opts:
            for o, arg in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    etag = str(arg)
                    force_etag = True
                else:
                    self.RaiseInvalidArgumentException()

        file_url = self.args[0]
        patterns = self.args[1:]

        # Load the IAM policy file and raise error if the file is invalid JSON or
        # does not exist.
        try:
            with open(file_url, 'r') as fp:
                policy = json.loads(fp.read())
        except IOError:
            raise ArgumentException(
                'Specified IAM policy file "%s" does not exist.' % file_url)
        except ValueError as e:
            self.logger.debug('Invalid IAM policy file, ValueError:\n%s', e)
            raise ArgumentException('Invalid IAM policy file "%s".' % file_url)

        bindings = policy.get('bindings', [])
        if not force_etag:
            etag = policy.get('etag', '')

        policy_json = json.dumps({
            'bindings': bindings,
            'etag': etag,
            'version': IAM_POLICY_VERSION
        })
        try:
            policy = protojson.decode_message(apitools_messages.Policy,
                                              policy_json)
        except DecodeError:
            raise ArgumentException(
                'Invalid IAM policy file "%s" or etag "%s".' %
                (file_url, etag))

        self.everything_set_okay = True

        # This list of wildcard strings will be handled by NameExpansionIterator.
        threaded_wildcards = []

        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            if surl.IsBucket():
                if self.recursion_requested:
                    surl.object_name = '*'
                    threaded_wildcards.append(surl.url_string)
                else:
                    self.SetIamHelper(surl, policy)
            else:
                threaded_wildcards.append(surl.url_string)

        # N.B.: If threaded_wildcards contains a non-existent bucket
        # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator
        # will raise an exception in iter.next. This halts all iteration, even
        # when -f is set. This behavior is also evident in acl set. This behavior
        # also appears for any exception that will be raised when iterating over
        # wildcard expansions (access denied if bucket cannot be listed, etc.).
        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            policy_it = itertools.repeat(protojson.encode_message(policy))
            self.Apply(_SetIamWrapper,
                       zip(policy_it, name_expansion_iterator),
                       _SetIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be set.')
Beispiel #3
0
    def RunCommand(self):
        """Command entry point for the du command."""
        self.line_ending = '\n'
        self.all_versions = False
        self.produce_total = False
        self.human_readable = False
        self.summary_only = False
        self.exclude_patterns = []
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-0':
                    self.line_ending = '\0'
                elif o == '-a':
                    self.all_versions = True
                elif o == '-c':
                    self.produce_total = True
                elif o == '-e':
                    self.exclude_patterns.append(a)
                elif o == '-h':
                    self.human_readable = True
                elif o == '-s':
                    self.summary_only = True
                elif o == '-X':
                    if a == '-':
                        f = sys.stdin
                    else:
                        f = open(a, 'r')
                    try:
                        for line in f:
                            line = line.strip()
                            if line:
                                self.exclude_patterns.append(line)
                    finally:
                        f.close()

        if not self.args:
            # Default to listing all gs buckets.
            self.args = ['gs://']

        total_bytes = 0
        got_nomatch_errors = False

        def _PrintObjectLong(blr):
            return self._PrintInfoAboutBucketListingRef(blr)

        def _PrintNothing(unused_blr=None):
            pass

        def _PrintDirectory(num_bytes, name):
            if not self.summary_only:
                self._PrintSummaryLine(num_bytes, name)

        for url_arg in self.args:
            top_level_storage_url = StorageUrlFromString(url_arg)
            if top_level_storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_listing_fields = ['size']

            ls_helper = LsHelper(self.WildcardIterator,
                                 self.logger,
                                 print_object_func=_PrintObjectLong,
                                 print_dir_func=_PrintNothing,
                                 print_dir_header_func=_PrintNothing,
                                 print_dir_summary_func=_PrintDirectory,
                                 print_newline_func=_PrintNothing,
                                 all_versions=self.all_versions,
                                 should_recurse=True,
                                 exclude_patterns=self.exclude_patterns,
                                 fields=bucket_listing_fields)

            # ls_helper expands to objects and prefixes, so perform a top-level
            # expansion first.
            if top_level_storage_url.IsProvider():
                # Provider URL: use bucket wildcard to iterate over all buckets.
                top_level_iter = self.WildcardIterator(
                    '%s://*' % top_level_storage_url.scheme).IterBuckets(
                        bucket_fields=['id'])
            elif top_level_storage_url.IsBucket():
                top_level_iter = self.WildcardIterator(
                    '%s://%s' %
                    (top_level_storage_url.scheme,
                     top_level_storage_url.bucket_name)).IterBuckets(
                         bucket_fields=['id'])
            else:
                top_level_iter = [BucketListingObject(top_level_storage_url)]

            for blr in top_level_iter:
                storage_url = blr.storage_url
                if storage_url.IsBucket() and self.summary_only:
                    storage_url = StorageUrlFromString(
                        storage_url.CreatePrefixUrl(wildcard_suffix='**'))
                _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if (storage_url.IsObject() and exp_objs == 0
                        and ContainsWildcard(url_arg)
                        and not self.exclude_patterns):
                    got_nomatch_errors = True
                total_bytes += exp_bytes

                if self.summary_only:
                    self._PrintSummaryLine(exp_bytes,
                                           blr.url_string.rstrip('/'))

        if self.produce_total:
            self._PrintSummaryLine(total_bytes, 'total')

        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')

        return 0
Beispiel #4
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        bucket_urls_to_delete = []
        bucket_strings_to_delete = []
        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in self.args:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        bucket_strings_to_delete.append(url_str)

        # Used to track if any files failed to be removed.
        self.everything_removed_okay = True

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                self.args,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(_RemoveFuncWrapper,
                       name_expansion_iterator,
                       _RemoveExceptionHandler,
                       fail_on_error=(not self.continue_on_error))

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if not bucket_urls_to_delete and not self.continue_on_error:
                raise
            # Reset the failure count if we failed due to an empty bucket that we're
            # going to delete.
            msg = 'No URLs matched: '
            if msg in str(e):
                parts = str(e).split(msg)
                if len(parts) == 2 and parts[1] in bucket_strings_to_delete:
                    ResetFailureCount()
        except ServiceException, e:
            if not self.continue_on_error:
                raise
Beispiel #5
0
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if STORAGE_URI_REGEX.match(token):
                patterns.append(token)
                break
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, next(it)))
            else:
                patch_bindings_tuples.append(BindingStringToTuple(True, token))
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        # All following arguments are urls.
        for token in it:
            patterns.append(token)

        self.everything_set_okay = True
        self.tried_ch_on_resource_with_conditions = False
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       zip(serialized_bindings_tuples_it,
                           name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            msg = 'Some IAM policies could not be patched.'
            if self.tried_ch_on_resource_with_conditions:
                msg += '\n'
                msg += '\n'.join(
                    textwrap.wrap(
                        'Some resources had conditions present in their IAM policy '
                        'bindings, which is not supported by "iam ch". %s' %
                        (IAM_CH_CONDITIONS_WORKAROUND_MSG)))
            raise CommandException(msg)
Beispiel #6
0
    def RunCommand(self):
        """Command entry point for the ls command."""
        got_nomatch_errors = False
        got_bucket_nomatch_errors = False
        listing_style = ListingStyle.SHORT
        get_bucket_info = False
        self.recursion_requested = False
        self.all_versions = False
        self.include_etag = False
        self.human_readable = False
        self.list_subdir_contents = True
        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-e':
                    self.include_etag = True
                elif o == '-b':
                    get_bucket_info = True
                elif o == '-h':
                    self.human_readable = True
                elif o == '-l':
                    listing_style = ListingStyle.LONG
                elif o == '-L':
                    listing_style = ListingStyle.LONG_LONG
                elif o == '-p':
                    self.project_id = a
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                elif o == '-d':
                    self.list_subdir_contents = False

        if not self.args:
            # default to listing all gs buckets
            self.args = ['gs://']

        total_objs = 0
        total_bytes = 0

        def MaybePrintBucketHeader(blr):
            if len(self.args) > 1:
                print '%s:' % blr.url_string.encode(UTF8)

        print_bucket_header = MaybePrintBucketHeader

        for url_str in self.args:
            storage_url = StorageUrlFromString(url_str)
            if storage_url.IsFileUrl():
                raise CommandException('Only cloud URLs are supported for %s' %
                                       self.command_name)
            bucket_fields = None
            if (listing_style == ListingStyle.SHORT
                    or listing_style == ListingStyle.LONG):
                bucket_fields = ['id']
            elif listing_style == ListingStyle.LONG_LONG:
                bucket_fields = [
                    'location', 'storageClass', 'versioning', 'acl',
                    'defaultObjectAcl', 'website', 'logging', 'cors',
                    'lifecycle'
                ]
            if storage_url.IsProvider():
                # Provider URL: use bucket wildcard to list buckets.
                for blr in self.WildcardIterator(
                        '%s://*' % storage_url.scheme).IterBuckets(
                            bucket_fields=bucket_fields):
                    self._PrintBucketInfo(blr, listing_style)
            elif storage_url.IsBucket() and get_bucket_info:
                # ls -b bucket listing request: List info about bucket(s).
                total_buckets = 0
                for blr in self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=bucket_fields):
                    if not ContainsWildcard(url_str) and not blr.root_object:
                        # Iterator does not make an HTTP call for non-wildcarded
                        # listings with fields=='id'. Ensure the bucket exists by calling
                        # GetBucket.
                        self.gsutil_api.GetBucket(blr.storage_url.bucket_name,
                                                  fields=['id'],
                                                  provider=storage_url.scheme)
                    self._PrintBucketInfo(blr, listing_style)
                    total_buckets += 1
                if not ContainsWildcard(url_str) and not total_buckets:
                    got_bucket_nomatch_errors = True
            else:
                # URL names a bucket, object, or object subdir ->
                # list matching object(s) / subdirs.
                def _PrintPrefixLong(blr):
                    print '%-33s%s' % ('', blr.url_string.encode(UTF8))

                if listing_style == ListingStyle.SHORT:
                    # ls helper by default readies us for a short listing.
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        all_versions=self.all_versions,
                        print_bucket_header_func=print_bucket_header,
                        should_recurse=self.recursion_requested,
                        list_subdir_contents=self.list_subdir_contents)
                elif listing_style == ListingStyle.LONG:
                    bucket_listing_fields = ['name', 'updated', 'size']
                    if self.all_versions:
                        bucket_listing_fields.extend(
                            ['generation', 'metageneration'])
                    if self.include_etag:
                        bucket_listing_fields.append('etag')

                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=self._PrintLongListing,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields,
                        list_subdir_contents=self.list_subdir_contents)

                elif listing_style == ListingStyle.LONG_LONG:
                    # List all fields
                    bucket_listing_fields = None
                    ls_helper = LsHelper(
                        self.WildcardIterator,
                        self.logger,
                        print_object_func=PrintFullInfoAboutObject,
                        print_dir_func=_PrintPrefixLong,
                        print_bucket_header_func=print_bucket_header,
                        all_versions=self.all_versions,
                        should_recurse=self.recursion_requested,
                        fields=bucket_listing_fields,
                        list_subdir_contents=self.list_subdir_contents)
                else:
                    raise CommandException('Unknown listing style: %s' %
                                           listing_style)

                exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint(
                    storage_url)
                if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0:
                    got_nomatch_errors = True
                total_bytes += exp_bytes
                total_objs += exp_objs

        if total_objs and listing_style != ListingStyle.SHORT:
            print('TOTAL: %d objects, %d bytes (%s)' %
                  (total_objs, total_bytes,
                   MakeHumanReadable(float(total_bytes))))
        if got_nomatch_errors:
            raise CommandException('One or more URLs matched no objects.')
        if got_bucket_nomatch_errors:
            raise NotFoundException(
                'One or more bucket URLs matched no buckets.')

        return 0
Beispiel #7
0
    def RunCommand(self):
        """Command entry point for the rm command."""
        # self.recursion_requested is initialized in command.py (so it can be
        # checked in parent class for all commands).
        self.continue_on_error = self.parallel_operations
        self.read_args_from_stdin = False
        self.all_versions = False
        if self.sub_opts:
            for o, unused_a in self.sub_opts:
                if o == '-a':
                    self.all_versions = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-I':
                    self.read_args_from_stdin = True
                elif o == '-r' or o == '-R':
                    self.recursion_requested = True
                    self.all_versions = True

        if self.read_args_from_stdin:
            if self.args:
                raise CommandException(
                    'No arguments allowed with the -I flag.')
            url_strs = StdinIterator()
        else:
            if not self.args:
                raise CommandException(
                    'The rm command (without -I) expects at '
                    'least one URL.')
            url_strs = self.args

        # Tracks number of object deletes that failed.
        self.op_failure_count = 0

        # Tracks if any buckets were missing.
        self.bucket_not_found_count = 0

        # Tracks buckets that are slated for recursive deletion.
        bucket_urls_to_delete = []
        self.bucket_strings_to_delete = []

        if self.recursion_requested:
            bucket_fields = ['id']
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsBucket() or url.IsProvider():
                    for blr in self.WildcardIterator(url_str).IterBuckets(
                            bucket_fields=bucket_fields):
                        bucket_urls_to_delete.append(blr.storage_url)
                        self.bucket_strings_to_delete.append(url_str)

        self.preconditions = PreconditionsFromHeaders(self.headers or {})

        try:
            # Expand wildcards, dirs, buckets, and bucket subdirs in URLs.
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                url_strs,
                self.recursion_requested,
                project_id=self.project_id,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations)

            seek_ahead_iterator = None
            # Cannot seek ahead with stdin args, since we can only iterate them
            # once without buffering in memory.
            if not self.read_args_from_stdin:
                seek_ahead_iterator = SeekAheadNameExpansionIterator(
                    self.command_name,
                    self.debug,
                    self.GetSeekAheadGsutilApi(),
                    url_strs,
                    self.recursion_requested,
                    all_versions=self.all_versions,
                    project_id=self.project_id)

            # Perform remove requests in parallel (-m) mode, if requested, using
            # configured number of parallel processes and threads. Otherwise,
            # perform requests with sequential function calls in current process.
            self.Apply(
                _RemoveFuncWrapper,
                name_expansion_iterator,
                _RemoveExceptionHandler,
                fail_on_error=(not self.continue_on_error),
                shared_attrs=['op_failure_count', 'bucket_not_found_count'],
                seek_ahead_iterator=seek_ahead_iterator)

        # Assuming the bucket has versioning enabled, url's that don't map to
        # objects should throw an error even with all_versions, since the prior
        # round of deletes only sends objects to a history table.
        # This assumption that rm -a is only called for versioned buckets should be
        # corrected, but the fix is non-trivial.
        except CommandException as e:
            # Don't raise if there are buckets to delete -- it's valid to say:
            #   gsutil rm -r gs://some_bucket
            # if the bucket is empty.
            if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete,
                                               e):
                DecrementFailureCount()
            else:
                raise
        except ServiceException as e:
            if not self.continue_on_error:
                raise

        if self.bucket_not_found_count:
            raise CommandException(
                'Encountered non-existent bucket during listing')

        if self.op_failure_count and not self.continue_on_error:
            raise CommandException('Some files could not be removed.')

        # If this was a gsutil rm -r command covering any bucket subdirs,
        # remove any dir_$folder$ objects (which are created by various web UI
        # tools to simulate folders).
        if self.recursion_requested:
            folder_object_wildcards = []
            for url_str in url_strs:
                url = StorageUrlFromString(url_str)
                if url.IsObject():
                    folder_object_wildcards.append(
                        url_str.rstrip('*') + '*_$folder$')
            if folder_object_wildcards:
                self.continue_on_error = True
                try:
                    name_expansion_iterator = NameExpansionIterator(
                        self.command_name,
                        self.debug,
                        self.logger,
                        self.gsutil_api,
                        folder_object_wildcards,
                        self.recursion_requested,
                        project_id=self.project_id,
                        all_versions=self.all_versions)
                    # When we're removing folder objects, always continue on error
                    self.Apply(_RemoveFuncWrapper,
                               name_expansion_iterator,
                               _RemoveFoldersExceptionHandler,
                               fail_on_error=False)
                except CommandException as e:
                    # Ignore exception from name expansion due to an absent folder file.
                    if not e.reason.startswith(NO_URLS_MATCHED_PREFIX):
                        raise

        # Now that all data has been deleted, delete any bucket URLs.
        for url in bucket_urls_to_delete:
            self.logger.info('Removing %s...', url)

            @Retry(NotEmptyException, tries=3, timeout_secs=1)
            def BucketDeleteWithRetry():
                self.gsutil_api.DeleteBucket(url.bucket_name,
                                             provider=url.scheme)

            BucketDeleteWithRetry()

        if self.op_failure_count:
            plural_str = 's' if self.op_failure_count else ''
            raise CommandException(
                '%d file%s/object%s could not be removed.' %
                (self.op_failure_count, plural_str, plural_str))

        return 0
Beispiel #8
0
    def _Create(self):
        self.CheckArguments()

        # User-specified options
        pubsub_topic = None
        payload_format = None
        custom_attributes = {}
        event_types = []
        object_name_prefix = None
        should_setup_topic = True

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o == '-e':
                    event_types.append(a)
                elif o == '-f':
                    payload_format = a
                elif o == '-m':
                    if ':' not in a:
                        raise CommandException(
                            'Custom attributes specified with -m should be of the form '
                            'key:value')
                    key, value = a.split(':')
                    custom_attributes[key] = value
                elif o == '-p':
                    object_name_prefix = a
                elif o == '-s':
                    should_setup_topic = False
                elif o == '-t':
                    pubsub_topic = a

        if payload_format not in PAYLOAD_FORMAT_MAP:
            raise CommandException(
                "Must provide a payload format with -f of either 'json' or 'none'"
            )
        payload_format = PAYLOAD_FORMAT_MAP[payload_format]

        bucket_arg = self.args[-1]

        bucket_url = StorageUrlFromString(bucket_arg)
        if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket():
            raise CommandException(
                "%s %s requires a GCS bucket name, but got '%s'" %
                (self.command_name, self.subcommand_name, bucket_arg))
        if bucket_url.scheme != 'gs':
            raise CommandException(
                'The %s command can only be used with gs:// bucket URLs.' %
                self.command_name)
        bucket_name = bucket_url.bucket_name
        self.logger.debug('Creating notification for bucket %s', bucket_url)

        # Find the project this bucket belongs to
        bucket_metadata = self.gsutil_api.GetBucket(bucket_name,
                                                    fields=['projectNumber'],
                                                    provider=bucket_url.scheme)
        bucket_project_number = bucket_metadata.projectNumber

        # If not specified, choose a sensible default for the Cloud Pub/Sub topic
        # name.
        if not pubsub_topic:
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      bucket_name)
        if not pubsub_topic.startswith('projects/'):
            # If a user picks a topic ID (mytopic) but doesn't pass the whole name (
            # projects/my-project/topics/mytopic ), pick a default project.
            pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None),
                                                      pubsub_topic)
        self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic)

        just_modified_topic_permissions = False
        if should_setup_topic:
            # Ask GCS for the email address that represents GCS's permission to
            # publish to a Cloud Pub/Sub topic from this project.
            service_account = self.gsutil_api.GetProjectServiceAccount(
                bucket_project_number,
                provider=bucket_url.scheme).email_address
            self.logger.debug('Service account for project %d: %s',
                              bucket_project_number, service_account)
            just_modified_topic_permissions = self._CreateTopic(
                pubsub_topic, service_account)

        for attempt_number in range(0, 2):
            try:
                create_response = self.gsutil_api.CreateNotificationConfig(
                    bucket_name,
                    pubsub_topic=pubsub_topic,
                    payload_format=payload_format,
                    custom_attributes=custom_attributes,
                    event_types=event_types if event_types else None,
                    object_name_prefix=object_name_prefix,
                    provider=bucket_url.scheme)
                break
            except PublishPermissionDeniedException:
                if attempt_number == 0 and just_modified_topic_permissions:
                    # If we have just set the IAM policy, it may take up to 10 seconds to
                    # take effect.
                    self.logger.info(
                        'Retrying create notification in 10 seconds '
                        '(new permissions may take up to 10 seconds to take effect.)'
                    )
                    time.sleep(10)
                else:
                    raise

        notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % (
            bucket_name, create_response.id)
        self.logger.info('Created notification config %s', notification_name)

        return 0
  def RunCommand(self):
    """Command entry point for the mb command."""
    bucket_policy_only = None
    location = None
    storage_class = None
    seconds = None
    public_access_prevention = None
    if self.sub_opts:
      for o, a in self.sub_opts:
        if o == '-l':
          location = a
        elif o == '-p':
          # Project IDs are sent as header values when using gs and s3 XML APIs.
          InsistAscii(a, 'Invalid non-ASCII character found in project ID')
          self.project_id = a
        elif o == '-c' or o == '-s':
          storage_class = NormalizeStorageClass(a)
        elif o == '--retention':
          seconds = RetentionInSeconds(a)
        elif o == '-b':
          if self.gsutil_api.GetApiSelector('gs') != ApiSelector.JSON:
            raise CommandException('The -b <on|off> option '
                                   'can only be used with the JSON API')
          InsistOnOrOff(a, 'Only on and off values allowed for -b option')
          bucket_policy_only = (a == 'on')
        elif o == '--pap':
          public_access_prevention = a

    bucket_metadata = apitools_messages.Bucket(location=location,
                                               storageClass=storage_class)
    if bucket_policy_only or public_access_prevention:
      bucket_metadata.iamConfiguration = IamConfigurationValue()
      iam_config = bucket_metadata.iamConfiguration
      if bucket_policy_only:
        iam_config.bucketPolicyOnly = BucketPolicyOnlyValue()
        iam_config.bucketPolicyOnly.enabled = bucket_policy_only
      if public_access_prevention:
        iam_config.publicAccessPrevention = public_access_prevention

    for bucket_url_str in self.args:
      bucket_url = StorageUrlFromString(bucket_url_str)
      if seconds is not None:
        if bucket_url.scheme != 'gs':
          raise CommandException('Retention policy can only be specified for '
                                 'GCS buckets.')
        retention_policy = (apitools_messages.Bucket.RetentionPolicyValue(
            retentionPeriod=seconds))
        bucket_metadata.retentionPolicy = retention_policy

      if public_access_prevention and self.gsutil_api.GetApiSelector(
          bucket_url.scheme) != ApiSelector.JSON:
        raise CommandException(
            'The --pap option can only be used for GCS Buckets with the JSON API'
        )
      if not bucket_url.IsBucket():
        raise CommandException('The mb command requires a URL that specifies a '
                               'bucket.\n"%s" is not valid.' % bucket_url)
      if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or
          TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)):
        raise InvalidUrlError('Invalid bucket name in URL "%s"' %
                              bucket_url.bucket_name)

      self.logger.info('Creating %s...', bucket_url)
      # Pass storage_class param only if this is a GCS bucket. (In S3 the
      # storage class is specified on the key object.)
      try:
        self.gsutil_api.CreateBucket(bucket_url.bucket_name,
                                     project_id=self.project_id,
                                     metadata=bucket_metadata,
                                     provider=bucket_url.scheme)
      except BadRequestException as e:
        if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and
            bucket_url.scheme == 'gs'):
          bucket_name = bucket_url.bucket_name
          final_comp = bucket_name[bucket_name.rfind('.') + 1:]
          raise CommandException('\n'.join(
              textwrap.wrap(
                  'Buckets with "." in the name must be valid DNS names. The bucket'
                  ' you are attempting to create (%s) is not a valid DNS name,'
                  ' because the final component (%s) is not currently a valid part'
                  ' of the top-level DNS tree.' % (bucket_name, final_comp))))
        else:
          raise

    return 0
Beispiel #10
0
    def _PatchIam(self):
        self.continue_on_error = False
        self.recursion_requested = False

        patch_bindings_tuples = []

        if self.sub_opts:
            for o, a in self.sub_opts:
                if o in ['-r', '-R']:
                    self.recursion_requested = True
                elif o == '-f':
                    self.continue_on_error = True
                elif o == '-d':
                    patch_bindings_tuples.append(BindingStringToTuple(
                        False, a))

        patterns = []

        # N.B.: self.sub_opts stops taking in options at the first non-flagged
        # token. The rest of the tokens are sent to self.args. Thus, in order to
        # handle input of the form "-d <binding> <binding> <url>", we will have to
        # parse self.args for a mix of both bindings and CloudUrls. We are not
        # expecting to come across the -r, -f flags here.
        it = iter(self.args)
        for token in it:
            if token == '-d':
                patch_bindings_tuples.append(
                    BindingStringToTuple(False, it.next()))
            else:
                try:
                    patch_bindings_tuples.append(
                        BindingStringToTuple(True, token))
                # All following arguments are urls.
                except (ArgumentException, CommandException):
                    patterns.append(token)
                    for token in it:
                        patterns.append(token)

        # We must have some bindings to process, else this is pointless.
        if not patch_bindings_tuples:
            raise CommandException('Must specify at least one binding.')

        self.everything_set_okay = True
        threaded_wildcards = []
        for pattern in patterns:
            surl = StorageUrlFromString(pattern)
            try:
                if surl.IsBucket():
                    if self.recursion_requested:
                        surl.object = '*'
                        threaded_wildcards.append(surl.url_string)
                    else:
                        self.PatchIamHelper(surl, patch_bindings_tuples)
                else:
                    threaded_wildcards.append(surl.url_string)
            except AttributeError:
                error_msg = 'Invalid Cloud URL "%s".' % surl.object_name
                if set(surl.object_name).issubset(set('-Rrf')):
                    error_msg += (
                        ' This resource handle looks like a flag, which must appear '
                        'before all bindings. See "gsutil help iam ch" for more details.'
                    )
                raise CommandException(error_msg)

        if threaded_wildcards:
            name_expansion_iterator = NameExpansionIterator(
                self.command_name,
                self.debug,
                self.logger,
                self.gsutil_api,
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions,
                continue_on_error=self.continue_on_error
                or self.parallel_operations,
                bucket_listing_fields=['name'])

            seek_ahead_iterator = SeekAheadNameExpansionIterator(
                self.command_name,
                self.debug,
                self.GetSeekAheadGsutilApi(),
                threaded_wildcards,
                self.recursion_requested,
                all_versions=self.all_versions)

            # N.B.: Python2.6 support means we can't use a partial function here to
            # curry the bindings tuples into the wrapper function. We instead pass
            # the bindings along by zipping them with each name_expansion_iterator
            # result. See http://bugs.python.org/issue5228.
            serialized_bindings_tuples_it = itertools.repeat(
                [SerializeBindingsTuple(t) for t in patch_bindings_tuples])
            self.Apply(_PatchIamWrapper,
                       itertools.izip(serialized_bindings_tuples_it,
                                      name_expansion_iterator),
                       _PatchIamExceptionHandler,
                       fail_on_error=not self.continue_on_error,
                       seek_ahead_iterator=seek_ahead_iterator)

            self.everything_set_okay &= not GetFailureCount() > 0

        # TODO: Add an error counter for files and objects.
        if not self.everything_set_okay:
            raise CommandException('Some IAM policies could not be patched.')