def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException, e: if not self.continue_on_error: raise
def _SetIam(self): """Set IAM policy for given wildcards on the command line.""" self.continue_on_error = False self.recursion_requested = False self.all_versions = False force_etag = False etag = '' if self.sub_opts: for o, arg in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-a': self.all_versions = True elif o == '-e': etag = str(arg) force_etag = True else: self.RaiseInvalidArgumentException() file_url = self.args[0] patterns = self.args[1:] # Load the IAM policy file and raise error if the file is invalid JSON or # does not exist. try: with open(file_url, 'r') as fp: policy = json.loads(fp.read()) except IOError: raise ArgumentException( 'Specified IAM policy file "%s" does not exist.' % file_url) except ValueError as e: self.logger.debug('Invalid IAM policy file, ValueError:\n%s', e) raise ArgumentException('Invalid IAM policy file "%s".' % file_url) bindings = policy.get('bindings', []) if not force_etag: etag = policy.get('etag', '') policy_json = json.dumps({ 'bindings': bindings, 'etag': etag, 'version': IAM_POLICY_VERSION }) try: policy = protojson.decode_message(apitools_messages.Policy, policy_json) except DecodeError: raise ArgumentException( 'Invalid IAM policy file "%s" or etag "%s".' % (file_url, etag)) self.everything_set_okay = True # This list of wildcard strings will be handled by NameExpansionIterator. threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) if surl.IsBucket(): if self.recursion_requested: surl.object_name = '*' threaded_wildcards.append(surl.url_string) else: self.SetIamHelper(surl, policy) else: threaded_wildcards.append(surl.url_string) # N.B.: If threaded_wildcards contains a non-existent bucket # (e.g. ["gs://non-existent", "gs://existent"]), NameExpansionIterator # will raise an exception in iter.next. This halts all iteration, even # when -f is set. This behavior is also evident in acl set. This behavior # also appears for any exception that will be raised when iterating over # wildcard expansions (access denied if bucket cannot be listed, etc.). if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) policy_it = itertools.repeat(protojson.encode_message(policy)) self.Apply(_SetIamWrapper, zip(policy_it, name_expansion_iterator), _SetIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be set.')
def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin else: f = open(a, 'r') try: for line in f: line = line.strip() if line: self.exclude_patterns.append(line) finally: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, name): if not self.summary_only: self._PrintSummaryLine(num_bytes, name) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # ls_helper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets( bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True bucket_urls_to_delete = [] bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in self.args: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) bucket_strings_to_delete.append(url_str) # Used to track if any files failed to be removed. self.everything_removed_okay = True try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, self.args, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error)) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if not bucket_urls_to_delete and not self.continue_on_error: raise # Reset the failure count if we failed due to an empty bucket that we're # going to delete. msg = 'No URLs matched: ' if msg in str(e): parts = str(e).split(msg) if len(parts) == 2 and parts[1] in bucket_strings_to_delete: ResetFailureCount() except ServiceException, e: if not self.continue_on_error: raise
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if STORAGE_URI_REGEX.match(token): patterns.append(token) break if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, next(it))) else: patch_bindings_tuples.append(BindingStringToTuple(True, token)) if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') # All following arguments are urls. for token in it: patterns.append(token) self.everything_set_okay = True self.tried_ch_on_resource_with_conditions = False threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, zip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: msg = 'Some IAM policies could not be patched.' if self.tried_ch_on_resource_with_conditions: msg += '\n' msg += '\n'.join( textwrap.wrap( 'Some resources had conditions present in their IAM policy ' 'bindings, which is not supported by "iam ch". %s' % (IAM_CH_CONDITIONS_WORKAROUND_MSG))) raise CommandException(msg)
def RunCommand(self): """Command entry point for the ls command.""" got_nomatch_errors = False got_bucket_nomatch_errors = False listing_style = ListingStyle.SHORT get_bucket_info = False self.recursion_requested = False self.all_versions = False self.include_etag = False self.human_readable = False self.list_subdir_contents = True if self.sub_opts: for o, a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-e': self.include_etag = True elif o == '-b': get_bucket_info = True elif o == '-h': self.human_readable = True elif o == '-l': listing_style = ListingStyle.LONG elif o == '-L': listing_style = ListingStyle.LONG_LONG elif o == '-p': self.project_id = a elif o == '-r' or o == '-R': self.recursion_requested = True elif o == '-d': self.list_subdir_contents = False if not self.args: # default to listing all gs buckets self.args = ['gs://'] total_objs = 0 total_bytes = 0 def MaybePrintBucketHeader(blr): if len(self.args) > 1: print '%s:' % blr.url_string.encode(UTF8) print_bucket_header = MaybePrintBucketHeader for url_str in self.args: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_fields = None if (listing_style == ListingStyle.SHORT or listing_style == ListingStyle.LONG): bucket_fields = ['id'] elif listing_style == ListingStyle.LONG_LONG: bucket_fields = [ 'location', 'storageClass', 'versioning', 'acl', 'defaultObjectAcl', 'website', 'logging', 'cors', 'lifecycle' ] if storage_url.IsProvider(): # Provider URL: use bucket wildcard to list buckets. for blr in self.WildcardIterator( '%s://*' % storage_url.scheme).IterBuckets( bucket_fields=bucket_fields): self._PrintBucketInfo(blr, listing_style) elif storage_url.IsBucket() and get_bucket_info: # ls -b bucket listing request: List info about bucket(s). total_buckets = 0 for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): if not ContainsWildcard(url_str) and not blr.root_object: # Iterator does not make an HTTP call for non-wildcarded # listings with fields=='id'. Ensure the bucket exists by calling # GetBucket. self.gsutil_api.GetBucket(blr.storage_url.bucket_name, fields=['id'], provider=storage_url.scheme) self._PrintBucketInfo(blr, listing_style) total_buckets += 1 if not ContainsWildcard(url_str) and not total_buckets: got_bucket_nomatch_errors = True else: # URL names a bucket, object, or object subdir -> # list matching object(s) / subdirs. def _PrintPrefixLong(blr): print '%-33s%s' % ('', blr.url_string.encode(UTF8)) if listing_style == ListingStyle.SHORT: # ls helper by default readies us for a short listing. ls_helper = LsHelper( self.WildcardIterator, self.logger, all_versions=self.all_versions, print_bucket_header_func=print_bucket_header, should_recurse=self.recursion_requested, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG: bucket_listing_fields = ['name', 'updated', 'size'] if self.all_versions: bucket_listing_fields.extend( ['generation', 'metageneration']) if self.include_etag: bucket_listing_fields.append('etag') ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=self._PrintLongListing, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) elif listing_style == ListingStyle.LONG_LONG: # List all fields bucket_listing_fields = None ls_helper = LsHelper( self.WildcardIterator, self.logger, print_object_func=PrintFullInfoAboutObject, print_dir_func=_PrintPrefixLong, print_bucket_header_func=print_bucket_header, all_versions=self.all_versions, should_recurse=self.recursion_requested, fields=bucket_listing_fields, list_subdir_contents=self.list_subdir_contents) else: raise CommandException('Unknown listing style: %s' % listing_style) exp_dirs, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if storage_url.IsObject() and exp_objs == 0 and exp_dirs == 0: got_nomatch_errors = True total_bytes += exp_bytes total_objs += exp_objs if total_objs and listing_style != ListingStyle.SHORT: print('TOTAL: %d objects, %d bytes (%s)' % (total_objs, total_bytes, MakeHumanReadable(float(total_bytes)))) if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') if got_bucket_nomatch_errors: raise NotFoundException( 'One or more bucket URLs matched no buckets.') return 0
def RunCommand(self): """Command entry point for the rm command.""" # self.recursion_requested is initialized in command.py (so it can be # checked in parent class for all commands). self.continue_on_error = self.parallel_operations self.read_args_from_stdin = False self.all_versions = False if self.sub_opts: for o, unused_a in self.sub_opts: if o == '-a': self.all_versions = True elif o == '-f': self.continue_on_error = True elif o == '-I': self.read_args_from_stdin = True elif o == '-r' or o == '-R': self.recursion_requested = True self.all_versions = True if self.read_args_from_stdin: if self.args: raise CommandException( 'No arguments allowed with the -I flag.') url_strs = StdinIterator() else: if not self.args: raise CommandException( 'The rm command (without -I) expects at ' 'least one URL.') url_strs = self.args # Tracks number of object deletes that failed. self.op_failure_count = 0 # Tracks if any buckets were missing. self.bucket_not_found_count = 0 # Tracks buckets that are slated for recursive deletion. bucket_urls_to_delete = [] self.bucket_strings_to_delete = [] if self.recursion_requested: bucket_fields = ['id'] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsBucket() or url.IsProvider(): for blr in self.WildcardIterator(url_str).IterBuckets( bucket_fields=bucket_fields): bucket_urls_to_delete.append(blr.storage_url) self.bucket_strings_to_delete.append(url_str) self.preconditions = PreconditionsFromHeaders(self.headers or {}) try: # Expand wildcards, dirs, buckets, and bucket subdirs in URLs. name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, url_strs, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations) seek_ahead_iterator = None # Cannot seek ahead with stdin args, since we can only iterate them # once without buffering in memory. if not self.read_args_from_stdin: seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), url_strs, self.recursion_requested, all_versions=self.all_versions, project_id=self.project_id) # Perform remove requests in parallel (-m) mode, if requested, using # configured number of parallel processes and threads. Otherwise, # perform requests with sequential function calls in current process. self.Apply( _RemoveFuncWrapper, name_expansion_iterator, _RemoveExceptionHandler, fail_on_error=(not self.continue_on_error), shared_attrs=['op_failure_count', 'bucket_not_found_count'], seek_ahead_iterator=seek_ahead_iterator) # Assuming the bucket has versioning enabled, url's that don't map to # objects should throw an error even with all_versions, since the prior # round of deletes only sends objects to a history table. # This assumption that rm -a is only called for versioned buckets should be # corrected, but the fix is non-trivial. except CommandException as e: # Don't raise if there are buckets to delete -- it's valid to say: # gsutil rm -r gs://some_bucket # if the bucket is empty. if _ExceptionMatchesBucketToDelete(self.bucket_strings_to_delete, e): DecrementFailureCount() else: raise except ServiceException as e: if not self.continue_on_error: raise if self.bucket_not_found_count: raise CommandException( 'Encountered non-existent bucket during listing') if self.op_failure_count and not self.continue_on_error: raise CommandException('Some files could not be removed.') # If this was a gsutil rm -r command covering any bucket subdirs, # remove any dir_$folder$ objects (which are created by various web UI # tools to simulate folders). if self.recursion_requested: folder_object_wildcards = [] for url_str in url_strs: url = StorageUrlFromString(url_str) if url.IsObject(): folder_object_wildcards.append( url_str.rstrip('*') + '*_$folder$') if folder_object_wildcards: self.continue_on_error = True try: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, folder_object_wildcards, self.recursion_requested, project_id=self.project_id, all_versions=self.all_versions) # When we're removing folder objects, always continue on error self.Apply(_RemoveFuncWrapper, name_expansion_iterator, _RemoveFoldersExceptionHandler, fail_on_error=False) except CommandException as e: # Ignore exception from name expansion due to an absent folder file. if not e.reason.startswith(NO_URLS_MATCHED_PREFIX): raise # Now that all data has been deleted, delete any bucket URLs. for url in bucket_urls_to_delete: self.logger.info('Removing %s...', url) @Retry(NotEmptyException, tries=3, timeout_secs=1) def BucketDeleteWithRetry(): self.gsutil_api.DeleteBucket(url.bucket_name, provider=url.scheme) BucketDeleteWithRetry() if self.op_failure_count: plural_str = 's' if self.op_failure_count else '' raise CommandException( '%d file%s/object%s could not be removed.' % (self.op_failure_count, plural_str, plural_str)) return 0
def _Create(self): self.CheckArguments() # User-specified options pubsub_topic = None payload_format = None custom_attributes = {} event_types = [] object_name_prefix = None should_setup_topic = True if self.sub_opts: for o, a in self.sub_opts: if o == '-e': event_types.append(a) elif o == '-f': payload_format = a elif o == '-m': if ':' not in a: raise CommandException( 'Custom attributes specified with -m should be of the form ' 'key:value') key, value = a.split(':') custom_attributes[key] = value elif o == '-p': object_name_prefix = a elif o == '-s': should_setup_topic = False elif o == '-t': pubsub_topic = a if payload_format not in PAYLOAD_FORMAT_MAP: raise CommandException( "Must provide a payload format with -f of either 'json' or 'none'" ) payload_format = PAYLOAD_FORMAT_MAP[payload_format] bucket_arg = self.args[-1] bucket_url = StorageUrlFromString(bucket_arg) if not bucket_url.IsCloudUrl() or not bucket_url.IsBucket(): raise CommandException( "%s %s requires a GCS bucket name, but got '%s'" % (self.command_name, self.subcommand_name, bucket_arg)) if bucket_url.scheme != 'gs': raise CommandException( 'The %s command can only be used with gs:// bucket URLs.' % self.command_name) bucket_name = bucket_url.bucket_name self.logger.debug('Creating notification for bucket %s', bucket_url) # Find the project this bucket belongs to bucket_metadata = self.gsutil_api.GetBucket(bucket_name, fields=['projectNumber'], provider=bucket_url.scheme) bucket_project_number = bucket_metadata.projectNumber # If not specified, choose a sensible default for the Cloud Pub/Sub topic # name. if not pubsub_topic: pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), bucket_name) if not pubsub_topic.startswith('projects/'): # If a user picks a topic ID (mytopic) but doesn't pass the whole name ( # projects/my-project/topics/mytopic ), pick a default project. pubsub_topic = 'projects/%s/topics/%s' % (PopulateProjectId(None), pubsub_topic) self.logger.debug('Using Cloud Pub/Sub topic %s', pubsub_topic) just_modified_topic_permissions = False if should_setup_topic: # Ask GCS for the email address that represents GCS's permission to # publish to a Cloud Pub/Sub topic from this project. service_account = self.gsutil_api.GetProjectServiceAccount( bucket_project_number, provider=bucket_url.scheme).email_address self.logger.debug('Service account for project %d: %s', bucket_project_number, service_account) just_modified_topic_permissions = self._CreateTopic( pubsub_topic, service_account) for attempt_number in range(0, 2): try: create_response = self.gsutil_api.CreateNotificationConfig( bucket_name, pubsub_topic=pubsub_topic, payload_format=payload_format, custom_attributes=custom_attributes, event_types=event_types if event_types else None, object_name_prefix=object_name_prefix, provider=bucket_url.scheme) break except PublishPermissionDeniedException: if attempt_number == 0 and just_modified_topic_permissions: # If we have just set the IAM policy, it may take up to 10 seconds to # take effect. self.logger.info( 'Retrying create notification in 10 seconds ' '(new permissions may take up to 10 seconds to take effect.)' ) time.sleep(10) else: raise notification_name = 'projects/_/buckets/%s/notificationConfigs/%s' % ( bucket_name, create_response.id) self.logger.info('Created notification config %s', notification_name) return 0
def RunCommand(self): """Command entry point for the mb command.""" bucket_policy_only = None location = None storage_class = None seconds = None public_access_prevention = None if self.sub_opts: for o, a in self.sub_opts: if o == '-l': location = a elif o == '-p': # Project IDs are sent as header values when using gs and s3 XML APIs. InsistAscii(a, 'Invalid non-ASCII character found in project ID') self.project_id = a elif o == '-c' or o == '-s': storage_class = NormalizeStorageClass(a) elif o == '--retention': seconds = RetentionInSeconds(a) elif o == '-b': if self.gsutil_api.GetApiSelector('gs') != ApiSelector.JSON: raise CommandException('The -b <on|off> option ' 'can only be used with the JSON API') InsistOnOrOff(a, 'Only on and off values allowed for -b option') bucket_policy_only = (a == 'on') elif o == '--pap': public_access_prevention = a bucket_metadata = apitools_messages.Bucket(location=location, storageClass=storage_class) if bucket_policy_only or public_access_prevention: bucket_metadata.iamConfiguration = IamConfigurationValue() iam_config = bucket_metadata.iamConfiguration if bucket_policy_only: iam_config.bucketPolicyOnly = BucketPolicyOnlyValue() iam_config.bucketPolicyOnly.enabled = bucket_policy_only if public_access_prevention: iam_config.publicAccessPrevention = public_access_prevention for bucket_url_str in self.args: bucket_url = StorageUrlFromString(bucket_url_str) if seconds is not None: if bucket_url.scheme != 'gs': raise CommandException('Retention policy can only be specified for ' 'GCS buckets.') retention_policy = (apitools_messages.Bucket.RetentionPolicyValue( retentionPeriod=seconds)) bucket_metadata.retentionPolicy = retention_policy if public_access_prevention and self.gsutil_api.GetApiSelector( bucket_url.scheme) != ApiSelector.JSON: raise CommandException( 'The --pap option can only be used for GCS Buckets with the JSON API' ) if not bucket_url.IsBucket(): raise CommandException('The mb command requires a URL that specifies a ' 'bucket.\n"%s" is not valid.' % bucket_url) if (not BUCKET_NAME_RE.match(bucket_url.bucket_name) or TOO_LONG_DNS_NAME_COMP.search(bucket_url.bucket_name)): raise InvalidUrlError('Invalid bucket name in URL "%s"' % bucket_url.bucket_name) self.logger.info('Creating %s...', bucket_url) # Pass storage_class param only if this is a GCS bucket. (In S3 the # storage class is specified on the key object.) try: self.gsutil_api.CreateBucket(bucket_url.bucket_name, project_id=self.project_id, metadata=bucket_metadata, provider=bucket_url.scheme) except BadRequestException as e: if (e.status == 400 and e.reason == 'DotfulBucketNameNotUnderTld' and bucket_url.scheme == 'gs'): bucket_name = bucket_url.bucket_name final_comp = bucket_name[bucket_name.rfind('.') + 1:] raise CommandException('\n'.join( textwrap.wrap( 'Buckets with "." in the name must be valid DNS names. The bucket' ' you are attempting to create (%s) is not a valid DNS name,' ' because the final component (%s) is not currently a valid part' ' of the top-level DNS tree.' % (bucket_name, final_comp)))) else: raise return 0
def _PatchIam(self): self.continue_on_error = False self.recursion_requested = False patch_bindings_tuples = [] if self.sub_opts: for o, a in self.sub_opts: if o in ['-r', '-R']: self.recursion_requested = True elif o == '-f': self.continue_on_error = True elif o == '-d': patch_bindings_tuples.append(BindingStringToTuple( False, a)) patterns = [] # N.B.: self.sub_opts stops taking in options at the first non-flagged # token. The rest of the tokens are sent to self.args. Thus, in order to # handle input of the form "-d <binding> <binding> <url>", we will have to # parse self.args for a mix of both bindings and CloudUrls. We are not # expecting to come across the -r, -f flags here. it = iter(self.args) for token in it: if token == '-d': patch_bindings_tuples.append( BindingStringToTuple(False, it.next())) else: try: patch_bindings_tuples.append( BindingStringToTuple(True, token)) # All following arguments are urls. except (ArgumentException, CommandException): patterns.append(token) for token in it: patterns.append(token) # We must have some bindings to process, else this is pointless. if not patch_bindings_tuples: raise CommandException('Must specify at least one binding.') self.everything_set_okay = True threaded_wildcards = [] for pattern in patterns: surl = StorageUrlFromString(pattern) try: if surl.IsBucket(): if self.recursion_requested: surl.object = '*' threaded_wildcards.append(surl.url_string) else: self.PatchIamHelper(surl, patch_bindings_tuples) else: threaded_wildcards.append(surl.url_string) except AttributeError: error_msg = 'Invalid Cloud URL "%s".' % surl.object_name if set(surl.object_name).issubset(set('-Rrf')): error_msg += ( ' This resource handle looks like a flag, which must appear ' 'before all bindings. See "gsutil help iam ch" for more details.' ) raise CommandException(error_msg) if threaded_wildcards: name_expansion_iterator = NameExpansionIterator( self.command_name, self.debug, self.logger, self.gsutil_api, threaded_wildcards, self.recursion_requested, all_versions=self.all_versions, continue_on_error=self.continue_on_error or self.parallel_operations, bucket_listing_fields=['name']) seek_ahead_iterator = SeekAheadNameExpansionIterator( self.command_name, self.debug, self.GetSeekAheadGsutilApi(), threaded_wildcards, self.recursion_requested, all_versions=self.all_versions) # N.B.: Python2.6 support means we can't use a partial function here to # curry the bindings tuples into the wrapper function. We instead pass # the bindings along by zipping them with each name_expansion_iterator # result. See http://bugs.python.org/issue5228. serialized_bindings_tuples_it = itertools.repeat( [SerializeBindingsTuple(t) for t in patch_bindings_tuples]) self.Apply(_PatchIamWrapper, itertools.izip(serialized_bindings_tuples_it, name_expansion_iterator), _PatchIamExceptionHandler, fail_on_error=not self.continue_on_error, seek_ahead_iterator=seek_ahead_iterator) self.everything_set_okay &= not GetFailureCount() > 0 # TODO: Add an error counter for files and objects. if not self.everything_set_okay: raise CommandException('Some IAM policies could not be patched.')