def testContainsWildcardMultipleMatches(self, mock_CreateWildcardIterator, mock_gsutil_api): mock_CreateWildcardIterator.return_value = iter([ BucketListingObject(StorageUrlFromString('gs://test/helloworld')), BucketListingObject(StorageUrlFromString('gs://test/helloworld2')) ]) with self.assertRaises(CommandException): ExpandUrlToSingleBlr('gs://test/hello*/', mock_gsutil_api, 'project_id', False, CreateOrGetGsutilLogger('copy_test'))
def __iter__(self, bucket_listing_fields=None): """Iterator that gets called when iterating over the file wildcard. In the case where no wildcard is present, returns a single matching file or directory. Args: bucket_listing_fields: Iterable fields to include in listings. Ex. ['size']. Currently only 'size' is supported. If present, will populate yielded BucketListingObject.root_object with the file name and size. Raises: WildcardException: if invalid wildcard found. Yields: BucketListingRef of type OBJECT (for files) or PREFIX (for directories) """ include_size = (bucket_listing_fields and 'size' in set(bucket_listing_fields)) wildcard = self.wildcard_url.object_name match = FLAT_LIST_REGEX.match(wildcard) if match: # Recursive wildcarding request ('.../**/...'). # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' base_dir = match.group('before')[:-1] remaining_wildcard = match.group('after') # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and # remaining_wildcard = '/*' if remaining_wildcard.startswith('*'): raise WildcardException( 'Invalid wildcard with more than 2 consecutive ' '*s (%s)' % wildcard) # If there was no remaining wildcard past the recursive wildcard, # treat it as if it were a '*'. For example, file://tmp/** is equivalent # to file://tmp/**/* if not remaining_wildcard: remaining_wildcard = '*' # Skip slash(es). remaining_wildcard = remaining_wildcard.lstrip(os.sep) filepaths = self._IterDir(base_dir, remaining_wildcard) else: # Not a recursive wildcarding request. filepaths = glob.iglob(wildcard) for filepath in filepaths: expanded_url = StorageUrlFromString(filepath) try: if self.ignore_symlinks and os.path.islink(filepath): continue if os.path.isdir(filepath): yield BucketListingPrefix(expanded_url) else: blr_object = _GetFileObject( filepath) if include_size else None yield BucketListingObject(expanded_url, root_object=blr_object) except UnicodeEncodeError: raise CommandException('\n'.join( textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
def RunCommand(self): """Command entry point for the compose command.""" target_url_str = self.args[-1] self.args = self.args[:-1] target_url = StorageUrlFromString(target_url_str) self.CheckProvider(target_url) if target_url.HasGeneration(): raise CommandException('A version-specific URL (%s) cannot be ' 'the destination for gsutil compose - abort.' % target_url) dst_obj_metadata = apitools_messages.Object(name=target_url.object_name, bucket=target_url.bucket_name) components = [] # Remember the first source object so we can get its content type. first_src_url = None for src_url_str in self.args: if ContainsWildcard(src_url_str): src_url_iter = self.WildcardIterator(src_url_str).IterObjects() else: src_url_iter = [BucketListingObject(StorageUrlFromString(src_url_str))] for blr in src_url_iter: src_url = blr.storage_url self.CheckProvider(src_url) if src_url.bucket_name != target_url.bucket_name: raise CommandException( 'GCS does not support inter-bucket composing.') if not first_src_url: first_src_url = src_url src_obj_metadata = ( apitools_messages.ComposeRequest.SourceObjectsValueListEntry( name=src_url.object_name)) if src_url.HasGeneration(): src_obj_metadata.generation = src_url.generation components.append(src_obj_metadata) # Avoid expanding too many components, and sanity check each name # expansion result. if len(components) > MAX_COMPOSE_ARITY: raise CommandException('"compose" called with too many component ' 'objects. Limit is %d.' % MAX_COMPOSE_ARITY) if not components: raise CommandException('"compose" requires at least 1 component object.') dst_obj_metadata.contentType = self.gsutil_api.GetObjectMetadata( first_src_url.bucket_name, first_src_url.object_name, provider=first_src_url.scheme, fields=['contentType']).contentType preconditions = PreconditionsFromHeaders(self.headers or {}) self.logger.info( 'Composing %s from %d component object(s).', target_url, len(components)) self.gsutil_api.ComposeObject( components, dst_obj_metadata, preconditions=preconditions, provider=target_url.scheme, encryption_tuple=GetEncryptionKeyWrapper(config))
def RunCommand(self): """Command entry point for stat command.""" stat_fields = ENCRYPTED_FIELDS + UNENCRYPTED_FULL_LISTING_FIELDS found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException( 'The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: try: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) except EncryptionException: # Retry without requesting hashes. single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=UNENCRYPTED_FULL_LISTING_FIELDS) blr_iter = [ BucketListingObject(url, root_object=single_obj) ] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 # TODO: Request fewer fields if we're not printing the object. if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write( 'You aren\'t authorized to read %s - skipping' % url_str) except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write(NO_URLS_MATCHED_TARGET % url_str) found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
def testContainsWildcardMatchesObject(self, mock_CreateWildcardIterator, mock_gsutil_api): storage_url = StorageUrlFromString('gs://test/helloworld') mock_CreateWildcardIterator.return_value = iter( [BucketListingObject(storage_url)]) (exp_url, have_existing_dst_container) = ExpandUrlToSingleBlr( 'gs://test/hello*/', mock_gsutil_api, 'project_id', False, CreateOrGetGsutilLogger('copy_test')) self.assertFalse(have_existing_dst_container) self.assertEqual(exp_url, storage_url)
def RunCommand(self): """Command entry point for stat command.""" # List of fields we'll print for stat objects. stat_fields = [ 'updated', 'cacheControl', 'contentDisposition', 'contentEncoding', 'contentLanguage', 'size', 'contentType', 'componentCount', 'metadata', 'crc32c', 'md5Hash', 'etag', 'generation', 'metageneration' ] found_nonmatching_arg = False for url_str in self.args: arg_matches = 0 url = StorageUrlFromString(url_str) if not url.IsObject(): raise CommandException( 'The stat command only works with object URLs') try: if ContainsWildcard(url_str): blr_iter = self.WildcardIterator(url_str).IterObjects( bucket_listing_fields=stat_fields) else: single_obj = self.gsutil_api.GetObjectMetadata( url.bucket_name, url.object_name, generation=url.generation, provider=url.scheme, fields=stat_fields) blr_iter = [ BucketListingObject(url, root_object=single_obj) ] for blr in blr_iter: if blr.IsObject(): arg_matches += 1 if logging.getLogger().isEnabledFor(logging.INFO): PrintFullInfoAboutObject(blr, incl_acl=False) except AccessDeniedException: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write( 'You aren\'t authorized to read %s - skipping' % url_str) except InvalidUrlError: raise except NotFoundException: pass if not arg_matches: if logging.getLogger().isEnabledFor(logging.INFO): sys.stderr.write('No URLs matched %s' % url_str) found_nonmatching_arg = True if found_nonmatching_arg: return 1 return 0
def __iter__(self): """Iterator that gets called when iterating over the file wildcard. In the case where no wildcard is present, returns a single matching file or directory. Raises: WildcardException: if invalid wildcard found. Yields: BucketListingRef of type OBJECT (for files) or PREFIX (for directories) """ wildcard = self.wildcard_url.object_name match = FLAT_LIST_REGEX.match(wildcard) if match: # Recursive wildcarding request ('.../**/...'). # Example input: wildcard = '/tmp/tmp2pQJAX/**/*' base_dir = match.group('before')[:-1] remaining_wildcard = match.group('after') # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and # remaining_wildcard = '/*' if remaining_wildcard.startswith('*'): raise WildcardException( 'Invalid wildcard with more than 2 consecutive ' '*s (%s)' % wildcard) # If there was no remaining wildcard past the recursive wildcard, # treat it as if it were a '*'. For example, file://tmp/** is equivalent # to file://tmp/**/* if not remaining_wildcard: remaining_wildcard = '*' # Skip slash(es). remaining_wildcard = remaining_wildcard.lstrip(os.sep) filepaths = self._IterDir(base_dir, remaining_wildcard) else: # Not a recursive wildcarding request. filepaths = glob.iglob(wildcard) for filepath in filepaths: expanded_url = StorageUrlFromString(filepath) try: if os.path.isdir(filepath): yield BucketListingPrefix(expanded_url) else: yield BucketListingObject(expanded_url) except UnicodeEncodeError: raise CommandException('\n'.join( textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
def _LocalDirIterator(base_url): """A generator that yields a BLR for each file in a local directory. We use this function instead of WildcardIterator for listing a local directory without recursion, because the glob.globi implementation called by WildcardIterator skips "dot" files (which we don't want to do when synchronizing to or from a local directory). Args: base_url: URL for the directory over which to iterate. Yields: BucketListingObject for each file in the directory. """ for filename in os.listdir(base_url.object_name): filename = os.path.join(base_url.object_name, filename) if os.path.isfile(filename): yield BucketListingObject(StorageUrlFromString(filename), None)
def _GetObjectRef(self, bucket_url_string, gcs_object, with_version=False): """Creates a BucketListingRef of type OBJECT from the arguments. Args: bucket_url_string: Wildcardless string describing the containing bucket. gcs_object: gsutil_api root Object for populating the BucketListingRef. with_version: If true, return a reference with a versioned string. Returns: BucketListingRef of type OBJECT. """ # Generation can be None in test mocks, so just return the # live object for simplicity. if with_version and gcs_object.generation is not None: generation_str = GenerationFromUrlAndString( self.wildcard_url, gcs_object.generation) object_string = '%s%s#%s' % (bucket_url_string, gcs_object.name, generation_str) else: object_string = '%s%s' % (bucket_url_string, gcs_object.name) object_url = StorageUrlFromString(object_string) return BucketListingObject(object_url, root_object=gcs_object)
def RunCommand(self): """Command entry point for the du command.""" self.line_ending = '\n' self.all_versions = False self.produce_total = False self.human_readable = False self.summary_only = False self.exclude_patterns = [] if self.sub_opts: for o, a in self.sub_opts: if o == '-0': self.line_ending = '\0' elif o == '-a': self.all_versions = True elif o == '-c': self.produce_total = True elif o == '-e': self.exclude_patterns.append(a) elif o == '-h': self.human_readable = True elif o == '-s': self.summary_only = True elif o == '-X': if a == '-': f = sys.stdin else: f = open(a, 'r') try: for line in f: line = line.strip() if line: self.exclude_patterns.append(line) finally: f.close() if not self.args: # Default to listing all gs buckets. self.args = ['gs://'] total_bytes = 0 got_nomatch_errors = False def _PrintObjectLong(blr): return self._PrintInfoAboutBucketListingRef(blr) def _PrintNothing(unused_blr=None): pass def _PrintDirectory(num_bytes, name): if not self.summary_only: self._PrintSummaryLine(num_bytes, name) for url_arg in self.args: top_level_storage_url = StorageUrlFromString(url_arg) if top_level_storage_url.IsFileUrl(): raise CommandException('Only cloud URLs are supported for %s' % self.command_name) bucket_listing_fields = ['size'] ls_helper = LsHelper(self.WildcardIterator, self.logger, print_object_func=_PrintObjectLong, print_dir_func=_PrintNothing, print_dir_header_func=_PrintNothing, print_dir_summary_func=_PrintDirectory, print_newline_func=_PrintNothing, all_versions=self.all_versions, should_recurse=True, exclude_patterns=self.exclude_patterns, fields=bucket_listing_fields) # ls_helper expands to objects and prefixes, so perform a top-level # expansion first. if top_level_storage_url.IsProvider(): # Provider URL: use bucket wildcard to iterate over all buckets. top_level_iter = self.WildcardIterator( '%s://*' % top_level_storage_url.scheme).IterBuckets( bucket_fields=['id']) elif top_level_storage_url.IsBucket(): top_level_iter = self.WildcardIterator( '%s://%s' % (top_level_storage_url.scheme, top_level_storage_url.bucket_name)).IterBuckets( bucket_fields=['id']) else: top_level_iter = [BucketListingObject(top_level_storage_url)] for blr in top_level_iter: storage_url = blr.storage_url if storage_url.IsBucket() and self.summary_only: storage_url = StorageUrlFromString( storage_url.CreatePrefixUrl(wildcard_suffix='**')) _, exp_objs, exp_bytes = ls_helper.ExpandUrlAndPrint( storage_url) if (storage_url.IsObject() and exp_objs == 0 and ContainsWildcard(url_arg) and not self.exclude_patterns): got_nomatch_errors = True total_bytes += exp_bytes if self.summary_only: self._PrintSummaryLine(exp_bytes, blr.url_string.rstrip('/')) if self.produce_total: self._PrintSummaryLine(total_bytes, 'total') if got_nomatch_errors: raise CommandException('One or more URLs matched no objects.') return 0