def testPluralityCheckableIteratorWith1Elem1Exception(self): """Tests PluralityCheckableIterator with 2 elements. The second element raises an exception. """ class IterTest(object): def __init__(self): self.position = 0 def __iter__(self): return self def next(self): if self.position == 0: self.position += 1 return 1 elif self.position == 1: self.position += 1 raise CustomTestException('Test exception') else: raise StopIteration() pcit = PluralityCheckableIterator(IterTest()) self.assertFalse(pcit.IsEmpty()) self.assertTrue(pcit.HasPlurality()) iterated_value = None try: for value in pcit: iterated_value = value self.fail('Expected exception from iterator') except CustomTestException: pass self.assertEqual(iterated_value, 1)
def _GetIam(self, thread_state=None): """Gets IAM policy for single bucket or object.""" pattern = self.args[0] matches = PluralityCheckableIterator( self.WildcardIterator(pattern).IterAll( bucket_listing_fields=['name'])) if matches.IsEmpty(): raise CommandException('%s matched no URLs' % pattern) if matches.HasPlurality(): raise CommandException( '%s matched more than one URL, which is not allowed by the %s ' 'command' % (pattern, self.command_name)) storage_url = StorageUrlFromString(list(matches)[0].url_string) policy = self.GetIamHelper(storage_url, thread_state=thread_state) policy_json = json.loads(protojson.encode_message(policy)) policy_str = json.dumps( policy_json, sort_keys=True, separators=(',', ': '), indent=2, ) print(policy_str)
def testPluralityCheckableIteratorWith3Elems(self): """Tests PluralityCheckableIterator with 3 elements.""" input_list = range(3) it = iter(input_list) pcit = PluralityCheckableIterator(it) self.assertFalse(pcit.IsEmpty()) self.assertTrue(pcit.HasPlurality()) output_list = list(pcit) self.assertEqual(input_list, output_list)
def testPluralityCheckableIteratorWith0Elems(self): """Tests empty PluralityCheckableIterator.""" input_list = list(range(0)) it = iter(input_list) pcit = PluralityCheckableIterator(it) self.assertTrue(pcit.IsEmpty()) self.assertFalse(pcit.HasPlurality()) output_list = list(pcit) self.assertEqual(input_list, output_list)
def ExpandUrlAndPrint(self, url): """Iterates over the given URL and calls print functions. Args: url: StorageUrl to iterate over. Returns: (num_objects, num_bytes) total number of objects and bytes iterated. """ num_objects = 0 num_dirs = 0 num_bytes = 0 print_newline = False if url.IsBucket() or self.should_recurse: # IsBucket() implies a top-level listing. if url.IsBucket(): self._print_bucket_header_func(url) return self._RecurseExpandUrlAndPrint(url.url_string, print_initial_newline=False) else: # User provided a prefix or object URL, but it's impossible to tell # which until we do a listing and see what matches. top_level_iterator = PluralityCheckableIterator(self._iterator_func( url.CreatePrefixUrl(wildcard_suffix=None), all_versions=self.all_versions).IterAll( expand_top_level_buckets=True, bucket_listing_fields=self.bucket_listing_fields)) plurality = top_level_iterator.HasPlurality() for blr in top_level_iterator: if self._MatchesExcludedPattern(blr): continue if blr.IsObject(): nd = 0 no, nb = self._print_object_func(blr) print_newline = True elif blr.IsPrefix(): if print_newline: self._print_newline_func() else: print_newline = True if plurality: self._print_dir_header_func(blr) expansion_url_str = StorageUrlFromString( blr.url_string).CreatePrefixUrl(wildcard_suffix='*') nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str) self._print_dir_summary_func(nb, blr) else: # We handle all buckets at the top level, so this should never happen. raise CommandException( 'Sub-level iterator returned a CsBucketListingRef of type Bucket') num_objects += no num_dirs += nd num_bytes += nb return num_dirs, num_objects, num_bytes
def _GetIam(self, pattern, thread_state=None): """Gets IAM policy for single bucket or object.""" matches = PluralityCheckableIterator( self.WildcardIterator(pattern).IterAll( bucket_listing_fields=['name'])) if matches.IsEmpty(): raise CommandException('%s matched no URLs' % pattern) if matches.HasPlurality(): raise CommandException( '%s matched more than one URL, which is not allowed by the %s ' 'command' % (pattern, self.command_name)) storage_url = StorageUrlFromString(list(matches)[0].url_string) return self.GetIamHelper(storage_url, thread_state=thread_state)
def testPluralityCheckableIteratorReadsAheadAsNeeded(self): """Tests that the PCI does not unnecessarily read new elements.""" class IterTest(object): def __init__(self): self.position = 0 def __iter__(self): return self def next(self): if self.position == 3: raise StopIteration() self.position += 1 # IsEmpty and PeekException should retrieve only 1 element from the # underlying iterator. pcit = PluralityCheckableIterator(IterTest()) pcit.IsEmpty() pcit.PeekException() self.assertEquals(pcit.orig_iterator.position, 1) # HasPlurality requires populating 2 elements into the iterator. pcit.HasPlurality() self.assertEquals(pcit.orig_iterator.position, 2) # next should yield already-populated elements without advancing the # iterator. pcit.next() # Yields element 1 self.assertEquals(pcit.orig_iterator.position, 2) pcit.next() # Yields element 2 self.assertEquals(pcit.orig_iterator.position, 2) pcit.next() # Yields element 3 self.assertEquals(pcit.orig_iterator.position, 3) try: pcit.next() # Underlying iterator is empty self.fail('Expected StopIteration') except StopIteration: pass
def __iter__(self): """Iterates over all source URLs passed to the iterator. For each src url, expands wildcards, object-less bucket names, subdir bucket names, and directory names, and generates a flat listing of all the matching objects/files. You should instantiate this object using the static factory function NameExpansionIterator, because consumers of this iterator need the PluralityCheckableIterator wrapper built by that function. Yields: gslib.name_expansion.NameExpansionResult. Raises: CommandException: if errors encountered. """ for url_str in self.url_strs: storage_url = StorageUrlFromString(url_str) if storage_url.IsFileUrl() and storage_url.IsStream(): if self.url_strs.has_plurality: raise CommandException( 'Multiple URL strings are not supported ' 'with streaming ("-") URLs.') yield NameExpansionResult(storage_url, False, False, storage_url) continue # Step 1: Expand any explicitly specified wildcards. The output from this # step is an iterator of BucketListingRef. # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd src_names_bucket = False if (storage_url.IsCloudUrl() and storage_url.IsBucket() and not self.recursion_requested): # UNIX commands like rm and cp will omit directory references. # If url_str refers only to buckets and we are not recursing, # then produce references of type BUCKET, because they are guaranteed # to pass through Step 2 and be omitted in Step 3. post_step1_iter = PluralityCheckableIterator( self.WildcardIterator(url_str).IterBuckets( bucket_fields=['id'])) else: # Get a list of objects and prefixes, expanding the top level for # any listed buckets. If our source is a bucket, however, we need # to treat all of the top level expansions as names_container=True. post_step1_iter = PluralityCheckableIterator( self.WildcardIterator(url_str).IterAll( bucket_listing_fields=['name'], expand_top_level_buckets=True)) if storage_url.IsCloudUrl() and storage_url.IsBucket(): src_names_bucket = True # Step 2: Expand bucket subdirs. The output from this # step is an iterator of (names_container, BucketListingRef). # Starting with gs://bucket/abcd this step would expand to: # iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]). subdir_exp_wildcard = self._flatness_wildcard[ self.recursion_requested] if self.recursion_requested: post_step2_iter = _ImplicitBucketSubdirIterator( self, post_step1_iter, subdir_exp_wildcard) else: post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter) post_step2_iter = PluralityCheckableIterator(post_step2_iter) # Because we actually perform and check object listings here, this will # raise if url_args includes a non-existent object. However, # plurality_checkable_iterator will buffer the exception for us, not # raising it until the iterator is actually asked to yield the first # result. if post_step2_iter.IsEmpty(): if self.continue_on_error: try: raise CommandException('No URLs matched: %s' % url_str) except CommandException, e: # Yield a specialized tuple of (exception, stack_trace) to # the wrapping PluralityCheckableIterator. yield (e, sys.exc_info()[2]) else: raise CommandException('No URLs matched: %s' % url_str) # Step 3. Omit any directories, buckets, or bucket subdirectories for # non-recursive expansions. post_step3_iter = PluralityCheckableIterator( _OmitNonRecursiveIterator(post_step2_iter, self.recursion_requested, self.command_name, self.cmd_supports_recursion, self.logger)) src_url_expands_to_multi = post_step3_iter.HasPlurality() is_multi_source_request = (self.url_strs.has_plurality or src_url_expands_to_multi) # Step 4. Expand directories and buckets. This step yields the iterated # values. Starting with gs://bucket this step would expand to: # [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt] # Starting with file://dir this step would expand to: # [dir/a.txt, dir/b.txt, dir/c/] for (names_container, blr) in post_step3_iter: src_names_container = src_names_bucket or names_container if blr.IsObject(): yield NameExpansionResult(storage_url, is_multi_source_request, src_names_container, blr.storage_url) else: # Use implicit wildcarding to do the enumeration. # At this point we are guaranteed that: # - Recursion has been requested because non-object entries are # filtered in step 3 otherwise. # - This is a prefix or bucket subdirectory because only # non-recursive iterations product bucket references. expanded_url = StorageUrlFromString(blr.url_string) if expanded_url.IsFileUrl(): # Convert dir to implicit recursive wildcard. url_to_iterate = '%s%s%s' % (blr, os.sep, subdir_exp_wildcard) else: # Convert subdir to implicit recursive wildcard. url_to_iterate = expanded_url.CreatePrefixUrl( wildcard_suffix=subdir_exp_wildcard) wc_iter = PluralityCheckableIterator( self.WildcardIterator(url_to_iterate).IterObjects( bucket_listing_fields=['name'])) src_url_expands_to_multi = (src_url_expands_to_multi or wc_iter.HasPlurality()) is_multi_source_request = (self.url_strs.has_plurality or src_url_expands_to_multi) # This will be a flattened listing of all underlying objects in the # subdir. for blr in wc_iter: yield NameExpansionResult(storage_url, is_multi_source_request, True, blr.storage_url)
def ExpandUrlAndPrint(self, url): """Iterates over the given URL and calls print functions. Args: url: StorageUrl to iterate over. Returns: (num_objects, num_bytes) total number of objects and bytes iterated. """ num_objects = 0 num_dirs = 0 num_bytes = 0 print_newline = False if url.IsBucket() or self.should_recurse: # IsBucket() implies a top-level listing. if url.IsBucket(): self._print_bucket_header_func(url) return self._RecurseExpandUrlAndPrint(url.url_string, print_initial_newline=False) else: # User provided a prefix or object URL, but it's impossible to tell # which until we do a listing and see what matches. top_level_iterator = PluralityCheckableIterator( self._iterator_func( url.CreatePrefixUrl(wildcard_suffix=None), all_versions=self.all_versions).IterAll( expand_top_level_buckets=True, bucket_listing_fields=self.bucket_listing_fields)) plurality = top_level_iterator.HasPlurality() try: top_level_iterator.PeekException() except EncryptionException: # Detailed listing on a single object can perform a GetObjectMetadata # call, which raises if a matching encryption key isn't found. # Re-iterate without requesting encrypted fields. top_level_iterator = PluralityCheckableIterator( self._iterator_func( url.CreatePrefixUrl(wildcard_suffix=None), all_versions=self.all_versions). IterAll( expand_top_level_buckets=True, bucket_listing_fields=UNENCRYPTED_FULL_LISTING_FIELDS)) plurality = top_level_iterator.HasPlurality() for blr in top_level_iterator: if self._MatchesExcludedPattern(blr): continue if blr.IsObject(): nd = 0 no, nb = self._print_object_func(blr) print_newline = True elif blr.IsPrefix(): if print_newline: self._print_newline_func() else: print_newline = True if plurality and self.list_subdir_contents: self._print_dir_header_func(blr) elif plurality and not self.list_subdir_contents: print_newline = False expansion_url_str = StorageUrlFromString( blr.url_string).CreatePrefixUrl( wildcard_suffix='*' if self. list_subdir_contents else None) nd, no, nb = self._RecurseExpandUrlAndPrint( expansion_url_str) self._print_dir_summary_func(nb, blr) else: # We handle all buckets at the top level, so this should never happen. raise CommandException( 'Sub-level iterator returned a CsBucketListingRef of type Bucket' ) num_objects += no num_dirs += nd num_bytes += nb return num_dirs, num_objects, num_bytes