def testPluralityCheckableIteratorWith1Elem1Exception(self):
        """Tests PluralityCheckableIterator with 2 elements.

    The second element raises an exception.
    """
        class IterTest(object):
            def __init__(self):
                self.position = 0

            def __iter__(self):
                return self

            def next(self):
                if self.position == 0:
                    self.position += 1
                    return 1
                elif self.position == 1:
                    self.position += 1
                    raise CustomTestException('Test exception')
                else:
                    raise StopIteration()

        pcit = PluralityCheckableIterator(IterTest())
        self.assertFalse(pcit.IsEmpty())
        self.assertTrue(pcit.HasPlurality())
        iterated_value = None
        try:
            for value in pcit:
                iterated_value = value
            self.fail('Expected exception from iterator')
        except CustomTestException:
            pass
        self.assertEqual(iterated_value, 1)
Exemple #2
0
    def _GetIam(self, thread_state=None):
        """Gets IAM policy for single bucket or object."""

        pattern = self.args[0]

        matches = PluralityCheckableIterator(
            self.WildcardIterator(pattern).IterAll(
                bucket_listing_fields=['name']))
        if matches.IsEmpty():
            raise CommandException('%s matched no URLs' % pattern)
        if matches.HasPlurality():
            raise CommandException(
                '%s matched more than one URL, which is not allowed by the %s '
                'command' % (pattern, self.command_name))

        storage_url = StorageUrlFromString(list(matches)[0].url_string)
        policy = self.GetIamHelper(storage_url, thread_state=thread_state)
        policy_json = json.loads(protojson.encode_message(policy))
        policy_str = json.dumps(
            policy_json,
            sort_keys=True,
            separators=(',', ': '),
            indent=2,
        )
        print(policy_str)
 def testPluralityCheckableIteratorWith3Elems(self):
     """Tests PluralityCheckableIterator with 3 elements."""
     input_list = range(3)
     it = iter(input_list)
     pcit = PluralityCheckableIterator(it)
     self.assertFalse(pcit.IsEmpty())
     self.assertTrue(pcit.HasPlurality())
     output_list = list(pcit)
     self.assertEqual(input_list, output_list)
 def testPluralityCheckableIteratorWith0Elems(self):
     """Tests empty PluralityCheckableIterator."""
     input_list = list(range(0))
     it = iter(input_list)
     pcit = PluralityCheckableIterator(it)
     self.assertTrue(pcit.IsEmpty())
     self.assertFalse(pcit.HasPlurality())
     output_list = list(pcit)
     self.assertEqual(input_list, output_list)
Exemple #5
0
  def ExpandUrlAndPrint(self, url):
    """Iterates over the given URL and calls print functions.

    Args:
      url: StorageUrl to iterate over.

    Returns:
      (num_objects, num_bytes) total number of objects and bytes iterated.
    """
    num_objects = 0
    num_dirs = 0
    num_bytes = 0
    print_newline = False

    if url.IsBucket() or self.should_recurse:
      # IsBucket() implies a top-level listing.
      if url.IsBucket():
        self._print_bucket_header_func(url)
      return self._RecurseExpandUrlAndPrint(url.url_string,
                                            print_initial_newline=False)
    else:
      # User provided a prefix or object URL, but it's impossible to tell
      # which until we do a listing and see what matches.
      top_level_iterator = PluralityCheckableIterator(self._iterator_func(
          url.CreatePrefixUrl(wildcard_suffix=None),
          all_versions=self.all_versions).IterAll(
              expand_top_level_buckets=True,
              bucket_listing_fields=self.bucket_listing_fields))
      plurality = top_level_iterator.HasPlurality()

      for blr in top_level_iterator:
        if self._MatchesExcludedPattern(blr):
          continue
        if blr.IsObject():
          nd = 0
          no, nb = self._print_object_func(blr)
          print_newline = True
        elif blr.IsPrefix():
          if print_newline:
            self._print_newline_func()
          else:
            print_newline = True
          if plurality:
            self._print_dir_header_func(blr)
          expansion_url_str = StorageUrlFromString(
              blr.url_string).CreatePrefixUrl(wildcard_suffix='*')
          nd, no, nb = self._RecurseExpandUrlAndPrint(expansion_url_str)
          self._print_dir_summary_func(nb, blr)
        else:
          # We handle all buckets at the top level, so this should never happen.
          raise CommandException(
              'Sub-level iterator returned a CsBucketListingRef of type Bucket')
        num_objects += no
        num_dirs += nd
        num_bytes += nb
      return num_dirs, num_objects, num_bytes
Exemple #6
0
    def _GetIam(self, pattern, thread_state=None):
        """Gets IAM policy for single bucket or object."""

        matches = PluralityCheckableIterator(
            self.WildcardIterator(pattern).IterAll(
                bucket_listing_fields=['name']))
        if matches.IsEmpty():
            raise CommandException('%s matched no URLs' % pattern)
        if matches.HasPlurality():
            raise CommandException(
                '%s matched more than one URL, which is not allowed by the %s '
                'command' % (pattern, self.command_name))

        storage_url = StorageUrlFromString(list(matches)[0].url_string)
        return self.GetIamHelper(storage_url, thread_state=thread_state)
Exemple #7
0
    def testPluralityCheckableIteratorReadsAheadAsNeeded(self):
        """Tests that the PCI does not unnecessarily read new elements."""
        class IterTest(object):
            def __init__(self):
                self.position = 0

            def __iter__(self):
                return self

            def next(self):
                if self.position == 3:
                    raise StopIteration()
                self.position += 1

        # IsEmpty and PeekException should retrieve only 1 element from the
        # underlying iterator.
        pcit = PluralityCheckableIterator(IterTest())
        pcit.IsEmpty()
        pcit.PeekException()
        self.assertEquals(pcit.orig_iterator.position, 1)
        # HasPlurality requires populating 2 elements into the iterator.
        pcit.HasPlurality()
        self.assertEquals(pcit.orig_iterator.position, 2)
        # next should yield already-populated elements without advancing the
        # iterator.
        pcit.next()  # Yields element 1
        self.assertEquals(pcit.orig_iterator.position, 2)
        pcit.next()  # Yields element 2
        self.assertEquals(pcit.orig_iterator.position, 2)
        pcit.next()  # Yields element 3
        self.assertEquals(pcit.orig_iterator.position, 3)
        try:
            pcit.next()  # Underlying iterator is empty
            self.fail('Expected StopIteration')
        except StopIteration:
            pass
Exemple #8
0
    def __iter__(self):
        """Iterates over all source URLs passed to the iterator.

    For each src url, expands wildcards, object-less bucket names,
    subdir bucket names, and directory names, and generates a flat listing of
    all the matching objects/files.

    You should instantiate this object using the static factory function
    NameExpansionIterator, because consumers of this iterator need the
    PluralityCheckableIterator wrapper built by that function.

    Yields:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.
    """
        for url_str in self.url_strs:
            storage_url = StorageUrlFromString(url_str)

            if storage_url.IsFileUrl() and storage_url.IsStream():
                if self.url_strs.has_plurality:
                    raise CommandException(
                        'Multiple URL strings are not supported '
                        'with streaming ("-") URLs.')
                yield NameExpansionResult(storage_url, False, False,
                                          storage_url)
                continue

            # Step 1: Expand any explicitly specified wildcards. The output from this
            # step is an iterator of BucketListingRef.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd

            src_names_bucket = False
            if (storage_url.IsCloudUrl() and storage_url.IsBucket()
                    and not self.recursion_requested):
                # UNIX commands like rm and cp will omit directory references.
                # If url_str refers only to buckets and we are not recursing,
                # then produce references of type BUCKET, because they are guaranteed
                # to pass through Step 2 and be omitted in Step 3.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=['id']))
            else:
                # Get a list of objects and prefixes, expanding the top level for
                # any listed buckets.  If our source is a bucket, however, we need
                # to treat all of the top level expansions as names_container=True.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterAll(
                        bucket_listing_fields=['name'],
                        expand_top_level_buckets=True))
                if storage_url.IsCloudUrl() and storage_url.IsBucket():
                    src_names_bucket = True

            # Step 2: Expand bucket subdirs. The output from this
            # step is an iterator of (names_container, BucketListingRef).
            # Starting with gs://bucket/abcd this step would expand to:
            #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
            subdir_exp_wildcard = self._flatness_wildcard[
                self.recursion_requested]
            if self.recursion_requested:
                post_step2_iter = _ImplicitBucketSubdirIterator(
                    self, post_step1_iter, subdir_exp_wildcard)
            else:
                post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
            post_step2_iter = PluralityCheckableIterator(post_step2_iter)

            # Because we actually perform and check object listings here, this will
            # raise if url_args includes a non-existent object.  However,
            # plurality_checkable_iterator will buffer the exception for us, not
            # raising it until the iterator is actually asked to yield the first
            # result.
            if post_step2_iter.IsEmpty():
                if self.continue_on_error:
                    try:
                        raise CommandException('No URLs matched: %s' % url_str)
                    except CommandException, e:
                        # Yield a specialized tuple of (exception, stack_trace) to
                        # the wrapping PluralityCheckableIterator.
                        yield (e, sys.exc_info()[2])
                else:
                    raise CommandException('No URLs matched: %s' % url_str)

            # Step 3. Omit any directories, buckets, or bucket subdirectories for
            # non-recursive expansions.
            post_step3_iter = PluralityCheckableIterator(
                _OmitNonRecursiveIterator(post_step2_iter,
                                          self.recursion_requested,
                                          self.command_name,
                                          self.cmd_supports_recursion,
                                          self.logger))

            src_url_expands_to_multi = post_step3_iter.HasPlurality()
            is_multi_source_request = (self.url_strs.has_plurality
                                       or src_url_expands_to_multi)

            # Step 4. Expand directories and buckets. This step yields the iterated
            # values. Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            for (names_container, blr) in post_step3_iter:
                src_names_container = src_names_bucket or names_container

                if blr.IsObject():
                    yield NameExpansionResult(storage_url,
                                              is_multi_source_request,
                                              src_names_container,
                                              blr.storage_url)
                else:
                    # Use implicit wildcarding to do the enumeration.
                    # At this point we are guaranteed that:
                    # - Recursion has been requested because non-object entries are
                    #   filtered in step 3 otherwise.
                    # - This is a prefix or bucket subdirectory because only
                    #   non-recursive iterations product bucket references.
                    expanded_url = StorageUrlFromString(blr.url_string)
                    if expanded_url.IsFileUrl():
                        # Convert dir to implicit recursive wildcard.
                        url_to_iterate = '%s%s%s' % (blr, os.sep,
                                                     subdir_exp_wildcard)
                    else:
                        # Convert subdir to implicit recursive wildcard.
                        url_to_iterate = expanded_url.CreatePrefixUrl(
                            wildcard_suffix=subdir_exp_wildcard)

                    wc_iter = PluralityCheckableIterator(
                        self.WildcardIterator(url_to_iterate).IterObjects(
                            bucket_listing_fields=['name']))
                    src_url_expands_to_multi = (src_url_expands_to_multi
                                                or wc_iter.HasPlurality())
                    is_multi_source_request = (self.url_strs.has_plurality
                                               or src_url_expands_to_multi)
                    # This will be a flattened listing of all underlying objects in the
                    # subdir.
                    for blr in wc_iter:
                        yield NameExpansionResult(storage_url,
                                                  is_multi_source_request,
                                                  True, blr.storage_url)
Exemple #9
0
    def ExpandUrlAndPrint(self, url):
        """Iterates over the given URL and calls print functions.

    Args:
      url: StorageUrl to iterate over.

    Returns:
      (num_objects, num_bytes) total number of objects and bytes iterated.
    """
        num_objects = 0
        num_dirs = 0
        num_bytes = 0
        print_newline = False

        if url.IsBucket() or self.should_recurse:
            # IsBucket() implies a top-level listing.
            if url.IsBucket():
                self._print_bucket_header_func(url)
            return self._RecurseExpandUrlAndPrint(url.url_string,
                                                  print_initial_newline=False)
        else:
            # User provided a prefix or object URL, but it's impossible to tell
            # which until we do a listing and see what matches.
            top_level_iterator = PluralityCheckableIterator(
                self._iterator_func(
                    url.CreatePrefixUrl(wildcard_suffix=None),
                    all_versions=self.all_versions).IterAll(
                        expand_top_level_buckets=True,
                        bucket_listing_fields=self.bucket_listing_fields))
            plurality = top_level_iterator.HasPlurality()

            try:
                top_level_iterator.PeekException()
            except EncryptionException:
                # Detailed listing on a single object can perform a GetObjectMetadata
                # call, which raises if a matching encryption key isn't found.
                # Re-iterate without requesting encrypted fields.
                top_level_iterator = PluralityCheckableIterator(
                    self._iterator_func(
                        url.CreatePrefixUrl(wildcard_suffix=None),
                        all_versions=self.all_versions).
                    IterAll(
                        expand_top_level_buckets=True,
                        bucket_listing_fields=UNENCRYPTED_FULL_LISTING_FIELDS))
                plurality = top_level_iterator.HasPlurality()

            for blr in top_level_iterator:
                if self._MatchesExcludedPattern(blr):
                    continue
                if blr.IsObject():
                    nd = 0
                    no, nb = self._print_object_func(blr)
                    print_newline = True
                elif blr.IsPrefix():
                    if print_newline:
                        self._print_newline_func()
                    else:
                        print_newline = True
                    if plurality and self.list_subdir_contents:
                        self._print_dir_header_func(blr)
                    elif plurality and not self.list_subdir_contents:
                        print_newline = False
                    expansion_url_str = StorageUrlFromString(
                        blr.url_string).CreatePrefixUrl(
                            wildcard_suffix='*' if self.
                            list_subdir_contents else None)
                    nd, no, nb = self._RecurseExpandUrlAndPrint(
                        expansion_url_str)
                    self._print_dir_summary_func(nb, blr)
                else:
                    # We handle all buckets at the top level, so this should never happen.
                    raise CommandException(
                        'Sub-level iterator returned a CsBucketListingRef of type Bucket'
                    )
                num_objects += no
                num_dirs += nd
                num_bytes += nb
            return num_dirs, num_objects, num_bytes