Beispiel #1
0
 def __iter__(self):
   for (names_container, blr) in self.tuple_iter:
     if not self.recursion_requested and not blr.IsObject():
       # At this point we either have a bucket or a prefix,
       # so if recursion is not requested, we're going to omit it.
       expanded_url = StorageUrlFromString(blr.url_string)
       if expanded_url.IsFileUrl():
         desc = 'directory'
       else:
         desc = blr.type_name
       if self.cmd_supports_recursion:
         self.logger.info('Omitting %s "%s". (Did you mean to do %s -r?)',
                          desc, blr.url_string, self.command_name)
       else:
         self.logger.info('Omitting %s "%s".', desc, blr.url_string)
     else:
       yield (names_container, blr)
Beispiel #2
0
    def __iter__(self):
        """Iterates over all source URLs passed to the iterator.

    For each src url, expands wildcards, object-less bucket names,
    subdir bucket names, and directory names, and generates a flat listing of
    all the matching objects/files.

    You should instantiate this object using the static factory function
    NameExpansionIterator, because consumers of this iterator need the
    PluralityCheckableIterator wrapper built by that function.

    Yields:
      gslib.name_expansion.NameExpansionResult.

    Raises:
      CommandException: if errors encountered.
    """
        for url_str in self.url_strs:
            storage_url = StorageUrlFromString(url_str)

            if storage_url.IsFileUrl() and storage_url.IsStream():
                if self.url_strs.has_plurality:
                    raise CommandException(
                        'Multiple URL strings are not supported '
                        'with streaming ("-") URLs.')
                yield NameExpansionResult(storage_url, False, False,
                                          storage_url)
                continue

            # Step 1: Expand any explicitly specified wildcards. The output from this
            # step is an iterator of BucketListingRef.
            # Starting with gs://buck*/abc* this step would expand to gs://bucket/abcd

            src_names_bucket = False
            if (storage_url.IsCloudUrl() and storage_url.IsBucket()
                    and not self.recursion_requested):
                # UNIX commands like rm and cp will omit directory references.
                # If url_str refers only to buckets and we are not recursing,
                # then produce references of type BUCKET, because they are guaranteed
                # to pass through Step 2 and be omitted in Step 3.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterBuckets(
                        bucket_fields=['id']))
            else:
                # Get a list of objects and prefixes, expanding the top level for
                # any listed buckets.  If our source is a bucket, however, we need
                # to treat all of the top level expansions as names_container=True.
                post_step1_iter = PluralityCheckableIterator(
                    self.WildcardIterator(url_str).IterAll(
                        bucket_listing_fields=['name'],
                        expand_top_level_buckets=True))
                if storage_url.IsCloudUrl() and storage_url.IsBucket():
                    src_names_bucket = True

            # Step 2: Expand bucket subdirs. The output from this
            # step is an iterator of (names_container, BucketListingRef).
            # Starting with gs://bucket/abcd this step would expand to:
            #   iter([(True, abcd/o1.txt), (True, abcd/o2.txt)]).
            subdir_exp_wildcard = self._flatness_wildcard[
                self.recursion_requested]
            if self.recursion_requested:
                post_step2_iter = _ImplicitBucketSubdirIterator(
                    self, post_step1_iter, subdir_exp_wildcard)
            else:
                post_step2_iter = _NonContainerTuplifyIterator(post_step1_iter)
            post_step2_iter = PluralityCheckableIterator(post_step2_iter)

            # Because we actually perform and check object listings here, this will
            # raise if url_args includes a non-existent object.  However,
            # plurality_checkable_iterator will buffer the exception for us, not
            # raising it until the iterator is actually asked to yield the first
            # result.
            if post_step2_iter.IsEmpty():
                if self.continue_on_error:
                    try:
                        raise CommandException('No URLs matched: %s' % url_str)
                    except CommandException, e:
                        # Yield a specialized tuple of (exception, stack_trace) to
                        # the wrapping PluralityCheckableIterator.
                        yield (e, sys.exc_info()[2])
                else:
                    raise CommandException('No URLs matched: %s' % url_str)

            # Step 3. Omit any directories, buckets, or bucket subdirectories for
            # non-recursive expansions.
            post_step3_iter = PluralityCheckableIterator(
                _OmitNonRecursiveIterator(post_step2_iter,
                                          self.recursion_requested,
                                          self.command_name,
                                          self.cmd_supports_recursion,
                                          self.logger))

            src_url_expands_to_multi = post_step3_iter.HasPlurality()
            is_multi_source_request = (self.url_strs.has_plurality
                                       or src_url_expands_to_multi)

            # Step 4. Expand directories and buckets. This step yields the iterated
            # values. Starting with gs://bucket this step would expand to:
            #  [abcd/o1.txt, abcd/o2.txt, xyz/o1.txt, xyz/o2.txt]
            # Starting with file://dir this step would expand to:
            #  [dir/a.txt, dir/b.txt, dir/c/]
            for (names_container, blr) in post_step3_iter:
                src_names_container = src_names_bucket or names_container

                if blr.IsObject():
                    yield NameExpansionResult(storage_url,
                                              is_multi_source_request,
                                              src_names_container,
                                              blr.storage_url)
                else:
                    # Use implicit wildcarding to do the enumeration.
                    # At this point we are guaranteed that:
                    # - Recursion has been requested because non-object entries are
                    #   filtered in step 3 otherwise.
                    # - This is a prefix or bucket subdirectory because only
                    #   non-recursive iterations product bucket references.
                    expanded_url = StorageUrlFromString(blr.url_string)
                    if expanded_url.IsFileUrl():
                        # Convert dir to implicit recursive wildcard.
                        url_to_iterate = '%s%s%s' % (blr, os.sep,
                                                     subdir_exp_wildcard)
                    else:
                        # Convert subdir to implicit recursive wildcard.
                        url_to_iterate = expanded_url.CreatePrefixUrl(
                            wildcard_suffix=subdir_exp_wildcard)

                    wc_iter = PluralityCheckableIterator(
                        self.WildcardIterator(url_to_iterate).IterObjects(
                            bucket_listing_fields=['name']))
                    src_url_expands_to_multi = (src_url_expands_to_multi
                                                or wc_iter.HasPlurality())
                    is_multi_source_request = (self.url_strs.has_plurality
                                               or src_url_expands_to_multi)
                    # This will be a flattened listing of all underlying objects in the
                    # subdir.
                    for blr in wc_iter:
                        yield NameExpansionResult(storage_url,
                                                  is_multi_source_request,
                                                  True, blr.storage_url)