def __iter__(self, bucket_listing_fields=None):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Args:
      bucket_listing_fields: Iterable fields to include in listings.
          Ex. ['size']. Currently only 'size' is supported.
          If present, will populate yielded BucketListingObject.root_object
          with the file name and size.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        include_size = (bucket_listing_fields
                        and 'size' in set(bucket_listing_fields))

        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if self.ignore_symlinks and os.path.islink(filepath):
                    continue
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    blr_object = _GetFileObject(
                        filepath) if include_size else None
                    yield BucketListingObject(expanded_url,
                                              root_object=blr_object)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))
  def testContainsWildcardMatchesNotObject(self, mock_CreateWildcardIterator,
                                           mock_gsutil_api):
    storage_url = StorageUrlFromString('gs://test/helloworld')
    mock_CreateWildcardIterator.return_value = iter(
        [BucketListingPrefix(storage_url)])
    (exp_url, have_existing_dst_container) = ExpandUrlToSingleBlr(
        'gs://test/hello*/', mock_gsutil_api, 'project_id', False,
        CreateOrGetGsutilLogger('copy_test'))

    self.assertTrue(have_existing_dst_container)
    self.assertEqual(exp_url, storage_url)
    def _GetPrefixRef(self, bucket_url_string, prefix):
        """Creates a BucketListingRef of type PREFIX from the arguments.

    Args:
      bucket_url_string: Wildcardless string describing the containing bucket.
      prefix: gsutil_api Prefix for populating the BucketListingRef

    Returns:
      BucketListingRef of type PREFIX.
    """
        prefix_url = StorageUrlFromString('%s%s' % (bucket_url_string, prefix))
        return BucketListingPrefix(prefix_url, root_object=prefix)
Beispiel #4
0
    def __iter__(self):
        """Iterator that gets called when iterating over the file wildcard.

    In the case where no wildcard is present, returns a single matching file
    or directory.

    Raises:
      WildcardException: if invalid wildcard found.

    Yields:
      BucketListingRef of type OBJECT (for files) or PREFIX (for directories)
    """
        wildcard = self.wildcard_url.object_name
        match = FLAT_LIST_REGEX.match(wildcard)
        if match:
            # Recursive wildcarding request ('.../**/...').
            # Example input: wildcard = '/tmp/tmp2pQJAX/**/*'
            base_dir = match.group('before')[:-1]
            remaining_wildcard = match.group('after')
            # At this point for the above example base_dir = '/tmp/tmp2pQJAX' and
            # remaining_wildcard = '/*'
            if remaining_wildcard.startswith('*'):
                raise WildcardException(
                    'Invalid wildcard with more than 2 consecutive '
                    '*s (%s)' % wildcard)
            # If there was no remaining wildcard past the recursive wildcard,
            # treat it as if it were a '*'. For example, file://tmp/** is equivalent
            # to file://tmp/**/*
            if not remaining_wildcard:
                remaining_wildcard = '*'
            # Skip slash(es).
            remaining_wildcard = remaining_wildcard.lstrip(os.sep)
            filepaths = self._IterDir(base_dir, remaining_wildcard)
        else:
            # Not a recursive wildcarding request.
            filepaths = glob.iglob(wildcard)
        for filepath in filepaths:
            expanded_url = StorageUrlFromString(filepath)
            try:
                if os.path.isdir(filepath):
                    yield BucketListingPrefix(expanded_url)
                else:
                    yield BucketListingObject(expanded_url)
            except UnicodeEncodeError:
                raise CommandException('\n'.join(
                    textwrap.wrap(_UNICODE_EXCEPTION_TEXT % repr(filepath))))