Esempio n. 1
0
    def __call__(self, prefix, **kwargs):
        if not prefix:
            prefix = 'gs://'
        elif IsFileUrlString(prefix):
            return []

        wildcard_url = prefix + '*'
        url = StorageUrlFromString(wildcard_url)
        if self._bucket_only and not url.IsBucket():
            return []

        timeout = boto.config.getint('GSUtil', 'tab_completion_timeout', 5)
        if timeout == 0:
            return []

        start_time = time.time()

        cache = TabCompletionCache.LoadFromFile(
            GetTabCompletionCacheFilename())
        cached_results = cache.GetCachedResults(prefix)

        timing_log_entry_type = ''
        if cached_results is not None:
            results = cached_results
            timing_log_entry_type = ' (from cache)'
        else:
            try:
                results = self._PerformCloudListing(wildcard_url, timeout)
                if self._bucket_only and len(results) == 1:
                    results = [StripOneSlash(results[0])]
                partial_results = (len(results) == _TAB_COMPLETE_MAX_RESULTS)
                cache.UpdateCache(prefix, results, partial_results)
            except TimeoutError:
                timing_log_entry_type = ' (request timeout)'
                results = []

        cache.WriteToFile(GetTabCompletionCacheFilename())

        end_time = time.time()
        num_results = len(results)
        elapsed_seconds = end_time - start_time
        _WriteTimingLog(
            '%s results%s in %.2fs, %.2f results/second for prefix: %s\n' %
            (num_results, timing_log_entry_type, elapsed_seconds,
             num_results / elapsed_seconds, prefix))

        return results
Esempio n. 2
0
    def _BuildBucketFilterStrings(self, wildcard):
        """Builds strings needed for querying a bucket and filtering results.

    This implements wildcard object name matching.

    Args:
      wildcard: The wildcard string to match to objects.

    Returns:
      (prefix, delimiter, prefix_wildcard, suffix_wildcard)
      where:
        prefix is the prefix to be sent in bucket GET request.
        delimiter is the delimiter to be sent in bucket GET request.
        prefix_wildcard is the wildcard to be used to filter bucket GET results.
        suffix_wildcard is wildcard to be appended to filtered bucket GET
          results for next wildcard expansion iteration.
      For example, given the wildcard gs://bucket/abc/d*e/f*.txt we
      would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and
      suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket
      listing request will then produce a listing result set that can be
      filtered using this prefix_wildcard; and we'd use this suffix_wildcard
      to feed into the next call(s) to _BuildBucketFilterStrings(), for the
      next iteration of listing/filtering.

    Raises:
      AssertionError if wildcard doesn't contain any wildcard chars.
    """
        # Generate a request prefix if the object name part of the wildcard starts
        # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz').
        match = WILDCARD_REGEX.search(wildcard)
        if not match:
            # Input "wildcard" has no wildcard chars, so just return tuple that will
            # cause a bucket listing to match the given input wildcard. Example: if
            # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc,
            # the next iteration will call _BuildBucketFilterStrings() with
            # gs://bucket/dir/abc, and we will return prefix ='dir/abc',
            # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''.
            prefix = wildcard
            delimiter = '/'
            prefix_wildcard = wildcard
            suffix_wildcard = ''
        else:
            if match.start() > 0:
                # Wildcard does not occur at beginning of object name, so construct a
                # prefix string to send to server.
                prefix = wildcard[:match.start()]
                wildcard_part = wildcard[match.start():]
            else:
                prefix = None
                wildcard_part = wildcard
            end = wildcard_part.find('/')
            if end != -1:
                wildcard_part = wildcard_part[:end + 1]
            # Remove trailing '/' so we will match gs://bucket/abc* as well as
            # gs://bucket/abc*/ with the same wildcard regex.
            prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part)
            suffix_wildcard = wildcard[match.end():]
            end = suffix_wildcard.find('/')
            if end == -1:
                suffix_wildcard = ''
            else:
                suffix_wildcard = suffix_wildcard[end + 1:]
            # To implement recursive (**) wildcarding, if prefix_wildcard
            # suffix_wildcard starts with '**' don't send a delimiter, and combine
            # suffix_wildcard at end of prefix_wildcard.
            if prefix_wildcard.find('**') != -1:
                delimiter = None
                prefix_wildcard += suffix_wildcard
                suffix_wildcard = ''
            else:
                delimiter = '/'
        # The following debug output is useful for tracing how the algorithm
        # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt
        if self.debug > 1:
            sys.stderr.write(
                'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, '
                'prefix_wildcard=%s, suffix_wildcard=%s\n' %
                (PrintableStr(wildcard), PrintableStr(prefix),
                 PrintableStr(delimiter), PrintableStr(prefix_wildcard),
                 PrintableStr(suffix_wildcard)))
        return (prefix, delimiter, prefix_wildcard, suffix_wildcard)
Esempio n. 3
0
    def __iter__(self,
                 bucket_listing_fields=None,
                 expand_top_level_buckets=False):
        """Iterator that gets called when iterating over the cloud wildcard.

    In the case where no wildcard is present, returns a single matching object,
    single matching prefix, or one of each if both exist.

    Args:
      bucket_listing_fields: Iterable fields to include in bucket listings.
                             Ex. ['name', 'acl'].  Iterator is
                             responsible for converting these to list-style
                             format ['items/name', 'items/acl'] as well as
                             adding any fields necessary for listing such as
                             prefixes.  API implementation is responsible for
                             adding pagination fields.  If this is None,
                             all fields are returned.
      expand_top_level_buckets: If true, yield no BUCKET references.  Instead,
                                expand buckets into top-level objects and
                                prefixes.

    Yields:
      BucketListingRef of type BUCKET, OBJECT or PREFIX.
    """
        single_version_request = self.wildcard_url.HasGeneration()

        # For wildcard expansion purposes, we need at a minimum the name of
        # each object and prefix.  If we're not using the default of requesting
        # all fields, make sure at least these are requested.  The Cloud API
        # tolerates specifying the same field twice.
        get_fields = None
        if bucket_listing_fields:
            get_fields = set()
            for field in bucket_listing_fields:
                get_fields.add(field)
            bucket_listing_fields = self._GetToListFields(
                get_fields=bucket_listing_fields)
            bucket_listing_fields.update(['items/name', 'prefixes'])
            get_fields.update(['name'])
            # If we're making versioned requests, ensure generation and
            # metageneration are also included.
            if single_version_request or self.all_versions:
                bucket_listing_fields.update(
                    ['items/generation', 'items/metageneration'])
                get_fields.update(['generation', 'metageneration'])

        # Handle bucket wildcarding, if any, in _ExpandBucketWildcards. Then
        # iterate over the expanded bucket strings and handle any object
        # wildcarding.
        for bucket_listing_ref in self._ExpandBucketWildcards(
                bucket_fields=['id']):
            bucket_url_string = bucket_listing_ref.url_string
            if self.wildcard_url.IsBucket():
                # IsBucket() guarantees there are no prefix or object wildcards, and
                # thus this is a top-level listing of buckets.
                if expand_top_level_buckets:
                    url = StorageUrlFromString(bucket_url_string)
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            delimiter='/',
                            all_versions=self.all_versions,
                            provider=self.wildcard_url.scheme,
                            fields=bucket_listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            yield self._GetObjectRef(
                                bucket_url_string,
                                obj_or_prefix.data,
                                with_version=self.all_versions)
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX:
                            yield self._GetPrefixRef(bucket_url_string,
                                                     obj_or_prefix.data)
                else:
                    yield bucket_listing_ref
            else:
                # By default, assume a non-wildcarded URL is an object, not a prefix.
                # This prevents unnecessary listings (which are slower, more expensive,
                # and also subject to eventual consistency).
                if (not ContainsWildcard(self.wildcard_url.url_string)
                        and self.wildcard_url.IsObject()
                        and not self.all_versions):
                    try:
                        get_object = self.gsutil_api.GetObjectMetadata(
                            self.wildcard_url.bucket_name,
                            self.wildcard_url.object_name,
                            generation=self.wildcard_url.generation,
                            provider=self.wildcard_url.scheme,
                            fields=get_fields)
                        yield self._GetObjectRef(
                            self.wildcard_url.bucket_url_string,
                            get_object,
                            with_version=(self.all_versions
                                          or single_version_request))
                        return
                    except (NotFoundException, AccessDeniedException):
                        # It's possible this is a prefix - try to list instead.
                        pass

                # Expand iteratively by building prefix/delimiter bucket listing
                # request, filtering the results per the current level's wildcard
                # (if present), and continuing with the next component of the
                # wildcard. See _BuildBucketFilterStrings() documentation for details.
                if single_version_request:
                    url_string = '%s%s#%s' % (bucket_url_string,
                                              self.wildcard_url.object_name,
                                              self.wildcard_url.generation)
                else:
                    # Rstrip any prefixes to correspond with rstripped prefix wildcard
                    # from _BuildBucketFilterStrings().
                    url_string = '%s%s' % (
                        bucket_url_string,
                        StripOneSlash(self.wildcard_url.object_name) or '/'
                    )  # Cover root object named '/' case.
                urls_needing_expansion = [url_string]
                while urls_needing_expansion:
                    url = StorageUrlFromString(urls_needing_expansion.pop(0))
                    (prefix, delimiter, prefix_wildcard,
                     suffix_wildcard) = (self._BuildBucketFilterStrings(
                         url.object_name))
                    prog = re.compile(fnmatch.translate(prefix_wildcard))

                    # If we have a suffix wildcard, we only care about listing prefixes.
                    listing_fields = (set(['prefixes']) if suffix_wildcard else
                                      bucket_listing_fields)

                    # List bucket for objects matching prefix up to delimiter.
                    for obj_or_prefix in self.gsutil_api.ListObjects(
                            url.bucket_name,
                            prefix=prefix,
                            delimiter=delimiter,
                            all_versions=self.all_versions
                            or single_version_request,
                            provider=self.wildcard_url.scheme,
                            fields=listing_fields):
                        if obj_or_prefix.datatype == CloudApi.CsObjectOrPrefixType.OBJECT:
                            gcs_object = obj_or_prefix.data
                            if prog.match(gcs_object.name):
                                if not suffix_wildcard or (StripOneSlash(
                                        gcs_object.name) == suffix_wildcard):
                                    if not single_version_request or (
                                            self._SingleVersionMatches(
                                                gcs_object.generation)):
                                        yield self._GetObjectRef(
                                            bucket_url_string,
                                            gcs_object,
                                            with_version=(
                                                self.all_versions
                                                or single_version_request))
                        else:  # CloudApi.CsObjectOrPrefixType.PREFIX
                            prefix = obj_or_prefix.data

                            if ContainsWildcard(prefix):
                                # TODO: Disambiguate user-supplied strings from iterated
                                # prefix and object names so that we can better reason
                                # about wildcards and handle this case without raising an error.
                                raise CommandException(
                                    'Cloud folder %s%s contains a wildcard; gsutil does '
                                    'not currently support objects with wildcards in their '
                                    'name.' % (bucket_url_string, prefix))

                            # If the prefix ends with a slash, remove it.  Note that we only
                            # remove one slash so that we can successfully enumerate dirs
                            # containing multiple slashes.
                            rstripped_prefix = StripOneSlash(prefix)
                            if prog.match(rstripped_prefix):
                                if suffix_wildcard and rstripped_prefix != suffix_wildcard:
                                    # There's more wildcard left to expand.
                                    url_append_string = '%s%s' % (
                                        bucket_url_string, rstripped_prefix +
                                        '/' + suffix_wildcard)
                                    urls_needing_expansion.append(
                                        url_append_string)
                                else:
                                    # No wildcard to expand, just yield the prefix
                                    yield self._GetPrefixRef(
                                        bucket_url_string, prefix)
  def _BuildBucketFilterStrings(self, wildcard):
    """Builds strings needed for querying a bucket and filtering results.

    This implements wildcard object name matching.

    Args:
      wildcard: The wildcard string to match to objects.

    Returns:
      (prefix, delimiter, prefix_wildcard, suffix_wildcard)
      where:
        prefix is the prefix to be sent in bucket GET request.
        delimiter is the delimiter to be sent in bucket GET request.
        prefix_wildcard is the wildcard to be used to filter bucket GET results.
        suffix_wildcard is wildcard to be appended to filtered bucket GET
          results for next wildcard expansion iteration.
      For example, given the wildcard gs://bucket/abc/d*e/f*.txt we
      would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and
      suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket
      listing request will then produce a listing result set that can be
      filtered using this prefix_wildcard; and we'd use this suffix_wildcard
      to feed into the next call(s) to _BuildBucketFilterStrings(), for the
      next iteration of listing/filtering.

    Raises:
      AssertionError if wildcard doesn't contain any wildcard chars.
    """
    # Generate a request prefix if the object name part of the wildcard starts
    # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz').
    match = WILDCARD_REGEX.search(wildcard)
    if not match:
      # Input "wildcard" has no wildcard chars, so just return tuple that will
      # cause a bucket listing to match the given input wildcard. Example: if
      # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc,
      # the next iteration will call _BuildBucketFilterStrings() with
      # gs://bucket/dir/abc, and we will return prefix ='dir/abc',
      # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''.
      prefix = wildcard
      delimiter = '/'
      prefix_wildcard = wildcard
      suffix_wildcard = ''
    else:
      if match.start() > 0:
        # Wildcard does not occur at beginning of object name, so construct a
        # prefix string to send to server.
        prefix = wildcard[:match.start()]
        wildcard_part = wildcard[match.start():]
      else:
        prefix = None
        wildcard_part = wildcard
      end = wildcard_part.find('/')
      if end != -1:
        wildcard_part = wildcard_part[:end+1]
      # Remove trailing '/' so we will match gs://bucket/abc* as well as
      # gs://bucket/abc*/ with the same wildcard regex.
      prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part)
      suffix_wildcard = wildcard[match.end():]
      end = suffix_wildcard.find('/')
      if end == -1:
        suffix_wildcard = ''
      else:
        suffix_wildcard = suffix_wildcard[end+1:]
      # To implement recursive (**) wildcarding, if prefix_wildcard
      # suffix_wildcard starts with '**' don't send a delimiter, and combine
      # suffix_wildcard at end of prefix_wildcard.
      if prefix_wildcard.find('**') != -1:
        delimiter = None
        prefix_wildcard += suffix_wildcard
        suffix_wildcard = ''
      else:
        delimiter = '/'
    # The following debug output is useful for tracing how the algorithm
    # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt
    if self.debug > 1:
      sys.stderr.write(
          'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, '
          'prefix_wildcard=%s, suffix_wildcard=%s\n' %
          (wildcard, prefix, delimiter, prefix_wildcard, suffix_wildcard))
    return (prefix, delimiter, prefix_wildcard, suffix_wildcard)