def _BuildBucketFilterStrings(self, wildcard): """Builds strings needed for querying a bucket and filtering results. This implements wildcard object name matching. Args: wildcard: The wildcard string to match to objects. Returns: (prefix, delimiter, prefix_wildcard, suffix_wildcard) where: prefix is the prefix to be sent in bucket GET request. delimiter is the delimiter to be sent in bucket GET request. prefix_wildcard is the wildcard to be used to filter bucket GET results. suffix_wildcard is wildcard to be appended to filtered bucket GET results for next wildcard expansion iteration. For example, given the wildcard gs://bucket/abc/d*e/f*.txt we would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket listing request will then produce a listing result set that can be filtered using this prefix_wildcard; and we'd use this suffix_wildcard to feed into the next call(s) to _BuildBucketFilterStrings(), for the next iteration of listing/filtering. Raises: AssertionError if wildcard doesn't contain any wildcard chars. """ # Generate a request prefix if the object name part of the wildcard starts # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz'). match = WILDCARD_REGEX.search(wildcard) if not match: # Input "wildcard" has no wildcard chars, so just return tuple that will # cause a bucket listing to match the given input wildcard. Example: if # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc, # the next iteration will call _BuildBucketFilterStrings() with # gs://bucket/dir/abc, and we will return prefix ='dir/abc', # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''. prefix = wildcard delimiter = '/' prefix_wildcard = wildcard suffix_wildcard = '' else: if match.start() > 0: # Wildcard does not occur at beginning of object name, so construct a # prefix string to send to server. prefix = wildcard[:match.start()] wildcard_part = wildcard[match.start():] else: prefix = None wildcard_part = wildcard end = wildcard_part.find('/') if end != -1: wildcard_part = wildcard_part[:end + 1] # Remove trailing '/' so we will match gs://bucket/abc* as well as # gs://bucket/abc*/ with the same wildcard regex. prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part) suffix_wildcard = wildcard[match.end():] end = suffix_wildcard.find('/') if end == -1: suffix_wildcard = '' else: suffix_wildcard = suffix_wildcard[end + 1:] # To implement recursive (**) wildcarding, if prefix_wildcard # suffix_wildcard starts with '**' don't send a delimiter, and combine # suffix_wildcard at end of prefix_wildcard. if prefix_wildcard.find('**') != -1: delimiter = None prefix_wildcard += suffix_wildcard suffix_wildcard = '' else: delimiter = '/' # The following debug output is useful for tracing how the algorithm # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt if self.debug > 1: sys.stderr.write( 'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, ' 'prefix_wildcard=%s, suffix_wildcard=%s\n' % (PrintableStr(wildcard), PrintableStr(prefix), PrintableStr(delimiter), PrintableStr(prefix_wildcard), PrintableStr(suffix_wildcard))) return (prefix, delimiter, prefix_wildcard, suffix_wildcard)
def _BuildBucketFilterStrings(self, wildcard): """Builds strings needed for querying a bucket and filtering results. This implements wildcard object name matching. Args: wildcard: The wildcard string to match to objects. Returns: (prefix, delimiter, prefix_wildcard, suffix_wildcard) where: prefix is the prefix to be sent in bucket GET request. delimiter is the delimiter to be sent in bucket GET request. prefix_wildcard is the wildcard to be used to filter bucket GET results. suffix_wildcard is wildcard to be appended to filtered bucket GET results for next wildcard expansion iteration. For example, given the wildcard gs://bucket/abc/d*e/f*.txt we would build prefix= abc/d, delimiter=/, prefix_wildcard=d*e, and suffix_wildcard=f*.txt. Using this prefix and delimiter for a bucket listing request will then produce a listing result set that can be filtered using this prefix_wildcard; and we'd use this suffix_wildcard to feed into the next call(s) to _BuildBucketFilterStrings(), for the next iteration of listing/filtering. Raises: AssertionError if wildcard doesn't contain any wildcard chars. """ # Generate a request prefix if the object name part of the wildcard starts # with a non-wildcard string (e.g., that's true for 'gs://bucket/abc*xyz'). match = WILDCARD_REGEX.search(wildcard) if not match: # Input "wildcard" has no wildcard chars, so just return tuple that will # cause a bucket listing to match the given input wildcard. Example: if # previous iteration yielded gs://bucket/dir/ with suffix_wildcard abc, # the next iteration will call _BuildBucketFilterStrings() with # gs://bucket/dir/abc, and we will return prefix ='dir/abc', # delimiter='/', prefix_wildcard='dir/abc', and suffix_wildcard=''. prefix = wildcard delimiter = '/' prefix_wildcard = wildcard suffix_wildcard = '' else: if match.start() > 0: # Wildcard does not occur at beginning of object name, so construct a # prefix string to send to server. prefix = wildcard[:match.start()] wildcard_part = wildcard[match.start():] else: prefix = None wildcard_part = wildcard end = wildcard_part.find('/') if end != -1: wildcard_part = wildcard_part[:end+1] # Remove trailing '/' so we will match gs://bucket/abc* as well as # gs://bucket/abc*/ with the same wildcard regex. prefix_wildcard = StripOneSlash((prefix or '') + wildcard_part) suffix_wildcard = wildcard[match.end():] end = suffix_wildcard.find('/') if end == -1: suffix_wildcard = '' else: suffix_wildcard = suffix_wildcard[end+1:] # To implement recursive (**) wildcarding, if prefix_wildcard # suffix_wildcard starts with '**' don't send a delimiter, and combine # suffix_wildcard at end of prefix_wildcard. if prefix_wildcard.find('**') != -1: delimiter = None prefix_wildcard += suffix_wildcard suffix_wildcard = '' else: delimiter = '/' # The following debug output is useful for tracing how the algorithm # walks through a multi-part wildcard like gs://bucket/abc/d*e/f*.txt if self.debug > 1: sys.stderr.write( 'DEBUG: wildcard=%s, prefix=%s, delimiter=%s, ' 'prefix_wildcard=%s, suffix_wildcard=%s\n' % (wildcard, prefix, delimiter, prefix_wildcard, suffix_wildcard)) return (prefix, delimiter, prefix_wildcard, suffix_wildcard)