def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) if resources.is_empty(): raise errors.InvalidUrlError( '{} matched no objects.'.format(url)) # Iterate over all the resource_reference.Resource objects. for resource in resources: if self._recursion_requested and resource.is_container(): # Append '**' to fetch all objects under this container new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url) else: yield NameExpansionResult(resource, resource.storage_url)
def Run(self, args): """Command execution logic.""" encryption_util.initialize_key_store(args) if args.path: storage_urls = [storage_url.storage_url_from_string(path) for path in args.path] for url in storage_urls: if not isinstance(url, storage_url.CloudUrl): raise errors.InvalidUrlError('Ls only works for cloud URLs.' ' Error for: {}'.format(url.url_string)) else: storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)] if args.full: display_detail = cloud_list_task.DisplayDetail.FULL elif args.json: display_detail = cloud_list_task.DisplayDetail.JSON elif args.long: display_detail = cloud_list_task.DisplayDetail.LONG else: display_detail = cloud_list_task.DisplayDetail.SHORT tasks = [] for url in storage_urls: tasks.append( cloud_list_task.CloudListTask( url, all_versions=args.all_versions, buckets_flag=args.buckets, display_detail=display_detail, include_etag=args.etag, readable_sizes=args.readable_sizes, recursion_flag=args.recursive)) task_executor.execute_tasks(tasks, parallelizable=False)
def from_url_string(cls, url_string): """Parse the url string and return the storage url object. Args: url_string (str): Cloud storage url of the form gs://bucket/object Returns: CloudUrl object Raises: InvalidUrlError: Raised if the url_string is not a valid cloud url. """ scheme = _get_scheme_from_url_string(url_string) # gs://a/b/c/d#num => a/b/c/d#num schemeless_url_string = url_string[len(scheme.value + '://'):] if schemeless_url_string.startswith('/'): raise errors.InvalidUrlError( 'Cloud URL scheme should be followed by colon and two slashes: "://".' ' Found: "{}"'.format(url_string)) # a/b/c/d#num => a, b/c/d#num bucket_name, _, object_name = schemeless_url_string.partition( CLOUD_URL_DELIMITER) # b/c/d#num => b/c/d, num object_name, _, generation = object_name.partition('#') return cls(scheme, bucket_name, object_name, generation)
def get_wildcard_iterator(url_str, all_versions=False, fields_scope=cloud_api.FieldsScope.NO_ACL, get_bucket_metadata=False, ignore_symlinks=False): """Instantiate a WildcardIterator for the given URL string. Args: url_str (str): URL string which may contain wildcard characters. all_versions (bool): If true, the iterator yields all versions of objects matching the wildcard. If false, yields just the live object version. fields_scope (cloud_api.FieldsScope): Determines amount of metadata returned by API. get_bucket_metadata (bool): If true, perform a bucket GET request when fetching bucket resources ignore_symlinks (bool): Skip over symlinks instead of following them. Returns: A WildcardIterator object. """ url = storage_url.storage_url_from_string(url_str) if isinstance(url, storage_url.CloudUrl): return CloudWildcardIterator(url, all_versions=all_versions, fields_scope=fields_scope, get_bucket_metadata=get_bucket_metadata) elif isinstance(url, storage_url.FileUrl): return FileWildcardIterator(url, ignore_symlinks=ignore_symlinks) else: raise command_errors.InvalidUrlError('Unknown url type %s.' % url)
def Run(self, args): """Command execution logic.""" if args.path: storage_urls = [ storage_url.storage_url_from_string(path) for path in args.path ] for url in storage_urls: if not isinstance(url, storage_url.CloudUrl): raise errors.InvalidUrlError( 'Ls only works for cloud URLs.' ' Error for: {}'.format(url.url_string)) else: storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)] display_detail = cloud_list_task.DisplayDetail.SHORT if args.full: display_detail = cloud_list_task.DisplayDetail.FULL if args.json: display_detail = cloud_list_task.DisplayDetail.JSON if args.long: display_detail = cloud_list_task.DisplayDetail.LONG tasks = [] for url in storage_urls: tasks.append( cloud_list_task.CloudListTask(url, all_versions=args.all_versions, display_detail=display_detail, include_etag=args.etag, recursion_flag=args.recursive)) task_executor.ExecuteTasks(tasks, is_parallel=False)
def execute(self, callback=None): """Recursively create wildcard iterators to print all relevant items.""" fields_scope = _translate_display_detail_to_fields_scope( self._display_detail, is_bucket_listing=self._cloud_url.is_provider()) resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.CloudWildcardIterator( self._cloud_url, fields_scope=fields_scope)) if resources.is_empty(): raise errors.InvalidUrlError('One or more URLs matched no objects.') if self._cloud_url.is_provider(): # Received a provider URL ("gs://"). List bucket names with no formatting. resources_wrappers = self._recursion_helper(resources, recursion_level=0) # "**" overrides recursive flag. elif self._recursion_flag and '**' not in self._cloud_url.url_string: resources_wrappers = self._recursion_helper(resources, float('inf')) elif not resources.is_plural() and resources.peek().is_container(): # One container was returned by the query, in which case we show # its contents. resources_wrappers = self._get_container_iterator( resources.peek().storage_url, recursion_level=0) else: resources_wrappers = self._recursion_helper(resources, recursion_level=1) if self._display_detail == DisplayDetail.FULL: # TODO(b/169795589): We may display something other than JSON for FULL, # and make JSON its own DisplayDetail option. self._print_json_list(resources_wrappers) else: self._print_row_list(resources_wrappers) if callback: callback()
def _raise_if_destination_is_file_url_and_not_a_directory_or_pipe(self): if (isinstance(self._raw_destination.storage_url, storage_url.FileUrl) and not (_destination_is_container(self._raw_destination) or self._raw_destination.storage_url.is_pipe)): raise errors.InvalidUrlError( 'Destination URL must name an existing directory.' ' Provided: {}.'.format( self._raw_destination.storage_url.object_name))
def _raise_no_url_match_error_if_necessary(self, url_found_match_dict): non_matching_urls = [ url for url, found_match in url_found_match_dict.items() if not found_match ] if non_matching_urls: raise errors.InvalidUrlError( 'The following URLs matched no objects or files:\n-{}'.format( '\n-'.join(non_matching_urls)))
def _get_scheme_from_url_string(url_str): """Returns scheme component of a URL string.""" end_scheme_idx = url_str.find('://') if end_scheme_idx == -1: # File is the default scheme. return ProviderPrefix.FILE else: prefix_string = url_str[0:end_scheme_idx].lower() if prefix_string not in VALID_SCHEMES: raise errors.InvalidUrlError('Unrecognized scheme "%s"' % prefix_string) return ProviderPrefix(prefix_string)
def Run(self, args): if wildcard_iterator.contains_wildcard(args.url): raise errors.InvalidUrlError( 'Describe does not accept wildcards because it returns a single' ' resource. Please use the `ls` or `buckets list` command for' ' retrieving multiple resources.') url = storage_url.storage_url_from_string(args.url) bucket_resource = api_factory.get_api(url.scheme).get_bucket( url.bucket_name, fields_scope=cloud_api.FieldsScope.FULL) # MakeSerializable will omit all the None values. return resource_projector.MakeSerializable( bucket_resource.get_displayable_bucket_data())
def _prompt_and_add_valid_scheme(url): """Has user select a valid scheme from a list and returns new URL.""" if not console_io.CanPrompt(): raise errors.InvalidUrlError('Did you mean "posix://{}"'.format( url.object_name)) scheme_index = console_io.PromptChoice( [scheme.value + '://' for scheme in VALID_TRANSFER_SCHEMES], cancel_option=True, message=('Storage Transfer does not support direct file URLs: {}\n' 'Did you mean to use "posix://"?\n' 'Run this command with "--help" for more info,\n' 'or select a valid scheme below.').format(url)) new_scheme = VALID_TRANSFER_SCHEMES[scheme_index] return storage_url.switch_scheme(url, new_scheme)
def __init__(self, scheme, bucket_name=None, object_name=None, generation=None, snapshot=None, account=None): super(AzureUrl, self).__init__(scheme, bucket_name, object_name, generation) self.snapshot = snapshot if snapshot else None if not account: raise errors.InvalidUrlError( 'Azure URLs must contain an account name.') self.account = account
def Run(self, args): """Command execution logic.""" if args.path: storage_urls = [ storage_url.storage_url_from_string(path) for path in args.path ] for url in storage_urls: if not isinstance(url, storage_url.CloudUrl): raise errors.InvalidUrlError( 'Ls only works for cloud URLs.' ' Error for: {}'.format(url.url_string)) else: storage_urls = [storage_url.CloudUrl(cloud_api.DEFAULT_PROVIDER)] tasks = [ cloud_list_task.CloudListTask(url, recursion_flag=args.recursive) for url in storage_urls ] task_executor.ExecuteTasks(tasks)
def execute(self, task_status_queue=None): """Recursively create wildcard iterators to print all relevant items.""" # List task does not need to report status information. del task_status_queue fields_scope = _translate_display_detail_to_fields_scope( self._display_detail, is_bucket_listing=self._cloud_url.is_provider()) resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.CloudWildcardIterator( self._cloud_url, all_versions=self._all_versions, error_on_missing_key=False, fields_scope=fields_scope, get_bucket_metadata=self._buckets_flag)) if resources.is_empty(): raise errors.InvalidUrlError( 'One or more URLs matched no objects.') if self._only_display_buckets: # Received a provider URL ("gs://") -> List all buckets. # Received buckets flag and bucket URL -> List matching buckets, ignoring # recursion. resources_wrappers = self._recursion_helper(resources, recursion_level=0) elif self._recursion_flag and '**' not in self._cloud_url.url_string: # "**" overrides recursive flag. resources_wrappers = self._recursion_helper( resources, float('inf')) elif not resources.is_plural() and resources.peek().is_container(): # One container was returned by the query, in which case we show # its contents. resources_wrappers = self._get_container_iterator( resources.peek().storage_url, recursion_level=0) else: resources_wrappers = self._recursion_helper(resources, recursion_level=1) if self._display_detail == DisplayDetail.JSON: self._print_json_list(resources_wrappers) else: self._print_row_list(resources_wrappers)
def Run(self, args): if args.urls: urls = [] for url_string in args.urls: url = storage_url.storage_url_from_string(url_string) if not (url.is_provider() or url.is_bucket()): raise errors.InvalidUrlError( 'URL does not match buckets: {}'.format(url_string)) urls.append(url) else: urls = [storage_url.CloudUrl(storage_url.ProviderPrefix.GCS)] for url in urls: for bucket in wildcard_iterator.get_wildcard_iterator( url.url_string, fields_scope=cloud_api.FieldsScope.FULL, get_bucket_metadata=True): # MakeSerializable will omit all the None values. yield resource_projector.MakeSerializable( bucket.get_displayable_bucket_data())
def get_wildcard_iterator(url_str, all_versions=False, fields_scope=cloud_api.FieldsScope.NO_ACL): """Instantiate a WildcardIterator for the given URL string. Args: url_str (str): URL string which may contain wildcard characters. all_versions (bool): If true, the iterator yields all versions of objects matching the wildcard. If false, yields just the live object version. fields_scope (cloud_api.FieldsScope): Determines amount of metadata returned by API. Returns: A WildcardIterator object. """ url = storage_url.storage_url_from_string(url_str) if isinstance(url, storage_url.CloudUrl): return CloudWildcardIterator(url, all_versions, fields_scope) elif isinstance(url, storage_url.FileUrl): return FileWildcardIterator(url) else: raise errors.InvalidUrlError('Unknown url type %s.' % url)
def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. Raises: InvalidUrlError: No matching objects found. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) is_name_expansion_iterator_empty = True original_storage_url = storage_url.storage_url_from_string(url) # Iterate over all the resource_reference.Resource objects. for resource in resources: if not resource.is_container(): yield NameExpansionResult(resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False continue if not self._recursion_requested: log.info('Omitting {} because it is a container, and recursion' ' is not enabled.'.format(resource.is_container())) continue # Append '**' to fetch all objects under this container. new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False if is_name_expansion_iterator_empty: raise errors.InvalidUrlError( '{} matched no objects or files.'.format(url))
def storage_url_from_string(url_string): """Static factory function for creating a StorageUrl from a string. Args: url_string (str): Cloud url or local filepath. Returns: StorageUrl object. Raises: InvalidUrlError: Unrecognized URL scheme. """ scheme = _get_scheme_from_url_string(url_string) if scheme == ProviderPrefix.FILE: return FileUrl(url_string) if scheme == ProviderPrefix.POSIX: return PosixFileSystemUrl(url_string) if scheme in VALID_HTTP_SCHEMES: # Azure's scheme breaks from other clouds. return AzureUrl.from_url_string(url_string) if scheme in VALID_CLOUD_SCHEMES: return CloudUrl.from_url_string(url_string) raise errors.InvalidUrlError('Unrecognized URL scheme.')
def Run(self, args): for url_string in args.urls: if not storage_url.storage_url_from_string(url_string).is_bucket(): raise errors.InvalidUrlError( 'buckets delete only accepts cloud bucket URLs. Example:' ' "gs://bucket"') task_status_queue = multiprocessing.Queue() bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory( name_expansion.NameExpansionIterator(args.urls, include_buckets=True), task_status_queue=task_status_queue).bucket_iterator() plurality_checkable_bucket_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( bucket_iterator)) task_executor.execute_tasks( plurality_checkable_bucket_iterator, parallelizable=True, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( increment_type=task_status.IncrementType.INTEGER, manifest_path=None))
def _validate_scheme(self): if not AzureUrl.is_valid_scheme(self.scheme): raise errors.InvalidUrlError('Invalid Azure scheme "{}"'.format( self.scheme))
def _validate_object_name(self): if self.object_name == '.' or self.object_name == '..': raise errors.InvalidUrlError( '%s is an invalid root-level object name' % self.object_name)
def _validate_scheme(self): if self.scheme not in VALID_CLOUD_SCHEMES: raise errors.InvalidUrlError('Unrecognized scheme "%s"' % self.scheme)
def _get_destination_suffix_for_recursion(self, destination_container, source): """Returns the suffix required to complete the destination URL. Let's assume the following: User command => cp -r */base_dir gs://dest/existing_prefix source.resource.storage_url => a/base_dir/c/d.txt source.expanded_url => a/base_dir destination_container.storage_url => gs://dest/existing_prefix If the destination container exists, the entire directory gets copied: Result => gs://dest/existing_prefix/base_dir/c/d.txt Args: destination_container (resource_reference.Resource): The destination container. source (NameExpansionResult): Represents the source resource and the expanded parent url in case of recursion. Returns: (str) The suffix to be appended to the destination container. """ source_prefix_to_ignore = storage_url.rstrip_one_delimiter( source.expanded_url.versionless_url_string, source.expanded_url.delimiter) expanded_url_is_valid_parent = _is_expanded_url_valid_parent_dir( source.expanded_url) if not expanded_url_is_valid_parent and self._has_multiple_top_level_sources: # To avoid top-level name conflicts, we need to copy the parent dir. # However, that cannot be done because the parent dir has an invalid name. raise errors.InvalidUrlError( 'Presence of multiple top-level sources and invalid expanded URL' ' make file name conflicts possible for URL: {}'.format( source.resource)) is_top_level_source_object_name_conflict_possible = ( isinstance(destination_container, resource_reference.UnknownResource) and self._has_multiple_top_level_sources) destination_is_existing_dir = (not isinstance( destination_container, resource_reference.UnknownResource) and destination_container.is_container()) if is_top_level_source_object_name_conflict_possible or ( expanded_url_is_valid_parent and destination_is_existing_dir): # Preserve the top-level source directory, and remove the leaf name # so that it gets added to the destination. source_prefix_to_ignore, _, _ = source_prefix_to_ignore.rpartition( source.expanded_url.delimiter) if not source_prefix_to_ignore: # In case of Windows, the source URL might not contain any Windows # delimiter if it was a single directory (e.g file://dir) and # source_prefix_to_ignore will be empty. Set it to <scheme>://. # TODO(b/169093672) This will not be required if we get rid of file:// source_prefix_to_ignore = source.expanded_url.scheme.value + '://' full_source_url = source.resource.storage_url.versionless_url_string suffix_for_destination = full_source_url.split(source_prefix_to_ignore)[1] # Windows uses \ as a delimiter. Force the suffix to use the same # delimiter used by the destination container. source_delimiter = source.resource.storage_url.delimiter destination_delimiter = destination_container.storage_url.delimiter if source_delimiter != destination_delimiter: return suffix_for_destination.replace(source_delimiter, destination_delimiter) return suffix_for_destination
def _expand_object_path(self, bucket_name): """If wildcard, expand object names. Recursively expand each folder with wildcard. Args: bucket_name (str): Name of the bucket. Yields: resource_reference.Resource objects where each resource can be an ObjectResource object or a PrefixResource object. """ # Retain original name to see if user wants only prefixes. original_object_name = self._url.object_name # Force API to return prefix resource not the prefix's contents. object_name = storage_url.rstrip_one_delimiter(original_object_name) names_needing_expansion = collections.deque([object_name]) error = None while names_needing_expansion: name = names_needing_expansion.popleft() # Parse out the prefix, delimiter, filter_pattern and suffix. # Given a string 'a/b*c/d/e*f/g.txt', this will return # CloudWildcardParts(prefix='a/b', filter_pattern='*c', # delimiter='/', suffix='d/e*f/g.txt') wildcard_parts = CloudWildcardParts.from_string( name, self._url.delimiter) # Fetch all the objects and prefixes. resource_iterator = self._client.list_objects( all_versions=self._all_versions or bool(self._url.generation), bucket_name=bucket_name, delimiter=wildcard_parts.delimiter, fields_scope=self._fields_scope, prefix=wildcard_parts.prefix or None) # We have all the objects and prefixes that matched the # wildcard_parts.prefix. Use the filter_pattern to eliminate non-matching # objects and prefixes. filtered_resources = self._filter_resources( resource_iterator, wildcard_parts.prefix + wildcard_parts.filter_pattern) for resource in filtered_resources: resource_path = resource.storage_url.object_name if wildcard_parts.suffix: if isinstance(resource, resource_reference.PrefixResource): # Suffix is present, which indicates that we have more wildcards to # expand. Let's say object_name is a/b1c. Then the new string that # we want to expand will be a/b1c/d/e*f/g.txt if WILDCARD_REGEX.search(resource_path): error = command_errors.InvalidUrlError( 'Cloud folders named with wildcards are not supported.' ' API returned {}'.format(resource)) else: names_needing_expansion.append( resource_path + wildcard_parts.suffix) else: # Make sure an object is not returned if the original query was for # a prefix. if (not resource_path.endswith(self._url.delimiter) and original_object_name.endswith( self._url.delimiter)): continue yield self._decrypt_resource_if_necessary(resource) if error: raise error
def _raise_error_if_source_matches_destination(self): if not self._multiple_sources: source_url = self._source_name_iterator.peek().expanded_url if source_url == self._raw_destination.storage_url: raise errors.InvalidUrlError( 'Source URL matches destination URL: {}'.format(source_url))
def validate_url_string(cls, url_string, scheme): AzureUrl.is_valid_scheme(scheme) if not (AZURE_DOMAIN in url_string and AzureUrl.is_valid_scheme(scheme)): raise errors.InvalidUrlError( 'Invalid Azure URL: "{}"'.format(url_string))