def test_multiply_wrapped_iterator_raises_exception(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( plurality_checkable_iterator.PluralityCheckableIterator( _exception_iterator())) self.assertEqual(next(test_iter), 0) with self.assertRaises(ValueError): next(test_iter)
def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) if resources.is_empty(): raise errors.InvalidUrlError( '{} matched no objects.'.format(url)) # Iterate over all the resource_reference.Resource objects. for resource in resources: if self._recursion_requested and resource.is_container(): # Append '**' to fetch all objects under this container new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url) else: yield NameExpansionResult(resource, resource.storage_url)
def test_initially_plural_iterator_becomes_singular_is_not_plural(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0, 1]) self.assertTrue(test_iter.is_plural()) next(test_iter) self.assertFalse(test_iter.is_plural())
def execute(self, callback=None): """Recursively create wildcard iterators to print all relevant items.""" fields_scope = _translate_display_detail_to_fields_scope( self._display_detail, is_bucket_listing=self._cloud_url.is_provider()) resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.CloudWildcardIterator( self._cloud_url, fields_scope=fields_scope)) if resources.is_empty(): raise errors.InvalidUrlError('One or more URLs matched no objects.') if self._cloud_url.is_provider(): # Received a provider URL ("gs://"). List bucket names with no formatting. resources_wrappers = self._recursion_helper(resources, recursion_level=0) # "**" overrides recursive flag. elif self._recursion_flag and '**' not in self._cloud_url.url_string: resources_wrappers = self._recursion_helper(resources, float('inf')) elif not resources.is_plural() and resources.peek().is_container(): # One container was returned by the query, in which case we show # its contents. resources_wrappers = self._get_container_iterator( resources.peek().storage_url, recursion_level=0) else: resources_wrappers = self._recursion_helper(resources, recursion_level=1) if self._display_detail == DisplayDetail.FULL: # TODO(b/169795589): We may display something other than JSON for FULL, # and make JSON its own DisplayDetail option. self._print_json_list(resources_wrappers) else: self._print_row_list(resources_wrappers) if callback: callback()
def _expand_destination_wildcards(self): """Expands destination wildcards. Ensures that only one resource matches the wildcard expanded string. Much like the unix cp command, the storage surface only supports copy operations to one user-specified destination. Returns: A resource_reference.Resource, or None if no matching resource is found. Raises: ValueError if more than one resource is matched, or the source contained an unescaped wildcard and no resources were matched. """ destination_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(self._destination_string))) contains_unexpanded_wildcard = ( destination_iterator.is_empty() and wildcard_iterator.contains_wildcard(self._destination_string)) if destination_iterator.is_plural() or contains_unexpanded_wildcard: raise ValueError('Destination ({}) must match exactly one URL'.format( self._destination_string)) if not destination_iterator.is_empty(): return next(destination_iterator)
def __init__(self, urls, all_versions=False, ignore_symlinks=False, include_buckets=False, recursion_requested=False): """Instantiates NameExpansionIterator. Args: urls (Iterable[str]): The URLs to expand. all_versions (bool): True if all versions of objects should be fetched, else False. ignore_symlinks (bool): Skip over symlinks instead of following them. include_buckets (bool): True if buckets should be fetched. recursion_requested (bool): True if recursion is requested, else False. """ self.all_versions = all_versions self._urls = urls self._ignore_symlinks = ignore_symlinks self._include_buckets = include_buckets self._recursion_requested = recursion_requested self._top_level_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( self._get_top_level_iterator())) self._has_multiple_top_level_resources = None
def test_peeking_handles_buffered_error(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( _exception_iterator()) next(test_iter) with self.assertRaises(ValueError): next(test_iter)
def test_non_empty_iterator_becomes_empty(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0]) self.assertFalse(test_iter.is_empty()) next(test_iter) self.assertTrue(test_iter.is_empty())
def __init__(self, source_name_iterator, destination_string, custom_md5_digest=None, do_not_decompress=False, print_created_message=False, shared_stream=None, skip_unsupported=True, task_status_queue=None, user_request_args=None): """Initializes a CopyTaskIterator instance. Args: source_name_iterator (name_expansion.NameExpansionIterator): yields resource_reference.Resource objects with expanded source URLs. destination_string (str): The copy destination path or url. custom_md5_digest (str|None): User-added MD5 hash output to send to server for validating a single resource upload. do_not_decompress (bool): Prevents automatically decompressing downloaded gzips. print_created_message (bool): Print the versioned URL of each successfully copied object. shared_stream (stream): Multiple tasks may reuse a read or write stream. skip_unsupported (bool): Skip creating copy tasks for unsupported object types. task_status_queue (multiprocessing.Queue|None): Used for estimating total workload from this iterator. user_request_args (UserRequestArgs|None): Values for RequestConfig. """ self._all_versions = source_name_iterator.all_versions self._has_multiple_top_level_sources = ( source_name_iterator.has_multiple_top_level_resources) self._source_name_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( source_name_iterator)) self._multiple_sources = self._source_name_iterator.is_plural() self._custom_md5_digest = custom_md5_digest self._do_not_decompress = do_not_decompress self._print_created_message = print_created_message self._shared_stream = shared_stream self._skip_unsupported = skip_unsupported self._task_status_queue = task_status_queue self._user_request_args = user_request_args self._total_file_count = 0 self._total_size = 0 self._raw_destination = _get_raw_destination(destination_string) if self._multiple_sources: self._raise_if_destination_is_file_url_and_not_a_directory_or_pipe() if self._multiple_sources and self._custom_md5_digest: raise ValueError('Received multiple objects to upload, but only one' ' custom MD5 digest is allowed.') self._already_completed_sources = manifest_util.parse_for_completed_sources( getattr(user_request_args, 'manifest_path', None))
def Run(self, args): if args.stdin: if args.urls: raise errors.Error( 'No URL arguments allowed when reading URLs from stdin.') urls = stdin_iterator.StdinIterator() else: if not args.urls: raise errors.Error( 'Without the --stdin flag, the rm command requires at least one URL' ' argument.') urls = args.urls name_expansion_iterator = name_expansion.NameExpansionIterator( urls, all_versions=args.all_versions or args.recursive, include_buckets=args.recursive, recursion_requested=args.recursive) user_request_args = (user_request_args_factory. get_user_request_args_from_command_args(args)) task_status_queue = task_graph_executor.multiprocessing_context.Queue() task_iterator_factory = ( delete_task_iterator_factory.DeleteTaskIteratorFactory( name_expansion_iterator, task_status_queue=task_status_queue, user_request_args=user_request_args)) log.status.Print('Removing objects:') object_exit_code = task_executor.execute_tasks( task_iterator_factory.object_iterator(), parallelizable=True, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( increment_type=task_status.IncrementType.INTEGER, manifest_path=None), continue_on_error=args.continue_on_error) bucket_iterator = plurality_checkable_iterator.PluralityCheckableIterator( task_iterator_factory.bucket_iterator()) # We perform the is_empty check to avoid printing unneccesary status lines. if args.recursive and not bucket_iterator.is_empty(): log.status.Print('Removing Buckets:') bucket_exit_code = task_executor.execute_tasks( bucket_iterator, parallelizable=True, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( increment_type=task_status.IncrementType.INTEGER, manifest_path=None), continue_on_error=args.continue_on_error) else: bucket_exit_code = 0 self.exit_code = max(object_exit_code, bucket_exit_code)
def __init__(self, source_name_iterator, destination_string): """Initializes a CopyTaskIterator instance. Args: source_name_iterator (name_expansion.NameExpansionIterator): yields resource_reference.Resource objects with expanded source URLs. destination_string (str): The copy destination path/url. """ self._source_name_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( source_name_iterator)) self._multiple_sources = self._source_name_iterator.is_plural() self._destination_string = destination_string
def execute_tasks(task_iterator, parallelizable=False, task_status_queue=None, progress_manager_args=None, continue_on_error=False): """Call appropriate executor. Args: task_iterator: An iterator for task objects. parallelizable (boolean): Should tasks be executed in parallel. task_status_queue (multiprocessing.Queue|None): Used by task to report its progress to a central location. progress_manager_args (task_status.ProgressManagerArgs|None): Determines what type of progress indicator to display. continue_on_error (bool): Only applicable for sequential mode. If True, execution will continue even if errors occur. Returns: An integer indicating the exit_code. Zero indicates no fatal errors were raised. """ plurality_checkable_task_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator(task_iterator)) optimize_parameters_util.detect_and_set_best_config( is_estimated_multi_file_workload=( plurality_checkable_task_iterator.is_plural())) # Some tasks operate under the assumption that they will only be executed when # parallelizable is True, and use should_use_parallelism to determine how they # are executed. if parallelizable and task_util.should_use_parallelism(): exit_code = task_graph_executor.TaskGraphExecutor( plurality_checkable_task_iterator, max_process_count=properties.VALUES.storage.process_count.GetInt(), thread_count=properties.VALUES.storage.thread_count.GetInt(), task_status_queue=task_status_queue, progress_manager_args=progress_manager_args).run() else: with task_status.progress_manager(task_status_queue, progress_manager_args): exit_code, _ = _execute_tasks_sequential( plurality_checkable_task_iterator, task_status_queue=task_status_queue, continue_on_error=continue_on_error) return exit_code
def execute(self, task_status_queue=None): """Recursively create wildcard iterators to print all relevant items.""" # List task does not need to report status information. del task_status_queue fields_scope = _translate_display_detail_to_fields_scope( self._display_detail, is_bucket_listing=self._cloud_url.is_provider()) resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.CloudWildcardIterator( self._cloud_url, all_versions=self._all_versions, error_on_missing_key=False, fields_scope=fields_scope, get_bucket_metadata=self._buckets_flag)) if resources.is_empty(): raise errors.InvalidUrlError( 'One or more URLs matched no objects.') if self._only_display_buckets: # Received a provider URL ("gs://") -> List all buckets. # Received buckets flag and bucket URL -> List matching buckets, ignoring # recursion. resources_wrappers = self._recursion_helper(resources, recursion_level=0) elif self._recursion_flag and '**' not in self._cloud_url.url_string: # "**" overrides recursive flag. resources_wrappers = self._recursion_helper( resources, float('inf')) elif not resources.is_plural() and resources.peek().is_container(): # One container was returned by the query, in which case we show # its contents. resources_wrappers = self._get_container_iterator( resources.peek().storage_url, recursion_level=0) else: resources_wrappers = self._recursion_helper(resources, recursion_level=1) if self._display_detail == DisplayDetail.JSON: self._print_json_list(resources_wrappers) else: self._print_row_list(resources_wrappers)
def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. Raises: InvalidUrlError: No matching objects found. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) is_name_expansion_iterator_empty = True original_storage_url = storage_url.storage_url_from_string(url) # Iterate over all the resource_reference.Resource objects. for resource in resources: if not resource.is_container(): yield NameExpansionResult(resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False continue if not self._recursion_requested: log.info('Omitting {} because it is a container, and recursion' ' is not enabled.'.format(resource.is_container())) continue # Append '**' to fetch all objects under this container. new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False if is_name_expansion_iterator_empty: raise errors.InvalidUrlError( '{} matched no objects or files.'.format(url))
def __init__(self, source_name_iterator, destination_string, custom_md5_digest=None): """Initializes a CopyTaskIterator instance. Args: source_name_iterator (name_expansion.NameExpansionIterator): yields resource_reference.Resource objects with expanded source URLs. destination_string (str): The copy destination path/url. custom_md5_digest (str|None): User-added MD5 hash output to send to server for validating a single resource upload. """ self._source_name_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( source_name_iterator)) self._multiple_sources = self._source_name_iterator.is_plural() self._destination_string = destination_string self._custom_md5_digest = custom_md5_digest if self._multiple_sources and self._custom_md5_digest: raise ValueError('Received multiple objects to upload, but only one' 'custom MD5 digest is allowed.')
def Run(self, args): for url_string in args.urls: if not storage_url.storage_url_from_string(url_string).is_bucket(): raise errors.InvalidUrlError( 'buckets delete only accepts cloud bucket URLs. Example:' ' "gs://bucket"') task_status_queue = multiprocessing.Queue() bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory( name_expansion.NameExpansionIterator(args.urls, include_buckets=True), task_status_queue=task_status_queue).bucket_iterator() plurality_checkable_bucket_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( bucket_iterator)) task_executor.execute_tasks( plurality_checkable_bucket_iterator, parallelizable=True, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( increment_type=task_status.IncrementType.INTEGER, manifest_path=None))
def test_peeking_returns_first_iterator_item(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0]) self.assertEqual(test_iter.peek(), 0) # Call again to confirm the first item in the iterator isn't consumed. self.assertEqual(test_iter.peek(), 0)
def test_iteration_terminates(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator([]) with self.assertRaises(StopIteration): next(test_iter)
def test_iteration_yields_correct_values(self): expected_list = [0, 1, 2] test_iter = plurality_checkable_iterator.PluralityCheckableIterator( expected_list) self.assertEqual(list(test_iter), expected_list)
def test_plural_iterator(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0, 1]) self.assertTrue(test_iter.is_plural())
def test_singular_iterator_not_plural(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0]) self.assertFalse(test_iter.is_plural())
def test_exceptions_count_toward_plurality(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( _exception_iterator()) self.assertTrue(test_iter.is_plural())
def test_empty_iterator_is_empty(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator([]) self.assertTrue(test_iter.is_empty())
def test_non_empty_iterator_is_not_empty(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator( [0]) self.assertFalse(test_iter.is_empty())
def test_peeking_returns_none_for_empty_iterator(self): test_iter = plurality_checkable_iterator.PluralityCheckableIterator([]) self.assertIsNone(test_iter.peek())