Example #1
0
    def test_multiply_wrapped_iterator_raises_exception(self):
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
            plurality_checkable_iterator.PluralityCheckableIterator(
                _exception_iterator()))

        self.assertEqual(next(test_iter), 0)
        with self.assertRaises(ValueError):
            next(test_iter)
Example #2
0
    def __iter__(self):
        """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.
    """
        for url in self._urls:
            resources = plurality_checkable_iterator.PluralityCheckableIterator(
                wildcard_iterator.get_wildcard_iterator(url))
            if resources.is_empty():
                raise errors.InvalidUrlError(
                    '{} matched no objects.'.format(url))

            # Iterate over all the resource_reference.Resource objects.
            for resource in resources:
                if self._recursion_requested and resource.is_container():
                    # Append '**' to fetch all objects under this container
                    new_storage_url = resource.storage_url.join('**')
                    child_resources = wildcard_iterator.get_wildcard_iterator(
                        new_storage_url.url_string)
                    for child_resource in child_resources:
                        yield NameExpansionResult(child_resource,
                                                  resource.storage_url)
                else:
                    yield NameExpansionResult(resource, resource.storage_url)
Example #3
0
    def test_initially_plural_iterator_becomes_singular_is_not_plural(self):
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
            [0, 1])

        self.assertTrue(test_iter.is_plural())
        next(test_iter)
        self.assertFalse(test_iter.is_plural())
  def execute(self, callback=None):
    """Recursively create wildcard iterators to print all relevant items."""
    fields_scope = _translate_display_detail_to_fields_scope(
        self._display_detail, is_bucket_listing=self._cloud_url.is_provider())
    resources = plurality_checkable_iterator.PluralityCheckableIterator(
        wildcard_iterator.CloudWildcardIterator(
            self._cloud_url, fields_scope=fields_scope))

    if resources.is_empty():
      raise errors.InvalidUrlError('One or more URLs matched no objects.')
    if self._cloud_url.is_provider():
      # Received a provider URL ("gs://"). List bucket names with no formatting.
      resources_wrappers = self._recursion_helper(resources, recursion_level=0)
    # "**" overrides recursive flag.
    elif self._recursion_flag and '**' not in self._cloud_url.url_string:
      resources_wrappers = self._recursion_helper(resources, float('inf'))
    elif not resources.is_plural() and resources.peek().is_container():
      # One container was returned by the query, in which case we show
      # its contents.
      resources_wrappers = self._get_container_iterator(
          resources.peek().storage_url, recursion_level=0)
    else:
      resources_wrappers = self._recursion_helper(resources, recursion_level=1)

    if self._display_detail == DisplayDetail.FULL:
      # TODO(b/169795589): We may display something other than JSON for FULL,
      # and make JSON its own DisplayDetail option.
      self._print_json_list(resources_wrappers)
    else:
      self._print_row_list(resources_wrappers)

    if callback:
      callback()
  def _expand_destination_wildcards(self):
    """Expands destination wildcards.

    Ensures that only one resource matches the wildcard expanded string. Much
    like the unix cp command, the storage surface only supports copy operations
    to one user-specified destination.

    Returns:
      A resource_reference.Resource, or None if no matching resource is found.

    Raises:
      ValueError if more than one resource is matched, or the source contained
      an unescaped wildcard and no resources were matched.
    """
    destination_iterator = (
        plurality_checkable_iterator.PluralityCheckableIterator(
            wildcard_iterator.get_wildcard_iterator(self._destination_string)))

    contains_unexpanded_wildcard = (
        destination_iterator.is_empty() and
        wildcard_iterator.contains_wildcard(self._destination_string))

    if destination_iterator.is_plural() or contains_unexpanded_wildcard:
      raise ValueError('Destination ({}) must match exactly one URL'.format(
          self._destination_string))

    if not destination_iterator.is_empty():
      return next(destination_iterator)
    def __init__(self,
                 urls,
                 all_versions=False,
                 ignore_symlinks=False,
                 include_buckets=False,
                 recursion_requested=False):
        """Instantiates NameExpansionIterator.

    Args:
      urls (Iterable[str]): The URLs to expand.
      all_versions (bool): True if all versions of objects should be fetched,
        else False.
      ignore_symlinks (bool): Skip over symlinks instead of following them.
      include_buckets (bool): True if buckets should be fetched.
      recursion_requested (bool): True if recursion is requested, else False.
    """
        self.all_versions = all_versions

        self._urls = urls
        self._ignore_symlinks = ignore_symlinks
        self._include_buckets = include_buckets
        self._recursion_requested = recursion_requested

        self._top_level_iterator = (
            plurality_checkable_iterator.PluralityCheckableIterator(
                self._get_top_level_iterator()))
        self._has_multiple_top_level_resources = None
Example #7
0
    def test_peeking_handles_buffered_error(self):
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
            _exception_iterator())

        next(test_iter)
        with self.assertRaises(ValueError):
            next(test_iter)
Example #8
0
    def test_non_empty_iterator_becomes_empty(self):
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
            [0])

        self.assertFalse(test_iter.is_empty())
        next(test_iter)
        self.assertTrue(test_iter.is_empty())
  def __init__(self,
               source_name_iterator,
               destination_string,
               custom_md5_digest=None,
               do_not_decompress=False,
               print_created_message=False,
               shared_stream=None,
               skip_unsupported=True,
               task_status_queue=None,
               user_request_args=None):
    """Initializes a CopyTaskIterator instance.

    Args:
      source_name_iterator (name_expansion.NameExpansionIterator):
        yields resource_reference.Resource objects with expanded source URLs.
      destination_string (str): The copy destination path or url.
      custom_md5_digest (str|None): User-added MD5 hash output to send to server
        for validating a single resource upload.
      do_not_decompress (bool): Prevents automatically decompressing
        downloaded gzips.
      print_created_message (bool): Print the versioned URL of each successfully
        copied object.
      shared_stream (stream): Multiple tasks may reuse a read or write stream.
      skip_unsupported (bool): Skip creating copy tasks for unsupported object
        types.
      task_status_queue (multiprocessing.Queue|None): Used for estimating total
        workload from this iterator.
      user_request_args (UserRequestArgs|None): Values for RequestConfig.
    """
    self._all_versions = source_name_iterator.all_versions
    self._has_multiple_top_level_sources = (
        source_name_iterator.has_multiple_top_level_resources)
    self._source_name_iterator = (
        plurality_checkable_iterator.PluralityCheckableIterator(
            source_name_iterator))
    self._multiple_sources = self._source_name_iterator.is_plural()

    self._custom_md5_digest = custom_md5_digest
    self._do_not_decompress = do_not_decompress
    self._print_created_message = print_created_message
    self._shared_stream = shared_stream
    self._skip_unsupported = skip_unsupported
    self._task_status_queue = task_status_queue
    self._user_request_args = user_request_args

    self._total_file_count = 0
    self._total_size = 0

    self._raw_destination = _get_raw_destination(destination_string)
    if self._multiple_sources:
      self._raise_if_destination_is_file_url_and_not_a_directory_or_pipe()

    if self._multiple_sources and self._custom_md5_digest:
      raise ValueError('Received multiple objects to upload, but only one'
                       ' custom MD5 digest is allowed.')

    self._already_completed_sources = manifest_util.parse_for_completed_sources(
        getattr(user_request_args, 'manifest_path', None))
Example #10
0
    def Run(self, args):
        if args.stdin:
            if args.urls:
                raise errors.Error(
                    'No URL arguments allowed when reading URLs from stdin.')
            urls = stdin_iterator.StdinIterator()
        else:
            if not args.urls:
                raise errors.Error(
                    'Without the --stdin flag, the rm command requires at least one URL'
                    ' argument.')
            urls = args.urls

        name_expansion_iterator = name_expansion.NameExpansionIterator(
            urls,
            all_versions=args.all_versions or args.recursive,
            include_buckets=args.recursive,
            recursion_requested=args.recursive)

        user_request_args = (user_request_args_factory.
                             get_user_request_args_from_command_args(args))
        task_status_queue = task_graph_executor.multiprocessing_context.Queue()
        task_iterator_factory = (
            delete_task_iterator_factory.DeleteTaskIteratorFactory(
                name_expansion_iterator,
                task_status_queue=task_status_queue,
                user_request_args=user_request_args))

        log.status.Print('Removing objects:')
        object_exit_code = task_executor.execute_tasks(
            task_iterator_factory.object_iterator(),
            parallelizable=True,
            task_status_queue=task_status_queue,
            progress_manager_args=task_status.ProgressManagerArgs(
                increment_type=task_status.IncrementType.INTEGER,
                manifest_path=None),
            continue_on_error=args.continue_on_error)

        bucket_iterator = plurality_checkable_iterator.PluralityCheckableIterator(
            task_iterator_factory.bucket_iterator())

        # We perform the is_empty check to avoid printing unneccesary status lines.
        if args.recursive and not bucket_iterator.is_empty():
            log.status.Print('Removing Buckets:')
            bucket_exit_code = task_executor.execute_tasks(
                bucket_iterator,
                parallelizable=True,
                task_status_queue=task_status_queue,
                progress_manager_args=task_status.ProgressManagerArgs(
                    increment_type=task_status.IncrementType.INTEGER,
                    manifest_path=None),
                continue_on_error=args.continue_on_error)
        else:
            bucket_exit_code = 0
        self.exit_code = max(object_exit_code, bucket_exit_code)
    def __init__(self, source_name_iterator, destination_string):
        """Initializes a CopyTaskIterator instance.

    Args:
      source_name_iterator (name_expansion.NameExpansionIterator):
        yields resource_reference.Resource objects with expanded source URLs.
      destination_string (str): The copy destination path/url.
    """
        self._source_name_iterator = (
            plurality_checkable_iterator.PluralityCheckableIterator(
                source_name_iterator))
        self._multiple_sources = self._source_name_iterator.is_plural()
        self._destination_string = destination_string
def execute_tasks(task_iterator,
                  parallelizable=False,
                  task_status_queue=None,
                  progress_manager_args=None,
                  continue_on_error=False):
    """Call appropriate executor.

  Args:
    task_iterator: An iterator for task objects.
    parallelizable (boolean): Should tasks be executed in parallel.
    task_status_queue (multiprocessing.Queue|None): Used by task to report its
      progress to a central location.
    progress_manager_args (task_status.ProgressManagerArgs|None):
      Determines what type of progress indicator to display.
    continue_on_error (bool): Only applicable for sequential mode. If True,
      execution will continue even if errors occur.

  Returns:
    An integer indicating the exit_code. Zero indicates no fatal errors were
      raised.
  """
    plurality_checkable_task_iterator = (
        plurality_checkable_iterator.PluralityCheckableIterator(task_iterator))
    optimize_parameters_util.detect_and_set_best_config(
        is_estimated_multi_file_workload=(
            plurality_checkable_task_iterator.is_plural()))

    # Some tasks operate under the assumption that they will only be executed when
    # parallelizable is True, and use should_use_parallelism to determine how they
    # are executed.
    if parallelizable and task_util.should_use_parallelism():
        exit_code = task_graph_executor.TaskGraphExecutor(
            plurality_checkable_task_iterator,
            max_process_count=properties.VALUES.storage.process_count.GetInt(),
            thread_count=properties.VALUES.storage.thread_count.GetInt(),
            task_status_queue=task_status_queue,
            progress_manager_args=progress_manager_args).run()
    else:
        with task_status.progress_manager(task_status_queue,
                                          progress_manager_args):
            exit_code, _ = _execute_tasks_sequential(
                plurality_checkable_task_iterator,
                task_status_queue=task_status_queue,
                continue_on_error=continue_on_error)
    return exit_code
Example #13
0
    def execute(self, task_status_queue=None):
        """Recursively create wildcard iterators to print all relevant items."""
        # List task does not need to report status information.
        del task_status_queue

        fields_scope = _translate_display_detail_to_fields_scope(
            self._display_detail,
            is_bucket_listing=self._cloud_url.is_provider())
        resources = plurality_checkable_iterator.PluralityCheckableIterator(
            wildcard_iterator.CloudWildcardIterator(
                self._cloud_url,
                all_versions=self._all_versions,
                error_on_missing_key=False,
                fields_scope=fields_scope,
                get_bucket_metadata=self._buckets_flag))

        if resources.is_empty():
            raise errors.InvalidUrlError(
                'One or more URLs matched no objects.')
        if self._only_display_buckets:
            # Received a provider URL ("gs://") -> List all buckets.
            # Received buckets flag and bucket URL -> List matching buckets, ignoring
            #   recursion.
            resources_wrappers = self._recursion_helper(resources,
                                                        recursion_level=0)
        elif self._recursion_flag and '**' not in self._cloud_url.url_string:
            # "**" overrides recursive flag.
            resources_wrappers = self._recursion_helper(
                resources, float('inf'))
        elif not resources.is_plural() and resources.peek().is_container():
            # One container was returned by the query, in which case we show
            # its contents.
            resources_wrappers = self._get_container_iterator(
                resources.peek().storage_url, recursion_level=0)
        else:
            resources_wrappers = self._recursion_helper(resources,
                                                        recursion_level=1)

        if self._display_detail == DisplayDetail.JSON:
            self._print_json_list(resources_wrappers)
        else:
            self._print_row_list(resources_wrappers)
Example #14
0
  def __iter__(self):
    """Iterates over each URL in self._urls and yield the expanded result.

    Yields:
      NameExpansionResult instance.

    Raises:
      InvalidUrlError: No matching objects found.
    """
    for url in self._urls:
      resources = plurality_checkable_iterator.PluralityCheckableIterator(
          wildcard_iterator.get_wildcard_iterator(url))
      is_name_expansion_iterator_empty = True
      original_storage_url = storage_url.storage_url_from_string(url)

      # Iterate over all the resource_reference.Resource objects.
      for resource in resources:
        if not resource.is_container():
          yield NameExpansionResult(resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False
          continue

        if not self._recursion_requested:
          log.info('Omitting {} because it is a container, and recursion'
                   ' is not enabled.'.format(resource.is_container()))
          continue

        # Append '**' to fetch all objects under this container.
        new_storage_url = resource.storage_url.join('**')
        child_resources = wildcard_iterator.get_wildcard_iterator(
            new_storage_url.url_string)
        for child_resource in child_resources:
          yield NameExpansionResult(child_resource, resource.storage_url,
                                    original_storage_url)
          is_name_expansion_iterator_empty = False

      if is_name_expansion_iterator_empty:
        raise errors.InvalidUrlError(
            '{} matched no objects or files.'.format(url))
  def __init__(self,
               source_name_iterator,
               destination_string,
               custom_md5_digest=None):
    """Initializes a CopyTaskIterator instance.

    Args:
      source_name_iterator (name_expansion.NameExpansionIterator):
        yields resource_reference.Resource objects with expanded source URLs.
      destination_string (str): The copy destination path/url.
      custom_md5_digest (str|None): User-added MD5 hash output to send to server
          for validating a single resource upload.
    """
    self._source_name_iterator = (
        plurality_checkable_iterator.PluralityCheckableIterator(
            source_name_iterator))
    self._multiple_sources = self._source_name_iterator.is_plural()
    self._destination_string = destination_string
    self._custom_md5_digest = custom_md5_digest

    if self._multiple_sources and self._custom_md5_digest:
      raise ValueError('Received multiple objects to upload, but only one'
                       'custom MD5 digest is allowed.')
Example #16
0
    def Run(self, args):
        for url_string in args.urls:
            if not storage_url.storage_url_from_string(url_string).is_bucket():
                raise errors.InvalidUrlError(
                    'buckets delete only accepts cloud bucket URLs. Example:'
                    ' "gs://bucket"')

        task_status_queue = multiprocessing.Queue()

        bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory(
            name_expansion.NameExpansionIterator(args.urls,
                                                 include_buckets=True),
            task_status_queue=task_status_queue).bucket_iterator()
        plurality_checkable_bucket_iterator = (
            plurality_checkable_iterator.PluralityCheckableIterator(
                bucket_iterator))

        task_executor.execute_tasks(
            plurality_checkable_bucket_iterator,
            parallelizable=True,
            task_status_queue=task_status_queue,
            progress_manager_args=task_status.ProgressManagerArgs(
                increment_type=task_status.IncrementType.INTEGER,
                manifest_path=None))
Example #17
0
 def test_peeking_returns_first_iterator_item(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
         [0])
     self.assertEqual(test_iter.peek(), 0)
     # Call again to confirm the first item in the iterator isn't consumed.
     self.assertEqual(test_iter.peek(), 0)
Example #18
0
    def test_iteration_terminates(self):
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator([])

        with self.assertRaises(StopIteration):
            next(test_iter)
Example #19
0
    def test_iteration_yields_correct_values(self):
        expected_list = [0, 1, 2]
        test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
            expected_list)

        self.assertEqual(list(test_iter), expected_list)
Example #20
0
 def test_plural_iterator(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
         [0, 1])
     self.assertTrue(test_iter.is_plural())
Example #21
0
 def test_singular_iterator_not_plural(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
         [0])
     self.assertFalse(test_iter.is_plural())
Example #22
0
 def test_exceptions_count_toward_plurality(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
         _exception_iterator())
     self.assertTrue(test_iter.is_plural())
Example #23
0
 def test_empty_iterator_is_empty(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator([])
     self.assertTrue(test_iter.is_empty())
Example #24
0
 def test_non_empty_iterator_is_not_empty(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator(
         [0])
     self.assertFalse(test_iter.is_empty())
Example #25
0
 def test_peeking_returns_none_for_empty_iterator(self):
     test_iter = plurality_checkable_iterator.PluralityCheckableIterator([])
     self.assertIsNone(test_iter.peek())