def from_url_string(url_string):
  """Convert test resource URL to resource object. Do not use in production.

  Do not use in production because terminating with a delimiter is not always
  an accurate indicator of if a URL is a prefix. For example, a query for
  "gs://bucket/dir" may have just forgotten the trailing "/".

  Furthermore, different operating systems may have different ways to signal
  filesystem paths point to directories.

  Args:
    url_string (str): Path to resource. Ex: "gs://bucket/hi" or "/bin/cat.png".

  Returns:
    resource.Resource subclass appropriate for URL.
  """
  parsed_url = storage_url.storage_url_from_string(url_string)

  if isinstance(parsed_url, storage_url.FileUrl):
    # See docstring.
    if url_string.endswith(parsed_url.delimiter):
      return get_file_directory_resource(url_string)
    return get_file_object_resource(url_string)
  # CloudUrl because it's not a FileUrl.
  if parsed_url.is_bucket():
    return get_bucket_resource(parsed_url.scheme, parsed_url.bucket_name)
  if parsed_url.is_object() and not url_string.endswith(
      storage_url.CloudUrl.CLOUD_URL_DELIM):
    return get_object_resource(parsed_url.scheme, parsed_url.bucket_name,
                               parsed_url.object_name, parsed_url.generation)
  # See docstring.
  if parsed_url.is_object() and url_string.endswith(
      storage_url.CloudUrl.CLOUD_URL_DELIM):
    return get_prefix_resource(parsed_url.scheme, parsed_url.bucket_name,
                               parsed_url.object_name)
  return get_unknown_resource(url_string)
    def _get_container_iterator(self, cloud_url, recursion_level):
        """For recursing into and retrieving the contents of a container.

    Args:
      cloud_url (storage_url.CloudUrl): Container URL for recursing into.
      recursion_level (int): Determines if iterator should keep recursing.

    Returns:
      _BaseFormatWrapper generator.
    """
        # End URL with '/*', so WildcardIterator won't filter out its contents.
        new_url_string = cloud_url.versionless_url_string
        if cloud_url.versionless_url_string[-1] != cloud_url.delimiter:
            new_url_string += cloud_url.delimiter
        new_cloud_url = storage_url.storage_url_from_string(new_url_string +
                                                            '*')

        fields_scope = _translate_display_detail_to_fields_scope(
            self._display_detail, is_bucket_listing=False)
        iterator = wildcard_iterator.CloudWildcardIterator(
            new_cloud_url,
            all_versions=self._all_versions,
            fields_scope=fields_scope)
        return self._recursion_helper(iterator, recursion_level)
Example #3
0
    def Run(self, args):
        for url_string in args.urls:
            if not storage_url.storage_url_from_string(url_string).is_bucket():
                raise errors.InvalidUrlError(
                    'buckets delete only accepts cloud bucket URLs. Example:'
                    ' "gs://bucket"')

        task_status_queue = multiprocessing.Queue()

        bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory(
            name_expansion.NameExpansionIterator(args.urls,
                                                 include_buckets=True),
            task_status_queue=task_status_queue).bucket_iterator()
        plurality_checkable_bucket_iterator = (
            plurality_checkable_iterator.PluralityCheckableIterator(
                bucket_iterator))

        task_executor.execute_tasks(
            plurality_checkable_bucket_iterator,
            parallelizable=True,
            task_status_queue=task_status_queue,
            progress_manager_args=task_status.ProgressManagerArgs(
                increment_type=task_status.IncrementType.INTEGER,
                manifest_path=None))
Example #4
0
    def __init__(self,
                 url,
                 all_versions=False,
                 fields_scope=cloud_api.FieldsScope.NO_ACL):
        """Instantiates an iterator that matches the wildcard URL.

    Args:
      url (CloudUrl): CloudUrl that may contain wildcard that needs expansion.
      all_versions (bool): If true, the iterator yields all versions of objects
          matching the wildcard.  If false, yields just the live object version.
      fields_scope (cloud_api.FieldsScope): Determines amount of metadata
          returned by API.
    """
        super(CloudWildcardIterator, self).__init__()
        url = _compress_url_wildcards(url)
        self._url = url
        self._all_versions = all_versions
        self._fields_scope = fields_scope
        self._client = api_factory.get_api(url.scheme)

        if url.url_string.endswith(url.delimiter):
            # Forces the API to return prefixes instead of their contents.
            url = storage_url.storage_url_from_string(
                storage_url.rstrip_one_delimiter(url.url_string))
  def test_long_display_detail_converts_timezone_behind_utc(
      self, client):
    """Long lists adds negative timedelta to creation_time."""
    self.object1.creation_time = datetime.datetime(
        1111, 1, 1, tzinfo=datetime.timezone(datetime.timedelta(
            hours=-4, minutes=-40)))
    client.get_bucket.side_effect = [self.bucket1]
    client.list_objects.side_effect = [self.bucket1_top_level_resources]

    task = cloud_list_task.CloudListTask(
        storage_url.storage_url_from_string('gs://bucket1/'),
        display_detail=cloud_list_task.DisplayDetail.LONG)
    task.execute()

    output = self.GetOutput()
    expected_output = textwrap.dedent(
        """\
                 0  1111-01-01T04:40:00Z  gs://bucket1/object1
                                          gs://bucket1/dir1/
                                          gs://bucket1/dir2/
        TOTAL: 1 objects, 0 bytes (0B)
        """
    )
    self.assertEqual(output, expected_output)
Example #6
0
    def test_uploads_object_with_object_resource(self):
        upload_metadata = self.messages.Object(name='o', bucket='b')
        request = self.messages.StorageObjectsInsertRequest(
            bucket=upload_metadata.bucket, object=upload_metadata)
        self.apitools_client.objects.Insert.Expect(request,
                                                   response=upload_metadata)

        upload_stream = mock.mock_open()
        upload_resource = resource_reference.FileObjectResource(
            storage_url.storage_url_from_string('gs://b/o'))
        expected_resource = gcs_api._object_resource_from_metadata(
            upload_metadata)
        with mock.patch.object(apitools_transfer, 'Upload') as mock_upload:
            observed_resource = self.gcs_client.upload_object(
                upload_stream, upload_resource)
            self.assertEqual(observed_resource, expected_resource)

            mock_upload.assert_called_once_with(
                upload_stream,
                gcs_api.DEFAULT_CONTENT_TYPE,
                total_size=None,
                auto_transfer=True,
                num_retries=gcs_api.DEFAULT_NUM_RETRIES,
                gzip_encoded=False)
 def test_join_returns_new_url_with_appended_part(self, url_str, part,
                                                  expected_string):
     url = storage_url.storage_url_from_string(url_str)
     new_url = url.join(part)
     self.assertEqual(new_url.url_string, expected_string)
     self.assertEqual(type(url), type(new_url))
 def test_file_url_isdir_with_invalid_path(self):
     file_url_object = storage_url.storage_url_from_string(
         'invalid/dirpath')
     self.assertFalse(file_url_object.isdir())
 def test_file_url_isdir(self):
     file_url_object = storage_url.storage_url_from_string(
         os.path.dirname(self.local_file))
     self.assertTrue(file_url_object.isdir())
 def test_file_url_exists_with_invalid_path(self):
     file_url_object = storage_url.storage_url_from_string(
         'invalid/path.txt')
     self.assertFalse(file_url_object.exists())
 def test_gets_bucket_resource(self):
     url_string = 'gs://bucket'
     cloud_url = storage_url.storage_url_from_string(url_string)
     resource = resource_reference.BucketResource(cloud_url)
     self.assertEqual(test_resources.from_url_string(url_string), resource)
  def test_execute_lists_multiple_buckets_with_recursive_flag_properly(
      self, client):
    """Test if all content of all buckets is shown recursively."""
    client.list_buckets.side_effect = [self.bucket_resources]
    client.list_objects.side_effect = [
        self.bucket1_top_level_resources, self.bucket1_dir1_resources,
        self.bucket1_dir1_subdir1_resources,
        self.bucket1_dir1_subdir2_resources, self.bucket1_dir2_resources,
        self.bucket1_dir2_subdir3_resources, self.bucket2_top_level_resources,
        self.bucket2_dir_object_resources
    ]

    task = cloud_list_task.CloudListTask(
        storage_url.storage_url_from_string('gs://bucket*'),
        recursion_flag=True)
    task.execute()

    output = self.GetOutput()
    expected_output = textwrap.dedent(
        """\
        gs://bucket1:
        gs://bucket1/object1

        gs://bucket1/dir1/:
        gs://bucket1/dir1/object2

        gs://bucket1/dir1/subdir1/:
        gs://bucket1/dir1/subdir1/object3

        gs://bucket1/dir1/subdir2/:
        gs://bucket1/dir1/subdir2/object4

        gs://bucket1/dir2/:

        gs://bucket1/dir2/subdir3/:
        gs://bucket1/dir2/subdir3/object5

        gs://bucket2:
        gs://bucket2/dir_object

        gs://bucket2/dir_object/:
        gs://bucket2/dir_object/object6
        """
    )
    self.assertEqual(output, expected_output)

    client.list_buckets.assert_called_once_with(cloud_api.FieldsScope.SHORT)
    self.assertEqual(client.list_objects.mock_calls, [
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=None),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.dir1.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir1.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir2.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.dir2.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir3.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket2.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=None),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket2.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.dir_duplicate_of_object.prefix)
    ])
def get_unknown_resource(url_string):
  url = storage_url.storage_url_from_string(url_string)
  return resource_reference.UnknownResource(url)
def get_file_directory_resource(path):
  url = storage_url.storage_url_from_string(path)
  return resource_reference.FileDirectoryResource(url)
def get_file_object_resource(path):
  url = storage_url.storage_url_from_string(path)
  return resource_reference.FileObjectResource(url)
    def Run(self, args):
        if args.no_clobber and args.if_generation_match:
            raise ValueError(
                'Cannot specify both generation precondition and no-clobber.')

        encryption_util.initialize_key_store(args)

        source_expansion_iterator = name_expansion.NameExpansionIterator(
            args.source,
            all_versions=args.all_versions,
            recursion_requested=args.recursive,
            ignore_symlinks=args.ignore_symlinks)
        task_status_queue = task_graph_executor.multiprocessing_context.Queue()

        raw_destination_url = storage_url.storage_url_from_string(
            args.destination)
        if (isinstance(raw_destination_url, storage_url.FileUrl)
                and args.storage_class):
            raise ValueError(
                'Cannot specify storage class for a non-cloud destination: {}'.
                format(raw_destination_url))

        parallelizable = True
        shared_stream = None
        if (args.all_versions
                and (properties.VALUES.storage.process_count.GetInt() != 1
                     or properties.VALUES.storage.thread_count.GetInt() != 1)):
            log.warning(
                'Using sequential instead of parallel task execution. This will'
                ' maintain version ordering when copying all versions of an object.'
            )
            parallelizable = False
        if (isinstance(raw_destination_url, storage_url.FileUrl)
                and raw_destination_url.is_pipe):
            log.warning('Downloading to a pipe.'
                        ' This command may stall until the pipe is read.')
            parallelizable = False
            shared_stream = files.BinaryFileWriter(args.destination)

        user_request_args = (
            user_request_args_factory.get_user_request_args_from_command_args(
                args,
                metadata_type=user_request_args_factory.MetadataType.OBJECT))
        task_iterator = copy_task_iterator.CopyTaskIterator(
            source_expansion_iterator,
            args.destination,
            custom_md5_digest=args.content_md5,
            do_not_decompress=args.do_not_decompress,
            print_created_message=args.print_created_message,
            shared_stream=shared_stream,
            skip_unsupported=args.skip_unsupported,
            task_status_queue=task_status_queue,
            user_request_args=user_request_args,
        )
        self.exit_code = task_executor.execute_tasks(
            task_iterator,
            parallelizable=parallelizable,
            task_status_queue=task_status_queue,
            progress_manager_args=task_status.ProgressManagerArgs(
                task_status.IncrementType.FILES_AND_BYTES,
                manifest_path=user_request_args.manifest_path,
            ),
            continue_on_error=args.continue_on_error,
        )

        if shared_stream:
            shared_stream.close()
 def test_gets_prefix_resource(self):
     url_string = 'gs://bucket/prefix/'
     parsed_url = storage_url.storage_url_from_string(url_string)
     resource = resource_reference.PrefixResource(parsed_url, 'prefix/')
     self.assertEqual(test_resources.from_url_string(url_string), resource)
 def test_gets_object_resource(self):
     url_string = 'gs://bucket/object#1'
     parsed_url = storage_url.storage_url_from_string(url_string)
     resource = resource_reference.ObjectResource(parsed_url)
     self.assertEqual(test_resources.from_url_string(url_string), resource)
  def test_execute_lists_object_url_with_single_wildcard_followed_by_single_wildcard_properly(
      self, client):
    """Check if all subdirectories are matched and formatted."""
    client.list_objects.side_effect = [
        self.bucket1_top_level_resources, self.bucket1_dir1_resources,
        self.bucket1_dir1_subdir1_resources,
        self.bucket1_dir1_subdir2_resources, self.bucket1_dir2_resources,
        self.bucket1_dir2_subdir3_resources
    ]

    task = cloud_list_task.CloudListTask(
        storage_url.storage_url_from_string('gs://bucket1/*/*'))
    task.execute()

    output = self.GetOutput()
    expected_output = textwrap.dedent(
        """\
        gs://bucket1/dir1/object2

        gs://bucket1/dir1/subdir1/:
        gs://bucket1/dir1/subdir1/object3

        gs://bucket1/dir1/subdir2/:
        gs://bucket1/dir1/subdir2/object4

        gs://bucket1/dir2/subdir3/:
        gs://bucket1/dir2/subdir3/object5
        """
    )
    self.assertEqual(output, expected_output)

    self.assertEqual(client.list_objects.mock_calls, [
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=None),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.dir1.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir1.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir2.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.dir2.prefix),
        mock.call(
            all_versions=False,
            bucket_name=self.bucket1.name,
            delimiter='/',
            fields_scope=cloud_api.FieldsScope.SHORT,
            prefix=self.subdir3.prefix)
    ])
Example #20
0
 def test_str_method(self):
     expanded_result = name_expansion.NameExpansionResult(
         test_resources.from_url_string('gs://bucket/dir1/obj1.txt'),
         storage_url.storage_url_from_string('gs://bucket/dir1/'))
     self.assertEqual(str(expanded_result), 'gs://bucket/dir1/obj1.txt')
 def test_gets_file_directory_resource(self):
     url_string = 'hi' + os.path.sep
     parsed_url = storage_url.storage_url_from_string(url_string)
     resource = resource_reference.FileDirectoryResource(parsed_url)
     self.assertEqual(test_resources.from_url_string(url_string), resource)
 def test_gets_file_object_resource(self):
     url_string = 'hi.txt'
     parsed_url = storage_url.storage_url_from_string(url_string)
     resource = resource_reference.FileObjectResource(parsed_url)
     self.assertEqual(test_resources.from_url_string(url_string), resource)
 def SetUp(self):
     # self.root_path is a temp dir which gets deleted during TearDown.
     self.local_file = self.Touch(os.path.join(self.root_path, 'fake'),
                                  'file.txt')
     self.local_file_url = storage_url.storage_url_from_string(
         self.local_file)
def _create_or_modify_transfer_spec(job, args, messages):
    """Creates or modifies TransferSpec based on args."""
    if not job.transferSpec:
        job.transferSpec = messages.TransferSpec()

    if getattr(args, 'source', None):
        # Clear any existing source to make space for new one.
        job.transferSpec.httpDataSource = None
        job.transferSpec.posixDataSource = None
        job.transferSpec.gcsDataSource = None
        job.transferSpec.awsS3DataSource = None
        job.transferSpec.azureBlobStorageDataSource = None

        try:
            source_url = storage_url.storage_url_from_string(args.source)
        except errors.InvalidUrlError:
            if args.source.startswith(storage_url.ProviderPrefix.HTTP.value):
                job.transferSpec.httpDataSource = messages.HttpData(
                    listUrl=args.source)
                source_url = None
            else:
                raise
        else:
            if source_url.scheme is storage_url.ProviderPrefix.FILE:
                source_url = _prompt_and_add_valid_scheme(source_url)

            if source_url.scheme is storage_url.ProviderPrefix.POSIX:
                job.transferSpec.posixDataSource = messages.PosixFilesystem(
                    rootDirectory=source_url.object_name)
            elif source_url.scheme is storage_url.ProviderPrefix.GCS:
                job.transferSpec.gcsDataSource = messages.GcsData(
                    bucketName=source_url.bucket_name,
                    path=source_url.object_name,
                )
            elif source_url.scheme is storage_url.ProviderPrefix.S3:
                job.transferSpec.awsS3DataSource = messages.AwsS3Data(
                    bucketName=source_url.bucket_name,
                    path=source_url.object_name,
                )
            elif isinstance(source_url, storage_url.AzureUrl):
                job.transferSpec.azureBlobStorageDataSource = (
                    messages.AzureBlobStorageData(
                        container=source_url.bucket_name,
                        path=source_url.object_name,
                        storageAccount=source_url.account,
                    ))

    if getattr(args, 'destination', None):
        # Clear any existing destination to make space for new one.
        job.transferSpec.posixDataSink = None
        job.transferSpec.gcsDataSink = None

        destination_url = storage_url.storage_url_from_string(args.destination)
        if destination_url.scheme is storage_url.ProviderPrefix.FILE:
            destination_url = _prompt_and_add_valid_scheme(destination_url)

        if destination_url.scheme is storage_url.ProviderPrefix.GCS:
            job.transferSpec.gcsDataSink = messages.GcsData(
                bucketName=destination_url.bucket_name,
                path=destination_url.object_name,
            )
        elif destination_url.scheme is storage_url.ProviderPrefix.POSIX:
            job.transferSpec.posixDataSink = messages.PosixFilesystem(
                rootDirectory=destination_url.object_name)

    if getattr(args, 'destination_agent_pool', None):
        job.transferSpec.sinkAgentPoolName = name_util.add_agent_pool_prefix(
            args.destination_agent_pool)
    if getattr(args, 'source_agent_pool', None):
        job.transferSpec.sourceAgentPoolName = name_util.add_agent_pool_prefix(
            args.source_agent_pool)
    if getattr(args, 'intermediate_storage_path', None):
        intermediate_storage_url = storage_url.storage_url_from_string(
            args.intermediate_storage_path)
        job.transferSpec.gcsIntermediateDataLocation = messages.GcsData(
            bucketName=intermediate_storage_url.bucket_name,
            path=intermediate_storage_url.object_name)
    if getattr(args, 'manifest_file', None):
        job.transferSpec.transferManifest = messages.TransferManifest(
            location=args.manifest_file)

    _create_or_modify_creds(job.transferSpec, args, messages)
    _create_or_modify_object_conditions(job.transferSpec, args, messages)
    _create_or_modify_transfer_options(job.transferSpec, args, messages)