def from_url_string(url_string): """Convert test resource URL to resource object. Do not use in production. Do not use in production because terminating with a delimiter is not always an accurate indicator of if a URL is a prefix. For example, a query for "gs://bucket/dir" may have just forgotten the trailing "/". Furthermore, different operating systems may have different ways to signal filesystem paths point to directories. Args: url_string (str): Path to resource. Ex: "gs://bucket/hi" or "/bin/cat.png". Returns: resource.Resource subclass appropriate for URL. """ parsed_url = storage_url.storage_url_from_string(url_string) if isinstance(parsed_url, storage_url.FileUrl): # See docstring. if url_string.endswith(parsed_url.delimiter): return get_file_directory_resource(url_string) return get_file_object_resource(url_string) # CloudUrl because it's not a FileUrl. if parsed_url.is_bucket(): return get_bucket_resource(parsed_url.scheme, parsed_url.bucket_name) if parsed_url.is_object() and not url_string.endswith( storage_url.CloudUrl.CLOUD_URL_DELIM): return get_object_resource(parsed_url.scheme, parsed_url.bucket_name, parsed_url.object_name, parsed_url.generation) # See docstring. if parsed_url.is_object() and url_string.endswith( storage_url.CloudUrl.CLOUD_URL_DELIM): return get_prefix_resource(parsed_url.scheme, parsed_url.bucket_name, parsed_url.object_name) return get_unknown_resource(url_string)
def _get_container_iterator(self, cloud_url, recursion_level): """For recursing into and retrieving the contents of a container. Args: cloud_url (storage_url.CloudUrl): Container URL for recursing into. recursion_level (int): Determines if iterator should keep recursing. Returns: _BaseFormatWrapper generator. """ # End URL with '/*', so WildcardIterator won't filter out its contents. new_url_string = cloud_url.versionless_url_string if cloud_url.versionless_url_string[-1] != cloud_url.delimiter: new_url_string += cloud_url.delimiter new_cloud_url = storage_url.storage_url_from_string(new_url_string + '*') fields_scope = _translate_display_detail_to_fields_scope( self._display_detail, is_bucket_listing=False) iterator = wildcard_iterator.CloudWildcardIterator( new_cloud_url, all_versions=self._all_versions, fields_scope=fields_scope) return self._recursion_helper(iterator, recursion_level)
def Run(self, args): for url_string in args.urls: if not storage_url.storage_url_from_string(url_string).is_bucket(): raise errors.InvalidUrlError( 'buckets delete only accepts cloud bucket URLs. Example:' ' "gs://bucket"') task_status_queue = multiprocessing.Queue() bucket_iterator = delete_task_iterator_factory.DeleteTaskIteratorFactory( name_expansion.NameExpansionIterator(args.urls, include_buckets=True), task_status_queue=task_status_queue).bucket_iterator() plurality_checkable_bucket_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( bucket_iterator)) task_executor.execute_tasks( plurality_checkable_bucket_iterator, parallelizable=True, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( increment_type=task_status.IncrementType.INTEGER, manifest_path=None))
def __init__(self, url, all_versions=False, fields_scope=cloud_api.FieldsScope.NO_ACL): """Instantiates an iterator that matches the wildcard URL. Args: url (CloudUrl): CloudUrl that may contain wildcard that needs expansion. all_versions (bool): If true, the iterator yields all versions of objects matching the wildcard. If false, yields just the live object version. fields_scope (cloud_api.FieldsScope): Determines amount of metadata returned by API. """ super(CloudWildcardIterator, self).__init__() url = _compress_url_wildcards(url) self._url = url self._all_versions = all_versions self._fields_scope = fields_scope self._client = api_factory.get_api(url.scheme) if url.url_string.endswith(url.delimiter): # Forces the API to return prefixes instead of their contents. url = storage_url.storage_url_from_string( storage_url.rstrip_one_delimiter(url.url_string))
def test_long_display_detail_converts_timezone_behind_utc( self, client): """Long lists adds negative timedelta to creation_time.""" self.object1.creation_time = datetime.datetime( 1111, 1, 1, tzinfo=datetime.timezone(datetime.timedelta( hours=-4, minutes=-40))) client.get_bucket.side_effect = [self.bucket1] client.list_objects.side_effect = [self.bucket1_top_level_resources] task = cloud_list_task.CloudListTask( storage_url.storage_url_from_string('gs://bucket1/'), display_detail=cloud_list_task.DisplayDetail.LONG) task.execute() output = self.GetOutput() expected_output = textwrap.dedent( """\ 0 1111-01-01T04:40:00Z gs://bucket1/object1 gs://bucket1/dir1/ gs://bucket1/dir2/ TOTAL: 1 objects, 0 bytes (0B) """ ) self.assertEqual(output, expected_output)
def test_uploads_object_with_object_resource(self): upload_metadata = self.messages.Object(name='o', bucket='b') request = self.messages.StorageObjectsInsertRequest( bucket=upload_metadata.bucket, object=upload_metadata) self.apitools_client.objects.Insert.Expect(request, response=upload_metadata) upload_stream = mock.mock_open() upload_resource = resource_reference.FileObjectResource( storage_url.storage_url_from_string('gs://b/o')) expected_resource = gcs_api._object_resource_from_metadata( upload_metadata) with mock.patch.object(apitools_transfer, 'Upload') as mock_upload: observed_resource = self.gcs_client.upload_object( upload_stream, upload_resource) self.assertEqual(observed_resource, expected_resource) mock_upload.assert_called_once_with( upload_stream, gcs_api.DEFAULT_CONTENT_TYPE, total_size=None, auto_transfer=True, num_retries=gcs_api.DEFAULT_NUM_RETRIES, gzip_encoded=False)
def test_join_returns_new_url_with_appended_part(self, url_str, part, expected_string): url = storage_url.storage_url_from_string(url_str) new_url = url.join(part) self.assertEqual(new_url.url_string, expected_string) self.assertEqual(type(url), type(new_url))
def test_file_url_isdir_with_invalid_path(self): file_url_object = storage_url.storage_url_from_string( 'invalid/dirpath') self.assertFalse(file_url_object.isdir())
def test_file_url_isdir(self): file_url_object = storage_url.storage_url_from_string( os.path.dirname(self.local_file)) self.assertTrue(file_url_object.isdir())
def test_file_url_exists_with_invalid_path(self): file_url_object = storage_url.storage_url_from_string( 'invalid/path.txt') self.assertFalse(file_url_object.exists())
def test_gets_bucket_resource(self): url_string = 'gs://bucket' cloud_url = storage_url.storage_url_from_string(url_string) resource = resource_reference.BucketResource(cloud_url) self.assertEqual(test_resources.from_url_string(url_string), resource)
def test_execute_lists_multiple_buckets_with_recursive_flag_properly( self, client): """Test if all content of all buckets is shown recursively.""" client.list_buckets.side_effect = [self.bucket_resources] client.list_objects.side_effect = [ self.bucket1_top_level_resources, self.bucket1_dir1_resources, self.bucket1_dir1_subdir1_resources, self.bucket1_dir1_subdir2_resources, self.bucket1_dir2_resources, self.bucket1_dir2_subdir3_resources, self.bucket2_top_level_resources, self.bucket2_dir_object_resources ] task = cloud_list_task.CloudListTask( storage_url.storage_url_from_string('gs://bucket*'), recursion_flag=True) task.execute() output = self.GetOutput() expected_output = textwrap.dedent( """\ gs://bucket1: gs://bucket1/object1 gs://bucket1/dir1/: gs://bucket1/dir1/object2 gs://bucket1/dir1/subdir1/: gs://bucket1/dir1/subdir1/object3 gs://bucket1/dir1/subdir2/: gs://bucket1/dir1/subdir2/object4 gs://bucket1/dir2/: gs://bucket1/dir2/subdir3/: gs://bucket1/dir2/subdir3/object5 gs://bucket2: gs://bucket2/dir_object gs://bucket2/dir_object/: gs://bucket2/dir_object/object6 """ ) self.assertEqual(output, expected_output) client.list_buckets.assert_called_once_with(cloud_api.FieldsScope.SHORT) self.assertEqual(client.list_objects.mock_calls, [ mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=None), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.dir1.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir1.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir2.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.dir2.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir3.prefix), mock.call( all_versions=False, bucket_name=self.bucket2.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=None), mock.call( all_versions=False, bucket_name=self.bucket2.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.dir_duplicate_of_object.prefix) ])
def get_unknown_resource(url_string): url = storage_url.storage_url_from_string(url_string) return resource_reference.UnknownResource(url)
def get_file_directory_resource(path): url = storage_url.storage_url_from_string(path) return resource_reference.FileDirectoryResource(url)
def get_file_object_resource(path): url = storage_url.storage_url_from_string(path) return resource_reference.FileObjectResource(url)
def Run(self, args): if args.no_clobber and args.if_generation_match: raise ValueError( 'Cannot specify both generation precondition and no-clobber.') encryption_util.initialize_key_store(args) source_expansion_iterator = name_expansion.NameExpansionIterator( args.source, all_versions=args.all_versions, recursion_requested=args.recursive, ignore_symlinks=args.ignore_symlinks) task_status_queue = task_graph_executor.multiprocessing_context.Queue() raw_destination_url = storage_url.storage_url_from_string( args.destination) if (isinstance(raw_destination_url, storage_url.FileUrl) and args.storage_class): raise ValueError( 'Cannot specify storage class for a non-cloud destination: {}'. format(raw_destination_url)) parallelizable = True shared_stream = None if (args.all_versions and (properties.VALUES.storage.process_count.GetInt() != 1 or properties.VALUES.storage.thread_count.GetInt() != 1)): log.warning( 'Using sequential instead of parallel task execution. This will' ' maintain version ordering when copying all versions of an object.' ) parallelizable = False if (isinstance(raw_destination_url, storage_url.FileUrl) and raw_destination_url.is_pipe): log.warning('Downloading to a pipe.' ' This command may stall until the pipe is read.') parallelizable = False shared_stream = files.BinaryFileWriter(args.destination) user_request_args = ( user_request_args_factory.get_user_request_args_from_command_args( args, metadata_type=user_request_args_factory.MetadataType.OBJECT)) task_iterator = copy_task_iterator.CopyTaskIterator( source_expansion_iterator, args.destination, custom_md5_digest=args.content_md5, do_not_decompress=args.do_not_decompress, print_created_message=args.print_created_message, shared_stream=shared_stream, skip_unsupported=args.skip_unsupported, task_status_queue=task_status_queue, user_request_args=user_request_args, ) self.exit_code = task_executor.execute_tasks( task_iterator, parallelizable=parallelizable, task_status_queue=task_status_queue, progress_manager_args=task_status.ProgressManagerArgs( task_status.IncrementType.FILES_AND_BYTES, manifest_path=user_request_args.manifest_path, ), continue_on_error=args.continue_on_error, ) if shared_stream: shared_stream.close()
def test_gets_prefix_resource(self): url_string = 'gs://bucket/prefix/' parsed_url = storage_url.storage_url_from_string(url_string) resource = resource_reference.PrefixResource(parsed_url, 'prefix/') self.assertEqual(test_resources.from_url_string(url_string), resource)
def test_gets_object_resource(self): url_string = 'gs://bucket/object#1' parsed_url = storage_url.storage_url_from_string(url_string) resource = resource_reference.ObjectResource(parsed_url) self.assertEqual(test_resources.from_url_string(url_string), resource)
def test_execute_lists_object_url_with_single_wildcard_followed_by_single_wildcard_properly( self, client): """Check if all subdirectories are matched and formatted.""" client.list_objects.side_effect = [ self.bucket1_top_level_resources, self.bucket1_dir1_resources, self.bucket1_dir1_subdir1_resources, self.bucket1_dir1_subdir2_resources, self.bucket1_dir2_resources, self.bucket1_dir2_subdir3_resources ] task = cloud_list_task.CloudListTask( storage_url.storage_url_from_string('gs://bucket1/*/*')) task.execute() output = self.GetOutput() expected_output = textwrap.dedent( """\ gs://bucket1/dir1/object2 gs://bucket1/dir1/subdir1/: gs://bucket1/dir1/subdir1/object3 gs://bucket1/dir1/subdir2/: gs://bucket1/dir1/subdir2/object4 gs://bucket1/dir2/subdir3/: gs://bucket1/dir2/subdir3/object5 """ ) self.assertEqual(output, expected_output) self.assertEqual(client.list_objects.mock_calls, [ mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=None), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.dir1.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir1.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir2.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.dir2.prefix), mock.call( all_versions=False, bucket_name=self.bucket1.name, delimiter='/', fields_scope=cloud_api.FieldsScope.SHORT, prefix=self.subdir3.prefix) ])
def test_str_method(self): expanded_result = name_expansion.NameExpansionResult( test_resources.from_url_string('gs://bucket/dir1/obj1.txt'), storage_url.storage_url_from_string('gs://bucket/dir1/')) self.assertEqual(str(expanded_result), 'gs://bucket/dir1/obj1.txt')
def test_gets_file_directory_resource(self): url_string = 'hi' + os.path.sep parsed_url = storage_url.storage_url_from_string(url_string) resource = resource_reference.FileDirectoryResource(parsed_url) self.assertEqual(test_resources.from_url_string(url_string), resource)
def test_gets_file_object_resource(self): url_string = 'hi.txt' parsed_url = storage_url.storage_url_from_string(url_string) resource = resource_reference.FileObjectResource(parsed_url) self.assertEqual(test_resources.from_url_string(url_string), resource)
def SetUp(self): # self.root_path is a temp dir which gets deleted during TearDown. self.local_file = self.Touch(os.path.join(self.root_path, 'fake'), 'file.txt') self.local_file_url = storage_url.storage_url_from_string( self.local_file)
def _create_or_modify_transfer_spec(job, args, messages): """Creates or modifies TransferSpec based on args.""" if not job.transferSpec: job.transferSpec = messages.TransferSpec() if getattr(args, 'source', None): # Clear any existing source to make space for new one. job.transferSpec.httpDataSource = None job.transferSpec.posixDataSource = None job.transferSpec.gcsDataSource = None job.transferSpec.awsS3DataSource = None job.transferSpec.azureBlobStorageDataSource = None try: source_url = storage_url.storage_url_from_string(args.source) except errors.InvalidUrlError: if args.source.startswith(storage_url.ProviderPrefix.HTTP.value): job.transferSpec.httpDataSource = messages.HttpData( listUrl=args.source) source_url = None else: raise else: if source_url.scheme is storage_url.ProviderPrefix.FILE: source_url = _prompt_and_add_valid_scheme(source_url) if source_url.scheme is storage_url.ProviderPrefix.POSIX: job.transferSpec.posixDataSource = messages.PosixFilesystem( rootDirectory=source_url.object_name) elif source_url.scheme is storage_url.ProviderPrefix.GCS: job.transferSpec.gcsDataSource = messages.GcsData( bucketName=source_url.bucket_name, path=source_url.object_name, ) elif source_url.scheme is storage_url.ProviderPrefix.S3: job.transferSpec.awsS3DataSource = messages.AwsS3Data( bucketName=source_url.bucket_name, path=source_url.object_name, ) elif isinstance(source_url, storage_url.AzureUrl): job.transferSpec.azureBlobStorageDataSource = ( messages.AzureBlobStorageData( container=source_url.bucket_name, path=source_url.object_name, storageAccount=source_url.account, )) if getattr(args, 'destination', None): # Clear any existing destination to make space for new one. job.transferSpec.posixDataSink = None job.transferSpec.gcsDataSink = None destination_url = storage_url.storage_url_from_string(args.destination) if destination_url.scheme is storage_url.ProviderPrefix.FILE: destination_url = _prompt_and_add_valid_scheme(destination_url) if destination_url.scheme is storage_url.ProviderPrefix.GCS: job.transferSpec.gcsDataSink = messages.GcsData( bucketName=destination_url.bucket_name, path=destination_url.object_name, ) elif destination_url.scheme is storage_url.ProviderPrefix.POSIX: job.transferSpec.posixDataSink = messages.PosixFilesystem( rootDirectory=destination_url.object_name) if getattr(args, 'destination_agent_pool', None): job.transferSpec.sinkAgentPoolName = name_util.add_agent_pool_prefix( args.destination_agent_pool) if getattr(args, 'source_agent_pool', None): job.transferSpec.sourceAgentPoolName = name_util.add_agent_pool_prefix( args.source_agent_pool) if getattr(args, 'intermediate_storage_path', None): intermediate_storage_url = storage_url.storage_url_from_string( args.intermediate_storage_path) job.transferSpec.gcsIntermediateDataLocation = messages.GcsData( bucketName=intermediate_storage_url.bucket_name, path=intermediate_storage_url.object_name) if getattr(args, 'manifest_file', None): job.transferSpec.transferManifest = messages.TransferManifest( location=args.manifest_file) _create_or_modify_creds(job.transferSpec, args, messages) _create_or_modify_object_conditions(job.transferSpec, args, messages) _create_or_modify_transfer_options(job.transferSpec, args, messages)