def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) if resources.is_empty(): raise errors.InvalidUrlError( '{} matched no objects.'.format(url)) # Iterate over all the resource_reference.Resource objects. for resource in resources: if self._recursion_requested and resource.is_container(): # Append '**' to fetch all objects under this container new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url) else: yield NameExpansionResult(resource, resource.storage_url)
def test_gcs_bucket_wildcard_and_object_wildcard(self, fields_scope, client): """Test multiple object matches in multiple buckets.""" self.bucket1_object = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt') self.bucket2_object = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'bucket2', 'a.txt') client.list_buckets.side_effect = [self.buckets_response] client.list_objects.side_effect = [[self.bucket1_object], [self.bucket2_object]] resource_iterator = wildcard_iterator.get_wildcard_iterator( 'gs://bucket*/*', fields_scope=fields_scope) self.assertEqual(list(resource_iterator), [self.bucket1_object, self.bucket2_object]) client.list_buckets.assert_called_once_with( cloud_api.FieldsScope(fields_scope)) client.list_objects.mock_calls = [ mock.call(all_versions=False, bucket_name='bucket1', delimiter='/', fields_scope=fields_scope, prefix=None), mock.call(all_versions=False, bucket_name='bucket2', delimiter='/', fields_scope=fields_scope, prefix=None) ]
def _expand_destination_wildcards(self): """Expands destination wildcards. Ensures that only one resource matches the wildcard expanded string. Much like the unix cp command, the storage surface only supports copy operations to one user-specified destination. Returns: A resource_reference.Resource, or None if no matching resource is found. Raises: ValueError if more than one resource is matched, or the source contained an unescaped wildcard and no resources were matched. """ destination_iterator = ( plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(self._destination_string))) contains_unexpanded_wildcard = ( destination_iterator.is_empty() and wildcard_iterator.contains_wildcard(self._destination_string)) if destination_iterator.is_plural() or contains_unexpanded_wildcard: raise ValueError('Destination ({}) must match exactly one URL'.format( self._destination_string)) if not destination_iterator.is_empty(): return next(destination_iterator)
def _get_top_level_iterator(self): for url in self._urls: for resource in wildcard_iterator.get_wildcard_iterator( url, all_versions=self.all_versions, ignore_symlinks=self._ignore_symlinks): original_storage_url = storage_url.storage_url_from_string(url) yield url, self._get_name_expansion_result( resource, resource.storage_url, original_storage_url)
def update_task_iterator(self, args): user_request_args = ( user_request_args_factory.get_user_request_args_from_command_args( args, metadata_type=user_request_args_factory.MetadataType.BUCKET)) for url in args.url: for resource in wildcard_iterator.get_wildcard_iterator(url): yield update_bucket_task.UpdateBucketTask( resource, user_request_args=user_request_args)
def test_gcs_bucket_url_without_wildcard(self, fields_scope, client): """Test bucket with no bucket-level expansion.""" client.get_bucket.side_effect = [self.buckets_response[0]] resource_iterator = wildcard_iterator.get_wildcard_iterator( 'gs://bucket1', fields_scope=fields_scope) expected = self.buckets_response[:1] self.assertEqual(list(resource_iterator), expected) client.get_bucket.assert_called_once_with( self.bucket1.name, cloud_api.FieldsScope(fields_scope))
def test_gcs_bucket_url_with_wildcard_gets_all_buckets( self, fields_scope, client): """Test multiple bucket with bucket-level expansion.""" client.list_buckets.side_effect = [self.buckets_response] resource_iterator = wildcard_iterator.get_wildcard_iterator( 'gs://bucket*', fields_scope=fields_scope) self.assertEqual(list(resource_iterator), self.buckets_response) client.list_buckets.assert_called_once_with( cloud_api.FieldsScope(fields_scope))
def test_object_with_generation_without_wildcard(self, client): """Test with generation.""" resource = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt', '1') client.get_object_metadata.return_value = resource resource_list = list( wildcard_iterator.get_wildcard_iterator('gs://bucket1/a.txt#1')) self.assertEqual(resource_list, [resource]) client.get_object_metadata.assert_called_once_with( 'bucket1', 'a.txt', '1', cloud_api.FieldsScope.NO_ACL) self.assertFalse(client.list_objects.called)
def _get_nested_objects_iterator(self, parent_name_expansion_result): new_storage_url = parent_name_expansion_result.resource.storage_url.join( '**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string, all_versions=self.all_versions, ignore_symlinks=self._ignore_symlinks) for child_resource in child_resources: yield self._get_name_expansion_result( child_resource, parent_name_expansion_result.resource.storage_url, parent_name_expansion_result.original_url)
def test_list_objects(self, mock_client, wildcard_url, expected_prefixes, expected_objects): mock_client.get_object_metadata.side_effect = api_errors.NotFoundError mock_client.list_objects.side_effect = self._list_objects_side_effect resource_iterator = wildcard_iterator.get_wildcard_iterator( wildcard_url) prefixes, object_names = _get_prefixes_and_object_names( resource_iterator) self.assertEqual(prefixes, expected_prefixes) self.assertEqual(object_names, expected_objects)
def test_list_objects_without_wildcard(self, mock_client): resource = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'bucket', 'a/b.txt') mock_client.get_object_metadata.side_effect = [resource] resources = list( wildcard_iterator.get_wildcard_iterator('gs://bucket/a/b.txt')) self.assertEqual(resources, [resource]) mock_client.get_object_metadata.assert_called_once_with( 'bucket', 'a/b.txt', None, cloud_api.FieldsScope.NO_ACL) self.assertFalse(mock_client.list_objects.called)
def test_gcs_root_listing(self, fields_scope, client): """Test retrieving provider URL with no specified resource.""" client.list_buckets.side_effect = [self.buckets_response] resource_iterator = wildcard_iterator.get_wildcard_iterator( 'gs://', fields_scope=fields_scope) actual = [resource.metadata.name for resource in resource_iterator] expected = [b.name for b in self.buckets] self.assertEqual(actual, expected) client.list_buckets.assert_called_once_with( cloud_api.FieldsScope(fields_scope))
def __iter__(self): """Iterates over each URL in self._urls and yield the expanded result. Yields: NameExpansionResult instance. Raises: InvalidUrlError: No matching objects found. """ for url in self._urls: resources = plurality_checkable_iterator.PluralityCheckableIterator( wildcard_iterator.get_wildcard_iterator(url)) is_name_expansion_iterator_empty = True original_storage_url = storage_url.storage_url_from_string(url) # Iterate over all the resource_reference.Resource objects. for resource in resources: if not resource.is_container(): yield NameExpansionResult(resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False continue if not self._recursion_requested: log.info('Omitting {} because it is a container, and recursion' ' is not enabled.'.format(resource.is_container())) continue # Append '**' to fetch all objects under this container. new_storage_url = resource.storage_url.join('**') child_resources = wildcard_iterator.get_wildcard_iterator( new_storage_url.url_string) for child_resource in child_resources: yield NameExpansionResult(child_resource, resource.storage_url, original_storage_url) is_name_expansion_iterator_empty = False if is_name_expansion_iterator_empty: raise errors.InvalidUrlError( '{} matched no objects or files.'.format(url))
def test_gcs_bucket_url_with_wildcard_gets_single_bucket( self, fields_scope, client): """Test single bucket with bucket-level expansion.""" client.list_buckets.side_effect = [self.buckets_response] resource_iterator = wildcard_iterator.get_wildcard_iterator( 'gs://buck*1', fields_scope=fields_scope) self.assertEqual( list(resource_iterator), [gcs_api._bucket_resource_from_metadata(self.bucket1)]) client.list_buckets.assert_called_once_with( cloud_api.FieldsScope(fields_scope))
def test_object_with_incorrect_generation(self, client): """Test with generation.""" client.get_object_metadata.side_effect = api_errors.NotFoundError client.list_objects.return_value = self._object_resources_with_generation resources = list( wildcard_iterator.get_wildcard_iterator('gs://bucket1/b.txt#2')) self.assertEqual(resources, []) client.get_object_metadata.assert_called_once_with( 'bucket1', 'b.txt', '2', cloud_api.FieldsScope.NO_ACL) client.list_objects.assert_called_once_with( 'bucket1', 'b.txt', '/', True, cloud_api.FieldsScope.NO_ACL)
def test_object_with_generation(self, client, wildcard_url, request_prefix, expected_resources): """Test with generation.""" client.list_objects.return_value = self._object_resources_with_generation resource_list = list( wildcard_iterator.get_wildcard_iterator(wildcard_url)) self.assertEqual(resource_list, expected_resources) self.assertFalse(client.get_object_metadata.called) client.list_objects.assert_called_once_with( all_versions=True, bucket_name='bucket1', delimiter='/', fields_scope=cloud_api.FieldsScope.NO_ACL, prefix=request_prefix, )
def test_file_wildcard(self, wildcard_url, expected_dirs, expected_files): processed_url_str = os.path.join(self.root_path, wildcard_url.replace('/', os.sep)) processed_expected_dirs = [ os.path.join(self.root_path, d.replace('/', os.sep)) for d in expected_dirs ] processed_expected_files = [ os.path.join(self.root_path, f.replace('/', os.sep)) for f in expected_files ] file_wildcard_iterator = wildcard_iterator.get_wildcard_iterator( processed_url_str) dirs, files = _get_prefixes_and_object_names(file_wildcard_iterator) self.assertCountEqual(dirs, processed_expected_dirs) self.assertCountEqual(files, processed_expected_files)
def test_gcs_list_object_with_fields_scope(self, fields_scope, client): """Test if list_objects gets correct fields_scope.""" test_resource = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'b', 'o.txt') expected_resources = [test_resource] client.list_objects.side_effect = [expected_resources] resource_list = list( wildcard_iterator.get_wildcard_iterator('gs://b/o*', fields_scope=fields_scope)) self.assertEqual(resource_list, expected_resources) client.list_objects.assert_called_once_with( all_versions=False, bucket_name='b', delimiter='/', fields_scope=fields_scope, prefix='o', )
def test_gcs_get_object_metadata_with_fields_scope(self, fields_scope, client): """Test if get_object_metadata gets correct fields_scope.""" test_resource = test_resources.get_object_resource( storage_url.ProviderPrefix.GCS, 'b', 'o.txt') expected_resources = [test_resource] client.get_object_metadata.side_effect = [test_resource] resource_list = list( wildcard_iterator.get_wildcard_iterator('gs://b/o.txt', fields_scope=fields_scope)) self.assertEqual(resource_list, expected_resources) client.get_object_metadata.assert_called_once_with( bucket_name='b', fields_scope=fields_scope, generation=None, object_name='o.txt', )
def Run(self, args): if args.urls: urls = [] for url_string in args.urls: url = storage_url.storage_url_from_string(url_string) if not (url.is_provider() or url.is_bucket()): raise errors.InvalidUrlError( 'URL does not match buckets: {}'.format(url_string)) urls.append(url) else: urls = [storage_url.CloudUrl(storage_url.ProviderPrefix.GCS)] for url in urls: for bucket in wildcard_iterator.get_wildcard_iterator( url.url_string, fields_scope=cloud_api.FieldsScope.FULL, get_bucket_metadata=True): # MakeSerializable will omit all the None values. yield resource_projector.MakeSerializable( bucket.get_displayable_bucket_data())
def test_gcs_list_all_object_versions(self, client): """Test with generation.""" client.list_objects.return_value = self._object_resources_with_generation resource_list = list( wildcard_iterator.get_wildcard_iterator('gs://bucket1/a.txt', all_versions=True)) expected_resources = [ test_resources.get_object_resource(storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt', '1'), test_resources.get_object_resource(storage_url.ProviderPrefix.GCS, 'bucket1', 'a.txt', '2'), ] self.assertEqual(resource_list, expected_resources) client.list_objects.assert_called_once_with( all_versions=True, bucket_name='bucket1', delimiter='/', fields_scope=cloud_api.FieldsScope.NO_ACL, prefix='a.txt', )
def test_invalid_scheme_raises_error(self): """Test wildcard iterator refuses invalid URL scheme.""" with self.assertRaises(command_errors.InvalidUrlError): wildcard_iterator.get_wildcard_iterator( 'invalid://', fields_scope=cloud_api.FieldsScope.SHORT)
def test_compresses_url_wildcards(self, observed_url_string, expected_url_string): """Test if FileWildcardIterator compresses URL wildcards correctly.""" iterator = wildcard_iterator.get_wildcard_iterator(observed_url_string) self.assertEqual(iterator._path, expected_url_string)