def test_blank_out_object_ttl_days_set_at_init(self): fs = GCSFilesystem(object_ttl_days=234) fs.create_bucket('walrus', object_ttl_days=0) bucket = fs.get_bucket('walrus') self.assertEqual(list(bucket.lifecycle_rules), [])
def test_location_set_at_init(self): fs = GCSFilesystem(location='us-central1') fs.create_bucket('walrus') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.location, 'US-CENTRAL1')
def test_override_location_set_at_init(self): fs = GCSFilesystem(location='us-central1') fs.create_bucket('walrus', location='us-east1') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.location, 'US-EAST1')
def test_location(self): fs = GCSFilesystem() fs.create_bucket('walrus', location='us-central1') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.location, 'US-CENTRAL1')
def test_blank_out_location_set_at_init(self): fs = GCSFilesystem(location='us-central1') fs.create_bucket('walrus', location='') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.location, 'US')
def test_default(self): fs = GCSFilesystem() fs.create_bucket('walrus') bucket = fs.get_bucket('walrus') self.assertEqual(bucket.location, 'US') self.assertEqual(list(bucket.lifecycle_rules), [])
def test_override_object_ttl_days_set_at_init(self): fs = GCSFilesystem(object_ttl_days=234) fs.create_bucket('walrus', object_ttl_days=123) bucket = fs.get_bucket('walrus') self.assertEqual( list(bucket.lifecycle_rules), [dict(action=dict(type='Delete'), condition=dict(age=123))])
class MockGCSClient(object): """Mock out GCSClient... TARGET API VERSION - Storage API v1 Emulates GCS metadata and stores raw bytes Contains convenience functions for initializing items in GCS """ def __init__(self, test_case): assert isinstance(test_case, MockGoogleAPITestCase) self._test_case = test_case self._fs = GCSFilesystem() self._cache_objects = dict() self._cache_buckets = dict() self._client_objects = MockGCSClientObjects(self) self._client_buckets = MockGCSClientBuckets(self) def objects(self): return self._client_objects def buckets(self): return self._client_buckets def put_gcs(self, gcs_uri, data): """Put data at gcs_uri, creating a bucket if necessary""" bucket, name = parse_gcs_uri(gcs_uri) try: self._fs.get_bucket(bucket) except google_errors.HttpError: self._fs.create_bucket(project=_TEST_PROJECT, name=bucket) bytes_io_obj = BytesIO(data) self.upload_io(bytes_io_obj, gcs_uri) def put_gcs_multi(self, gcs_uri_to_data_map): """Bulk put data at gcs_uris""" for gcs_uri, data in gcs_uri_to_data_map.items(): self.put_gcs(gcs_uri, data) def download_io(self, src_uri, io_obj): """ Clobber GCSFilesystem._download_io """ bucket, name = parse_gcs_uri(src_uri) object_dict = _get_deep(self._cache_objects, [bucket, name]) if not object_dict: raise Exception object_data = object_dict['_data'] io_obj.write(object_data) return io_obj def upload_io(self, io_obj, dest_uri): """ Clobber GCSFilesystem._upload_io """ bucket, name = parse_gcs_uri(dest_uri) assert bucket in self._cache_buckets io_obj.seek(0) data = io_obj.read() # TODO - io_obj.close() ? Not sure if callers of this function would # expect their io_objs to be closed object_resp = _insert_object_resp(bucket=bucket, name=name, data=data) _set_deep(self._cache_objects, [bucket, name], object_resp) return object_resp
class GCSFSTestCase(MockGoogleTestCase): def setUp(self): super(GCSFSTestCase, self).setUp() self.fs = GCSFilesystem() def test_ls_blob(self): self.put_gcs_multi({'gs://walrus/data/foo': b''}) self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')), ['gs://walrus/data/foo']) def test_ls_missing(self): self.assertEqual(list(self.fs.ls('gs://nope/not/here')), []) def test_ls_ignores_dirs(self): # Dataproc (i.e. Hadoop) will create empty blobs whose names end # in '/' self.put_gcs_multi({ 'gs://walrus/data/foo/': b'', 'gs://walrus/data/foo/bar': b'baz', }) self.assertEqual(list(self.fs.ls('gs://walrus/data')), ['gs://walrus/data/foo/bar']) def test_ls_recursively(self): self.put_gcs_multi({ 'gs://walrus/data/bar': b'', 'gs://walrus/data/bar/baz': b'', 'gs://walrus/data/foo': b'', 'gs://walrus/qux': b'', }) uris = [ 'gs://walrus/data/bar', 'gs://walrus/data/bar/baz', 'gs://walrus/data/foo', 'gs://walrus/qux', ] self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris)) self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris)) self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1])) self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1])) self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1])) def test_ls_globs(self): self.put_gcs_multi({ 'gs://w/a': b'', 'gs://w/a/b': b'', 'gs://w/ab': b'', 'gs://w/b': b'', }) self.assertEqual( set(self.fs.ls('gs://w/')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) self.assertEqual( set(self.fs.ls('gs://w/*')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) self.assertEqual(list(self.fs.ls('gs://w/*/')), ['gs://w/a/b']) self.assertEqual(list(self.fs.ls('gs://w/*/*')), ['gs://w/a/b']) self.assertEqual(list(self.fs.ls('gs://w/a?')), ['gs://w/ab']) # * can match / self.assertEqual(set(self.fs.ls('gs://w/a*')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab'])) self.assertEqual(set(self.fs.ls('gs://w/*b')), set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) def test_du(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'abcde', 'gs://walrus/data/bar/baz': b'fgh' }) self.assertEqual(self.fs.du('gs://walrus/'), 8) self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5) self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3) def test_exists(self): self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'}) self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True) self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False) def test_md5sum(self): self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'}) self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'), md5(b'abcd').hexdigest()) def test_md5sum_of_missing_blob(self): self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'}) self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar') def test_mkdir_creates_buckets(self): self.assertNotIn('walrus', self.mock_gcs_fs) self.fs.mkdir('gs://walrus/data') self.assertIn('walrus', self.mock_gcs_fs) def test_mkdir_does_not_create_directories(self): self.fs.create_bucket('walrus') self.assertEqual(list(self.fs.ls('gs://walrus/')), []) self.fs.mkdir('gs://walrus/data') self.assertEqual(list(self.fs.ls('gs://walrus/')), []) def test_put(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() self.fs.put(local_path, dest) self.assertEqual(b''.join(self.fs.cat(dest)), b'bar') def test_put_with_part_size(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() fs = GCSFilesystem(part_size=12345) with patch.object(GCSFilesystem, '_blob') as blob_meth: fs.put(local_path, dest) blob_meth.assert_called_once_with(dest, chunk_size=12345) def test_put_chunk_size(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() with patch.object(GCSFilesystem, '_blob') as blob_meth: with patch('mrjob.fs.gcs.log') as log: self.fs.put(local_path, dest, chunk_size=99999) blob_meth.assert_called_once_with(dest, chunk_size=99999) self.assertTrue(log.warning.called) def test_rm(self): self.put_gcs_multi({'gs://walrus/foo': b''}) self.assertEqual(self.fs.exists('gs://walrus/foo'), True) self.fs.rm('gs://walrus/foo') self.assertEqual(self.fs.exists('gs://walrus/foo'), False) def test_rm_dir(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'', 'gs://walrus/data/bar/baz': b'', }) self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True) self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True) self.fs.rm('gs://walrus/data') self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False) self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)
def _make_bucket(self, name, location=None): fs = GCSFilesystem() fs.create_bucket(name, location=location)
class GCSFSTestCase(MockGoogleTestCase): def setUp(self): super(GCSFSTestCase, self).setUp() self.fs = GCSFilesystem() def test_ls_blob(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'' }) self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')), ['gs://walrus/data/foo']) def test_ls_missing(self): self.assertEqual(list(self.fs.ls('gs://nope/not/here')), []) def test_ls_ignores_dirs(self): # Dataproc (i.e. Hadoop) will create empty blobs whose names end # in '/' self.put_gcs_multi({ 'gs://walrus/data/foo/': b'', 'gs://walrus/data/foo/bar': b'baz', }) self.assertEqual(list(self.fs.ls('gs://walrus/data')), ['gs://walrus/data/foo/bar']) def test_ls_recursively(self): self.put_gcs_multi({ 'gs://walrus/data/bar': b'', 'gs://walrus/data/bar/baz': b'', 'gs://walrus/data/foo': b'', 'gs://walrus/qux': b'', }) uris = [ 'gs://walrus/data/bar', 'gs://walrus/data/bar/baz', 'gs://walrus/data/foo', 'gs://walrus/qux', ] self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris)) self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris)) self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1])) self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1])) self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1])) def test_ls_globs(self): self.put_gcs_multi({ 'gs://w/a': b'', 'gs://w/a/b': b'', 'gs://w/ab': b'', 'gs://w/b': b'', }) self.assertEqual(set(self.fs.ls('gs://w/')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) self.assertEqual(set(self.fs.ls('gs://w/*')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) self.assertEqual(list(self.fs.ls('gs://w/*/')), ['gs://w/a/b']) self.assertEqual(list(self.fs.ls('gs://w/*/*')), ['gs://w/a/b']) self.assertEqual(list(self.fs.ls('gs://w/a?')), ['gs://w/ab']) # * can match / self.assertEqual(set(self.fs.ls('gs://w/a*')), set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab'])) self.assertEqual(set(self.fs.ls('gs://w/*b')), set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b'])) def test_du(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'abcde', 'gs://walrus/data/bar/baz': b'fgh' }) self.assertEqual(self.fs.du('gs://walrus/'), 8) self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5) self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3) def test_exists(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'abcd' }) self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True) self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False) def test_md5sum(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'abcd' }) self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'), md5(b'abcd').hexdigest()) def test_md5sum_of_missing_blob(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'abcd' }) self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar') def test_mkdir_creates_buckets(self): self.assertNotIn('walrus', self.mock_gcs_fs) self.fs.mkdir('gs://walrus/data') self.assertIn('walrus', self.mock_gcs_fs) def test_mkdir_does_not_create_directories(self): self.fs.create_bucket('walrus') self.assertEqual(list(self.fs.ls('gs://walrus/')), []) self.fs.mkdir('gs://walrus/data') self.assertEqual(list(self.fs.ls('gs://walrus/')), []) def test_put(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() self.fs.put(local_path, dest) self.assertEqual(b''.join(self.fs.cat(dest)), b'bar') def test_put_with_part_size(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() fs = GCSFilesystem(part_size=12345) with patch.object(GCSFilesystem, '_blob') as blob_meth: fs.put(local_path, dest) blob_meth.assert_called_once_with(dest, chunk_size=12345) def test_put_chunk_size(self): local_path = self.makefile('foo', contents=b'bar') dest = 'gs://bar-files/foo' self.storage_client().bucket('bar-files').create() with patch.object(GCSFilesystem, '_blob') as blob_meth: with patch('mrjob.fs.gcs.log') as log: self.fs.put(local_path, dest, chunk_size=99999) blob_meth.assert_called_once_with(dest, chunk_size=99999) self.assertTrue(log.warning.called) def test_rm(self): self.put_gcs_multi({ 'gs://walrus/foo': b'' }) self.assertEqual(self.fs.exists('gs://walrus/foo'), True) self.fs.rm('gs://walrus/foo') self.assertEqual(self.fs.exists('gs://walrus/foo'), False) def test_rm_dir(self): self.put_gcs_multi({ 'gs://walrus/data/foo': b'', 'gs://walrus/data/bar/baz': b'', }) self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True) self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True) self.fs.rm('gs://walrus/data') self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False) self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)