예제 #1
0
파일: test_gcs.py 프로젝트: suzil/mrjob
    def test_blank_out_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=0)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(list(bucket.lifecycle_rules), [])
예제 #2
0
파일: test_gcs.py 프로젝트: suzil/mrjob
    def test_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')
예제 #3
0
파일: test_gcs.py 프로젝트: suzil/mrjob
    def test_override_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='us-east1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-EAST1')
예제 #4
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_blank_out_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=0)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(list(bucket.lifecycle_rules), [])
예제 #5
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_location(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus', location='us-central1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')
예제 #6
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_blank_out_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
예제 #7
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_override_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='us-east1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-EAST1')
예제 #8
0
파일: test_gcs.py 프로젝트: suzil/mrjob
    def test_default(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
        self.assertEqual(list(bucket.lifecycle_rules), [])
예제 #9
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_default(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
        self.assertEqual(list(bucket.lifecycle_rules), [])
예제 #10
0
파일: test_gcs.py 프로젝트: suzil/mrjob
    def test_override_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=123)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
예제 #11
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
    def test_override_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=123)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
예제 #12
0
class MockGCSClient(object):
    """Mock out GCSClient...

    TARGET API VERSION - Storage API v1

    Emulates GCS metadata and stores raw bytes
    Contains convenience functions for initializing items in GCS
    """
    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)

    def objects(self):
        return self._client_objects

    def buckets(self):
        return self._client_buckets

    def put_gcs(self, gcs_uri, data):
        """Put data at gcs_uri, creating a bucket if necessary"""
        bucket, name = parse_gcs_uri(gcs_uri)

        try:
            self._fs.get_bucket(bucket)
        except google_errors.HttpError:
            self._fs.create_bucket(project=_TEST_PROJECT, name=bucket)

        bytes_io_obj = BytesIO(data)
        self.upload_io(bytes_io_obj, gcs_uri)

    def put_gcs_multi(self, gcs_uri_to_data_map):
        """Bulk put data at gcs_uris"""
        for gcs_uri, data in gcs_uri_to_data_map.items():
            self.put_gcs(gcs_uri, data)

    def download_io(self, src_uri, io_obj):
        """
        Clobber GCSFilesystem._download_io
        """
        bucket, name = parse_gcs_uri(src_uri)

        object_dict = _get_deep(self._cache_objects, [bucket, name])

        if not object_dict:
            raise Exception

        object_data = object_dict['_data']
        io_obj.write(object_data)
        return io_obj

    def upload_io(self, io_obj, dest_uri):
        """
        Clobber GCSFilesystem._upload_io
        """
        bucket, name = parse_gcs_uri(dest_uri)

        assert bucket in self._cache_buckets

        io_obj.seek(0)

        data = io_obj.read()

        # TODO - io_obj.close() ?  Not sure if callers of this function would
        # expect their io_objs to be closed

        object_resp = _insert_object_resp(bucket=bucket, name=name, data=data)

        _set_deep(self._cache_objects, [bucket, name], object_resp)

        return object_resp
예제 #13
0
파일: test_gcs.py 프로젝트: suzil/mrjob
class GCSFSTestCase(MockGoogleTestCase):
    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b''})

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_missing(self):
        self.assertEqual(list(self.fs.ls('gs://nope/not/here')), [])

    def test_ls_ignores_dirs(self):
        # Dataproc (i.e. Hadoop) will create empty blobs whose names end
        # in '/'
        self.put_gcs_multi({
            'gs://walrus/data/foo/': b'',
            'gs://walrus/data/foo/bar': b'baz',
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data')),
                         ['gs://walrus/data/foo/bar'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(
            set(self.fs.ls('gs://w/')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(
            set(self.fs.ls('gs://w/*')),
            set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')), ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')), ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_md5sum(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'),
                         md5(b'abcd').hexdigest())

    def test_md5sum_of_missing_blob(self):
        self.put_gcs_multi({'gs://walrus/data/foo': b'abcd'})

        self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar')

    def test_mkdir_creates_buckets(self):
        self.assertNotIn('walrus', self.mock_gcs_fs)

        self.fs.mkdir('gs://walrus/data')

        self.assertIn('walrus', self.mock_gcs_fs)

    def test_mkdir_does_not_create_directories(self):
        self.fs.create_bucket('walrus')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

        self.fs.mkdir('gs://walrus/data')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

    def test_put(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        self.fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)

    def test_put_chunk_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            with patch('mrjob.fs.gcs.log') as log:

                self.fs.put(local_path, dest, chunk_size=99999)
                blob_meth.assert_called_once_with(dest, chunk_size=99999)

                self.assertTrue(log.warning.called)

    def test_rm(self):
        self.put_gcs_multi({'gs://walrus/foo': b''})

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)
예제 #14
0
class MockGCSClient(object):
    """Mock out GCSClient...

    TARGET API VERSION - Storage API v1

    Emulates GCS metadata and stores raw bytes
    Contains convenience functions for initializing items in GCS
    """

    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)

    def objects(self):
        return self._client_objects

    def buckets(self):
        return self._client_buckets

    def put_gcs(self, gcs_uri, data):
        """Put data at gcs_uri, creating a bucket if necessary"""
        bucket, name = parse_gcs_uri(gcs_uri)

        try:
            self._fs.get_bucket(bucket)
        except google_errors.HttpError:
            self._fs.create_bucket(project=_TEST_PROJECT, name=bucket)

        bytes_io_obj = BytesIO(data)
        self.upload_io(bytes_io_obj, gcs_uri)

    def put_gcs_multi(self, gcs_uri_to_data_map):
        """Bulk put data at gcs_uris"""
        for gcs_uri, data in gcs_uri_to_data_map.items():
            self.put_gcs(gcs_uri, data)

    def download_io(self, src_uri, io_obj):
        """
        Clobber GCSFilesystem._download_io
        """
        bucket, name = parse_gcs_uri(src_uri)

        object_dict = _get_deep(self._cache_objects, [bucket, name])

        if not object_dict:
            raise Exception

        object_data = object_dict['_data']
        io_obj.write(object_data)
        return io_obj

    def upload_io(self, io_obj, dest_uri):
        """
        Clobber GCSFilesystem._upload_io
        """
        bucket, name = parse_gcs_uri(dest_uri)

        assert bucket in self._cache_buckets

        io_obj.seek(0)

        data = io_obj.read()

        # TODO - io_obj.close() ?  Not sure if callers of this function would
        # expect their io_objs to be closed

        object_resp = _insert_object_resp(bucket=bucket, name=name, data=data)

        _set_deep(self._cache_objects, [bucket, name], object_resp)

        return object_resp
예제 #15
0
 def _make_bucket(self, name, location=None):
     fs = GCSFilesystem()
     fs.create_bucket(name, location=location)
예제 #16
0
파일: test_gcs.py 프로젝트: Yelp/mrjob
class GCSFSTestCase(MockGoogleTestCase):

    def setUp(self):
        super(GCSFSTestCase, self).setUp()
        self.fs = GCSFilesystem()

    def test_ls_blob(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b''
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data/foo')),
                         ['gs://walrus/data/foo'])

    def test_ls_missing(self):
        self.assertEqual(list(self.fs.ls('gs://nope/not/here')), [])

    def test_ls_ignores_dirs(self):
        # Dataproc (i.e. Hadoop) will create empty blobs whose names end
        # in '/'
        self.put_gcs_multi({
            'gs://walrus/data/foo/': b'',
            'gs://walrus/data/foo/bar': b'baz',
        })

        self.assertEqual(list(self.fs.ls('gs://walrus/data')),
                         ['gs://walrus/data/foo/bar'])

    def test_ls_recursively(self):
        self.put_gcs_multi({
            'gs://walrus/data/bar': b'',
            'gs://walrus/data/bar/baz': b'',
            'gs://walrus/data/foo': b'',
            'gs://walrus/qux': b'',
        })

        uris = [
            'gs://walrus/data/bar',
            'gs://walrus/data/bar/baz',
            'gs://walrus/data/foo',
            'gs://walrus/qux',
        ]

        self.assertEqual(set(self.fs.ls('gs://walrus/')), set(uris))
        self.assertEqual(set(self.fs.ls('gs://walrus/*')), set(uris))

        self.assertEqual(set(self.fs.ls('gs://walrus/data')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/')), set(uris[:-1]))
        self.assertEqual(set(self.fs.ls('gs://walrus/data/*')), set(uris[:-1]))

    def test_ls_globs(self):
        self.put_gcs_multi({
            'gs://w/a': b'',
            'gs://w/a/b': b'',
            'gs://w/ab': b'',
            'gs://w/b': b'',
        })

        self.assertEqual(set(self.fs.ls('gs://w/')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(set(self.fs.ls('gs://w/*')),
                         set(['gs://w/a', 'gs://w/a/b',
                              'gs://w/ab', 'gs://w/b']))
        self.assertEqual(list(self.fs.ls('gs://w/*/')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/*/*')),
                         ['gs://w/a/b'])
        self.assertEqual(list(self.fs.ls('gs://w/a?')),
                         ['gs://w/ab'])
        # * can match /
        self.assertEqual(set(self.fs.ls('gs://w/a*')),
                         set(['gs://w/a', 'gs://w/a/b', 'gs://w/ab']))
        self.assertEqual(set(self.fs.ls('gs://w/*b')),
                         set(['gs://w/a/b', 'gs://w/ab', 'gs://w/b']))

    def test_du(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcde',
            'gs://walrus/data/bar/baz': b'fgh'
        })

        self.assertEqual(self.fs.du('gs://walrus/'), 8)
        self.assertEqual(self.fs.du('gs://walrus/data/foo'), 5)
        self.assertEqual(self.fs.du('gs://walrus/data/bar/baz'), 3)

    def test_exists(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar'), False)

    def test_md5sum(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })

        self.assertEqual(self.fs.md5sum('gs://walrus/data/foo'),
                         md5(b'abcd').hexdigest())

    def test_md5sum_of_missing_blob(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'abcd'
        })

        self.assertRaises(IOError, self.fs.md5sum, 'gs://walrus/data/bar')

    def test_mkdir_creates_buckets(self):
        self.assertNotIn('walrus', self.mock_gcs_fs)

        self.fs.mkdir('gs://walrus/data')

        self.assertIn('walrus', self.mock_gcs_fs)

    def test_mkdir_does_not_create_directories(self):
        self.fs.create_bucket('walrus')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

        self.fs.mkdir('gs://walrus/data')

        self.assertEqual(list(self.fs.ls('gs://walrus/')), [])

    def test_put(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        self.fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)

    def test_put_chunk_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            with patch('mrjob.fs.gcs.log') as log:

                self.fs.put(local_path, dest, chunk_size=99999)
                blob_meth.assert_called_once_with(dest, chunk_size=99999)

                self.assertTrue(log.warning.called)

    def test_rm(self):
        self.put_gcs_multi({
            'gs://walrus/foo': b''
        })

        self.assertEqual(self.fs.exists('gs://walrus/foo'), True)
        self.fs.rm('gs://walrus/foo')
        self.assertEqual(self.fs.exists('gs://walrus/foo'), False)

    def test_rm_dir(self):
        self.put_gcs_multi({
            'gs://walrus/data/foo': b'',
            'gs://walrus/data/bar/baz': b'',
        })

        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), True)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), True)
        self.fs.rm('gs://walrus/data')
        self.assertEqual(self.fs.exists('gs://walrus/data/foo'), False)
        self.assertEqual(self.fs.exists('gs://walrus/data/bar/baz'), False)