Esempio n. 1
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['project_id'],
                    location=self._opts['gcs_region'],
                    object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
                ), disable_if=_is_permanent_google_error)

            # Hadoop FS is responsible for all URIs that fall through to it
            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Esempio n. 2
0
    def _lock_contents(self, mock_cluster, steps_ahead=0):
        fs = S3Filesystem()

        contents = b''.join(fs.cat('s3://my_bucket/locks/%s/%d' % (
            mock_cluster['Id'], len(mock_cluster['_Steps']) + steps_ahead)))

        return contents or None
Esempio n. 3
0
    def test_endpoint_for_bucket_in_us_west_2(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        bucket = fs.get_bucket('walrus')
        self.assertEqual(bucket.connection.host, 's3-us-west-2.amazonaws.com')
Esempio n. 4
0
    def test_get_location_is_forbidden(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        access_denied_error = ClientError(
            dict(
                Error=dict(
                    Code='AccessDenied',
                    Message='Access Denied',
                ),
                ResponseMetadata=dict(
                    HTTPStatusCode=403
                ),
            ),
            'GetBucketLocation')

        with patch(
                'tests.mock_boto3.s3.MockS3Client.get_bucket_location',
                side_effect=access_denied_error):

            bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.meta.client.meta.endpoint_url,
                         'https://s3.amazonaws.com')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
Esempio n. 5
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['google_project_id']
                ), disable_if=_is_permanent_google_error)

            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Esempio n. 6
0
    def _lock_contents(self, mock_cluster):
        fs = S3Filesystem()

        contents = b''.join(fs.cat(
            's3://my_bucket/locks/%s' % (mock_cluster['Id'])))

        return contents or None
Esempio n. 7
0
    def test_s3_ls(self):
        self.add_mock_s3_data(
            {'walrus': {
                'one': b'',
                'two': b'',
                'three': b''
            }})

        fs = S3Filesystem()

        self.assertEqual(
            set(fs._s3_ls('s3://walrus/')),
            set([
                's3://walrus/one',
                's3://walrus/two',
                's3://walrus/three',
            ]))

        self.assertEqual(set(fs._s3_ls('s3://walrus/t')),
                         set([
                             's3://walrus/two',
                             's3://walrus/three',
                         ]))

        self.assertEqual(set(fs._s3_ls('s3://walrus/t/')), set([]))

        # if we ask for a nonexistent bucket, we should get some sort
        # of exception (in practice, buckets with random names will
        # probably be owned by other people, and we'll get some sort
        # of permissions error)
        self.assertRaises(Exception, set, fs._s3_ls('s3://lolcat/'))
Esempio n. 8
0
    def test_endpoint_for_bucket_in_us_west_1(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-1')

        fs = S3Filesystem()

        bucket = fs.get_bucket('walrus')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-1')
Esempio n. 9
0
    def test_endpoint_for_bucket_in_us_east_1(self):
        # location constraint for us-east-1 is '', not 'us-east-1'
        self.add_mock_s3_data({'walrus': {}}, location='')

        fs = S3Filesystem()

        bucket = fs.get_bucket('walrus')
        self.assertEqual(bucket.connection.host, 's3.amazonaws.com')
Esempio n. 10
0
    def test_force_s3_endpoint_url(self):
        fs = S3Filesystem(s3_endpoint='https://myproxy:8080')

        client = fs.make_s3_client()
        self.assertEqual(client.meta.endpoint_url, 'https://myproxy:8080')

        resource = fs.make_s3_resource()
        self.assertEqual(resource.meta.client.meta.endpoint_url,
                         'https://myproxy:8080')
Esempio n. 11
0
    def test_default_endpoint(self):
        fs = S3Filesystem()

        client = fs.make_s3_client()
        self.assertEqual(client.meta.endpoint_url, 'https://s3.amazonaws.com')

        resource = fs.make_s3_resource()
        self.assertEqual(resource.meta.client.meta.endpoint_url,
                         'https://s3.amazonaws.com')
Esempio n. 12
0
    def test_create_bucket_with_mkdir(self):
        # mkdir() doesn't have a way to specify bucket location, so we
        # do it at init time
        fs = S3Filesystem(s3_region='us-west-1')

        fs.mkdir('s3://walrus/data')

        bucket = fs.get_bucket('walrus')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-1')
Esempio n. 13
0
    def test_get_location_other_error(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        with patch('tests.mockboto.MockBucket.get_location',
                   side_effect=boto.exception.S3ResponseError(
                       404, 'Not Found')):

            self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket,
                              'walrus')
Esempio n. 14
0
    def test_get_location_is_forbidden(self):
        self.add_mock_s3_data({'walrus': {}}, location='us-west-2')

        fs = S3Filesystem()

        with patch('tests.mockboto.MockBucket.get_location',
                   side_effect=boto.exception.S3ResponseError(
                       403, 'Forbidden')):

            bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.connection.host, 's3.amazonaws.com')
Esempio n. 15
0
    def test_force_s3_endpoint_region(self):
        # this is the actual mrjob default region
        fs = S3Filesystem(s3_region='us-west-2')

        client = fs.make_s3_client()
        self.assertEqual(client.meta.endpoint_url,
                         'https://s3-us-west-2.amazonaws.com')
        self.assertEqual(client.meta.region_name, 'us-west-2')

        resource = fs.make_s3_resource()
        self.assertEqual(resource.meta.client.meta.endpoint_url,
                         'https://s3-us-west-2.amazonaws.com')
        self.assertEqual(resource.meta.client.meta.region_name, 'us-west-2')
Esempio n. 16
0
    def test_buckets_from_forced_s3_endpoint(self):
        self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2')
        self.add_mock_s3_data({'walrus-west': {}}, location='us-west-2')

        fs = S3Filesystem(s3_endpoint='s3-us-east-2.amazonaws.com')

        bucket_east = fs.get_bucket('walrus-east')
        self.assertEqual(bucket_east.connection.host,
                         's3-us-east-2.amazonaws.com')

        # can't access this bucket from wrong endpoint!
        self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket,
                          'walrus-west')
Esempio n. 17
0
    def test_put_with_part_size(self):
        self.add_mock_s3_data({'bar-files': {}})

        local_path = self.makefile('foo', contents=b'bar')
        dest = 's3://bar-files/foo'

        fs = S3Filesystem(part_size=12345)

        fs.put(local_path, dest)
        self.assertEqual(b''.join(self.fs.cat(dest)), b'bar')

        self.TransferConfig.assert_called_once_with(
            multipart_chunksize=12345,
            multipart_threshold=12345,
        )
Esempio n. 18
0
    def test_create_bucket_in_us_west_2(self):
        fs = S3Filesystem()

        fs.create_bucket('walrus', region='us-west-2')

        s3_client = fs.make_s3_client()
        self.assertEqual(
            s3_client.get_bucket_location('walrus')['LocationConstraint'],
            'us-west-2')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.meta.client.meta.endpoint_url,
                         'https://s3-us-west-2.amazonaws.com')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-west-2')
Esempio n. 19
0
    def test_create_bucket_with_no_region(self):
        fs = S3Filesystem()

        fs.create_bucket('walrus')

        s3_client = fs.make_s3_client()
        self.assertEqual(
            s3_client.get_bucket_location('walrus')['LocationConstraint'],
            None)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.meta.client.meta.endpoint_url,
                         'https://s3.amazonaws.com')
        self.assertEqual(bucket.meta.client.meta.region_name, 'us-east-1')
Esempio n. 20
0
    def test_buckets_from_forced_s3_endpoint(self):
        self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2')

        fs = S3Filesystem(s3_endpoint='s3-us-west-2.amazonaws.com')

        bucket_east = fs.get_bucket('walrus-east')

        with patch('tests.mock_boto3.s3.MockS3Client.get_bucket_location'
                   ) as mock_gbl:
            # won't actually be able to access this bucket from this endpoint,
            # but boto3 doesn't check that on bucket creation
            self.assertEqual(bucket_east.meta.client.meta.endpoint_url,
                             'https://s3-us-west-2.amazonaws.com')
            # no reason to check bucket location if endpoint is forced
            self.assertFalse(mock_gbl.called)
Esempio n. 21
0
    def test_buckets_from_forced_s3_endpoint(self):
        self.add_mock_s3_data({'walrus-east': {}}, location='us-east-2')
        self.add_mock_s3_data({'walrus-west': {}}, location='us-west-2')

        fs = S3Filesystem(s3_endpoint='s3-us-east-2.amazonaws.com')

        bucket_east = fs.get_bucket('walrus-east')

        with patch('tests.mockboto.MockBucket.get_location') as mock_get_loc:
            self.assertEqual(bucket_east.connection.host,
                             's3-us-east-2.amazonaws.com')
            # no reason to check bucket location if endpoint is forced
            self.assertFalse(mock_get_loc.called)

        # can't access this bucket from wrong endpoint!
        self.assertRaises(boto.exception.S3ResponseError, fs.get_bucket,
                          'walrus-west')
Esempio n. 22
0
 def setUp(self):
     super(S3FSTestCase, self).setUp()
     self.fs = S3Filesystem()
Esempio n. 23
0
 def setUp(self):
     self.sandbox_boto()
     self.addCleanup(self.unsandbox_boto)
     self.fs = S3Filesystem('key_id', 'secret', 'nowhere')
Esempio n. 24
0
    def test_force_s3_endpoint(self):
        fs = S3Filesystem(s3_endpoint='s3-us-west-1.amazonaws.com')

        s3_conn = fs.make_s3_conn()
        self.assertEqual(s3_conn.host, 's3-us-west-1.amazonaws.com')
Esempio n. 25
0
    def test_default_endpoint(self):
        fs = S3Filesystem()

        s3_conn = fs.make_s3_conn()
        self.assertEqual(s3_conn.host, 's3.amazonaws.com')
Esempio n. 26
0
    def setUp(self):
        super(S3FSTestCase, self).setUp()
        self.fs = S3Filesystem()

        self.TransferConfig = self.start(
            patch('boto3.s3.transfer.TransferConfig'))
Esempio n. 27
0
    def test_bucket_does_not_exist(self):
        fs = S3Filesystem()

        self.assertRaises(ClientError, fs.get_bucket, 'walrus')