Ejemplo n.º 1
0
    def test_local_tmp_dir_is_deprecated_and_does_nothing(self):
        fs = GCSFilesystem(local_tmp_dir=self.tmp_dir)
        self.assertTrue(self.log.warning.called)

        self.assertEqual(fs.client, self.Client(project=None,
                                                credentials=None))
        self.assertFalse(hasattr(fs, '_local_tmp_dir'))
Ejemplo n.º 2
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['project_id'],
                    location=self._opts['gcs_region'],
                    object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
                ), disable_if=_is_permanent_google_error)

            # Hadoop FS is responsible for all URIs that fall through to it
            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Ejemplo n.º 3
0
    def fs(self):
        # Spark supports basically every filesystem there is

        if not self._fs:
            self._fs = CompositeFilesystem()

            if boto3_installed:
                self._fs.add_fs('s3', S3Filesystem(
                    aws_access_key_id=self._opts['aws_access_key_id'],
                    aws_secret_access_key=self._opts['aws_secret_access_key'],
                    aws_session_token=self._opts['aws_session_token'],
                    s3_endpoint=self._opts['s3_endpoint'],
                    s3_region=self._opts['s3_region'],
                ), disable_if=_is_permanent_boto3_error)

            if google_libs_installed:
                self._fs.add_fs('gcs', GCSFilesystem(
                    project_id=self._opts['google_project_id']
                ), disable_if=_is_permanent_google_error)

            self._fs.add_fs('hadoop', HadoopFilesystem(
                self._opts['hadoop_bin']))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Ejemplo n.º 4
0
    def test_blank_out_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=0)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(list(bucket.lifecycle_rules), [])
Ejemplo n.º 5
0
    def test_override_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus', location='us-east1')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-EAST1')
Ejemplo n.º 6
0
    def test_location_set_at_init(self):
        fs = GCSFilesystem(location='us-central1')

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')
Ejemplo n.º 7
0
    def test_default(self):
        fs = GCSFilesystem()

        fs.create_bucket('walrus')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US')
        self.assertEqual(list(bucket.lifecycle_rules), [])
Ejemplo n.º 8
0
    def test_set_credentials_and_project_id(self):
        creds = Mock()
        project_id = 'alan-parsons'

        fs = GCSFilesystem(credentials=creds, project_id=project_id)
        self.assertFalse(self.log.warning.called)

        self.assertEqual(fs.client,
                         self.Client(project=project_id, credentials=creds))
Ejemplo n.º 9
0
    def test_put_with_part_size(self):
        local_path = self.makefile('foo', contents=b'bar')
        dest = 'gs://bar-files/foo'
        self.storage_client().bucket('bar-files').create()

        fs = GCSFilesystem(part_size=12345)

        with patch.object(GCSFilesystem, '_blob') as blob_meth:
            fs.put(local_path, dest)
            blob_meth.assert_called_once_with(dest, chunk_size=12345)
Ejemplo n.º 10
0
    def test_override_object_ttl_days_set_at_init(self):
        fs = GCSFilesystem(object_ttl_days=234)

        fs.create_bucket('walrus', object_ttl_days=123)

        bucket = fs.get_bucket('walrus')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Ejemplo n.º 11
0
    def __init__(self, test_case):
        assert isinstance(test_case, MockGoogleAPITestCase)
        self._test_case = test_case
        self._fs = GCSFilesystem()

        self._cache_objects = dict()
        self._cache_buckets = dict()

        self._client_objects = MockGCSClientObjects(self)
        self._client_buckets = MockGCSClientBuckets(self)
Ejemplo n.º 12
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem()

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
Ejemplo n.º 13
0
    def test_mkdir_bucket(self):
        fs = GCSFilesystem(location='us-central1', object_ttl_days=123)

        fs.mkdir('gs://walrus/data')

        bucket = fs.get_bucket('walrus')

        self.assertEqual(bucket.location, 'US-CENTRAL1')

        self.assertEqual(
            list(bucket.lifecycle_rules),
            [dict(action=dict(type='Delete'), condition=dict(age=123))])
Ejemplo n.º 14
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is not None:
            return self._fs

        self._gcs_fs = GCSFilesystem(
            credentials=self._credentials,
            local_tmp_dir=self._get_local_tmp_dir(),
            project_id=self._project_id,
        )

        self._fs = CompositeFilesystem(self._gcs_fs, LocalFilesystem())
        return self._fs
Ejemplo n.º 15
0
    def setUp(self):
        self.fs = GCSFilesystem()
        self.gcs_path = 'gs://walrus/data'

        self.list_req_mock = mock.MagicMock()

        objects_ret = mock.MagicMock()
        objects_ret.list.return_value = self.list_req_mock
        objects_ret.get_media.return_value = google_http.HttpRequest(
            None, None, self.gcs_path)

        api_client = mock.MagicMock()
        api_client.objects.return_value = objects_ret

        self.fs._api_client = api_client
        self.next_chunk_patch = patch.object(
            google_http.MediaIoBaseDownload, 'next_chunk')
Ejemplo n.º 16
0
    def fs(self):
        """:py:class:`~mrjob.fs.base.Filesystem` object for SSH, S3, GCS, and
        the local filesystem.
        """
        if self._fs is None:
            self._fs = CompositeFilesystem()

            location = self._opts['region'] or _zone_to_region(
                self._opts['zone'])

            self._fs.add_fs('gcs', GCSFilesystem(
                credentials=self._credentials,
                project_id=self._project_id,
                part_size=self._upload_part_size(),
                location=location,
                object_ttl_days=_DEFAULT_CLOUD_TMP_DIR_OBJECT_TTL_DAYS,
            ))

            self._fs.add_fs('local', LocalFilesystem())

        return self._fs
Ejemplo n.º 17
0
 def setUp(self):
     super(CatTestCase, self).setUp()
     self.fs = GCSFilesystem()
Ejemplo n.º 18
0
 def _make_bucket(self, name, location=None):
     fs = GCSFilesystem()
     fs.create_bucket(name, location=location)
Ejemplo n.º 19
0
    def test_default(self):
        fs = GCSFilesystem()
        self.assertFalse(self.log.warning.called)

        self.assertEqual(fs.client, self.Client(project=None,
                                                credentials=None))