Esempio n. 1
0
 def test_empty_credentials_load(self, bad_credentials):
     parquet_data_set = ParquetDataSet(
         filepath=S3_PATH, credentials={"client_kwargs": bad_credentials}
     )
     pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)"
     with pytest.raises(DataSetError, match=pattern):
         parquet_data_set.load().compute()
Esempio n. 2
0
    def test_pass_credentials(self, mocker):
        """Test that AWS credentials are passed successfully into boto3
        client instantiation on creating S3 connection."""
        mocker.patch("s3fs.core.boto3.Session.client")
        s3_data_set = ParquetDataSet(
            filepath=S3_PATH, credentials={"client_kwargs": AWS_CREDENTIALS})
        pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)"
        with pytest.raises(DataSetError, match=pattern):
            s3_data_set.load().compute()

        assert s3fs.core.boto3.Session.client.call_count == 1
        args, kwargs = s3fs.core.boto3.Session.client.call_args_list[0]
        assert args == ("s3", )
        for k, v in AWS_CREDENTIALS.items():
            assert kwargs[k] == v
Esempio n. 3
0
    def test_pass_credentials(self, mocker):
        """Test that AWS credentials are passed successfully into boto3
        client instantiation on creating S3 connection."""
        client_mock = mocker.patch("botocore.session.Session.create_client")
        s3_data_set = ParquetDataSet(filepath=S3_PATH,
                                     credentials=AWS_CREDENTIALS)
        pattern = r"Failed while loading data from data set ParquetDataSet\(.+\)"
        with pytest.raises(DataSetError, match=pattern):
            s3_data_set.load().compute()

        assert client_mock.call_count == 1
        args, kwargs = client_mock.call_args_list[0]
        assert args == ("s3", )
        assert kwargs["aws_access_key_id"] == AWS_CREDENTIALS["key"]
        assert kwargs["aws_secret_access_key"] == AWS_CREDENTIALS["secret"]
Esempio n. 4
0
def s3_data_set(load_args, save_args):
    return ParquetDataSet(
        filepath=S3_PATH,
        credentials={"client_kwargs": AWS_CREDENTIALS},
        load_args=load_args,
        save_args=save_args,
    )
Esempio n. 5
0
 def test_incorrect_credentials_load(self):
     """Test that incorrect credential keys won't instantiate dataset."""
     pattern = r"unexpected keyword argument"
     with pytest.raises(DataSetError, match=pattern):
         ParquetDataSet(
             filepath=S3_PATH,
             credentials={
                 "client_kwargs": {"access_token": "TOKEN", "access_key": "KEY"}
             },
         ).load().compute()
Esempio n. 6
0
    def test_save_load_locally(self, tmp_path, dummy_dd_dataframe):
        """Test loading the data locally."""
        file_path = str(tmp_path / "some" / "dir" / FILE_NAME)
        data_set = ParquetDataSet(filepath=file_path)

        assert not data_set.exists()
        data_set.save(dummy_dd_dataframe)
        assert data_set.exists()
        loaded_data = data_set.load()
        dummy_dd_dataframe.compute().equals(loaded_data.compute())