Ejemplo n.º 1
0
 def test_empty_credentials_load(self, bad_credentials):
     parquet_data_set = ParquetS3DataSet(
         filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=bad_credentials
     )
     pattern = r"Failed while loading data from data set ParquetS3DataSet\(.+\)"
     with pytest.raises(DataSetError, match=pattern):
         parquet_data_set.load()
Ejemplo n.º 2
0
    def test_version_str_repr(self, load_version, save_version):
        """Test that version is in string representation of the class instance
        when applicable."""
        ds = ParquetS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME)
        ds_versioned = ParquetS3DataSet(
            filepath=FILENAME,
            bucket_name=BUCKET_NAME,
            version=Version(load_version, save_version),
        )
        assert FILENAME in str(ds)
        assert "version" not in str(ds)

        assert FILENAME in str(ds_versioned)
        ver_str = "version=Version(load={}, save='{}')".format(
            load_version, save_version)
        assert ver_str in str(ds_versioned)
Ejemplo n.º 3
0
 def test_incorrect_credentials_load(self):
     """Test that incorrect credential keys won't instantiate dataset."""
     pattern = "unexpected keyword argument"
     with pytest.raises(TypeError, match=pattern):
         ParquetS3DataSet(
             filepath=FILENAME,
             bucket_name=BUCKET_NAME,
             credentials={"access_token": "TOKEN", "access_key": "KEY"},
         )
Ejemplo n.º 4
0
def versioned_s3_data_set(load_version, save_version):
    return ParquetS3DataSet(
        filepath=FILENAME,
        bucket_name=BUCKET_NAME,
        credentials={
            "aws_access_key_id": "YOUR_KEY",
            "aws_secret_access_key": "YOUR SECRET",
        },
        version=Version(load_version, save_version),
    )
Ejemplo n.º 5
0
def s3_data_set(load_args, save_args):
    return ParquetS3DataSet(
        filepath=FILENAME,
        bucket_name=BUCKET_NAME,
        credentials={
            "aws_access_key_id": "YOUR_KEY",
            "aws_secret_access_key": "YOUR SECRET",
        },
        load_args=load_args,
        save_args=save_args,
    )
Ejemplo n.º 6
0
 def test_load_with_protocol(self, dummy_dataframe, load_args, save_args):
     """Test loading the data from S3."""
     s3_data_set = ParquetS3DataSet(
         filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME),
         credentials={
             "aws_access_key_id": "YOUR_KEY",
             "aws_secret_access_key": "YOUR SECRET",
         },
         load_args=load_args,
         save_args=save_args,
     )
     loaded_data = s3_data_set.load()
     assert_frame_equal(loaded_data, dummy_dataframe)
Ejemplo n.º 7
0
    def test_pass_credentials(self, mocker):
        """Test that AWS credentials are passed successfully into boto3
        client instantiation on creating S3 connection."""
        mocker.patch("s3fs.core.boto3.Session.client")
        s3_data_set = ParquetS3DataSet(
            filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=AWS_CREDENTIALS
        )
        pattern = r"Failed while loading data from data set ParquetS3DataSet\(.+\)"
        with pytest.raises(DataSetError, match=pattern):
            s3_data_set.load()

        assert s3fs.core.boto3.Session.client.call_count == 1
        args, kwargs = s3fs.core.boto3.Session.client.call_args_list[0]
        assert args == ("s3",)
        for k, v in AWS_CREDENTIALS.items():
            assert kwargs[k] == v
Ejemplo n.º 8
0
 def test_save_with_protocol(self, load_args, save_args):
     """Test saving the data to S3."""
     s3_data_set = ParquetS3DataSet(
         filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME),
         credentials={
             "aws_access_key_id": "YOUR_KEY",
             "aws_secret_access_key": "YOUR SECRET",
         },
         load_args=load_args,
         save_args=save_args,
     )
     new_data = pd.DataFrame(
         {"col1": ["a", "b"], "col2": ["c", "d"], "col3": ["e", "f"]}
     )
     s3_data_set.save(new_data)
     loaded_data = s3_data_set.load()
     assert_frame_equal(loaded_data, new_data)
Ejemplo n.º 9
0
 def test_s3fs_args_propagated(self, mocker):
     mock = mocker.patch("kedro.contrib.io.parquet.parquet_s3.S3FileSystem")
     ParquetS3DataSet(
         FILENAME, BUCKET_NAME, AWS_CREDENTIALS, s3fs_args=dict(custom=42)
     )
     mock.assert_called_with(client_kwargs=mocker.ANY, custom=42)
Ejemplo n.º 10
0
 def test_incomplete_credentials_load(self, bad_credentials):
     """Test that incomplete credentials passed in credentials.yml raises exception."""
     with pytest.raises(PartialCredentialsError):
         ParquetS3DataSet(
             filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=bad_credentials
         )