def test_s3fs_args_propagated(self, mocker, mocked_s3_object): mock = mocker.patch("kedro.io.csv_s3.S3FileSystem") CSVS3DataSet(FILENAME, BUCKET_NAME, AWS_CREDENTIALS, s3fs_args=dict(custom=42)) mock.assert_called_with(client_kwargs=mocker.ANY, custom=42)
def test_version_str_repr(self, load_version, save_version): """Test that version is in string representation of the class instance when applicable.""" ds = CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME) ds_versioned = CSVS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, version=Version(load_version, save_version), ) assert FILENAME in str(ds) assert "version" not in str(ds) assert FILENAME in str(ds_versioned) ver_str = "version=Version(load={}, save='{}')".format( load_version, save_version) assert ver_str in str(ds_versioned)
def test_empty_credentials_load(self, bad_credentials): s3_data_set = CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=bad_credentials) pattern = r"Failed while loading data from data set CSVS3DataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): s3_data_set.load()
def test_load_args_propagated(self, mocker, mocked_s3_object): mock = mocker.patch("kedro.io.csv_s3.pd.read_csv") CSVS3DataSet(FILENAME, BUCKET_NAME, AWS_CREDENTIALS, load_args=dict(custom=42)).load() assert mock.call_args_list[0][1] == {"custom": 42}
def s3_data_set(load_args, save_args): return CSVS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=AWS_CREDENTIALS, load_args=load_args, save_args=save_args, )
def versioned_s3_data_set(load_args, save_args, load_version, save_version): return CSVS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=AWS_CREDENTIALS, load_args=load_args, save_args=save_args, version=Version(load_version, save_version), )
def test_incorrect_credentials_load(self): """Test that incorrect credential keys won't instantiate dataset.""" pattern = "unexpected keyword argument" with pytest.raises(TypeError, match=pattern): CSVS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, credentials={"access_token": "TOKEN", "access_key": "KEY"}, )
def test_load_with_protocol(self, dummy_dataframe, load_args, save_args): """Test loading the data from S3.""" s3_data_set = CSVS3DataSet( filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME), credentials={ "aws_access_key_id": "YOUR_KEY", "aws_secret_access_key": "YOUR SECRET", }, load_args=load_args, save_args=save_args, ) loaded_data = s3_data_set.load() assert_frame_equal(loaded_data, dummy_dataframe)
def test_pass_credentials(self, mocker): """Test that AWS credentials are passed successfully into boto3 client instantiation on creating S3 connection.""" mocker.patch("s3fs.core.boto3.Session.client") s3_data_set = CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=AWS_CREDENTIALS) pattern = r"Failed while loading data from data set CSVS3DataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): s3_data_set.load() assert s3fs.core.boto3.Session.client.call_count == 1 args, kwargs = s3fs.core.boto3.Session.client.call_args_list[0] assert args == ("s3", ) for k, v in AWS_CREDENTIALS.items(): assert kwargs[k] == v
def test_save_with_protocol(self, load_args, save_args): """Test saving the data to S3.""" s3_data_set = CSVS3DataSet( filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME), credentials={ "aws_access_key_id": "YOUR_KEY", "aws_secret_access_key": "YOUR SECRET", }, load_args=load_args, save_args=save_args, ) new_data = pd.DataFrame({ "col1": ["a", "b"], "col2": ["c", "d"], "col3": ["e", "f"] }) s3_data_set.save(new_data) loaded_data = s3_data_set.load() assert_frame_equal(loaded_data, new_data)
def test_incomplete_credentials_load(self, bad_credentials): """Test that incomplete credentials passed in credentials.yml raises exception.""" with pytest.raises(PartialCredentialsError): CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=bad_credentials)