def test_empty_credentials_load(self, bad_credentials): s3_data_set = HDFS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, key="test_hdf", credentials=bad_credentials, ) pattern = r"Failed while loading data from data set HDFS3DataSet\(.+\)" with pytest.raises(DataSetError, match=pattern): s3_data_set.load()
def test_incorrect_credentials_load(self): """Test that incorrect credential keys won't instantiate dataset.""" pattern = "unexpected keyword argument" with pytest.raises(TypeError, match=pattern): HDFS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, key="test_hdf", credentials={"access_token": "TOKEN", "access_key": "KEY"}, )
def test_version_str_repr(self, load_version, save_version): """Test that version is in string representation of the class instance when applicable.""" ds = HDFS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME, key="test_hdf") ds_versioned = HDFS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, credentials=AWS_CREDENTIALS, key="test_hdf", version=Version(load_version, save_version), ) assert FILENAME in str(ds) assert "version" not in str(ds) assert FILENAME in str(ds_versioned) ver_str = "version=Version(load={}, save='{}')".format( load_version, save_version ) assert ver_str in str(ds_versioned)
def test_save_and_load_with_protocol(self, dummy_dataframe): """Test loading the data from S3.""" hdf_data_set = HDFS3DataSet( filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME), credentials={ "aws_access_key_id": "YOUR_KEY", "aws_secret_access_key": "YOUR SECRET", }, key=HDF_KEY, ) hdf_data_set.save(dummy_dataframe) reloaded_df = hdf_data_set.load() assert_frame_equal(reloaded_df, dummy_dataframe)
def test_exists(self, hdf_data_set, dummy_dataframe): """Test `exists` method invocation.""" # file does not exist assert not hdf_data_set.exists() # file and key exist hdf_data_set.save(dummy_dataframe) assert hdf_data_set.exists() # file exists but the key does not data_set2 = HDFS3DataSet( filepath=FILENAME, bucket_name=BUCKET_NAME, key="test_hdf_different_key", credentials=AWS_CREDENTIALS, ) assert not data_set2.exists()