Exemple #1
0
 def test_empty_credentials_load(self, bad_credentials):
     s3_data_set = HDFS3DataSet(
         filepath=FILENAME,
         bucket_name=BUCKET_NAME,
         key="test_hdf",
         credentials=bad_credentials,
     )
     pattern = r"Failed while loading data from data set HDFS3DataSet\(.+\)"
     with pytest.raises(DataSetError, match=pattern):
         s3_data_set.load()
Exemple #2
0
 def test_incorrect_credentials_load(self):
     """Test that incorrect credential keys won't instantiate dataset."""
     pattern = "unexpected keyword argument"
     with pytest.raises(TypeError, match=pattern):
         HDFS3DataSet(
             filepath=FILENAME,
             bucket_name=BUCKET_NAME,
             key="test_hdf",
             credentials={"access_token": "TOKEN", "access_key": "KEY"},
         )
Exemple #3
0
    def test_version_str_repr(self, load_version, save_version):
        """Test that version is in string representation of the class instance
        when applicable."""
        ds = HDFS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME, key="test_hdf")
        ds_versioned = HDFS3DataSet(
            filepath=FILENAME,
            bucket_name=BUCKET_NAME,
            credentials=AWS_CREDENTIALS,
            key="test_hdf",
            version=Version(load_version, save_version),
        )

        assert FILENAME in str(ds)
        assert "version" not in str(ds)

        assert FILENAME in str(ds_versioned)
        ver_str = "version=Version(load={}, save='{}')".format(
            load_version, save_version
        )
        assert ver_str in str(ds_versioned)
Exemple #4
0
    def test_save_and_load_with_protocol(self, dummy_dataframe):
        """Test loading the data from S3."""
        hdf_data_set = HDFS3DataSet(
            filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME),
            credentials={
                "aws_access_key_id": "YOUR_KEY",
                "aws_secret_access_key": "YOUR SECRET",
            },
            key=HDF_KEY,
        )
        hdf_data_set.save(dummy_dataframe)
        reloaded_df = hdf_data_set.load()

        assert_frame_equal(reloaded_df, dummy_dataframe)
Exemple #5
0
    def test_exists(self, hdf_data_set, dummy_dataframe):
        """Test `exists` method invocation."""
        # file does not exist
        assert not hdf_data_set.exists()

        # file and key exist
        hdf_data_set.save(dummy_dataframe)
        assert hdf_data_set.exists()

        # file exists but the key does not
        data_set2 = HDFS3DataSet(
            filepath=FILENAME,
            bucket_name=BUCKET_NAME,
            key="test_hdf_different_key",
            credentials=AWS_CREDENTIALS,
        )
        assert not data_set2.exists()