Ejemplo n.º 1
0
 def test_s3fs_args_propagated(self, mocker, mocked_s3_object):
     mock = mocker.patch("kedro.io.csv_s3.S3FileSystem")
     CSVS3DataSet(FILENAME,
                  BUCKET_NAME,
                  AWS_CREDENTIALS,
                  s3fs_args=dict(custom=42))
     mock.assert_called_with(client_kwargs=mocker.ANY, custom=42)
Ejemplo n.º 2
0
    def test_version_str_repr(self, load_version, save_version):
        """Test that version is in string representation of the class instance
        when applicable."""
        ds = CSVS3DataSet(filepath=FILENAME, bucket_name=BUCKET_NAME)
        ds_versioned = CSVS3DataSet(
            filepath=FILENAME,
            bucket_name=BUCKET_NAME,
            version=Version(load_version, save_version),
        )
        assert FILENAME in str(ds)
        assert "version" not in str(ds)

        assert FILENAME in str(ds_versioned)
        ver_str = "version=Version(load={}, save='{}')".format(
            load_version, save_version)
        assert ver_str in str(ds_versioned)
Ejemplo n.º 3
0
 def test_empty_credentials_load(self, bad_credentials):
     s3_data_set = CSVS3DataSet(filepath=FILENAME,
                                bucket_name=BUCKET_NAME,
                                credentials=bad_credentials)
     pattern = r"Failed while loading data from data set CSVS3DataSet\(.+\)"
     with pytest.raises(DataSetError, match=pattern):
         s3_data_set.load()
Ejemplo n.º 4
0
 def test_load_args_propagated(self, mocker, mocked_s3_object):
     mock = mocker.patch("kedro.io.csv_s3.pd.read_csv")
     CSVS3DataSet(FILENAME,
                  BUCKET_NAME,
                  AWS_CREDENTIALS,
                  load_args=dict(custom=42)).load()
     assert mock.call_args_list[0][1] == {"custom": 42}
Ejemplo n.º 5
0
def s3_data_set(load_args, save_args):
    return CSVS3DataSet(
        filepath=FILENAME,
        bucket_name=BUCKET_NAME,
        credentials=AWS_CREDENTIALS,
        load_args=load_args,
        save_args=save_args,
    )
Ejemplo n.º 6
0
def versioned_s3_data_set(load_args, save_args, load_version, save_version):
    return CSVS3DataSet(
        filepath=FILENAME,
        bucket_name=BUCKET_NAME,
        credentials=AWS_CREDENTIALS,
        load_args=load_args,
        save_args=save_args,
        version=Version(load_version, save_version),
    )
Ejemplo n.º 7
0
 def test_incorrect_credentials_load(self):
     """Test that incorrect credential keys won't instantiate dataset."""
     pattern = "unexpected keyword argument"
     with pytest.raises(TypeError, match=pattern):
         CSVS3DataSet(
             filepath=FILENAME,
             bucket_name=BUCKET_NAME,
             credentials={"access_token": "TOKEN", "access_key": "KEY"},
         )
Ejemplo n.º 8
0
 def test_load_with_protocol(self, dummy_dataframe, load_args, save_args):
     """Test loading the data from S3."""
     s3_data_set = CSVS3DataSet(
         filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME),
         credentials={
             "aws_access_key_id": "YOUR_KEY",
             "aws_secret_access_key": "YOUR SECRET",
         },
         load_args=load_args,
         save_args=save_args,
     )
     loaded_data = s3_data_set.load()
     assert_frame_equal(loaded_data, dummy_dataframe)
Ejemplo n.º 9
0
    def test_pass_credentials(self, mocker):
        """Test that AWS credentials are passed successfully into boto3
        client instantiation on creating S3 connection."""
        mocker.patch("s3fs.core.boto3.Session.client")
        s3_data_set = CSVS3DataSet(filepath=FILENAME,
                                   bucket_name=BUCKET_NAME,
                                   credentials=AWS_CREDENTIALS)
        pattern = r"Failed while loading data from data set CSVS3DataSet\(.+\)"
        with pytest.raises(DataSetError, match=pattern):
            s3_data_set.load()

        assert s3fs.core.boto3.Session.client.call_count == 1
        args, kwargs = s3fs.core.boto3.Session.client.call_args_list[0]
        assert args == ("s3", )
        for k, v in AWS_CREDENTIALS.items():
            assert kwargs[k] == v
Ejemplo n.º 10
0
 def test_save_with_protocol(self, load_args, save_args):
     """Test saving the data to S3."""
     s3_data_set = CSVS3DataSet(
         filepath="s3://{}/{}".format(BUCKET_NAME, FILENAME),
         credentials={
             "aws_access_key_id": "YOUR_KEY",
             "aws_secret_access_key": "YOUR SECRET",
         },
         load_args=load_args,
         save_args=save_args,
     )
     new_data = pd.DataFrame({
         "col1": ["a", "b"],
         "col2": ["c", "d"],
         "col3": ["e", "f"]
     })
     s3_data_set.save(new_data)
     loaded_data = s3_data_set.load()
     assert_frame_equal(loaded_data, new_data)
Ejemplo n.º 11
0
 def test_incomplete_credentials_load(self, bad_credentials):
     """Test that incomplete credentials passed in credentials.yml raises exception."""
     with pytest.raises(PartialCredentialsError):
         CSVS3DataSet(filepath=FILENAME,
                      bucket_name=BUCKET_NAME,
                      credentials=bad_credentials)