Beispiel #1
0
def test_write_avro(setup_bucket_w_dfs, test_bucket, test_df, test_df_keys):
    """Tests that writing files stored as Avro works properly"""
    s3 = boto3.client('s3')

    for key in test_df_keys['pq']:
        write(test_df, key, test_bucket)

        with NamedTemporaryFile() as tmpfile:
            s3.download_file(test_bucket, key, tmpfile.name)
            df = pd.read_parquet(tmpfile.name)
            assert df.equals(test_df)
Beispiel #2
0
def test_write_csv(setup_bucket_wo_contents, test_bucket, test_df,
                   test_df_keys):
    """Tests that writing files stored as a CSV works properly"""
    s3 = boto3.client('s3')

    for key in test_df_keys['csv']:
        write(test_df, key, test_bucket)

        with NamedTemporaryFile() as tmpfile:
            s3.download_file(test_bucket, key, tmpfile.name)
            df = pd.read_csv(tmpfile.name)
            assert df.equals(test_df)
Beispiel #3
0
def test_write_pkl(setup_bucket_w_dfs, test_bucket, test_df, test_df_keys):
    """Tests that writing pickled files works properly"""
    s3 = boto3.client('s3')

    for key in test_df_keys['pkl']:
        write(test_df, key, test_bucket)

        with NamedTemporaryFile() as tmpfile:
            s3.download_file(test_bucket, key, tmpfile.name)
            # Pickle won't be able to read from tmpfile until the connection
            # has been opened post-writing, so we need a nested open.
            with open(tmpfile.name, 'rb') as nested_open_file:
                df = pickle.load(nested_open_file)
                assert df.equals(test_df)
Beispiel #4
0
def test_write_psv_xz(setup_bucket_wo_contents, test_bucket, test_df,
                      test_df_keys):
    """
    Tests that writing files stored as an xz-compressed PSV works properly
    """
    s3 = boto3.client('s3')

    for key in test_df_keys['psv.xz']:
        write(test_df, key, test_bucket)

        with NamedTemporaryFile(suffix=".psv.xz") as tmpfile:
            s3.download_file(test_bucket, key, tmpfile.name)
            df = pd.read_csv(tmpfile.name, sep='|')
            assert df.equals(test_df)