def test_to_parquet_with_kms(
        bucket,
        database,
        kms_key,
):
    extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key}
    session_inner = Session(s3_additional_kwargs=extra_args)
    dataframe = pandas.read_csv("data_samples/nano.csv")
    session_inner.pandas.to_parquet(dataframe=dataframe,
                                    database=database,
                                    path=f"s3://{bucket}/test/",
                                    preserve_index=False,
                                    mode="overwrite",
                                    procs_cpu_bound=1)
    dataframe2 = None
    for counter in range(10):
        dataframe2 = session_inner.pandas.read_sql_athena(
            sql="select * from test", database=database)
        if len(dataframe.index) == len(dataframe2.index):
            break
        sleep(1)
    assert len(dataframe.index) == len(dataframe2.index)
    assert len(list(dataframe.columns)) == len(list(dataframe2.columns))
    assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[
        dataframe2["id"] == 0].iloc[0]["name"]
Beispiel #2
0
def session():
    spark_session: SparkSession = SparkSession.builder.appName("AWS Wrangler Test").getOrCreate()
    spark_session.sparkContext.setLogLevel("ERROR")
    spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.attempts.maximum", "200")
    spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.retry.interval", "1000ms")
    spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.retry.limit", "200")
    yield Session(spark_session=spark_session)
Beispiel #3
0
def session():
    yield Session(spark_session=SparkSession.builder.appName("AWS Wrangler Test").getOrCreate())
Beispiel #4
0
def session():
    yield Session()
def test_cpu_count():
    assert_account_id(
        Session(procs_cpu_bound=1, procs_io_bound=1, botocore_max_retries=1))
def test_from_boto3_region_name(default_session):
    assert_account_id(Session(region_name=default_session.region_name))
def test_from_boto3_keys(default_session):
    assert_account_id(
        Session(
            aws_access_key_id=default_session.aws_access_key_id,
            aws_secret_access_key=default_session.aws_secret_access_key,
        ))
def test_from_boto3_session(default_session):
    assert_account_id(Session(boto3_session=default_session.boto3_session))
def test_session_region():
    assert_account_id(Session(region_name="us-east-1"))
def default_session():
    yield Session()