def test_to_parquet_with_kms( bucket, database, kms_key, ): extra_args = {"ServerSideEncryption": "aws:kms", "SSEKMSKeyId": kms_key} session_inner = Session(s3_additional_kwargs=extra_args) dataframe = pandas.read_csv("data_samples/nano.csv") session_inner.pandas.to_parquet(dataframe=dataframe, database=database, path=f"s3://{bucket}/test/", preserve_index=False, mode="overwrite", procs_cpu_bound=1) dataframe2 = None for counter in range(10): dataframe2 = session_inner.pandas.read_sql_athena( sql="select * from test", database=database) if len(dataframe.index) == len(dataframe2.index): break sleep(1) assert len(dataframe.index) == len(dataframe2.index) assert len(list(dataframe.columns)) == len(list(dataframe2.columns)) assert dataframe[dataframe["id"] == 0].iloc[0]["name"] == dataframe2[ dataframe2["id"] == 0].iloc[0]["name"]
def session(): spark_session: SparkSession = SparkSession.builder.appName("AWS Wrangler Test").getOrCreate() spark_session.sparkContext.setLogLevel("ERROR") spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.attempts.maximum", "200") spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.retry.interval", "1000ms") spark_session.sparkContext._jsc.hadoopConfiguration().set("fs.s3a.retry.limit", "200") yield Session(spark_session=spark_session)
def session(): yield Session(spark_session=SparkSession.builder.appName("AWS Wrangler Test").getOrCreate())
def session(): yield Session()
def test_cpu_count(): assert_account_id( Session(procs_cpu_bound=1, procs_io_bound=1, botocore_max_retries=1))
def test_from_boto3_region_name(default_session): assert_account_id(Session(region_name=default_session.region_name))
def test_from_boto3_keys(default_session): assert_account_id( Session( aws_access_key_id=default_session.aws_access_key_id, aws_secret_access_key=default_session.aws_secret_access_key, ))
def test_from_boto3_session(default_session): assert_account_id(Session(boto3_session=default_session.boto3_session))
def test_session_region(): assert_account_id(Session(region_name="us-east-1"))
def default_session(): yield Session()