def test_read_from_non_existent_table(self): dataset = SparkHiveDataSet(database="default_1", table="table_doesnt_exist", write_mode="insert") with pytest.raises( DataSetError, match="requested table not found: default_1.table_doesnt_exist", ): dataset.load()
def test_insert_to_non_existent_table(self): dataset = SparkHiveDataSet(database="default_1", table="table_not_yet_created", write_mode="insert") dataset.save(_generate_spark_df_one()) assert_df_equal(dataset.load().sort("name"), _generate_spark_df_one().sort("name"))
def test_overwrite_empty_table(self, spark_hive_session): spark_hive_session.sql( "create table default_1.test_overwrite_empty_table (name string, age integer)" ).take(1) dataset = SparkHiveDataSet( database="default_1", table="test_overwrite_empty_table", write_mode="overwrite", ) dataset.save(_generate_spark_df_one()) assert_df_equal(dataset.load(), _generate_spark_df_one())
def test_upsert_empty_table(self, spark_session): spark_session.sql( "create table default_1.test_upsert_empty_table (name string, age integer)" ).take(1) dataset = SparkHiveDataSet( database="default_1", table="test_upsert_empty_table", write_mode="upsert", table_pk=["name"], ) dataset.save(_generate_spark_df_one()) assert_df_equal( dataset.load().sort("name"), _generate_spark_df_one().sort("name") )
def test_read_existing_table(self): dataset = SparkHiveDataSet(database="default_1", table="table_1", write_mode="overwrite") assert_df_equal(_generate_spark_df_one(), dataset.load())