def test_smoke_test_table_three(): spark = spark_session_from_config() result = invoke_compute( TableThree, inputs={ 'table_one': spark.createDataFrame([Row(num=1)]), 'table_two': spark.createDataFrame([Row(num=2)]), }, ) assert result.success assert set(result.output_value().collect()) == set([Row(num=1), Row(num=2)])
def get_table(table_def): spark = spark_session_from_config() return spark.read.parquet( os.path.join(temp_dir, table_def.metadata[FEATURE_AREA], table_def.name)).collect()
def get_table(name): spark = spark_session_from_config() return spark.read.csv(os.path.join(temp_dir, name), header=True, inferSchema=True).collect()