def test_validate_schema_when_struct_field_is_missing(self): data = [("jose", 1), ("li", 2), ("luisa", 3)] source_df = spark.createDataFrame(data, ["name", "age"]) required_schema = StructType([ StructField("name", StringType(), True), StructField("age", LongType(), True), ]) quinn.validate_schema(source_df, required_schema)
def it_does_nothing_when_the_schema_matches(spark): data = [("jose", 1), ("li", 2), ("luisa", 3)] source_df = spark.createDataFrame(data, ["name", "age"]) required_schema = StructType([ StructField("name", StringType(), True), StructField("age", LongType(), True), ]) quinn.validate_schema(source_df, required_schema)
def test_validate_schema_when_struct_field_is_missing(self): data = [("jose", 1), ("li", 2), ("luisa", 3)] source_df = spark.createDataFrame(data, ["name", "age"]) required_schema = StructType([ StructField("name", StringType(), True), StructField("city", StringType(), True), ]) with pytest.raises(quinn.DataFrameMissingStructFieldError) as excinfo: quinn.validate_schema(source_df, required_schema) assert excinfo.value.args[ 0] == "The [StructField(city,StringType,true)] StructFields are not included in the DataFrame with the following StructFields StructType(List(StructField(name,StringType,true),StructField(age,LongType,true)))"