def test_auto_mapper_boolean(spark_session: SparkSession) -> None:
    # Arrange
    spark_session.createDataFrame(
        [
            (1, "Qureshi", "Imran", "0"),
            (2, "Vidal", "Michael", "1"),
        ],
        ["member_id", "last_name", "first_name", "my_age"],
    ).createOrReplaceTempView("patients")

    source_df: DataFrame = spark_session.table("patients")

    df = source_df.select("member_id")
    df.createOrReplaceTempView("members")

    # Act
    mapper = AutoMapper(view="members",
                        source_view="patients",
                        keys=["member_id"]).columns(
                            age=A.boolean(A.column("my_age")),
                            is_active=A.boolean("False"),
                        )

    assert isinstance(mapper, AutoMapper)
    sql_expressions: Dict[str, Column] = mapper.get_column_specs(
        source_df=source_df)
    for column_name, sql_expression in sql_expressions.items():
        print(f"{column_name}: {sql_expression}")

    assert_compare_expressions(sql_expressions["age"],
                               col("b.my_age").cast("boolean").alias("age"))
    assert_compare_expressions(sql_expressions["is_active"],
                               lit("False").cast("boolean").alias("is_active"))

    result_df: DataFrame = mapper.transform(df=df)

    # Assert
    result_df.printSchema()
    result_df.show()

    assert result_df.where("member_id == 1").select(
        "age",
        "is_active",
    ).collect()[0][:] == (False, False)
    assert result_df.where("member_id == 2").select(
        "age",
        "is_active",
    ).collect()[0][:] == (True, False)

    assert dict(result_df.dtypes)["age"] == "boolean"
    assert dict(result_df.dtypes)["is_active"] == "boolean"
Example #2
0
def test_auto_mapper_boolean(spark_session: SparkSession) -> None:
    # Arrange
    spark_session.createDataFrame([
        (1, 'Qureshi', 'Imran', "0"),
        (2, 'Vidal', 'Michael', "1"),
    ], ['member_id', 'last_name', 'first_name', "my_age"
        ]).createOrReplaceTempView("patients")

    source_df: DataFrame = spark_session.table("patients")

    df = source_df.select("member_id")
    df.createOrReplaceTempView("members")

    # Act
    mapper = AutoMapper(view="members",
                        source_view="patients",
                        keys=["member_id"
                              ]).columns(age=A.boolean(A.column("my_age")))

    assert isinstance(mapper, AutoMapper)
    sql_expressions: Dict[str, Column] = mapper.get_column_specs(
        source_df=source_df)
    for column_name, sql_expression in sql_expressions.items():
        print(f"{column_name}: {sql_expression}")

    assert str(sql_expressions["age"]) == str(
        col("b.my_age").cast("boolean").alias("age"))

    result_df: DataFrame = mapper.transform(df=df)

    # Assert
    result_df.printSchema()
    result_df.show()

    assert result_df.where("member_id == 1").select(
        "age").collect()[0][0] is False
    assert result_df.where("member_id == 2").select(
        "age").collect()[0][0] is True

    assert dict(result_df.dtypes)["age"] == "boolean"