예제 #1
0
def test_auto_mapper_with_column_expression(
        spark_session: SparkSession) -> None:
    # Arrange
    spark_session.createDataFrame([
        (1, 'Qureshi', 'Imran'),
        (2, 'Vidal', 'Michael'),
    ], ['member_id', 'last_name', 'first_name'
        ]).createOrReplaceTempView("patients")

    source_df: DataFrame = spark_session.table("patients")

    df = source_df.select("member_id")
    df.createOrReplaceTempView("members")

    # Act
    mapper = AutoMapper(view="members",
                        source_view="patients",
                        keys=["member_id"]).columns(
                            lname=A.expression("SUBSTRING(last_name,1,3)"))

    assert isinstance(mapper, AutoMapper)
    sql_expressions: Dict[str, Column] = mapper.get_column_specs(
        source_df=source_df)
    for column_name, sql_expression in sql_expressions.items():
        print(f"{column_name}: {sql_expression}")

    assert str(sql_expressions["lname"]) == str(
        expr("SUBSTRING(last_name,1,3)").alias("lname"))

    result_df: DataFrame = mapper.transform(df=df)

    # Assert
    result_df.printSchema()
    result_df.show()

    assert result_df.where("member_id == 1").select(
        "lname").collect()[0][0] == "Qur"
    assert result_df.where("member_id == 2").select(
        "lname").collect()[0][0] == "Vid"
예제 #2
0
def test_automapper_first_valid_column(spark_session: SparkSession) -> None:
    # Arrange
    spark_session.createDataFrame(
        [
            (1, "Qureshi", "Imran", "54"),
            (2, "Vidal", "Michael", "33"),
        ],
        ["member_id", "last_name", "first_name", "my_age"],
    ).createOrReplaceTempView("patients")

    source_df: DataFrame = spark_session.table("patients")

    df = source_df.select("member_id")
    df.createOrReplaceTempView("members")

    # The key thing in this test is that we are using the same mapper on sources with different columns, and they both
    # work as expected.

    # Act
    mapper = AutoMapper(
        view="members",
        source_view="patients",
        keys=["member_id"],
        drop_key_columns=False,
    ).columns(
        last_name=A.column("last_name"),
        age=A.first_valid_column(
            A.number(A.expression("CAST (age AS BIGINT)")),
            A.number(A.expression("CAST (my_age AS BIGINT)")),
            A.text(None),
        ),
        is_young=A.first_valid_column(
            A.map(
                A.column("age"),
                {
                    "21": "yes",
                    "33": "yes",
                    "54": "no comment",
                    None: "not provided"
                },
            ),
            A.map(
                A.column("my_age"),
                {
                    "21": "yes",
                    "33": "yes",
                    "54": "no comment",
                    None: "not provided"
                },
            ),
        ),
    )

    assert isinstance(mapper, AutoMapper)
    sql_expressions: Dict[str, Column] = mapper.get_column_specs(
        source_df=source_df)
    for column_name, sql_expression in sql_expressions.items():
        print(f"{column_name}: {sql_expression}")

    assert_compare_expressions(
        sql_expressions["age"],
        col("my_age").cast("long").cast("long").alias("age"))
    result_df: DataFrame = mapper.transform(df=df)

    # Assert
    result_df.printSchema()
    result_df.show()

    assert result_df.where("member_id == 1").select(
        "age", "is_young").collect()[0][:] == (54, "no comment")
    assert result_df.where("member_id == 2").select(
        "age", "is_young").collect()[0][:] == (33, "yes")
예제 #3
0
def mapping(parameters: Dict[str, Any]) -> AutoMapper:
    mapper = AutoMapper(
        view=parameters["view"],
        source_view="bwellProviderFeed_08122020",  # file name
        keys=["gecb_provider_number"],
    ).complex(
        Practitioner(
            id_=FhirId(A.column("b.gecb_provider_number")),
            meta=Meta(
                source=A.text("http://www.icanbwell.com"),
                security=FhirList(
                    [
                        Coding(
                            system="https://www.icanbwell.com/owner",
                            code=AllSecurityLabelsCode(A.text("bwell")),
                        ),
                        Coding(
                            system="https://www.icanbwell.com/access",
                            code=AllSecurityLabelsCode(A.text("bwell")),
                        ),
                        Coding(
                            system="https://www.icanbwell.com/vendor",
                            code=AllSecurityLabelsCode(A.text("bwell")),
                        ),
                    ]
                ),
            ),
            identifier=FhirList(
                [
                    Identifier(
                        use=IdentifierUseCodeValues.Usual,
                        value=A.column("b.gecb_provider_number"),
                        type_=CodeableConcept(
                            coding=FhirList(
                                [
                                    Coding(
                                        system=IdentifierUseCode.codeset,
                                        code=IdentifierTypeCodesCodeValues.ProviderNumber,
                                    )
                                ]
                            )
                        ),
                        system="medstarhealth.org",
                    ),
                    Identifier(
                        use=IdentifierUseCodeValues.Official,
                        value=A.column("provider_npi"),
                        type_=CodeableConcept(
                            coding=FhirList(
                                [
                                    Coding(
                                        system=IdentifierUseCode.codeset,
                                        code=IdentifierTypeCodesCodeValues.NationalProviderIdentifier,
                                    )
                                ]
                            )
                        ),
                        system="http://hl7.org/fhir/sid/us-npi",
                    ),
                ]
            ),
            active=True,
            name=FhirList(
                [
                    HumanName(
                        given=FhirList(
                            [
                                A.column("provider_first_name"),
                                A.column("provider_middle_name"),
                            ]
                        ),
                        family=A.column("provider_last_name"),
                        suffix=FhirList([A.column("provider_title")]),
                        use=NameUseCodeValues.Usual,
                        text=A.text(" "),
                    )
                ]
            ),
            # birthdate="",
            gender=AdministrativeGenderCode(
                A.expression(
                    """
                    CASE
                        WHEN `provider_gender` = 'MALE'
                        THEN 'male'
                        WHEN `provider_gender` = 'FEMALE'
                        THEN 'female'
                        ELSE 'unknown'
                    END
                    """
                )
            ),
            telecom=FhirList([]),
            address=FhirList(
                [
                    Address(
                        use=AddressUseCodeValues.Work,  # AddressUseCode.Work,
                        type_=AddressTypeCode(
                            A.text("physical")
                        ),  # AddressTypeCode.Physical,
                        text=A.column("practice_name"),
                        line=FhirList(
                            [
                                A.column("scheduling_location_address1"),
                                A.column("scheduling_location_address2"),
                            ]
                        ),
                        city=A.column("scheduling_location_city"),
                        district=A.text(" "),
                        state=A.column("scheduling_location_state"),
                        postalCode=A.column("scheduling_location_zip"),
                        country=A.text("USA"),
                    )
                ]
            ),
        )
    )
    return mapper