def test_default_values(test_config, defaults, source_query, module_name):
    log.debug(f"module_name: {module_name}")

    config = test_config

    add_to_tested_list(module_name=module_name,
                       tested_fields=[x for x in defaults.keys()])

    source_sample_df = get_data_from_query(query=source_query,
                                           config=config,
                                           sample=True)

    assert source_sample_df.shape[0] > 0

    log.debug(
        f"Checking {source_sample_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%) from table: {module_name} "
    )
    assert source_sample_df.shape[0] > 0
    for k, v in defaults.items():
        source_sample_df["compare_col"] = v
        matches = source_sample_df[k] == source_sample_df["compare_col"]
        total_matches = matches.sum()
        success = total_matches == source_sample_df.shape[0]
        print(
            config.VERBOSE,
            f"checking {k} == {v}...."
            f" {'OK' if success else 'oh no'} ",
        )
        assert success
def test_round_column(
    test_config,
    round_columns,
    source_query,
    transformed_query,
    merge_columns,
    module_name,
):
    print(f"module_name: {module_name}")

    config = test_config
    add_to_tested_list(
        module_name=module_name,
        tested_fields=[y for x in round_columns.values() for y in x],
    )

    source_sample_df = get_data_from_query(query=source_query,
                                           config=config,
                                           sort_col=merge_columns["source"],
                                           sample=True)

    assert source_sample_df.shape[0] > 0

    sample_caserefs = get_merge_col_data_as_list(
        df=source_sample_df, column_name=merge_columns["source"])

    transformed_df = get_data_from_query(
        query=transformed_query,
        config=config,
        sort_col=merge_columns["transformed"],
        sample=False,
    )

    assert transformed_df.shape[0] > 0

    transformed_sample_df = transformed_df[transformed_df[
        merge_columns["transformed"]].isin(sample_caserefs)]

    result_df = merge_source_and_transformed_df(
        source_df=source_sample_df,
        transformed_df=transformed_sample_df,
        merge_columns=merge_columns,
    )

    log.debug(
        f"Checking {result_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%)  from table: {module_name}"
    )

    assert result_df.shape[0] > 0
    for k, v in round_columns.items():

        for i in v:
            result_df["round2dp"] = result_df[i].apply(lambda x: round(x, 2))

            assert (result_df["round2dp"].astype(bool) == True).all()
def test_one_to_one_joins(
    test_config,
    join_columns,
    merge_columns,
    fk_child_query,
    fk_parent_query,
    module_name,
):
    print(f"module_name: {module_name}")

    add_to_tested_list(module_name=module_name,
                       tested_fields=[x for x in join_columns.keys()])

    config = test_config

    fk_child_df = get_data_from_query(
        query=fk_child_query,
        config=config,
        sample=True,
    )

    sample_caserefs = get_merge_col_data_as_list(
        df=fk_child_df, column_name=merge_columns["fk_child"])

    fk_parent_df = get_data_from_query(
        query=fk_parent_query,
        config=config,
        sort_col=merge_columns["fk_parent"],
        sample=False,
    )
    print(fk_parent_query)
    fk_parent_sample_df = fk_parent_df[fk_parent_df[
        merge_columns["fk_parent"]].isin(sample_caserefs)]

    fk_child_id_list = fk_child_df[[k for k in join_columns][0]].tolist()
    fk_child_id_list = [int(x) for x in fk_child_id_list]

    fk_parent_id_list = fk_parent_sample_df[[
        y for x in join_columns.values() for y in x.values()
    ][0]].tolist()
    fk_parent_id_list = [int(x) for x in fk_parent_id_list]

    print(
        f"Checking {fk_parent_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%) from table: {module_name} "
    )
    success = set(fk_child_id_list) == set(fk_parent_id_list)
    print(
        config.VERBOSE,
        f"checking {[k for k in join_columns][0]} == "
        f"{[y for x in join_columns.values() for y in x.values()][0]}.... "
        f"{'OK' if success is True else 'oh no'} ",
    )
    assert success is True
Example #4
0
def test_calculated_values(test_config, calculated_fields, source_query,
                           module_name):
    log.debug(f"module_name: {module_name}")
    print(f"module_name: {module_name}")

    # log.debug(f"source_query: {source_query}")

    config = test_config

    add_to_tested_list(module_name=module_name,
                       tested_fields=[x for x in calculated_fields.keys()])

    source_sample_df = get_data_from_query(query=source_query,
                                           config=config,
                                           sample=True)

    # print(source_sample_df.to_markdown())
    source_sample_df.info()
    assert source_sample_df.shape[0] > 0

    log.debug(f"Checking {source_sample_df.shape[0]} rows of data ("
              f"{config.SAMPLE_PERCENTAGE}%) from table: {module_name}")
    assert source_sample_df.shape[0] > 0
    for k, v in calculated_fields.items():
        print(f"k: {k}")
        print(f"v: {v}")
        source_sample_df["compare_col"] = v
        matches = source_sample_df[k] == source_sample_df["compare_col"]
        print(f"source_sample_df[k]: {source_sample_df[k]}")
        print(
            f'source_sample_df["compare_col"]: {source_sample_df["compare_col"]}'
        )

        total_matches = matches.sum()
        success = total_matches == source_sample_df.shape[0]
        log.log(
            config.VERBOSE,
            f"checking {k} == {v}.... "
            f"{'OK' if success else 'oh no'} ",
        )
        assert success
def test_simple_transformations(
    test_config,
    simple_matches,
    merge_columns,
    source_query,
    transformed_query,
    module_name,
):
    log.debug(f"module_name: {module_name}")

    add_to_tested_list(
        module_name=module_name,
        tested_fields=[y for x in simple_matches.values() for y in x]
        + [merge_columns["transformed"]],
    )

    config = test_config

    source_sample_df = get_data_from_query(
        query=source_query, config=config, sort_col=merge_columns["source"], sample=True
    )

    assert source_sample_df.shape[0] > 0

    sample_caserefs = get_merge_col_data_as_list(
        df=source_sample_df, column_name=merge_columns["source"]
    )

    transformed_df = get_data_from_query(
        query=transformed_query,
        config=config,
        sort_col=merge_columns["transformed"],
        sample=False,
    )

    assert transformed_df.shape[0] > 0

    transformed_sample_df = transformed_df[
        transformed_df[merge_columns["transformed"]].isin(sample_caserefs)
    ]

    result_df = merge_source_and_transformed_df(
        source_df=source_sample_df,
        transformed_df=transformed_sample_df,
        merge_columns=merge_columns,
    )

    log.debug(
        f"Checking {result_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%) from table: {module_name} "
    )

    assert result_df.shape[0] > 0
    for k, v in simple_matches.items():
        for i in v:

            try:
                result_df[k] = pd.to_datetime(result_df[k], format="%Y-%m-%d")
                result_df[i] = pd.to_datetime(result_df[i], format="%Y-%m-%d")
            except Exception:
                result_df[k] = result_df[k].apply(lambda x: f"{x}")
                result_df[i] = result_df[i].apply(lambda x: f"{x}")

            match = result_df[k].equals(result_df[i])

            print(
                f"checking {k} == {i}.... " f"{'OK' if match is True else 'oh no'} ",
            )

            assert match is True
def test_map_conditional_lookup_tables(
    test_config,
    lookup_fields,
    merge_columns,
    source_query,
    transformed_query,
    module_name,
):
    print(f"module_name: {module_name}")

    add_to_tested_list(
        module_name=module_name,
        tested_fields=[x for x in lookup_fields.keys()] +
        [merge_columns["transformed"]],
    )

    config = test_config

    source_sample_df = get_data_from_query(query=source_query,
                                           config=config,
                                           sort_col=merge_columns["source"],
                                           sample=True)

    assert source_sample_df.shape[0] > 0

    sample_caserefs = get_merge_col_data_as_list(
        df=source_sample_df, column_name=merge_columns["source"])

    transformed_df = get_data_from_query(
        query=transformed_query,
        config=config,
        sort_col=merge_columns["transformed"],
        sample=False,
    )

    assert transformed_df.shape[0] > 0

    transformed_sample_df = transformed_df[transformed_df[
        merge_columns["transformed"]].isin(sample_caserefs)]

    result_df = merge_source_and_transformed_df(
        source_df=source_sample_df,
        transformed_df=transformed_sample_df,
        merge_columns=merge_columns,
    )

    log.debug(
        f"Checking {result_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%) from table: {module_name} "
    )

    assert result_df.shape[0] > 0
    for k, v in lookup_fields.items():
        lookup_filename = lookup_fields[k]["lookup_def"]

        for i, j in v.items():
            result_col = lookup_fields[k]["cols"]["result"]

            reference_col = lookup_fields[k]["cols"]["reference"]

            lookup_dict = get_lookup_dict(file_name=lookup_filename.lower())

            result_df["mapped"] = result_df[reference_col].map(lookup_dict)

            result_df = result_df.loc[result_df["mapped"].notnull()]

            result_df[result_col] = result_df[result_col].apply(
                lambda x: f"{x}")
            result_df[k] = result_df[k].apply(lambda x: f"{x}")

            assert result_df[result_col].fillna("").equals(
                result_df[k].fillna(""))

            match = result_df[result_col].fillna("").equals(
                result_df[k].fillna(""))

            print(
                f"checking {k} == {result_col}...."
                f""
                f" {'OK' if match is True else 'oh no'} ", )

            assert match is True
def test_squash_columns(
    test_config,
    squash_columns_fields,
    source_query,
    transformed_query,
    merge_columns,
    module_name,
):
    log.debug(f"module_name: {module_name}")

    config = test_config
    add_to_tested_list(module_name=module_name,
                       tested_fields=[x for x in squash_columns_fields.keys()])

    source_sample_df = get_data_from_query(query=source_query,
                                           config=config,
                                           sort_col=merge_columns["source"],
                                           sample=True)

    assert source_sample_df.shape[0] > 0

    sample_caserefs = get_merge_col_data_as_list(
        df=source_sample_df, column_name=merge_columns["source"])

    transformed_df = get_data_from_query(
        query=transformed_query,
        config=config,
        sort_col=merge_columns["transformed"],
        sample=False,
    )

    assert transformed_df.shape[0] > 0

    transformed_sample_df = transformed_df[transformed_df[
        merge_columns["transformed"]].isin(sample_caserefs)]

    result_df = merge_source_and_transformed_df(
        source_df=source_sample_df,
        transformed_df=transformed_sample_df,
        merge_columns=merge_columns,
    )

    log.debug(
        f"Checking {result_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%) from table: {module_name} "
    )

    assert result_df.shape[0] > 0

    for k, v in squash_columns_fields.items():
        for i, j in enumerate(v):

            unsquashed = result_df[k].apply(pd.Series)
            unsquashed = unsquashed.rename(columns=lambda x: "unsq_" + str(x))

            match = result_df[j].equals(unsquashed[f"unsq_{i}"])

            log.log(
                config.VERBOSE,
                f"checking {j} == {k}:{i}...."
                f" {'OK' if match is True else 'oh no'} ",
            )

            assert match is True
Example #8
0
def test_convert_to_bool(
    test_config,
    convert_to_bool_fields,
    source_query,
    transformed_query,
    merge_columns,
    module_name,
):
    log.debug(f"module_name: {module_name}")

    config = test_config
    add_to_tested_list(
        module_name=module_name,
        tested_fields=[y for x in convert_to_bool_fields.values() for y in x],
    )

    source_sample_df = get_data_from_query(
        query=source_query, config=config, sort_col=merge_columns["source"], sample=True
    )

    assert source_sample_df.shape[0] > 0

    sample_caserefs = get_merge_col_data_as_list(
        df=source_sample_df, column_name=merge_columns["source"]
    )

    transformed_df = get_data_from_query(
        query=transformed_query,
        config=config,
        sort_col=merge_columns["transformed"],
        sample=False,
    )

    assert transformed_df.shape[0] > 0

    transformed_sample_df = transformed_df[
        transformed_df[merge_columns["transformed"]].isin(sample_caserefs)
    ]

    result_df = merge_source_and_transformed_df(
        source_df=source_sample_df,
        transformed_df=transformed_sample_df,
        merge_columns=merge_columns,
    )

    log.debug(
        f"Checking {result_df.shape[0]} rows of data ({config.SAMPLE_PERCENTAGE}%)  from table: {module_name}"
    )
    assert result_df.shape[0] > 0
    for k, v in convert_to_bool_fields.items():
        for i in v:
            true_values = ["true", "True", "t", "T", "1", "1.0"]

            true_rows = result_df[result_df[k].isin(true_values)]
            total_true_rows = true_rows.shape[0]
            true_matches = true_rows[i]
            total_true_matches = true_matches.shape[0]

            t_match = total_true_rows == total_true_matches
            log.log(
                config.VERBOSE,
                f"checking True: {total_true_rows} {k} =="
                f" {total_true_matches} "
                f"{i}.... {'OK' if t_match is True else 'oh no'} ",
            )
            assert t_match

            false_rows = result_df[~result_df[k].isin(true_values)]
            total_false_rows = false_rows.shape[0]
            false_matches = false_rows[i]
            total_false_matches = false_matches.shape[0]

            f_match = total_false_rows == total_false_matches

            log.log(
                config.VERBOSE,
                f"checking False: {total_false_rows} {k} =="
                f" {total_false_matches} "
                f"{i}.... {'OK' if f_match is True else 'oh no'} ",
            )
            assert f_match