예제 #1
0
def test_custom_transformer():
    input_df = pd.DataFrame({
        'feat1': [1, 2, 3],
        'feat2': [math.e, math.e**2, math.e**3],
        'feat3': [1.5, 2.5, 3.5],
        'target': [1, 4, 9]
    })

    expected = pd.DataFrame({
        'feat1': [1, 2, 3],
        'feat2': [math.e, math.e**2, math.e**3],
        'feat3': [1.5, 2.5, 3.5],
        'target': [1.0, 2.0, 3.0]
    })

    expected2 = pd.DataFrame({
        'feat1': [1, 4, 9],
        'feat2': [math.e, math.e**2, math.e**3],
        'feat3': [1.5, 2.5, 3.5],
        'target': [1, 4, 9]
    })

    transformer_fn, data, log = custom_transformer(input_df, ["target"], sqrt)

    # the transformed input df should contain the square root of the target column
    assert expected.equals(data)

    transformer_fn, data, log = custom_transformer(input_df, ["feat1"],
                                                   lambda x: x**2)

    # the transformed input df should contain the squared value of the feat1 column
    assert expected2.equals(data)

    expected3 = pd.DataFrame({
        'feat1': [1, 2, 3],
        'feat2': [1.0, 2.0, 3.0],
        'feat3': [1.5, 2.5, 3.5],
        'target': [1, 4, 9]
    })

    expected4 = pd.DataFrame({
        'feat1': [1, 2, 3],
        'feat2': [math.e, math.e**2, math.e**3],
        'feat3': [1.0, 2.0, 3.0],
        'target': [1, 4, 9]
    })

    transformer_fn, data, log = custom_transformer(input_df, ["feat2"],
                                                   ln,
                                                   is_vectorized=True)

    # the transformed input df should contain the square root of the target column
    assert expected3.equals(data)

    transformer_fn, data, log = custom_transformer(input_df, ["feat3"],
                                                   floor,
                                                   is_vectorized=True)

    # the transformed input df should contain the squared value of the feat1 column
    assert expected4.equals(data)
예제 #2
0
def test_custom_transformer():
    input_df = pd.DataFrame({'feat1': [1, 2, 3], 'target': [1, 4, 9]})

    expected = pd.DataFrame({'feat1': [1, 2, 3], 'target': [1.0, 2.0, 3.0]})

    expected2 = pd.DataFrame({'feat1': [1, 4, 9], 'target': [1.0, 2.0, 3.0]})

    transformer_fn, data, log = custom_transformer(input_df, ["target"], sqrt)

    # the transformed input df should contain the square root of the target column
    assert expected.equals(data)

    transformer_fn, data, log = custom_transformer(input_df, ["feat1"],
                                                   lambda x: x**2)

    # the transformed input df should contain the squared value of the feat1 column
    assert expected2.equals(data)
예제 #3
0
def test_custom_transformer():
    input_df = pd.DataFrame(
        {
            "feat1": [1, 2, 3],
            "feat2": [math.e, math.e ** 2, math.e ** 3],
            "feat3": [1.5, 2.5, 3.5],
            "target": [1, 4, 9],
        }
    )

    expected = pd.DataFrame(
        {
            "feat1": [1, 2, 3],
            "feat2": [math.e, math.e ** 2, math.e ** 3],
            "feat3": [1.5, 2.5, 3.5],
            "target": [1.0, 2.0, 3.0],
        }
    )

    expected2 = pd.DataFrame(
        {
            "feat1": [1, 4, 9],
            "feat2": [math.e, math.e ** 2, math.e ** 3],
            "feat3": [1.5, 2.5, 3.5],
            "target": [1, 4, 9],
        }
    )

    # the transformed input df should contain the square root of the target column
    pred_fn1, data1, log = custom_transformer(input_df, ["target"], sqrt)
    pred_fn2, data2, log = custom_transformer(
        input_df, ["target"], sqrt, suffix="_suffix"
    )
    pred_fn3, data3, log = custom_transformer(
        input_df, ["target"], sqrt, prefix="prefix_"
    )
    pred_fn4, data4, log = custom_transformer(
        input_df, ["target"], sqrt, columns_mapping={"target": "target_raw"}
    )

    assert expected.equals(data1)
    assert pd.concat(
        [expected, input_df[["target"]].copy().add_suffix("_suffix")], axis=1
    ).equals(data2)
    assert pd.concat(
        [expected, input_df[["target"]].copy().add_prefix("prefix_")], axis=1
    ).equals(data3)
    assert pd.concat(
        [expected, input_df[["target"]].copy().add_suffix("_raw")], axis=1
    ).equals(data4)

    # the transformed input df should contain the squared value of the feat1 column
    pred_fn1, data1, log = custom_transformer(
        input_df, ["feat1"], lambda x: x ** 2
    )
    pred_fn2, data2, log = custom_transformer(
        input_df, ["feat1"], lambda x: x ** 2, suffix="_suffix"
    )
    pred_fn3, data3, log = custom_transformer(
        input_df, ["feat1"], lambda x: x ** 2, prefix="prefix_"
    )
    pred_fn4, data4, log = custom_transformer(
        input_df,
        ["feat1"],
        lambda x: x ** 2,
        columns_mapping={"feat1": "feat1_raw"},
    )

    assert expected2.equals(data1)
    assert pd.concat(
        [expected2, input_df[["feat1"]].copy().add_suffix("_suffix")], axis=1
    ).equals(data2)
    assert pd.concat(
        [expected2, input_df[["feat1"]].copy().add_prefix("prefix_")], axis=1
    ).equals(data3)
    assert pd.concat(
        [expected2, input_df[["feat1"]].copy().add_suffix("_raw")], axis=1
    ).equals(data4)

    expected3 = pd.DataFrame(
        {
            "feat1": [1, 2, 3],
            "feat2": [1.0, 2.0, 3.0],
            "feat3": [1.5, 2.5, 3.5],
            "target": [1, 4, 9],
        }
    )

    expected4 = pd.DataFrame(
        {
            "feat1": [1, 2, 3],
            "feat2": [math.e, math.e ** 2, math.e ** 3],
            "feat3": [1.0, 2.0, 3.0],
            "target": [1, 4, 9],
        }
    )

    # the transformed input df should contain the log of the target column
    pred_fn1, data1, log = custom_transformer(
        input_df, ["feat2"], ln, is_vectorized=True
    )
    pred_fn2, data2, log = custom_transformer(
        input_df, ["feat2"], ln, is_vectorized=True, suffix="_suffix"
    )
    pred_fn3, data3, log = custom_transformer(
        input_df, ["feat2"], ln, is_vectorized=True, prefix="prefix_"
    )
    pred_fn4, data4, log = custom_transformer(
        input_df,
        ["feat2"],
        ln,
        is_vectorized=True,
        columns_mapping={"feat2": "feat2_raw"},
    )

    assert expected3.equals(data1)
    assert pd.concat(
        [expected3, input_df[["feat2"]].copy().add_suffix("_suffix")], axis=1
    ).equals(data2)
    assert pd.concat(
        [expected3, input_df[["feat2"]].copy().add_prefix("prefix_")], axis=1
    ).equals(data3)
    assert pd.concat(
        [expected3, input_df[["feat2"]].copy().add_suffix("_raw")], axis=1
    ).equals(data4)

    # the transformed input df should contain the floor value of the feat1 column
    pred_fn1, data1, log = custom_transformer(
        input_df, ["feat3"], floor, is_vectorized=True
    )
    pred_fn2, data2, log = custom_transformer(
        input_df, ["feat3"], floor, is_vectorized=True, suffix="_suffix"
    )
    pred_fn3, data3, log = custom_transformer(
        input_df, ["feat3"], floor, is_vectorized=True, prefix="prefix_"
    )
    pred_fn4, data4, log = custom_transformer(
        input_df,
        ["feat3"],
        floor,
        is_vectorized=True,
        columns_mapping={"feat3": "feat3_raw"},
    )

    assert expected4.equals(data1)
    assert pd.concat(
        [expected4, input_df[["feat3"]].copy().add_suffix("_suffix")], axis=1
    ).equals(data2)
    assert pd.concat(
        [expected4, input_df[["feat3"]].copy().add_prefix("prefix_")], axis=1
    ).equals(data3)
    assert pd.concat(
        [expected4, input_df[["feat3"]].copy().add_suffix("_raw")], axis=1
    ).equals(data4)