Beispiel #1
0
def test_capper():
    input_df = pd.DataFrame({
        'feat1': [10, 13, 50],
        'feat2': [50, 75, None],
    })

    input_df2 = pd.DataFrame({
        'feat1': [7, 15],
        'feat2': [200, None],
    })

    expected1 = pd.DataFrame({
        'feat1': [9, 9, 9],
        'feat2': [50, 75, None],
    })

    expected2 = pd.DataFrame({
        'feat1': [7, 9],
        'feat2': [75, None],
    })

    pred_fn, data, log = capper(input_df, ["feat1", "feat2"], {'feat1': 9.0})

    assert expected1.equals(data)

    assert expected2.equals(pred_fn(input_df2))
def test_capper():
    input_df = pd.DataFrame({"feat1": [10, 13, 50], "feat2": [50, 75, None]})

    input_df2 = pd.DataFrame({"feat1": [7, 15], "feat2": [200, None]})

    expected1 = pd.DataFrame({"feat1": [9, 9, 9], "feat2": [50, 75, None]})

    expected2 = pd.DataFrame({"feat1": [7, 9], "feat2": [75, None]})

    pred_fn1, data1, log = capper(input_df, ["feat1", "feat2"], {"feat1": 9})
    pred_fn2, data2, log = capper(
        input_df, ["feat1", "feat2"], {"feat1": 9}, suffix="_suffix"
    )
    pred_fn3, data3, log = capper(
        input_df, ["feat1", "feat2"], {"feat1": 9}, prefix="prefix_"
    )
    pred_fn4, data4, log = capper(
        input_df,
        ["feat1", "feat2"],
        {"feat1": 9},
        columns_mapping={"feat1": "feat1_raw", "feat2": "feat2_raw"},
    )

    assert expected1.equals(data1)
    assert expected2.equals(pred_fn1(input_df2))

    assert pd.concat(
        [expected1, input_df.copy().add_suffix("_suffix")], axis=1
    ).equals(data2)
    assert pd.concat(
        [expected2, input_df2.copy().add_suffix("_suffix")], axis=1
    ).equals(pred_fn2(input_df2))

    assert pd.concat(
        [expected1, input_df.copy().add_prefix("prefix_")], axis=1
    ).equals(data3)
    assert pd.concat(
        [expected2, input_df2.copy().add_prefix("prefix_")], axis=1
    ).equals(pred_fn3(input_df2))

    assert pd.concat(
        [expected1, input_df.copy().add_suffix("_raw")], axis=1
    ).equals(data4)
    assert pd.concat(
        [expected2, input_df2.copy().add_suffix("_raw")], axis=1
    ).equals(pred_fn4(input_df2))
Beispiel #3
0
df.columns = ["income"]
df["bill_amount"] = data_bill_amount * 10000
df["income"] = df["income"].apply(lambda x: x * 1000)
print(f"turned our test data into an income dataframe...\n {df.head()}")

# ----------------------------------------------------------------------------------------------------------------------
# Get to the actual work.

from fklearn.training.regression import linear_regression_learner
from fklearn.training.transformation import capper, floorer, prediction_ranger

# initialize several learner functions
# 1. one function to cap the input data to ignore outliers.
# 2. then a usual regression
# 3. third again we'd min/max the output of the regression

capper_fn = capper(columns_to_cap=["income"], precomputed_caps={"income": 500})
regression_fn = linear_regression_learner(features=["income"],
                                          target="bill_amount")
ranger_fn = prediction_ranger(prediction_min=0.0, prediction_max=200.0)

# apply two by currieing them together...
from fklearn.training.pipeline import build_pipeline

learner = build_pipeline(capper_fn, regression_fn, ranger_fn)
p, df, log = learner(df)

print(
    f" the returned dataframe now contains our capped prediction:\n {df.head(5)}"
)