Esempio n. 1
0
def test_TargetEncoderRegressor(cv, noise_level):
    df = get_sample_df(100)
    df["cat_col"] = df["text_col"].apply(lambda s: s[0:3])
    np.random.seed(123)
    y = np.random.randn(100)

    encoder = TargetEncoderRegressor(noise_level=noise_level, cv=cv)
    encoder.fit(df, y)
    res = encoder.transform(df)

    assert encoder.get_feature_names() == ["float_col", "int_col", "text_col", "cat_col__target_mean"]
    assert list(res.columns) == ["float_col", "int_col", "text_col", "cat_col__target_mean"]
    assert res["cat_col__target_mean"].isnull().sum() == 0
    assert (res.index == df.index).all()
    assert encoder._columns_informations["input_columns"] == ["cat_col"]

    temp = pd.DataFrame({"cat_col": df["cat_col"], "cat_col__target_mean": res["cat_col__target_mean"]})
    assert temp.groupby("cat_col")["cat_col__target_mean"].std().max() == 0

    encoder = TargetEncoderRegressor(noise_level=noise_level, cv=cv)
    res = encoder.fit_transform(df, y)

    assert encoder.get_feature_names() == ["float_col", "int_col", "text_col", "cat_col__target_mean"]
    assert list(res.columns) == ["float_col", "int_col", "text_col", "cat_col__target_mean"]
    assert res["cat_col__target_mean"].isnull().sum() == 0
    assert (res.index == df.index).all()
    assert encoder._columns_informations["input_columns"] == ["cat_col"]
Esempio n. 2
0
def test_TargetEncoderRegressor_is_picklable():
    df = get_sample_df(100)
    df["cat_col"] = df["text_col"].apply(lambda s: s[0:3])
    np.random.seed(123)
    y = np.random.randn(100)

    encoder = TargetEncoderRegressor(cv=2)
    encoder.fit(df, y)

    pickled_encoder = pickle.dumps(encoder)
    unpickled_encoder = pickle.loads(pickled_encoder)
    
    assert type(unpickled_encoder) == type(encoder)
    X1 = encoder.transform(df)
    X2 = unpickled_encoder.transform(df)
    
    assert X1.shape == X2.shape
    assert (X1 == X2).all().all()