예제 #1
0
def check_sk_pipeline():
    pline = pdp.make_pdpipeline(
        pdp.ApplyByCols("ph", lambda x: x - 1),
        # pdp.Bin({"ph": [0, 3, 5, 12]}),
        pdp.Encode(["type", "lbl"]),
    )
    print(pline)

    model_pline = make_pipeline(
        pdp.FreqDrop(2, "lbl"),
        LogisticRegression(),
    )
    print(model_pline)

    train = _train_df()
    res_train = pline(train)
    print("Processed train set: {}".format(res_train))
    x_train, y_train = x_y_by_col_lbl(res_train, "lbl")
    model_pline = model_pline.fit(x_train, y_train)
    print("Fitted model pipeline: {}".format(model_pline))

    test = _test_df()
    res_test = pline(test)
    print("Processed test set: {}".format(res_test))
    x_test, y_test = x_y_by_col_lbl(res_test, "lbl")
    predictions = model_pline.predict(x_test)
    print("predictions: {}".format(predictions))
예제 #2
0
def test_make_pdpipeline():
    """Testing something."""
    drop_num1 = SilentDropStage('num1')
    drop_num2 = SilentDropStage('num2')
    pipeline = make_pdpipeline(drop_num1, drop_num2)
    assert len(pipeline) == 2
    df = _test_df()
    res_df = pipeline.apply(df, verbose=True)
    assert 'num1' not in res_df.columns
    assert 'num2' not in res_df.columns
    assert 'char' in res_df.columns