def check_sk_pipeline(): pline = pdp.make_pdpipeline( pdp.ApplyByCols("ph", lambda x: x - 1), # pdp.Bin({"ph": [0, 3, 5, 12]}), pdp.Encode(["type", "lbl"]), ) print(pline) model_pline = make_pipeline( pdp.FreqDrop(2, "lbl"), LogisticRegression(), ) print(model_pline) train = _train_df() res_train = pline(train) print("Processed train set: {}".format(res_train)) x_train, y_train = x_y_by_col_lbl(res_train, "lbl") model_pline = model_pline.fit(x_train, y_train) print("Fitted model pipeline: {}".format(model_pline)) test = _test_df() res_test = pline(test) print("Processed test set: {}".format(res_test)) x_test, y_test = x_y_by_col_lbl(res_test, "lbl") predictions = model_pline.predict(x_test) print("predictions: {}".format(predictions))
def test_make_pdpipeline(): """Testing something.""" drop_num1 = SilentDropStage('num1') drop_num2 = SilentDropStage('num2') pipeline = make_pdpipeline(drop_num1, drop_num2) assert len(pipeline) == 2 df = _test_df() res_df = pipeline.apply(df, verbose=True) assert 'num1' not in res_df.columns assert 'num2' not in res_df.columns assert 'char' in res_df.columns