def train_it(train, holdout_df, filename):
    train_y = train["is_attributed"]
    train_x = train[predict_col]
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
        train_x, train_y, test_size=0.2, random_state=99)
    timer.time("prepare train in ")

    lgb = pocket_lgb.GoldenLgb()
    model = lgb.do_train_sk(X_train, X_valid, y_train, y_valid)
    lgb.show_feature_importance(model)
    del train, X_train, X_valid, y_train, y_valid
    gc.collect()
    timer.time("end train in ")

    validator = holdout_validator2.HoldoutValidator(model, holdout_df,
                                                    predict_col)
    validator.validate()
    validator.output_prediction(filename)
    timer.time("done validation in ")

    return model
def train_it(train, pred_df, output_file):
    train_y = train["is_attributed"]
    train_x = train[predict_col]
    X_train, X_valid, y_train, y_valid = model_selection.train_test_split(train_x, train_y, test_size=0.1, random_state=99)
    timer.time("prepare train in ")

    lgb = pocket_lgb.GoldenLgb()
    model = lgb.do_train_sk(X_train, X_valid, y_train, y_valid)
    lgb.show_feature_importance(model)
    del train, X_train, X_valid, y_train, y_valid
    gc.collect()
    timer.time("end train in ")

    submission = pd.DataFrame({"click_id": pred_df["click_id"].astype("int")})

    y_pred = model.predict(pred_df[predict_col])
    submission["is_attributed"] = y_pred
    print(submission.describe())
    timer.time("done prediction in ")

    submission.to_csv(output_file, index=False)
    timer.time("submission in ")

    return model
test = pd.read_feather(OUTPUT_TEST)
timer.time("load csv in ")

train = train7.append(train8).append(train9)
print(train.info())
print(test.info())
del train7, train8, train9
gc.collect()

train_y = train["is_attributed"]
train_x = train[predict_col]
X_train, X_valid, y_train, y_valid = model_selection.train_test_split(
    train_x, train_y, test_size=0.2, random_state=99)
timer.time("prepare train in ")

lgb = pocket_lgb.GoldenLgb()
model = lgb.do_train_sk(X_train, X_valid, y_train, y_valid)
lgb.show_feature_importance(model)
timer.time("end train in ")
del train, X_train, X_valid, y_train, y_valid
gc.collect()

y_pred = model.predict(test[predict_col])
mamas_idx = np.load(MAMAS_INDEX)

test = test[test["click_id"] >= 0]  # watch out this line for bugs
submission = pd.DataFrame({"click_id": test["click_id"].astype("int")})
submission["is_attributed"] = y_pred[mamas_idx]
print(submission.describe())
timer.time("done prediction in ")