warnings.filterwarnings("ignore") if __name__ == '__main__': test_mode = 0 # 0: FullTest / 1: run_rest (5000) / 2: train_sampler base_models = [ MyLightGBM(param_pattern=0), # 深い / baggingあり MyLightGBM(param_pattern=1), # 普通 MyLightGBM(param_pattern=2), # 超浅い Kernels('Kernel_nffm'), Kernels('Kernel_xdeepfm'), #MySklearn('RandomForest'), #MySklearn('ExtraTrees'), #MyXGBoost(), #MyCatBoost(), ] stacker = LogisticRegression() stack = Ensemble(n_splits=3, n_stacks=5, stacker=stacker, base_models=base_models, use_rank=True, use_adv_train=False, corr_threshold=1.0, test_mode=test_mode) val_score, oof, pred, df_test = stack.fit_predict() save_commits(df_test, pred, "./commits/stack_{}.csv".format(val_score))
'max_depth': 6, 'n_estimators': 1000, 'learning_rate': 0.025, 'subsample': 0.9 } models = { "LGB-1": LGBMClassifier(**lgb_params), "XGB-1": XGBClassifier(**xgb_params), "LGB-2": LGBMClassifier(**lgb_params2), #"LGB-3": LGBMClassifier(**lgb_params3), "XGB-2": XGBClassifier(**xgb_params2), #"CAT": CatBoostClassifier(**cat_params), #"GBM": GradientBoostingClassifier(**gb_params), #"RF": RandomForestClassifier(**rf_params), #"ET": ExtraTreesClassifier(**et_params), #"ABC": AdaBoostClassifier(n_estimators=100), } start = time.time() stack = Ensemble(4, models.values(), stacker=SGDClassifier(loss="log", max_iter=1000)) y_pred = stack.fit_predict(X, y, X_test) print("Finished ensembling in %.2f seconds" % (time.time() - start)) sub = pd.DataFrame() sub['id'] = id_test sub['target'] = y_pred sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
"PassengerId": test["PassengerId"], "Survived": pre }) print("write submit file: *.csv") submit_file = '../sub/{}.csv'.format(datetime.now().strftime('%Y%m%d_%H_%M')) submission.to_csv(submit_file, encoding="utf-8", index=False) if __name__ == '__main__': train = pd.read_csv('../data/feature_train.csv') test = pd.read_csv('../data/feature_test.csv') base_models = [svc, rf, xgb, lgb, rf, gbm] stack = Ensemble(n_splits=8, stacker=DecisionTreeClassifier(), base_models=base_models) # Stacker score: 0.8239 LB: 0.779 clfs = [DecisionTreeClassifier(), XGBClassifier(n_estimators=100, max_depth=4, min_child_weight=2), RandomForestClassifier(n_estimators=140, max_depth=4, min_samples_split=6, min_samples_leaf=4, n_jobs=4), GradientBoostingClassifier(n_estimators=140, max_depth=4, min_samples_split=6, min_samples_leaf=4)] ensemble = Ensemble(clfs, rf) prediction = ensemble.fit_predict(x_train, y_train, x_test) submit(prediction)