Exemple #1
0
warnings.filterwarnings("ignore")

if __name__ == '__main__':
    test_mode = 0  # 0: FullTest / 1: run_rest (5000) / 2: train_sampler

    base_models = [
        MyLightGBM(param_pattern=0),  # 深い / baggingあり
        MyLightGBM(param_pattern=1),  # 普通
        MyLightGBM(param_pattern=2),  # 超浅い
        Kernels('Kernel_nffm'),
        Kernels('Kernel_xdeepfm'),
        #MySklearn('RandomForest'),
        #MySklearn('ExtraTrees'),
        #MyXGBoost(),
        #MyCatBoost(),
    ]

    stacker = LogisticRegression()
    stack = Ensemble(n_splits=3,
                     n_stacks=5,
                     stacker=stacker,
                     base_models=base_models,
                     use_rank=True,
                     use_adv_train=False,
                     corr_threshold=1.0,
                     test_mode=test_mode)
    val_score, oof, pred, df_test = stack.fit_predict()

    save_commits(df_test, pred, "./commits/stack_{}.csv".format(val_score))
Exemple #2
0
    'max_depth': 6,
    'n_estimators': 1000,
    'learning_rate': 0.025,
    'subsample': 0.9
}

models = {
    "LGB-1": LGBMClassifier(**lgb_params),
    "XGB-1": XGBClassifier(**xgb_params),
    "LGB-2": LGBMClassifier(**lgb_params2),
    #"LGB-3": LGBMClassifier(**lgb_params3),
    "XGB-2": XGBClassifier(**xgb_params2),
    #"CAT": CatBoostClassifier(**cat_params),
    #"GBM": GradientBoostingClassifier(**gb_params),
    #"RF": RandomForestClassifier(**rf_params),
    #"ET": ExtraTreesClassifier(**et_params),
    #"ABC": AdaBoostClassifier(n_estimators=100),
}

start = time.time()
stack = Ensemble(4,
                 models.values(),
                 stacker=SGDClassifier(loss="log", max_iter=1000))
y_pred = stack.fit_predict(X, y, X_test)
print("Finished ensembling in %.2f seconds" % (time.time() - start))

sub = pd.DataFrame()
sub['id'] = id_test
sub['target'] = y_pred
sub.to_csv("%s.csv" % ("-".join(models.keys())), index=False)
Exemple #3
0
        "PassengerId": test["PassengerId"],
        "Survived": pre
    })
    print("write submit file: *.csv")
    submit_file = '../sub/{}.csv'.format(datetime.now().strftime('%Y%m%d_%H_%M'))
    submission.to_csv(submit_file, encoding="utf-8", index=False)

if __name__ == '__main__':
    train = pd.read_csv('../data/feature_train.csv')
    test = pd.read_csv('../data/feature_test.csv')

base_models = [svc, rf, xgb, lgb, rf, gbm]

stack = Ensemble(n_splits=8,
                 stacker=DecisionTreeClassifier(),
                 base_models=base_models)
# Stacker score: 0.8239 LB: 0.779

    clfs = [DecisionTreeClassifier(),
            XGBClassifier(n_estimators=100, max_depth=4, min_child_weight=2),
            RandomForestClassifier(n_estimators=140, max_depth=4, min_samples_split=6, min_samples_leaf=4, n_jobs=4),
            GradientBoostingClassifier(n_estimators=140, max_depth=4, min_samples_split=6, min_samples_leaf=4)]

    ensemble = Ensemble(clfs, rf)
    prediction = ensemble.fit_predict(x_train, y_train, x_test)
    submit(prediction)