예제 #1
0
y_train_all = load_target(target_name)
logging.debug(X_train_all.shape)

y_preds = []
models = []

lgbm_params = config['lgbm_params']

kf = KFold(n_splits=10, random_state=0)
for train_index, valid_index in kf.split(X_train_all):
    X_train, X_valid = (X_train_all.iloc[train_index, :],
                        X_train_all.iloc[valid_index, :])
    y_train, y_valid = y_train_all[train_index], y_train_all[valid_index]

    # lgbmの実行
    y_pred, model = train_and_predict(X_train, X_valid, y_train, y_valid,
                                      X_test, lgbm_params)

    # 結果の保存
    y_preds.append(y_pred)
    models.append(model)

    # スコア
    log_best(model, config['loss'])

# CVスコア
scores = [m.best_score['valid_0'][config['loss']] for m in models]
score = sum(scores) / len(scores)
print('===CV scores===')
print(scores)
print(score)
logging.debug('===CV scores===')
# Load hyper-parameters
with open(os.path.join(CUR_DIR, "configs/default.json"), "r") as f:
    lgbm_params = json.load(f)["lgbm_params"]
    logger.debug(f"{lgbm_params=}")

for fold_id, (train_index,
              valid_index) in enumerate(cv.split(X_train, y_train)):
    X_tr = X_train.loc[train_index, :]
    X_val = X_train.loc[valid_index, :]
    y_tr = y_train.loc[train_index]
    y_val = y_train.loc[valid_index]

    logger.debug(f"{X_tr.shape=} {X_val.shape=} {X_test.shape=}")
    logger.debug(f"{y_tr.shape=} {y_val.shape=} {y_test.shape=}")

    pred_tr, pred_val, pred_test, model = train_and_predict(
        X_tr, X_val, X_test, y_tr, y_val, lgbm_params)
    models.append(model)

    valid_preds[valid_index] = pred_val
    test_preds[fold_id] = pred_test

    scores["logloss"]["train"].append(
        model.best_score["training"]["multi_logloss"])
    scores["logloss"]["valid"].append(
        model.best_score["valid_1"]["multi_logloss"])
    scores["logloss"]["test"].append(log_loss(y_test, pred_test))

    for pred, y, mode in zip([pred_tr, pred_val, pred_test],
                             [y_tr, y_val, y_test],
                             ["train", "valid", "test"]):
        pred = pred.argmax(axis=1)
예제 #3
0
    y_preds = []
    models = []
    oof = np.zeros(len(X_train_all))

    for fold_ in range(n_splits):
        print(f"=== fold{fold_} ===")
        with timer(f'fold{fold_}', logging):
            X_train, X_valid = X_train_all[
                folds['fold_id'] != fold_], X_train_all[folds['fold_id'] ==
                                                        fold_]
            y_train, y_valid = y_train_all[
                folds['fold_id'] != fold_], y_train_all[folds['fold_id'] ==
                                                        fold_]

            y_pred, model, oof = train_and_predict(X_train, X_valid, y_train,
                                                   y_valid, X_test, PARAMS,
                                                   CAT, oof)

            # if CALIBRATION:
            #     y_pred = calibration(y_pred, list_sampling_rate[fold_])

            log_best(model, LOSS)

            y_preds.append(y_pred)
            models.append(model)

with timer('save importances', logging):
    save_importances(models, X_train.columns, IMP_PATH, logging)

with timer('calculate score', logging):
    scores = [round(m.best_score['valid_1'][LOSS], 3) for m in models]
예제 #4
0
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=1000)

for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_all, y_train_all.values)):
    print("===fold {}===".format(fold_ + 1))
    logging.debug("===fold {}===".format(fold_ + 1))
    X_train, X_valid = (
        X_train_all.iloc[trn_idx, :], X_train_all.iloc[val_idx, :]
    )
    y_train, y_valid = y_train_all[trn_idx], y_train_all[val_idx]

    # print("*DOWN SAMPLING*")
    # logging.debug("*DOWN SAMPLING*")
    # X_train, y_train = downsampling(X_train, y_train, fold_)

    y_pred, model, oof = train_and_predict(
        X_train, X_valid, y_train, y_valid, X_test, st_params, oof
    )

    y_preds.append(y_pred)
    models.append(model)

    log_best(model, config['loss'])

print("===Save importances===")
logging.debug('===Save importances===')
save_importances(models, X_train.columns, now)

scores = [
    round(m.best_score['valid_1'][config['loss']], 5) for m in models
]
score = sum(scores) / len(scores)