y_train_all = load_target(target_name) logging.debug(X_train_all.shape) y_preds = [] models = [] lgbm_params = config['lgbm_params'] kf = KFold(n_splits=10, random_state=0) for train_index, valid_index in kf.split(X_train_all): X_train, X_valid = (X_train_all.iloc[train_index, :], X_train_all.iloc[valid_index, :]) y_train, y_valid = y_train_all[train_index], y_train_all[valid_index] # lgbmの実行 y_pred, model = train_and_predict(X_train, X_valid, y_train, y_valid, X_test, lgbm_params) # 結果の保存 y_preds.append(y_pred) models.append(model) # スコア log_best(model, config['loss']) # CVスコア scores = [m.best_score['valid_0'][config['loss']] for m in models] score = sum(scores) / len(scores) print('===CV scores===') print(scores) print(score) logging.debug('===CV scores===')
# Load hyper-parameters with open(os.path.join(CUR_DIR, "configs/default.json"), "r") as f: lgbm_params = json.load(f)["lgbm_params"] logger.debug(f"{lgbm_params=}") for fold_id, (train_index, valid_index) in enumerate(cv.split(X_train, y_train)): X_tr = X_train.loc[train_index, :] X_val = X_train.loc[valid_index, :] y_tr = y_train.loc[train_index] y_val = y_train.loc[valid_index] logger.debug(f"{X_tr.shape=} {X_val.shape=} {X_test.shape=}") logger.debug(f"{y_tr.shape=} {y_val.shape=} {y_test.shape=}") pred_tr, pred_val, pred_test, model = train_and_predict( X_tr, X_val, X_test, y_tr, y_val, lgbm_params) models.append(model) valid_preds[valid_index] = pred_val test_preds[fold_id] = pred_test scores["logloss"]["train"].append( model.best_score["training"]["multi_logloss"]) scores["logloss"]["valid"].append( model.best_score["valid_1"]["multi_logloss"]) scores["logloss"]["test"].append(log_loss(y_test, pred_test)) for pred, y, mode in zip([pred_tr, pred_val, pred_test], [y_tr, y_val, y_test], ["train", "valid", "test"]): pred = pred.argmax(axis=1)
y_preds = [] models = [] oof = np.zeros(len(X_train_all)) for fold_ in range(n_splits): print(f"=== fold{fold_} ===") with timer(f'fold{fold_}', logging): X_train, X_valid = X_train_all[ folds['fold_id'] != fold_], X_train_all[folds['fold_id'] == fold_] y_train, y_valid = y_train_all[ folds['fold_id'] != fold_], y_train_all[folds['fold_id'] == fold_] y_pred, model, oof = train_and_predict(X_train, X_valid, y_train, y_valid, X_test, PARAMS, CAT, oof) # if CALIBRATION: # y_pred = calibration(y_pred, list_sampling_rate[fold_]) log_best(model, LOSS) y_preds.append(y_pred) models.append(model) with timer('save importances', logging): save_importances(models, X_train.columns, IMP_PATH, logging) with timer('calculate score', logging): scores = [round(m.best_score['valid_1'][LOSS], 3) for m in models]
folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=1000) for fold_, (trn_idx, val_idx) in enumerate(folds.split(X_train_all, y_train_all.values)): print("===fold {}===".format(fold_ + 1)) logging.debug("===fold {}===".format(fold_ + 1)) X_train, X_valid = ( X_train_all.iloc[trn_idx, :], X_train_all.iloc[val_idx, :] ) y_train, y_valid = y_train_all[trn_idx], y_train_all[val_idx] # print("*DOWN SAMPLING*") # logging.debug("*DOWN SAMPLING*") # X_train, y_train = downsampling(X_train, y_train, fold_) y_pred, model, oof = train_and_predict( X_train, X_valid, y_train, y_valid, X_test, st_params, oof ) y_preds.append(y_pred) models.append(model) log_best(model, config['loss']) print("===Save importances===") logging.debug('===Save importances===') save_importances(models, X_train.columns, now) scores = [ round(m.best_score['valid_1'][config['loss']], 5) for m in models ] score = sum(scores) / len(scores)