shuffle=True, n_class=True) y_preds.append(y_pred) model_all += models nround_mean += len(ret['wloss-mean']) wloss_list.append(ret['wloss-mean'][-1]) nround_mean = int((nround_mean / 2) * 1.3) utils.send_line(f'nround_mean: {nround_mean}') result = f"CV wloss: {np.mean(wloss_list)} + {np.std(wloss_list)}" print(result) utils.send_line(result) for i, y_pred in enumerate(y_preds): y_pred = pd.DataFrame(utils_metric.softmax(y_pred.astype(float).values)) if i == 0: tmp = y_pred else: tmp += y_pred tmp /= len(y_preds) y_preds = tmp.copy().values.astype(float) a_score = utils_metric.akiyama_metric(y.values, y_preds) print(f'akiyama_metric: {a_score}') utils.send_line(f'akiyama_metric: {a_score}') # ============================================================================= # model # =============================================================================
axis=1) gc.collect() is_gal = pd.read_pickle('../data/te_is_gal.pkl') X_test_gal = X_test[is_gal][COL_gal] X_test_exgal = X_test[~is_gal][COL_exgal] del X_test gc.collect() for i, (model_gal, model_exgal) in enumerate(zip(model_all_gal, model_all_exgal)): y_pred_gal = model_gal.predict(X_test_gal) y_pred_gal = utils_metric.softmax(y_pred_gal) y_pred_exgal = model_exgal.predict(X_test_exgal) y_pred_exgal = utils_metric.softmax(y_pred_exgal) if i == 0: y_pred_all_gal = y_pred_gal y_pred_all_exgal = y_pred_exgal else: y_pred_all_gal += y_pred_gal y_pred_all_exgal += y_pred_exgal y_pred_all_gal /= int(LOOP) y_pred_all_exgal /= int(LOOP) sub_gal = pd.read_pickle('../data/te_oid_gal.pkl') sub_exgal = pd.read_pickle('../data/te_oid_exgal.pkl')
verbose_eval=50, seed=SEED) y_pred = ex.eval_oob(X[COL], y.values, models, SEED, stratified=True, shuffle=True, n_class=y.unique().shape[0]) y_preds.append(y_pred) model_all += models nround_mean += len(ret['wloss-mean']) wloss_list.append(ret['wloss-mean'][-1]) for i, y_pred in enumerate(y_preds): y_pred = utils_metric.softmax(y_pred.astype(float).values) if i == 0: y_preds_ = y_pred else: y_preds_ += y_pred y_preds_ /= len(y_preds) # ============================================================================= # # ============================================================================= utils_metric.multi_weighted_logloss(y, y_preds_) def multi_weighted_logloss(y_true: np.array, y_preds: np.array):
# ============================================================================= # test # ============================================================================= X_test = pd.concat([pd.read_pickle(f) for f in tqdm(files_te, mininterval=10)], axis=1)[COL] gc.collect() for i in feature_set: models = model_set[i] col = feature_set[i] for j, model in enumerate(tqdm(model_all)): gc.collect() y_pred = model.predict(X_test[col]) y_pred = utils_metric.softmax(y_pred) if i == 0: y_pred_all = y_pred else: y_pred_all += y_pred y_pred_all /= int(LOOP * MOD_N) sub = pd.read_csv('../input/sample_submission.csv.zip') df = pd.DataFrame(y_pred_all, columns=sub.columns[1:-1]) sub = pd.concat([sub[['object_id']], df], axis=1) # class_99 sub.to_pickle(f'../data/y_pred_raw_{__file__}.pkl') utils.postprocess(sub, method='oli')