def threshold_search(preds, ys, flat=True): if isinstance(preds, list): preds = np.concatenate(preds, axis=0) ys = np.concatenate(ys, axis=0) # grid search max_f1 = -1.0 max_th = 0.5 for delta in range(30, 60): th = delta / 100. f1 = get_f1_threshold(preds, ys, th) if max_f1 <= f1: max_f1 = f1 max_th = th found = max_th if flat: return found from itertools import product delta = [-0.05, 0.0, 0.05] delta = [-0.2, -0.15, -0.1, -0.05, 0.05, 0.1, 0.15, 0.20, 0.25, 0.0] max_th = [max_th] * num_class() for cls in range(num_class()): before_changed = max_th.copy() for d in delta: new_th = before_changed.copy() if new_th[cls] + d <= 0.1: continue new_th[cls] += d f1 = get_f1_threshold(preds, ys, new_th) if max_f1 <= f1: max_f1 = f1 max_th = new_th return max_th
best_w = sigmoid(best_w) print() with np.printoptions(precision=2, suppress=True): print(best_loss_v) for l in best_w: for e in l: print('%.2f' % e, end=' ') print() print() print(np.sum(best_w, axis=0)) weighted_inp = np.mean(valid_merged, axis=1) best_th = threshold_search(weighted_inp, valid_ohs) __best_threshold = best_th f1_best = get_f1_threshold(weighted_inp, valid_ohs, __best_threshold) print(__best_threshold) print('f1_best(all, naive valid)=%.4f' % f1_best) weighted_inp = np.mean(valid_v, axis=1) best_th = threshold_search(weighted_inp, valid_ohs_v) __best_threshold = best_th f1_best = get_f1_threshold(weighted_inp, valid_ohs_v, __best_threshold) print(__best_threshold) print('f1_best(naive valid)=%.4f' % f1_best) weighted_valid = valid_v * best_w weighted_valid = np.sum(weighted_valid, axis=1) weighted_valid = np.clip(weighted_valid, 0.0, 1.0) best_th = threshold_search(weighted_valid, valid_ohs_v)
# print('f1_best(naive valid)=%.4f' % f1_best) # # weighted_inp = np.mean(valid_v, axis=1) # best_th = threshold_search(weighted_inp, valid_ohs_v) # __best_threshold = best_th # __best_threshold_naive = best_th # f1_best = get_f1_threshold(weighted_inp, valid_ohs_v, __best_threshold) # print(__best_threshold) # print('f1_best(naive valid_v)=%.4f' % f1_best) out = eval_batch(train_v, train_logit_v) out = out.detach().cpu().numpy() best_th = threshold_search(out, train_ohs_v) __best_threshold = best_th f1_best = get_f1_threshold(out, train_ohs_v, __best_threshold) print(__best_threshold) print('f1_best(train_v)=', f1_best) out = eval_batch(valid_merged, valid_logit) out = out.detach().cpu().numpy() f1_best = get_f1_threshold(out, valid_ohs, __best_threshold) print(__best_threshold) print('f1_best(valid)=', f1_best) best_th = threshold_search(out, valid_ohs) f1_best = get_f1_threshold(out, valid_ohs, best_th)
def run_epoch(model, it_data, optimizer=None, title='', aug=False, bt_update=True): global __best_threshold, __f1_ths losses = [] f1s = [[] for _ in range(len(__f1_ths))] t = it_data if not C.get()['eval']: t = tqdm(it_data) preds = [] feats = [] ys = [] # loss_f = FocalLoss() if C.get()['loss'] == 'f1': loss_f = f1_loss elif C.get()['loss'] == 'bce': loss_f = BCELoss(reduction='mean') elif C.get()['loss'] == 'margin': loss_f = MultiLabelMarginLoss() else: raise Exception('invalid loss=%s' % C.get()['loss']) for cnt, (x, y) in enumerate(t): model_results = model(x.cuda()) pred_y, feat = model_results['logit'], model_results['feat'] if not aug: if len(pred_y.shape) < 2: pred_y = pred_y.unsqueeze(0) pred_y = pred_y.cuda().float() else: means = [] targs = [] for i in range(0, len(x), test_aug_sz): mean_y = torch.mean(pred_y[i:i + test_aug_sz], dim=0, keepdim=True) means.append(mean_y.squeeze()) targs.append(y[i]) pred_y = torch.stack(means, dim=0) y = torch.stack(targs, dim=0) # feat = torch.stack(feat.unsqueeze(0), dim=0) if C.get()['loss'] == 'margin': y = y.cuda().long() else: y = y.cuda().float() loss = loss_f(pred_y, y) lr_curr = 0.0 if optimizer: optimizer.zero_grad() loss.backward() optimizer.step() lr_curr = optimizer.param_groups[0]['lr'] losses.append(loss.item()) preds.append(pred_y.detach().cpu().numpy()) feats.append(feat.detach().cpu().numpy()) ys.append(y.detach().cpu().numpy()) if title != 'test' and cnt % 20 == 0: preds_concat = np.concatenate(preds, axis=0) ys_concat = np.concatenate(ys, axis=0) for i, th in enumerate(__f1_ths): f1 = get_f1_threshold(preds_concat, ys_concat, th) f1s[i] = f1 if not C.get()['eval']: desc = ['[%s]' % title] if title == 'test': if isinstance(__best_threshold, np.ndarray): bt_str = ','.join(['%.1f' % t for t in __best_threshold]) desc.append(' best_th=%s' % bt_str) else: desc.append(' best_th=%.3f' % __best_threshold) else: desc.append('loss=%.4f' % np.mean(losses)) f1_desc = ' '.join( ['%.3f@%.2f' % (f1, th) for th, f1 in zip(__f1_ths, f1s)]) desc.append('f1(%s)' % f1_desc) if 'train' in title: desc.append(' lr=%.5f' % lr_curr) desc = ' '.join(desc) t.set_description(desc) del pred_y, loss if title == 'valid' and bt_update: __best_threshold = __f1_ths[np.argmax(f1s)] if title != 'test': preds_concat = np.concatenate(preds, axis=0) ys_concat = np.concatenate(ys, axis=0) for i, th in enumerate(__f1_ths): f1 = get_f1_threshold(preds_concat, ys_concat, th) f1s[i] = f1 stats = stats_by_class(preds_concat, ys_concat) else: stats = [] return { 'loss': np.mean(losses), 'prediction': preds, 'feature': feats, 'labels': ys, 'f1_scores': f1s, 'stats': stats }
# only for evaluation tta = True d_train, d_cvalid, d_tests = get_dataloaders_eval(tta) model.eval() # ----- train (sampled) ----- train_result = run_epoch(model, d_train, title='train', aug=tta) best_th = threshold_search(train_result['prediction'], train_result['labels']) __best_threshold = best_th print('best_th(train)=', __best_threshold) preds_concat = np.concatenate(train_result['prediction'], axis=0) ys_concat = np.concatenate(train_result['labels'], axis=0) f1_train = get_f1_threshold(preds_concat, ys_concat, __best_threshold) print('f1(train)=', f1_train) # ----- cvalid ----- cvalid_result = run_epoch(model, d_cvalid, title='valid', aug=tta) best_th = threshold_search(cvalid_result['prediction'], cvalid_result['labels']) __best_threshold = best_th print('best_th(cvalid)=', __best_threshold) preds_concat = np.concatenate(cvalid_result['prediction'], axis=0) ys_concat = np.concatenate(cvalid_result['labels'], axis=0) f1_valid = get_f1_threshold(preds_concat, ys_concat, __best_threshold) print('f1(cvalid)=', f1_valid) # ----- test -----
best_w = w.squeeze().detach().numpy() not_improved_cnt = 0 else: not_improved_cnt += 1 if not_improved_cnt > 1000: break def sigmoid(x, derivative=False): return x * (1 - x) if derivative else 1 / (1 + np.exp(-x)) best_w = sigmoid(best_w) print(best_loss_v, best_w) best_w = np.expand_dims(np.expand_dims(best_w, 0), -1) weighted_valid = valid_merged * best_w weighted_valid = np.sum(weighted_valid, axis=1) weighted_valid = np.clip(weighted_valid, 0.0, 1.0) best_th = threshold_search(weighted_valid, valid_ohs) __best_threshold = best_th f1_best = get_f1_threshold(weighted_valid, valid_ohs, __best_threshold) print(__best_threshold) print('f1_best=', f1_best) weighted_test = test_merged * best_w weighted_test = np.sum(weighted_test, axis=1) weighted_test = np.clip(weighted_test, 0.0, 1.0) output = 'asset/ensemble_nn1.csv' save_pred(ids_test, weighted_test, th=__best_threshold, fname=output)
print(cvr['train-merror-mean'].tail(1)) print(cvr['test-merror-mean'].tail(1)) print('-----') print('----- train on subset') xg_subset_t = xgb.DMatrix(valid_expand_t, label=valid_lbs_t) xg_subset_v = xgb.DMatrix(valid_expand_v, label=valid_lbs_v) bst = xgb.train(param, xg_subset_t, num_round, [(xg_subset_t, 'train'), (xg_subset_v, 'valid')], early_stopping_rounds=early_stop) xgpred_val = bst.predict(xgb.DMatrix(valid_merged[:split_idx])) best_th = threshold_search(xgpred_val, valid_ohs[:split_idx]) __best_threshold = best_th f1_best = get_f1_threshold(xgpred_val, valid_ohs[:split_idx], __best_threshold) print(__best_threshold) print('f1_best=', f1_best) sys.exit(0) # print('----------- train') # bst = xgb.train(param, xg_train, num_round, watchlist, early_stopping_rounds=early_stop) # # xgpred_val = bst.predict(xg_train2) # best_th = threshold_search(xgpred_val, valid_ohs) # __best_threshold = best_th # f1_best = get_f1_threshold(xgpred_val, valid_ohs, __best_threshold) # print(__best_threshold) # print('f1_best=', f1_best) #
optimizer.zero_grad() loss.backward() optimizer.step() ma_loss_t = ma_loss_t * 0.9 + loss.item() * 0.1 print() # net.load_state_dict(best_model) net.eval() th_flat = True weighted_inp = np.mean(train_merged, axis=1) best_th = threshold_search(weighted_inp, train_ohs, flat=th_flat) __best_threshold = best_th f1_best = get_f1_threshold(weighted_inp, train_ohs, __best_threshold) print(__best_threshold) print('f1_best(naive train)=%.4f' % f1_best) weighted_inp = np.mean(train_v, axis=1) best_th = threshold_search(weighted_inp, train_ohs_v, flat=th_flat) __best_threshold = best_th f1_best = get_f1_threshold(weighted_inp, train_ohs_v, __best_threshold) print(__best_threshold) print('f1_best(naive train_v)=%.4f' % f1_best) weighted_inp = np.mean(valid_merged, axis=1) f1_best = get_f1_threshold(weighted_inp, valid_ohs, __best_threshold) print('f1_best(naive valid)=%.4f' % f1_best) best_th = threshold_search(weighted_inp, valid_ohs, flat=th_flat)