def ensemble_aug_eval(n_iter, class_model, with_temp_scal=False): acc_test = 0 w_acc_test = 0 ens_preds = torch.zeros_like(class_model.calibration_variables[2][0]) start_time = time.time() data_loader, test_data_loader, valid_data_loader = class_model.data_loader, class_model.test_data_loader, class_model.valid_data_loader for i in range(1, n_iter + 1): acc_test_temp, w_acc_test_temp, calibration_statistics, conf_matrix_temp, _ = \ eval(class_model, test_data_loader, *class_model.calibration_variables[2], with_temp_scal) acc_test += acc_test_temp w_acc_test += w_acc_test_temp _, preds, true_lab = calibration_statistics ens_preds += preds conf_matrix_test = ConfusionMatrix(class_model.num_classes) temp_ens_preds = ens_preds / n_iter check_output, res = torch.max(torch.tensor(temp_ens_preds, device='cuda'), 1) conf_matrix_test.update_matrix(res, torch.tensor(true_lab, device='cuda')) ens_acc, ens_w_acc = conf_matrix_test.get_metrics() ECE_test, MCE_test, BRIER_test, NNL_test = compute_calibration_measures( temp_ens_preds, true_lab, apply_softmax=False, bins=15) print( "\n|| took {:.1f} minutes \n" "| Mean Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n" "| Ensemble Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n" "| Calibration test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f} NNL: {:.5f}\n\n" .format((time.time() - start_time) / 60., w_acc_test / i, acc_test / i, ens_w_acc, ens_acc, ECE_test * 100, MCE_test * 100, BRIER_test, NNL_test)) print(conf_matrix_test.conf_matrix) return ens_acc, ens_w_acc, (ens_preds / n_iter), true_lab
for idx, (x, t) in enumerate(test_loader): x, t = x.cuda(), t.cuda() out = net.forward_test(x) MC_test += net.classification_error(out, t) total_test += t.size(0) predictions_test[idx * batch_test:idx * batch_test + batch_test, :] = out.data.cpu() labels_test[idx * batch_test:idx * batch_test + batch_test] = t.data.cpu() ''' Monitoring Calibration Error ''' ECEtrain, MCEtrain, BRIERtrain, NNLtrain = compute_calibration_measures( predictions_train, labels_train, apply_softmax=True, bins=bins_for_eval) ECEtest, MCEtest, BRIERtest, NNLtest = compute_calibration_measures( predictions_test, labels_test, apply_softmax=True, bins=bins_for_eval) ECEvalid, MCEvalid, BRIERvalid, NNLvalid = [0.0] * 4 if args.use_valid_set: ECEvalid, MCEvalid, BRIERvalid, NNLvalid = compute_calibration_measures( predictions_valid, labels_valid, apply_softmax=True, bins=bins_for_eval) '''variables to display'''
no_logs=True, optimize_temp_scal=True) if not net_opt.load_epoch == 0: n.load_mode_one(net_opt.load_epoch) train_temperature_scaling_decoupled(n, temp_scal_lr=net_opt.temp_scal_learning_rate, temp_scal_epochs=net_opt.temp_scal_epochs) n.save(net_opt.load_epoch) pred_variable, labs_variable = n.calibration_variables[2] acc, w_acc, calib_stats_withTS, conf_matrix_test, _ = eval(n, n.test_data_loader, pred_variable, labs_variable, with_temp_scal=True) _, preds, true_lab = calib_stats_withTS ECE_test_calib, MCE_test_calib, BRIER_test_calib, NNL_test_calib = compute_calibration_measures(preds, true_lab, apply_softmax=False, bins=15) acc, w_acc, calib_stats_withNOTS, conf_matrix, _ = eval(n, n.test_data_loader, pred_variable, labs_variable, with_temp_scal=False) _, preds, true_lab = calib_stats_withNOTS ECE_test_NOcalib, MCE_test_NOcalib, BRIER_test_NOcalib, NNL_test_NOcalib = compute_calibration_measures(preds, true_lab, apply_softmax=False, bins=15) print("\n ----- FINISH ----- \n") print("---------TEST------------") print("\n|| took {:.1f} minutes \n" "| Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n" "| Uncalibrated test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f} NNL: {:.5f}\n"
def train(class_model, num_epochs, starting_e=0): for epoch in range(starting_e + 1, num_epochs): class_model.n.train() losses = [] start_time = time.time() for idx, (x, target, _) in enumerate(class_model.data_loader): # measure data loading time # print("data time: " + str(time.time() - start_time)) # compute output x = x.to('cuda') target = target.to('cuda', torch.long) output = torch.squeeze(class_model.n(x)) loss = class_model.criterion(output, target) losses.append(loss.item()) # compute gradient and do SGD step class_model.optimizer.zero_grad() loss.backward() class_model.optimizer.step() acc_valid, w_acc_valid, calibration_statistics, conf_matrix_valid, _ = eval( class_model, class_model.valid_data_loader, *class_model.calibration_variables[1], class_model.optimize_temp_scal) _, preds, true_lab = calibration_statistics ECE_valid, MCE_valid, BRIER_valid, NNL_valid = compute_calibration_measures( preds, true_lab, apply_softmax=False, bins=15) acc_test, w_acc_test, calibration_statistics, conf_matrix_test, _ = eval( class_model, class_model.test_data_loader, *class_model.calibration_variables[2], class_model.optimize_temp_scal) _, preds, true_lab = calibration_statistics ECE_test, MCE_test, BRIER_test, NNL_test = compute_calibration_measures( preds, true_lab, apply_softmax=False, bins=15) class_model.logger.info( "\n|| Epoch {} took {:.1f} minutes \t LossCE {:.5f} \n" "| Accuracy statistics: weighted Acc valid: {:.3f} weighted Acc test: {:.3f} Acc valid: {:.3f} Acc test: {:.3f} \n" "| Calibration valid: ECE: {:.5f} MCE: {:.3f} BRIER: {:.3f} NNL: {:.5f} \n" "| Calibration test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f} NNL: {:.5f}\n\n" .format(epoch, (time.time() - start_time) / 60., np.mean(losses), w_acc_valid, w_acc_test, acc_valid, acc_test, ECE_valid * 100., MCE_valid * 100., BRIER_valid, NNL_valid, ECE_test * 100., MCE_test * 100., BRIER_test, NNL_test)) print( "\n|| Epoch {} took {:.1f} minutes \t LossCE {:.5f} \n" "| Accuracy statistics: weighted Acc valid: {:.3f} weighted Acc test: {:.3f} Acc valid: {:.3f} Acc test: {:.3f} \n" "| Calibration valid: ECE: {:.5f} MCE: {:.3f} BRIER: {:.3f} NNL: {:.5f} \n" "| Calibration test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f} NNL: {:.5f}\n\n" .format(epoch, (time.time() - start_time) / 60., np.mean(losses), w_acc_valid, w_acc_test, acc_valid, acc_test, ECE_valid * 100., MCE_valid * 100., BRIER_valid, NNL_valid, ECE_test * 100., MCE_test * 100., BRIER_test, NNL_test)) print(conf_matrix_valid) print('\n') print(conf_matrix_test) if (w_acc_valid > class_model.best_acc or epoch % 20 == 0) and epoch > 20: print("SAVING MODEL") class_model.save(epoch) class_model.best_acc = w_acc_valid if class_model.schedname == 'plateau': class_model.scheduler.step(w_acc_valid)
w_acc_test += w_acc if ens_preds is None: ens_preds = preds else: ens_preds += preds conf_matrix_test = ConfusionMatrix(n.num_classes) temp_ens_preds = ens_preds / counter check_output, res = torch.max(torch.tensor(temp_ens_preds, device='cuda'), 1) conf_matrix_test.update_matrix(res, torch.tensor(true_lab, device='cuda')) ens_acc, ens_w_acc = conf_matrix_test.get_metrics() ECE_test, MCE_test, BRIER_test, NNL_test = compute_calibration_measures( temp_ens_preds, true_lab, apply_softmax=False, bins=15) print("\n ----- FINAL PRINT ----- \n") print( "\n|| took {:.1f} minutes \n" "| Mean Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n" "| Ensemble Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n" "| Calibration test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f} NNL: {:.5f}\n\n" .format((time.time() - start_time) / 60., w_acc_test / counter, acc_test / counter, ens_w_acc, ens_acc, ECE_test * 100, MCE_test * 100, BRIER_test, NNL_test)) print(conf_matrix_test.conf_matrix) avgname = os.path.basename(opt.avg) fname = opt.dataset + "_" + os.path.splitext(avgname)[0]