Esempio n. 1
0
def ensemble_aug_eval(n_iter, class_model, with_temp_scal=False):
    acc_test = 0
    auc_test = 0
    ens_preds = torch.zeros_like(class_model.calibration_variables[2][0])

    start_time = time.time()
    # data_loader, test_data_loader, valid_data_loader = get_dataloader(dname='isic2019_testwaugm', size=class_model.size,
    #                                                                SRV=class_model.SRV,
    #                                                                batch_size=class_model.batch_size,
    #                                                                n_workers=class_model.n_workers,
    #                                                                augm_config=class_model.augm_config,
    #                                                                cutout_params=[class_model.cutout_nholes,
    #                                                                               class_model.cutout_pad_size])

    data_loader, test_data_loader, valid_data_loader = class_model.data_loader, class_model.test_data_loader, class_model.valid_data_loader

    for i in range(1, n_iter + 1):
        acc_test_temp, w_acc_test_temp, conf_matrix_test_temp, acc_1_test_temp, pr_test_temp, rec_test_temp, fscore_test_temp, auc_test_temp, preds, true_lab = \
            eval(class_model, test_data_loader, *class_model.calibration_variables[2], with_temp_scal)
        # print('iteration ' + str(i) + ' completed in ' + str(time.time()-start_time) + ' seconds')
        # print('Acc: ' + str(acc_test_temp) + ' | Weighted Acc: ' + str(w_acc_test_temp) + '\n')
        # print(conf_matrix_temp)
        acc_test += acc_test_temp
        auc_test += auc_test_temp

        ens_preds += preds

    conf_matrix_test = ConfusionMatrix(class_model.num_classes)
    temp_ens_preds = ens_preds / n_iter
    check_output, res = torch.max(torch.tensor(temp_ens_preds, device='cuda'),
                                  1)
    conf_matrix_test.update_matrix(res, torch.tensor(true_lab, device='cuda'))

    ens_acc, ens_w_acc = conf_matrix_test.get_metrics()
    ens_acc_1, pr, rec, fscore, auc = compute_accuracy_metrics(
        temp_ens_preds, true_lab)

    print(
        "\n|| took {:.1f} minutes \n"
        "| Mean Accuracy statistics: Acc: {:.3f} AUC: {:.3f} \n"
        "| Ensemble Accuracy statistics: Weighted Acc: {:.3f} AUC: {:.3f} Recall: {:.3f} Precision: {:.3f} Fscore: {:.3f} \n"
        .format((time.time() - start_time) / 60., acc_test / i, auc_test / i,
                ens_w_acc, auc, rec, pr, fscore))
    print(conf_matrix_test.conf_matrix)

    return ens_acc, ens_w_acc, conf_matrix_test, ens_acc, pr, rec, fscore, auc, temp_ens_preds, true_lab
Esempio n. 2
0
def ensemble_aug_eval(n_iter, class_model, with_temp_scal=False):
    acc_test = 0
    w_acc_test = 0
    ens_preds = torch.zeros_like(class_model.calibration_variables[2][0])

    start_time = time.time()

    data_loader, test_data_loader, valid_data_loader = class_model.data_loader, class_model.test_data_loader, class_model.valid_data_loader

    for i in range(1, n_iter + 1):
        acc_test_temp, w_acc_test_temp, calibration_statistics, conf_matrix_temp, _ = \
            eval(class_model, test_data_loader, *class_model.calibration_variables[2], with_temp_scal)
        acc_test += acc_test_temp
        w_acc_test += w_acc_test_temp

        _, preds, true_lab = calibration_statistics
        ens_preds += preds

    conf_matrix_test = ConfusionMatrix(class_model.num_classes)
    temp_ens_preds = ens_preds / n_iter
    check_output, res = torch.max(torch.tensor(temp_ens_preds, device='cuda'),
                                  1)
    conf_matrix_test.update_matrix(res, torch.tensor(true_lab, device='cuda'))

    ens_acc, ens_w_acc = conf_matrix_test.get_metrics()
    ECE_test, MCE_test, BRIER_test, NNL_test = compute_calibration_measures(
        temp_ens_preds, true_lab, apply_softmax=False, bins=15)
    print(
        "\n|| took {:.1f} minutes \n"
        "| Mean Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n"
        "| Ensemble Accuracy statistics: weighted Acc test: {:.3f} Acc test: {:.3f} \n"
        "| Calibration test: ECE: {:.5f} MCE: {:.5f} BRIER: {:.5f}  NNL: {:.5f}\n\n"
        .format((time.time() - start_time) / 60., w_acc_test / i, acc_test / i,
                ens_w_acc, ens_acc, ECE_test * 100, MCE_test * 100, BRIER_test,
                NNL_test))
    print(conf_matrix_test.conf_matrix)

    return ens_acc, ens_w_acc, (ens_preds / n_iter), true_lab
Esempio n. 3
0
def eval(class_model,
         e_loader,
         predictions,
         labels,
         with_temp_scal=False,
         compute_separate_metrics_for_errors=False):
    with torch.no_grad():
        entropy_of_predictions = torch.zeros_like(labels).float()
        '''to measure stuff on correct and incorrect classified samples'''
        if compute_separate_metrics_for_errors:
            corr_entropy = torch.zeros_like(labels).float()
            incorr_entropy = torch.zeros_like(labels).float()

            corr_labels = torch.zeros_like(labels).float()
            incorr_labels = torch.zeros_like(labels).float()

            corr_predictions = torch.zeros_like(predictions).float()
            incorr_predictions = torch.zeros_like(predictions).float()

            corr_count = 0
            incorr_count = 0

        class_model.n.eval()
        sofmx = nn.Softmax(dim=-1)
        conf_matrix = ConfusionMatrix(class_model.num_classes)

        start_time = time.time()
        for idx, (x, target, img_name) in enumerate(e_loader):
            # measure data loading time
            # print("data time: " + str(time.time() - start_time))
            # compute output
            x = x.to('cuda')
            out = class_model.n(x)

            if with_temp_scal:
                out = class_model.temp_scal_model(out)

            # output = torch.squeeze(out)
            output = out

            target = target.to('cuda', torch.long)
            check_output_all = sofmx(output)
            check_output, res = torch.max(check_output_all, -1)

            aux = target.size(0)
            predictions[idx *
                        class_model.batch_size:idx * class_model.batch_size +
                        aux, :] = check_output_all.data.cpu()
            labels[idx * class_model.batch_size:idx * class_model.batch_size +
                   aux] = target.data.cpu()

            entropy_of_predictions[idx * class_model.batch_size:idx *
                                   class_model.batch_size +
                                   aux] = entropy_categorical(
                                       check_output_all).cpu()

            # update the confusion matrix
            conf_matrix.update_matrix(res, target)
            # measure batch time
            # print("batch " + str(idx) + " of " + str(len(e_loader)) + "; time: " + str(time.time() - start_time))
            # start_time = time.time()

            # if idx == 0:
            #     break

            if compute_separate_metrics_for_errors:
                # if true we compute the entropy and calibration measures on correct and incorrect samples separately
                corr_idx = check_output_all.argmax(dim=1) == target
                incorr_idx = check_output_all.argmax(dim=1) != target

                corr_samples_prob = check_output_all[corr_idx, :]
                incorr_samples_prob = check_output_all[incorr_idx, :]

                corr_numel = corr_idx.sum().long()
                incorr_numel = incorr_idx.sum().long()

                corr_entropy[corr_count:corr_count +
                             corr_numel] = entropy_categorical(
                                 corr_samples_prob).cpu()
                incorr_entropy[incorr_count:incorr_count +
                               incorr_numel] = entropy_categorical(
                                   incorr_samples_prob).cpu()

                corr_predictions[corr_count:corr_count +
                                 corr_numel] = corr_samples_prob.cpu()
                incorr_predictions[incorr_count:incorr_count +
                                   incorr_numel] = incorr_samples_prob.cpu()

                corr_labels[corr_count:corr_count +
                            corr_numel] = target[corr_idx].cpu()
                incorr_labels[incorr_count:incorr_count +
                              incorr_numel] = target[incorr_idx].cpu()

                corr_count += corr_numel
                incorr_count += incorr_numel

        # filter out the zeros
        per_samples_stats = None
        if compute_separate_metrics_for_errors:
            corr_entropy = corr_entropy[0:corr_count]
            incorr_entropy = incorr_entropy[0:incorr_count]

            corr_predictions = corr_predictions[0:corr_count]
            incorr_predictions = incorr_predictions[0:incorr_count]

            corr_labels = corr_labels[0:corr_count]
            incorr_labels = incorr_labels[0:incorr_count]

            per_samples_stats = {
                'corr': [corr_entropy, corr_predictions, corr_labels],
                'incorr': [incorr_entropy, incorr_predictions, incorr_labels]
            }

        acc, w_acc = conf_matrix.get_metrics()

        acc_1, pr, rec, fscore, auc = compute_accuracy_metrics(
            predictions, labels)

        return acc, w_acc, conf_matrix.conf_matrix, acc_1, pr, rec, fscore, auc, predictions, labels
Esempio n. 4
0
            w_acc_test += w_acc
            if ens_preds is None:
                ens_preds = preds
            else:
                ens_preds += preds
        except Exception as e:
            print(f'Exception {e}')

    conf_matrix_test = ConfusionMatrix(n.num_classes)
    temp_ens_preds = ens_preds / counter

    check_output, res = torch.max(torch.tensor(temp_ens_preds, device='cuda'),
                                  1)
    conf_matrix_test.update_matrix(res, torch.tensor(true_lab, device='cuda'))

    ens_acc, ens_w_acc = conf_matrix_test.get_metrics()
    ens_acc_1, pr, rec, fscore, auc = compute_accuracy_metrics(
        temp_ens_preds, true_lab)

    print("\n ----- FINAL PRINT ----- \n")

    print(
        "\n|| took {:.1f} minutes \n"
        "| Mean Accuracy statistics: Weighted Acc: {:.3f} AUC: {:.3f} \n"
        "| Ensemble Accuracy statistics: Weighted Acc: {:.3f} AUC: {:.3f} Recall: {:.3f} Precision: {:.3f} Fscore: {:.3f} \n"
        .format((time.time() - start_time) / 60., w_acc_test / counter,
                auc_test / counter, ens_w_acc, auc, rec, pr, fscore))
    print(conf_matrix_test.conf_matrix)

    np.save(os.path.join(output_path, "output_" + fname + ".npy"),
            temp_ens_preds)