Example #1
0
    def compute_val(self, model, valloader, device, **kwargs):
        """
        Logs we want to keep on the validation set
        Args:
            val_accuracy (float): accuracy on the validation set
            val_outputs (torch.Tensor): size = (number_of_tests, batch_size, number_of_classes):
            output of the evaluation on the validation set
        """
        if not self.validation_logging:
            self.validation_logging = True
        assert 'number_of_tests' in kwargs.keys(
        ), 'give number of tests for bayesian evaluation'
        val_accuracy, val_outputs = eval_bayesian(
            model,
            valloader,
            number_of_tests=kwargs['number_of_tests'],
            device=device,
            verbose=False)

        if val_accuracy > self.max_val_acc:
            self.max_val_acc = val_accuracy
            self.max_weights = model.state_dict()

        self.logs['val_accuracy'] = val_accuracy
        (
            self.logs['val_uncertainty_vr'],
            self.logs['val_uncertainty_pe'],
            self.logs['val_uncertainty_mi'],
        ) = get_all_uncertainty_measures_bayesian(val_outputs)
        self.add_to_history([
            'val_accuracy',
            'val_uncertainty_vr',
            'val_uncertainty_pe',
            'val_uncertainty_mi',
        ])

        for key in [
                'val_uncertainty_vr', 'val_uncertainty_pe',
                'val_uncertainty_mi'
        ]:
            self.logs[key] = self.logs[key].mean()
    trainloader=trainloader,
    # valloader=valloader,
    # output_dir_tensorboard='./output',
    device=device,
    verbose=True,
)

true_train_labels, all_outputs_train = eval_bayesian(
    bay_net,
    trainloader,
    return_accuracy=False,
    number_of_tests=number_of_tests,
    device=device,
)

train_vr, train_pe, train_mi = get_all_uncertainty_measures_bayesian(
    all_outputs_train)

true_eval_labels, all_outputs_eval = eval_bayesian(
    bay_net,
    evalloader,
    return_accuracy=False,
    number_of_tests=number_of_tests,
    device=device,
)
eval_vr, eval_pe, eval_mi = get_all_uncertainty_measures_bayesian(
    all_outputs_eval)
eval_preds = get_predictions_from_multiple_tests(all_outputs_eval)
eval_correct_preds = (eval_preds.float() == true_eval_labels.float()).float()

eval_acc_vrs = []
eval_acc_pes = []
    print(f'Eval acc: {round(100 * eval_acc, 2)} %, '
          f'Uncertainty Softmax:{eval_us.mean()}, '
          f'Predictive Entropy:{eval_pe.mean()}, ')
    print(f'Unseen: '
          f'Uncertainty Softmax:{unseen_us.mean()}, '
          f'Predictive Entropy:{unseen_pe.mean()}, ')
    res = pd.concat((res,
                     pd.DataFrame.from_dict({
                         'seen_uncertainty_us': [eval_us],
                         'seen_uncertainty_pe': [eval_pe],
                         'unseen_uncertainty_us': [unseen_us],
                         'unseen_uncertainty_pe': [unseen_pe],
                     })),
                    axis=1)
else:
    eval_vr, eval_pe, eval_mi = get_all_uncertainty_measures_bayesian(
        all_outputs_eval)
    unseen_vr, unseen_pe, unseen_mi = get_all_uncertainty_measures_bayesian(
        all_outputs_unseen)
    print(f'Eval acc: {round(100 * eval_acc, 2)} %, '
          f'Variation-Ratio:{eval_vr.mean()}, '
          f'Predictive Entropy:{eval_pe.mean()}, '
          f'Mutual Information:{eval_mi.mean()}')
    print(f'Unseen: '
          f'Variation-Ratio:{unseen_vr.mean()}, '
          f'Predictive Entropy:{unseen_pe.mean()}, '
          f'Mutual Information:{unseen_mi.mean()}')
    res = pd.concat((res,
                     pd.DataFrame.from_dict({
                         'sigma_initial': [log(1 + exp(rho))],
                         'seen_uncertainty_vr': [eval_vr],
                         'seen_uncertainty_pe': [eval_pe],
Example #4
0
    filename = filename / f'group{group_nb}{extra_info}'
elif exp_nb is not None:
    _, all_dirs = get_file_and_dir_path_in_dir(
        os.path.join(polyaxon_results_path, polyaxon_type, exp_nb), which_file)
    filename = filename / f'group{exp_nb}{extra_info}'

results = pd.DataFrame()
for dir_path in all_dirs:
    experiment = dir_path.split('/')[-1]
    dir_path = pathlib.Path(dir_path)
    result = pd.read_pickle(dir_path / 'results.pkl')
    result['experiment'] = experiment
    if type_of_test == 'random':
        softmax_output = torch.load(dir_path / 'softmax_outputs.pt',
                                    map_location='cpu')
        seen_vr, seen_pe, seen_mi = get_all_uncertainty_measures_bayesian(
            softmax_output)
        result['seen uncertainty vr'] = seen_vr
        result['seen uncertainty pe'] = seen_pe
        result['seen uncertainty mi'] = seen_mi

        random_output = torch.load(dir_path / 'random_outputs.pt',
                                   map_location='cpu')
        random_vr, random_pe, random_mi = get_all_uncertainty_measures_bayesian(
            random_output)
        result['random uncertainty vr'] = random_vr
        result['random uncertainty pe'] = random_pe
        result['random uncertainty mi'] = random_mi

    elif type_of_test in ['unseen_classes', 'unseen_dataset']:
        seen_output = torch.load(dir_path / 'softmax_outputs_eval_seen.pt',
                                 map_location='cpu')
Example #5
0
def main(
        exp_nbs=None,
        path_to_exps=path_to_exps,
        path_to_results=save_csv_path,
        nb_of_runs=nb_of_runs,
        number_of_tests=number_of_tests,
        rstars=rstars,
        delta=delta,
        recompute_outputs=recompute_outputs,
        verbose=verbose,
        save_csv=save_csv,
        do_save_animation=do_save_animation,
        device='cpu',
):
    """
    Performs selective classification given a trained network and testset. Computes different threshold depending on
    different accepted risks.
    Args:
        exp_nbs (int || str): number of the experiment
        path_to_exps (str): path to the experiment groups
        path_to_results (str): path to save the results
        nb_of_runs (int): number of times to perform the same operation to get a confidence interval
        number_of_tests (int): number of inferences for each predictions
        rstars (list): list of float of accepted risks
        delta (float): probability of being higher than the upper bound
        recompute_outputs (Bool): whether or not we compute the outputs of train / test set. Put False if it is already
                                  computed and you don't want to loose time.
        verbose (Bool): show or not progress bar
        save_csv (Bool): save or not in csv
        do_save_animation (Bool): save or not the animation of finding the threshold.
        device (torch.device): gpu or cpu

    """
    if exp_nbs is None:
        exp_nbs = these_exp_nbs
    save_csv_path = pathlib.Path(path_to_results)
    save_fig_path = pathlib.Path(path_to_results)

    if not os.path.exists(save_csv_path / 'results_train.csv'):
        results_train = pd.DataFrame(
            columns=['exp', 'unc', 'threshold', 'risk', 'acc', 'coverage', 'time', 'number_of_tests'])
        results_eval = pd.DataFrame(
            columns=['exp', 'unc', 'threshold', 'risk', 'acc', 'coverage', 'time', 'number_of_tests'])
        if save_csv:
            save_csv_path.mkdir(exist_ok=True, parents=True)
            results_train.to_csv(save_csv_path / 'results_train.csv')
            results_eval.to_csv(save_csv_path / 'results_eval.csv')
    else:
        results_train = pd.read_csv(save_csv_path / 'results_train.csv', )
        results_train = results_train.filter(regex=r'^(?!Unnamed)')
        results_train.to_csv(save_csv_path / 'results_train_backup.csv')
        results_eval = pd.read_csv(save_csv_path / 'results_eval.csv', )
        results_eval = results_eval.filter(regex=r'^(?!Unnamed)')
        results_eval.to_csv(save_csv_path / 'results_eval_backup.csv')

    global_start = time.time()
    for _ in range(nb_of_runs):
        for exp_nb in exp_nbs:
            print(exp_nb)
            bay_net, arguments, _ = get_trained_model_and_args_and_groupnb(exp_nb, exp_path=path_to_exps)
            if recompute_outputs:

                split_labels = arguments.get('split_labels', 10)
                if arguments.get('trainset', 'mnist') == 'mnist':
                    get_trainset = get_mnist
                elif arguments.get('trainset', 'mnist') == 'cifar10':
                    get_trainset = get_cifar10
                else:
                    assert False, 'trainset not recognized'

                trainloader_seen, _, evalloader_seen = get_trainset(
                    train_labels=range(split_labels),
                    eval_labels=range(split_labels),
                    batch_size=128,
                )

                bay_net.to(device)

                true_labels_train, all_outputs_train = eval_bayesian(
                    bay_net,
                    trainloader_seen,
                    number_of_tests=number_of_tests,
                    return_accuracy=False,
                    device=device,
                    verbose=True,
                )
                labels_predicted_train = get_predictions_from_multiple_tests(all_outputs_train).float()

                true_labels_eval, all_outputs_eval = eval_bayesian(
                    bay_net,
                    evalloader_seen,
                    number_of_tests=number_of_tests,
                    return_accuracy=False,
                    device=device,
                    verbose=True,
                )
                labels_predicted_eval = get_predictions_from_multiple_tests(all_outputs_eval).float()

            correct_preds_train = (labels_predicted_train == true_labels_train).float()
            residuals = 1 - correct_preds_train
            correct_preds_eval = (labels_predicted_eval == true_labels_eval).float()

            uncs_train = get_all_uncertainty_measures_bayesian(all_outputs_train)
            uncs_eval = get_all_uncertainty_measures_bayesian(all_outputs_eval)
            for idx_risk, rstar in enumerate(tqdm(rstars)):
                for unc_train, unc_eval, unc_name in zip(uncs_train, uncs_eval, ['vr', 'pe', 'mi']):
                    start = time.time()
                    thetas, bounds, risks, coverages = bound_animate(rstar, delta, -unc_train, residuals,
                                                                     verbose=verbose,
                                                                     max_iter=10,
                                                                     precision=1e-5, )
                    threshold = thetas[-1]
                    acc_train = correct_preds_train[
                        -unc_train > threshold].mean()  # .sum() / correct_preds_train.size(0)
                    coverage_train = (-unc_train >= threshold).sum().float() / unc_train.size(0)
                    new_res_train = pd.DataFrame.from_dict({
                        'exp': [exp_nb],
                        'unc': [unc_name],
                        'delta': [delta],
                        'threshold': [threshold],
                        'risk': [rstar],
                        'acc': [acc_train],
                        'coverage': [coverage_train],
                        'time': [time.time() - start],
                        'number_of_tests': [number_of_tests],
                        'loss_type': [arguments.get('loss_type', 'criterion')],
                    })
                    convert_tensor_to_float(new_res_train)
                    results_train = results_train.append(new_res_train, sort=True)

                    acc_eval = correct_preds_eval[-unc_eval > threshold].mean()
                    coverage_eval = (-unc_eval >= threshold).sum().float() / unc_eval.size(0)
                    new_res_eval = pd.DataFrame.from_dict({
                        'exp': [exp_nb],
                        'unc': [unc_name],
                        'delta': [delta],
                        'threshold': [threshold],
                        'risk': [rstar],
                        'acc': [acc_eval],
                        'coverage': [coverage_eval],
                        'time': [time.time() - start],
                        'number_of_tests': [number_of_tests],
                        'loss_type': [arguments.get('loss_type', 'criterion')],
                    })
                    convert_tensor_to_float(new_res_eval)
                    results_eval = results_eval.append(new_res_eval, sort=True)

                    if do_save_animation:
                        save_animation_path = save_fig_path / 'animation'
                        save_animation_path.mkdir(exist_ok=True, parents=True)
                        save_animation_path = save_animation_path / f'{exp_nb}_{unc_name}_{idx_risk}_' \
                            f'finding_threshold.gif'
                        save_animation(arguments, rstar, unc_train, correct_preds_train, risks, bounds, coverages,
                                       thetas, figsize,
                                       save_animation_path)

                if save_csv:
                    results_train.to_csv(save_csv_path / 'results_train.csv')
                    results_eval.to_csv(save_csv_path / 'results_eval.csv')
            print(f'Time since start: {time.time() - global_start}')
    fig.suptitle(f'Exp {exp}, Nb tests {number_of_tests} ', x=0.16, y=0.8)

    ax1.imshow(img_seen)
    ax1.set_axis_off()
    # ax2.imshow(img_unseen)
    sns.heatmap(sample_outputs_seen, ax=ax4)

    ax3.scatter(labels, densities_seen, marker='_')
    if is_determinist:
        sr_seen, pe_seen = um.get_all_uncertainty_measures_not_bayesian(
            sample_outputs_seen.unsqueeze(1))
        ax3.set_title(
            f'softmax output seen. SR: {round(sr_seen.item(), 4)}, PE: {round(pe_seen.item(), 4)}'
        )
    else:
        vr_seen, pe_seen, mi_seen = um.get_all_uncertainty_measures_bayesian(
            sample_outputs_seen.unsqueeze(1))
        vr_unseen, pe_unseen, mi_unseen = um.get_all_uncertainty_measures_bayesian(
            sample_outputs_unseen.unsqueeze(1))
        ax3.set_title(
            f'softmax output seen. VR: {round(vr_seen.item(), 4)}, PE: {round(pe_seen.item(), 4)}, MI: {round(mi_seen.item(), 4)}'
        )
    if is_cifar:
        ax1.set_title(
            f'True: {cifar_labels[target_seen]}. Prediction: {cifar_labels[prediction]}. Id: {img_index_seen}'
        )
        ax3.set_xticks(range(10))
        ax3.set_xticklabels(cifar_labels)
        ax3.tick_params(axis='x', rotation=45)
    else:
        ax1.set_title(
            f'True: {target_seen}. Prediction: {prediction}. Id: {img_index_seen}'
    nb_of_runs,
    3,
))
uncs2 = np.zeros((
    nb_of_runs,
    3,
))

for i in tqdm(range(nb_of_runs)):
    eval_acc1, eval_output1 = do_train_dirac_one_image(verbose)
    eval_acc2, eval_output2 = do_train_ce(verbose)
    accs1[i] = eval_acc1
    accs2[i] = eval_acc2
    uncs1[i] = np.array([
        unc.mean()
        for unc in get_all_uncertainty_measures_bayesian(eval_output1)
    ])
    uncs2[i] = np.array([
        unc.mean()
        for unc in get_all_uncertainty_measures_bayesian(eval_output2)
    ])

# Do T test
pvalues = pd.DataFrame(columns=['acc'] +
                       [f'unc_{i}' for i in range(uncs1.shape[1])],
                       index=['ttest', 'chisquare'])
for name, stat_test in zip(['ttest', 'chisquare'], [ttest_ind, chisquare]):
    pvalues.loc[name, 'acc'] = stat_test(accs1, accs2).pvalue
    for i in range(3):
        print(ttest_ind(uncs1[:, i], uncs2[:, i]))
        pvalues.loc[name, f'unc_{i}'] = stat_test(uncs1[:, i], uncs2[:,