def _rotation(model, loaders, device, ind_dataset, val_dataset, ood_datasets, num_classes): ood_dataset_1, ood_dataset_2, ood_dataset_3 = ood_datasets val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders val_kl_div_ind, val_rot_score_ind, _ = _predict_rotations(model, val_ind_loader, num_classes, device=device) val_kl_div_ood, val_rot_score_ood, _ = _predict_rotations(model, val_ood_loader, num_classes, device=device) best_val_auc, best_lamda = 0, 0.25 for lamda in [0.25, 0.5, 0.75, 1]: anomaly_score_ind = val_kl_div_ind - lamda * val_rot_score_ind anomaly_score_ood = val_kl_div_ood - lamda * val_rot_score_ood auc, _, _ = _score_npzs(anomaly_score_ind, anomaly_score_ood) if auc > best_val_auc: best_val_auc = auc best_anomaly_score_ind = anomaly_score_ind best_anomaly_score_ood = anomaly_score_ood _, threshold = _find_threshold(best_anomaly_score_ind, best_anomaly_score_ood) _, _, ind_full = _predict_rotations(model, test_ind_loader, num_classes, lamda=best_lamda, device=device) _, _, ood_full_1 = _predict_rotations(model, test_ood_loader_1, num_classes, lamda=best_lamda, device=device) _, _, ood_full_2 = _predict_rotations(model, test_ood_loader_2, num_classes, lamda=best_lamda, device=device) _, _, ood_full_3 = _predict_rotations(model, test_ood_loader_3, num_classes, lamda=best_lamda, device=device) ind_savefile_name_full = f'npzs/self_supervision_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}.npz' ood_savefile_name_full_1 = f'npzs/self_supervision_{ood_dataset_1}_ind_{ind_dataset}_val_{val_dataset}.npz' ood_savefile_name_full_2 = f'npzs/self_supervision_{ood_dataset_2}_ind_{ind_dataset}_val_{val_dataset}.npz' ood_savefile_name_full_3 = f'npzs/self_supervision_{ood_dataset_3}_ind_{ind_dataset}_val_{val_dataset}.npz' auc_1, fpr_1, acc_1 = _score_npzs(ind_full, ood_full_1, threshold) auc_2, fpr_2, acc_2 = _score_npzs(ind_full, ood_full_2, threshold) auc_3, fpr_3, acc_3 = _score_npzs(ind_full, ood_full_3, threshold) aucs = [auc_1, auc_2, auc_3] fprs = [fpr_1, fpr_2, fpr_3] accs = [acc_1, acc_2, acc_3] np.savez(ind_savefile_name_full, ind_full) np.savez(ood_savefile_name_full_1, ood_full_1) np.savez(ood_savefile_name_full_2, ood_full_2) np.savez(ood_savefile_name_full_3, ood_full_3) print('###############################################') print() print(f'Succesfully stored in-distribution ood scores to {ind_savefile_name_full} and out-distribution ood scores to {ood_savefile_name_full_1}, {ood_savefile_name_full_2} and {ood_savefile_name_full_3}') print() print('###############################################') print() print('###############################################') print() print(f"InD dataset: {ind_dataset}") print(f"Validation dataset: {val_dataset}") method = f"Self-Supervision " _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)
def _gen_odin_inference(model, loaders, device, ind_dataset, val_dataset, ood_datasets): model.eval() ood_dataset_1, ood_dataset_2, ood_dataset_3 = ood_datasets val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders epsilons = [0, 0.0025, 0.005, 0.01, 0.02, 0.04, 0.08] best_auc, best_epsilon = 0, 0 for epsilon in epsilons: val_ind_scores = _process_gen_odin_loader(model, val_ind_loader, device, epsilon) val_ood_scores = _process_gen_odin_loader(model, val_ood_loader, device, epsilon) auc, _, _ = _score_npzs(val_ind_scores, val_ood_scores, threshold=0) if auc > best_auc: best_auc = auc best_epsilon = epsilon best_val_ind_scores = val_ind_scores best_val_ood_scores = val_ood_scores _, threshold = _find_threshold(best_val_ind_scores, best_val_ood_scores) test_ind_scores = _process_gen_odin_loader(model, test_ind_loader, device, best_epsilon) test_ood_scores_1 = _process_gen_odin_loader(model, test_ood_loader_1, device, best_epsilon) test_ood_scores_2 = _process_gen_odin_loader(model, test_ood_loader_2, device, best_epsilon) test_ood_scores_3 = _process_gen_odin_loader(model, test_ood_loader_3, device, best_epsilon) max_h_ind_savefile_name = f'npzs/max_h_gen_odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}.npz' max_h_ood_savefile_name_1 = f'npzs/max_h_gen_odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_1}.npz' max_h_ood_savefile_name_2 = f'npzs/max_h_gen_odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_2}.npz' max_h_ood_savefile_name_3 = f'npzs/max_h_gen_odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_3}.npz' np.savez(max_h_ind_savefile_name, test_ind_scores) np.savez(max_h_ood_savefile_name_1, test_ood_scores_1) np.savez(max_h_ood_savefile_name_2, test_ood_scores_2) np.savez(max_h_ood_savefile_name_3, test_ood_scores_3) auc_1, fpr_1, acc_1 = _score_npzs(test_ind_scores, test_ood_scores_1, threshold) auc_2, fpr_2, acc_2 = _score_npzs(test_ind_scores, test_ood_scores_2, threshold) auc_3, fpr_3, acc_3 = _score_npzs(test_ind_scores, test_ood_scores_3, threshold) aucs = [auc_1, auc_2, auc_3] fprs = [fpr_1, fpr_2, fpr_3] accs = [acc_1, acc_2, acc_3] print('###############################################') print() print(f'Succesfully stored in-distribution ood scores for maximum h to {max_h_ind_savefile_name} and out-distribution ood scores to {max_h_ood_savefile_name_1}, {max_h_ood_savefile_name_2} and {max_h_ood_savefile_name_3}') print() method = "Generalized-Odin results (Cosine Similarity) " print('###############################################') print() print(f"InD dataset: {ind_dataset}") print(f"Validation dataset: {val_dataset}") _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)
def _ood_detection_performance(method, val_ind, val_ood, test_ind, test_ood): _, threshold = _find_threshold(val_ind, val_ood) auc, fpr, acc = _score_npzs(test_ind, test_ood, threshold) print() print(f'Method: {method}') print(f'Area Under Receiver Operating Characteristic curve: {auc}') print(f'True Negative Rate @ 95% True Positive Rate: {100-fpr}') print(f'Detection Accuracy: {acc}')
def _rotation(model, loaders, device, num_classes): val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders val_kl_div_ind, val_rot_score_ind, _ = _predict_rotations(model, val_ind_loader, num_classes, device=device) val_kl_div_ood, val_rot_score_ood, _ = _predict_rotations(model, val_ood_loader, num_classes, device=device) best_val_auc, best_lamda = 0, 0.25 for lamda in [0.25, 0.5, 0.75, 1]: anomaly_score_ind = val_kl_div_ind - lamda * val_rot_score_ind anomaly_score_ood = val_kl_div_ood - lamda * val_rot_score_ood auc, _, _ = _score_npzs(anomaly_score_ind, anomaly_score_ood) if auc > best_val_auc: best_val_auc = auc best_lamda = lamda best_anomaly_score_ind = anomaly_score_ind best_anomaly_score_ood = anomaly_score_ood print(f"Chosen lambda: {best_lamda}") _, threshold = _find_threshold(best_anomaly_score_ind, best_anomaly_score_ood) _, _, ind_full = _predict_rotations(model, test_ind_loader, num_classes, lamda=best_lamda, device=device) _, _, ood_full_1 = _predict_rotations(model, test_ood_loader_1, num_classes, lamda=best_lamda, device=device) _, _, ood_full_2 = _predict_rotations(model, test_ood_loader_2, num_classes, lamda=best_lamda, device=device) _, _, ood_full_3 = _predict_rotations(model, test_ood_loader_3, num_classes, lamda=best_lamda, device=device) return best_anomaly_score_ind, best_anomaly_score_ood, ind_full, ood_full_1, ood_full_2, ood_full_3
def _ood_detection_performance(method, val_ind, val_ood, test_ind, test_ood_1, test_ood_2, test_ood_3, ood_dataset_1, ood_dataset_2, ood_dataset_3): _, threshold = _find_threshold(val_ind, val_ood) auc1, fpr1, acc1 = _score_npzs(test_ind, test_ood_1, threshold) auc2, fpr2, acc2 = _score_npzs(test_ind, test_ood_2, threshold) auc3, fpr3, acc3 = _score_npzs(test_ind, test_ood_3, threshold) aucs = [auc1, auc2, auc3] fprs = [fpr1, fpr2, fpr3] accs = [acc1, acc2, acc3] _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)
def _baseline(model, loaders, device, ind_dataset, val_dataset, ood_datasets, monte_carlo_steps=1): ood_dataset_1, ood_dataset_2, ood_dataset_3 = ood_datasets val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders model.eval() _score_classification_accuracy(model, testloader=test_ind_loader, device=device, dataset=ind_dataset) if monte_carlo_steps > 1: model._dropout.train() val_ind = _get_baseline_scores(model, val_ind_loader, device, monte_carlo_steps) val_ood = _get_baseline_scores(model, val_ood_loader, device, monte_carlo_steps) if monte_carlo_steps > 1: val_ind = val_ind / monte_carlo_steps val_ood = val_ood / monte_carlo_steps acc, threshold = _find_threshold(val_ind, val_ood) test_ind = _get_baseline_scores(model, test_ind_loader, device, monte_carlo_steps) test_ood_1 = _get_baseline_scores(model, test_ood_loader_1, device, monte_carlo_steps) test_ood_2 = _get_baseline_scores(model, test_ood_loader_2, device, monte_carlo_steps) test_ood_3 = _get_baseline_scores(model, test_ood_loader_3, device, monte_carlo_steps) if monte_carlo_steps == 1: ind_savefile_name = f'npzs/baseline_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}.npz' ood_savefile_name_1 = f'npzs/baseline_{ood_dataset_1}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_1}.npz' ood_savefile_name_2 = f'npzs/baseline_{ood_dataset_2}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_2}.npz' ood_savefile_name_3 = f'npzs/baseline_{ood_dataset_3}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_3}.npz' else: ind_savefile_name = f'npzs/baseline_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_monte_carlo_{monte_carlo_steps}.npz' ood_savefile_name_1 = f'npzs/baseline_{ood_dataset_1}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_1}_monte_carlo_{monte_carlo_steps}.npz' ood_savefile_name_2 = f'npzs/baseline_{ood_dataset_2}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_2}_monte_carlo_{monte_carlo_steps}.npz' ood_savefile_name_3 = f'npzs/baseline_{ood_dataset_3}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_3}_monte_carlo_{monte_carlo_steps}.npz' np.savez(ind_savefile_name, test_ind) np.savez(ood_savefile_name_1, test_ood_1) np.savez(ood_savefile_name_2, test_ood_2) np.savez(ood_savefile_name_3, test_ood_3) auc_1, fpr_1, acc_1 = _score_npzs(test_ind, test_ood_1, threshold) auc_2, fpr_2, acc_2 = _score_npzs(test_ind, test_ood_2, threshold) auc_3, fpr_3, acc_3 = _score_npzs(test_ind, test_ood_3, threshold) aucs = [auc_1, auc_2, auc_3] fprs = [fpr_1, fpr_2, fpr_3] accs = [acc_1, acc_2, acc_3] print('###############################################') print() print(f'Succesfully stored in-distribution ood scores to {ind_savefile_name} and out-distribution ood scores to: {ood_savefile_name_1}, {ood_savefile_name_2} and {ood_savefile_name_3}') print() print('###############################################') print() print(f"InD dataset: {ind_dataset}") print(f"Validation dataset: {val_dataset}") if monte_carlo_steps == 1: method = f"Baseline" else: method = f"Baseline results with MC dropout ({monte_carlo_steps} steps)" _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)
def _ensemble_inference(model_checkpoints, num_classes, loaders, device, ind_dataset, val_dataset): val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders models = [] for index, model_checkpoint in enumerate(model_checkpoints): model = build_model_with_checkpoint('eb0', model_checkpoint, device, out_classes=num_classes[index]) model.eval() models.append(model) best_auc = -1e30 for T in [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]: for epsilon in tqdm(np.arange(0, 0.004, 0.004/21, float).tolist()): for index, model in enumerate(models): if index == 0: val_ind = _get_odin_scores(model, val_ind_loader, T, epsilon, device=device, score_entropy=True) val_ood = _get_odin_scores(model, val_ood_loader, T, epsilon, device=device, score_entropy=True) else: val_ind += _get_odin_scores(model, val_ind_loader, T, epsilon, device=device, score_entropy=True) val_ood += _get_odin_scores(model, val_ood_loader, T, epsilon, device=device, score_entropy=True) val_ind = val_ind / len(models) val_ood = val_ood / len(models) auc, _, _ = _score_npzs(val_ind, val_ood) if auc > best_auc: best_auc = auc best_T, best_epsilon = T, epsilon best_val_ind, best_val_ood = val_ind, val_ood _, threshold = _find_threshold(best_val_ind, best_val_ood) for index, model in enumerate(models): if index == 0: test_ind = _get_odin_scores(model, test_ind_loader, best_T, best_epsilon, device=device, score_entropy=True) test_ood_1 = _get_odin_scores(model, test_ood_loader_1, best_T, best_epsilon, device=device, score_entropy=True) test_ood_2 = _get_odin_scores(model, test_ood_loader_2, best_T, best_epsilon, device=device, score_entropy=True) test_ood_3 = _get_odin_scores(model, test_ood_loader_3, best_T, best_epsilon, device=device, score_entropy=True) else: test_ind += _get_odin_scores(model, test_ind_loader, best_T, best_epsilon, device=device, score_entropy=True) test_ood_1 += _get_odin_scores(model, test_ood_loader_1, best_T, best_epsilon, device=device, score_entropy=True) test_ood_2 += _get_odin_scores(model, test_ood_loader_2, best_T, best_epsilon, device=device, score_entropy=True) test_ood_3 += _get_odin_scores(model, test_ood_loader_3, best_T, best_epsilon, device=device, score_entropy=True) test_ind = test_ind / len(models) test_ood_1 = test_ood_1 / len(models) test_ood_2 = test_ood_2 / len(models) test_ood_3 = test_ood_3 / len(models) test_ind_savefile_name = f'npzs/ensemble_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}.npz' test_ood_savefile_name_1 = f'npzs/ensemble_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_1}.npz' test_ood_savefile_name_2 = f'npzs/ensemble_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_2}.npz' test_ood_savefile_name_3 = f'npzs/ensemble_odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_3}.npz' np.savez(test_ind_savefile_name, test_ind) np.savez(test_ood_savefile_name_1, test_ood_1) np.savez(test_ood_savefile_name_2, test_ood_2) np.savez(test_ood_savefile_name_3, test_ood_3) auc_1, fpr_1, acc_1 = _score_npzs(test_ind, test_ood_1, threshold) auc_2, fpr_2, acc_2 = _score_npzs(test_ind, test_ood_2, threshold) auc_3, fpr_3, acc_3 = _score_npzs(test_ind, test_ood_3, threshold) aucs = [auc_1, auc_2, auc_3] fprs = [fpr_1, fpr_2, fpr_3] accs = [acc_1, acc_2, acc_3] print('###############################################') print() print(f'Succesfully stored in-distribution ood scores for maximum h to {test_ind_savefile_name} and out-distribution ood scores to {test_ood_savefile_name_1}, {test_ood_savefile_name_2} and {test_ood_savefile_name_3}') print() method = "Self-Ensemble" print('###############################################') print() print(f"InD dataset: {ind_dataset}") print(f"Validation dataset: {val_dataset}") _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)
def _odin(model, loaders, device, ind_dataset, val_dataset, ood_datasets): model.eval() ood_dataset_1, ood_dataset_2, ood_dataset_3 = ood_datasets val_ind_loader, test_ind_loader, val_ood_loader, test_ood_loader_1, test_ood_loader_2, test_ood_loader_3 = loaders best_auc = 0 for T in [1, 2, 5, 10, 20, 50, 100, 200, 500, 1000]: for epsilon in tqdm(np.arange(0, 0.004, 0.004/21, float).tolist()): val_ind = _get_odin_scores(model, val_ind_loader, T, epsilon, device=device) val_ood = _get_odin_scores(model, val_ood_loader, T, epsilon, device=device) auc, _, _ = _score_npzs(val_ind, val_ood, threshold=0) if auc > best_auc: best_auc = auc best_epsilon = epsilon best_T = T best_val_ind = val_ind best_val_ood = val_ood print('###############################################') print() print(f'Selected temperature: {best_T}, selected epsilon: {best_epsilon}') print() _, threshold = _find_threshold(best_val_ind, best_val_ood) test_ind = _get_odin_scores(model, test_ind_loader, best_T, best_epsilon, device=device) test_ood_1 = _get_odin_scores(model, test_ood_loader_1, best_T, best_epsilon, device=device) test_ood_2 = _get_odin_scores(model, test_ood_loader_2, best_T, best_epsilon, device=device) test_ood_3 = _get_odin_scores(model, test_ood_loader_3, best_T, best_epsilon, device=device) ind_savefile_name = f'npzs/odin_{ind_dataset}_ind_{ind_dataset}_val_{val_dataset}_temperature_{best_T}_epsilon{best_epsilon}.npz' ood_savefile_name_1 = f'npzs/odin_{ood_dataset_1}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_1}_temperature_{best_T}_epsilon{best_epsilon}.npz' ood_savefile_name_2 = f'npzs/odin_{ood_dataset_2}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_2}_temperature_{best_T}_epsilon{best_epsilon}.npz' ood_savefile_name_3 = f'npzs/odin_{ood_dataset_3}_ind_{ind_dataset}_val_{val_dataset}_ood_{ood_dataset_3}_temperature_{best_T}_epsilon{best_epsilon}.npz' np.savez(ind_savefile_name, test_ind) np.savez(ood_savefile_name_1, test_ood_1) np.savez(ood_savefile_name_2, test_ood_2) np.savez(ood_savefile_name_3, test_ood_3) auc_1, fpr_1, acc_1 = _score_npzs(test_ind, test_ood_1, threshold) auc_2, fpr_2, acc_2 = _score_npzs(test_ind, test_ood_2, threshold) auc_3, fpr_3, acc_3 = _score_npzs(test_ind, test_ood_3, threshold) aucs = [auc_1, auc_2, auc_3] fprs = [fpr_1, fpr_2, fpr_3] accs = [acc_1, acc_2, acc_3] print('###############################################') print() print(f'Succesfully stored in-distribution ood scores to {ind_savefile_name} and out-distribution ood scores to: {ood_savefile_name_1}, {ood_savefile_name_2} and {ood_savefile_name_3}') print() print('###############################################') print() print(f"InD dataset: {ind_dataset}") print(f"Validation dataset: {val_dataset}") method = f"Odin results with chosen T={best_T}, epsilon={best_epsilon}" _verbose(method, ood_dataset_1, ood_dataset_2, ood_dataset_3, aucs, fprs, accs)