def validation(self, net, val_loader_in, val_loader_out): net.eval() device = list(net.parameters())[0].device unc_in, unc_out = [], [] n_samples, running_loss, running_corrects = 0, 0, 0 for (X_batch_in, y_batch_in), (X_batch_out, y_batch_out) in zip(val_loader_in, val_loader_out): X_batch_in, y_batch_in = X_batch_in.to(device), y_batch_in.to(device) X_batch_out, y_batch_out = X_batch_out.to(device), y_batch_out.to(device) with torch.no_grad(): logits_in = net(X_batch_in) logits_out = net(X_batch_out) proba_in = F.softmax(logits_in, -1) proba_out = F.softmax(logits_out, -1) loss = F.cross_entropy(logits_in, y_batch_in) unc_in.append(-proba_in.max(-1)[0]) unc_out.append(-proba_out.max(-1)[0]) batch_size = X_batch_in.size(0) n_samples += batch_size running_loss += loss * batch_size running_corrects += (logits_in.argmax(-1) == y_batch_in).float().sum() unc_in = torch.cat(unc_in).cpu() unc_out = torch.cat(unc_out).cpu() val_loss = running_loss / n_samples val_acc = running_corrects / n_samples # results = self.score(val_loader_in, val_loader_out) # Logging self.history['val_loss'].append(val_loss.item()) self.history['val_acc'].append(val_acc.item()) self.history['val_auroc'].append(evaluation.get_AUROC_ood(unc_in, unc_out))
def validation(self, val_loader_in, val_loader_out, i_epoch): device = list(self.net.parameters())[0].device self.net.eval() unc_in, unc_out = [], [] n_samples, running_loss, running_corrects = 0, 0, 0 for (X_batch_in, y_batch_in), (X_batch_out, y_batch_out) in zip(val_loader_in, val_loader_out): X_batch_in, y_batch_in = X_batch_in.to(device), y_batch_in.to(device) X_batch_out, y_batch_out = X_batch_out.to(device), y_batch_out.to(device) with torch.no_grad(): logits_in = self.net(X_batch_in) logits_out = self.net(X_batch_out) loss = self._edl_loss(exp_evidence(logits_in), y_batch_in, epoch=i_epoch) unc_in.append(self.get_unc(logits_in)) unc_out.append(self.get_unc(logits_out)) batch_size = X_batch_in.size(0) n_samples += batch_size running_loss += loss * batch_size running_corrects += (logits_in.argmax(-1) == y_batch_in).float().sum() val_loss = running_loss / n_samples val_acc = running_corrects / n_samples unc_in = torch.cat(unc_in).cpu() unc_out = torch.cat(unc_out).cpu() # Logging self.history['val_loss'].append(val_loss.item()) self.history['val_acc'].append(val_acc.item()) self.history['val_auroc'].append(evaluation.get_AUROC_ood(unc_in, unc_out))
def score(self, dataloader_in, dataloader_out): device = list(self.net.parameters())[0].device probas_in = [] y_in = [] for X_batch, y_batch in dataloader_in: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): probas_in.append(self(X_batch)) y_in.append(y_batch) probas_in = torch.cat(probas_in).cpu() y_in = torch.cat(y_in).cpu() probas_out = [] for X_batch, y_batch in dataloader_out: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): probas_out.append(self(X_batch)) probas_out = torch.cat(probas_out).cpu() probas_in = probas_in.clamp(1e-8, 1-1e-8) probas_out = probas_out.clamp(1e-8, 1-1e-8) # Accuracy acc = (y_in == probas_in.argmax(-1)).float().mean().item() # Calibration Metrics criterion_ece = evaluation.ExpectedCalibrationError() criterion_nll = evaluation.NegativeLogLikelihood() criterion_bs = evaluation.BrierScore() criterion_cc = evaluation.CalibrationCurve() ece = criterion_ece(probas_in, y_in) nll = criterion_nll(probas_in, y_in) brier_score = criterion_bs(probas_in, y_in) calibration_curve = criterion_cc(probas_in, y_in) # OOD metrics # entropy_in = -torch.sum(probas_in * probas_in.log(), dim=-1) # entropy_out = -torch.sum(probas_out * probas_out.log(), dim=-1) unc_in, unc_out = -probas_in.max(1)[0], -probas_out.max(1)[0] auroc = evaluation.get_AUROC_ood(unc_in, unc_out) results = { 'accuracy': acc, # Calibration 'ece': ece, 'nll': nll, 'brier_score': brier_score, 'calibration_curve': calibration_curve, # OOD 'auroc': auroc, 'unc_in': unc_in, 'unc_out': unc_out, } return results
def validation(self, val_loader_in, val_loader_out): self.net.eval() (preds, unc_in), lbls = evaluation.eval_on_dataloader( self.net, val_loader_in, (lambda x: x.argmax(-1), self.get_unc), return_labels=True) unc_out = evaluation.eval_on_dataloader(self.net, val_loader_out, self.get_unc) self.history['val_acc'].append((preds == lbls).float().mean(0).item()) self.history['val_auroc'].append( evaluation.get_AUROC_ood(unc_in, unc_out))
def validation(self, val_loader_in, val_loader_out): # Evaluation device = list(self.net.parameters())[0].device self.net.eval() n_samples, running_loss, running_corrects = 0, 0, 0 unc_in, unc_out = [], [] for (X_batch, y_batch), (X_ood, _) in zip(val_loader_in, val_loader_out): X_batch, y_batch, X_ood = X_batch.to(device), y_batch.to( device), X_ood.to(device) with torch.no_grad(): logits_in = self.net(X_batch) logits_out = self.net(X_ood) alphas_in = torch.exp(logits_in) target_in = torch.zeros_like(alphas_in).scatter_( 1, y_batch[:, None], self.precision - 1) + 1 loss_in = torch.mean( dirichlet_reverse_kl_divergence(alphas_in, target_in)) alphas_out = torch.exp(logits_out) target_out = torch.ones_like(alphas_out) loss_out = torch.mean( dirichlet_reverse_kl_divergence(alphas_out, target_out)) unc_in.append( dirichlet_prior_network_uncertainty(logits_in) ['mutual_information']) unc_out.append( dirichlet_prior_network_uncertainty(logits_out) ['mutual_information']) loss = loss_in + self.gamma * loss_out batch_size = X_batch.size(0) n_samples += batch_size running_loss += loss * batch_size running_corrects += (alphas_in.argmax(-1) == y_batch).float().sum() val_loss = running_loss / n_samples val_acc = running_corrects / n_samples unc_in = torch.cat(unc_in).cpu() unc_out = torch.cat(unc_out).cpu() # Logging self.history['val_loss'].append(val_loss.item()) self.history['val_acc'].append(val_acc.item()) self.history['val_auroc'].append( evaluation.get_AUROC_ood(unc_in, unc_out))
def validation(self, val_loader_in, val_loader_out): self.net.eval() device = list(self.net.parameters())[0].device # EvaluatOOion n_samples, running_loss, running_corrects = 0, 0, 0 proba_in, proba_out = [], [] for (X_batch_in, y_batch_in), (X_batch_out, y_batch_out) in zip(val_loader_in, val_loader_out): X_batch_in, y_batch_in = X_batch_in.to(device), y_batch_in.to(device) X_batch_out, y_batch_out = X_batch_out.to(device), y_batch_out.to(device) with torch.no_grad(): mean_in, logvar_in = self.net(X_batch_in) proba_in.append(self.predic_proba(X_batch_in).clamp(1e-8, 1-1e-8)) proba_out.append(self.predic_proba(X_batch_out).clamp(1e-8, 1-1e-8)) std_in = torch.exp(.5*logvar_in) loss = 0 for _ in range(2): x_hat = mean_in + torch.randn_like(mean_in) * std_in # Eq. 12 loss += torch.exp( x_hat.gather(1, y_batch_in.view(-1, 1)) - torch.log(torch.sum(torch.exp(x_hat), dim=-1, keepdim=True)) ) loss = torch.log(loss / 2) loss = - torch.sum(loss) batch_size = X_batch_in.size(0) n_samples += batch_size running_loss += loss running_corrects += (mean_in.argmax(-1) == y_batch_in).float().sum() proba_in = torch.cat(proba_in).cpu() proba_out = torch.cat(proba_out).cpu() unc_in = - torch.sum(proba_in * proba_in.log(), -1) unc_out = - torch.sum(proba_out * proba_out.log(), -1) val_loss = running_loss / n_samples val_acc = running_corrects / n_samples # Logging self.history['val_loss'].append(val_loss.item()) self.history['val_acc'].append(val_acc.item()) self.history['val_auroc'].append(evaluation.get_AUROC_ood(unc_in, unc_out))
def score(self, dataloader_in, dataloader_out): self.eval() device = list(self.net.parameters())[0].device logits_in = [] probas_in = [] y_in = [] for X_batch, y_batch in dataloader_in: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): logits_in.append(self.net(X_batch)) a = self.evidence_func(logits_in[-1]) + self.prior proba = a / a.sum(-1, keepdim=True) probas_in.append(proba) y_in.append(y_batch) logits_in = torch.cat(logits_in).cpu() probas_in = torch.cat(probas_in).cpu() y_in = torch.cat(y_in).cpu() logits_out = [] probas_out = [] for X_batch, y_batch in dataloader_out: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): logits_out.append(self.net(X_batch)) a = self.evidence_func(logits_in[-1]) + self.prior proba = a / a.sum(-1, keepdim=True) probas_out.append(proba) logits_out = torch.cat(logits_out).cpu() probas_out = torch.cat(probas_out).cpu() probas_in = probas_in.clamp(1e-8, 1 - 1e-8) probas_out = probas_out.clamp(1e-8, 1 - 1e-8) # Accuracy acc = (y_in == probas_in.argmax(-1)).float().mean().item() # Calibration Metrics criterion_ece = evaluation.ExpectedCalibrationError() criterion_nll = evaluation.NegativeLogLikelihood() criterion_bs = evaluation.BrierScore() criterion_cc = evaluation.CalibrationCurve() ece = criterion_ece(probas_in, y_in) nll = criterion_nll(probas_in, y_in) brier_score = criterion_bs(probas_in, y_in) calibration_curve = criterion_cc(probas_in, y_in) # OOD metrics entropy_in = -torch.sum(probas_in * probas_in.log(), dim=-1) entropy_out = -torch.sum(probas_out * probas_out.log(), dim=-1) unc_in, unc_out = self.get_unc(logits_in), self.get_unc(logits_out) auroc = evaluation.get_AUROC_ood(unc_in, unc_out) results = { 'accuracy': acc, # Calibration 'ece': ece, 'nll': nll, 'brier_score': brier_score, 'calibration_curve': calibration_curve, # OOD 'auroc': auroc, 'entropy_in': entropy_in, 'entropy_out': entropy_out, 'unc_in': unc_in, 'unc_out': unc_out, } self.train() return results
def score(self, dataloader_in, dataloader_out): self.eval() device = list(self.net.parameters())[0].device logits_in, y_in = [], [] for X_batch, y_batch in dataloader_in: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): logits_in.append(self.net(X_batch)) y_in.append(y_batch) logits_in = torch.cat(logits_in).cpu() y_in = torch.cat(y_in).cpu() alphas_in = torch.exp(logits_in) probas_in = alphas_in / alphas_in.sum(-1, keepdim=True) logits_out, y_out = [], [] for X_batch, y_batch in dataloader_out: X_batch, y_batch = X_batch.to(device), y_batch.to(device) with torch.no_grad(): logits_out.append(self.net(X_batch)) y_out.append(y_batch) logits_out = torch.cat(logits_out).cpu() y_out = torch.cat(y_out).cpu() alphas_out = torch.exp(logits_out) probas_out = alphas_out / alphas_out.sum(-1, keepdim=True) uncertainty_in = dirichlet_prior_network_uncertainty(logits_in) uncertainty_out = dirichlet_prior_network_uncertainty(logits_out) probas_in = probas_in.clamp(1e-8, 1 - 1e-8) probas_out = probas_out.clamp(1e-8, 1 - 1e-8) # Accuracy acc = (y_in == probas_in.argmax(-1)).float().mean().item() # Calibration Metrics criterion_ece = evaluation.ExpectedCalibrationError() criterion_nll = evaluation.NegativeLogLikelihood() criterion_bs = evaluation.BrierScore() criterion_cc = evaluation.CalibrationCurve() ece = criterion_ece(probas_in, y_in) nll = criterion_nll(probas_in, y_in) brier_score = criterion_bs(probas_in, y_in) calibration_curve = criterion_cc(probas_in, y_in) # OOD metrics unc_in, unc_out = uncertainty_in[ 'mutual_information'], uncertainty_out['mutual_information'] auroc = evaluation.get_AUROC_ood(unc_in, unc_out) entropy_in = uncertainty_in['entropy_of_expected'] entropy_out = uncertainty_out['entropy_of_expected'] self.train() results = { 'accuracy': acc, # Calibration 'ece': ece, 'nll': nll, 'brier_score': brier_score, 'calibration_curve': calibration_curve, # OOD 'auroc': auroc, 'entropy_in': entropy_in, 'entropy_out': entropy_out, 'unc_in': unc_in, 'unc_out': unc_out, } return results