Beispiel #1
0
    def score(self, dataloader_in, dataloader_out):
        device = list(self.net.parameters())[0].device

        probas_in = []
        y_in = []
        for X_batch, y_batch in dataloader_in:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                probas_in.append(self(X_batch))
            y_in.append(y_batch)
        probas_in = torch.cat(probas_in).cpu()
        y_in = torch.cat(y_in).cpu()

        probas_out = []
        for X_batch, y_batch in dataloader_out:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                probas_out.append(self(X_batch))
        probas_out = torch.cat(probas_out).cpu()

        probas_in = probas_in.clamp(1e-8, 1-1e-8)
        probas_out = probas_out.clamp(1e-8, 1-1e-8)

        # Accuracy
        acc = (y_in == probas_in.argmax(-1)).float().mean().item()

        # Calibration Metrics
        criterion_ece = evaluation.ExpectedCalibrationError()
        criterion_nll = evaluation.NegativeLogLikelihood()
        criterion_bs = evaluation.BrierScore()
        criterion_cc = evaluation.CalibrationCurve()

        ece = criterion_ece(probas_in, y_in)
        nll = criterion_nll(probas_in, y_in)
        brier_score = criterion_bs(probas_in, y_in)
        calibration_curve = criterion_cc(probas_in, y_in)

        # OOD metrics
        # entropy_in = -torch.sum(probas_in * probas_in.log(), dim=-1)
        # entropy_out = -torch.sum(probas_out * probas_out.log(), dim=-1)

        unc_in, unc_out = -probas_in.max(1)[0], -probas_out.max(1)[0]
        auroc = evaluation.get_AUROC_ood(unc_in, unc_out)

        results = {
            'accuracy': acc,
            # Calibration
            'ece': ece,
            'nll': nll,
            'brier_score': brier_score,
            'calibration_curve': calibration_curve,
            # OOD
            'auroc': auroc,
            'unc_in': unc_in,
            'unc_out': unc_out,
        }
        return results
Beispiel #2
0
    def score(self, dataloader_in, dataloader_out):
        self.eval()
        device = list(self.net.parameters())[0].device

        logits_in = []
        probas_in = []
        y_in = []
        for X_batch, y_batch in dataloader_in:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                logits_in.append(self.net(X_batch))
            a = self.evidence_func(logits_in[-1]) + self.prior
            proba = a / a.sum(-1, keepdim=True)
            probas_in.append(proba)
            y_in.append(y_batch)
        logits_in = torch.cat(logits_in).cpu()
        probas_in = torch.cat(probas_in).cpu()
        y_in = torch.cat(y_in).cpu()

        logits_out = []
        probas_out = []
        for X_batch, y_batch in dataloader_out:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                logits_out.append(self.net(X_batch))
            a = self.evidence_func(logits_in[-1]) + self.prior
            proba = a / a.sum(-1, keepdim=True)
            probas_out.append(proba)
        logits_out = torch.cat(logits_out).cpu()
        probas_out = torch.cat(probas_out).cpu()

        probas_in = probas_in.clamp(1e-8, 1 - 1e-8)
        probas_out = probas_out.clamp(1e-8, 1 - 1e-8)

        # Accuracy
        acc = (y_in == probas_in.argmax(-1)).float().mean().item()

        # Calibration Metrics
        criterion_ece = evaluation.ExpectedCalibrationError()
        criterion_nll = evaluation.NegativeLogLikelihood()
        criterion_bs = evaluation.BrierScore()
        criterion_cc = evaluation.CalibrationCurve()

        ece = criterion_ece(probas_in, y_in)
        nll = criterion_nll(probas_in, y_in)
        brier_score = criterion_bs(probas_in, y_in)
        calibration_curve = criterion_cc(probas_in, y_in)

        # OOD metrics
        entropy_in = -torch.sum(probas_in * probas_in.log(), dim=-1)
        entropy_out = -torch.sum(probas_out * probas_out.log(), dim=-1)
        unc_in, unc_out = self.get_unc(logits_in), self.get_unc(logits_out)
        auroc = evaluation.get_AUROC_ood(unc_in, unc_out)

        results = {
            'accuracy': acc,
            # Calibration
            'ece': ece,
            'nll': nll,
            'brier_score': brier_score,
            'calibration_curve': calibration_curve,
            # OOD
            'auroc': auroc,
            'entropy_in': entropy_in,
            'entropy_out': entropy_out,
            'unc_in': unc_in,
            'unc_out': unc_out,
        }
        self.train()
        return results
Beispiel #3
0
    def score(self, dataloader_in, dataloader_out):
        self.eval()
        device = list(self.net.parameters())[0].device

        logits_in, y_in = [], []
        for X_batch, y_batch in dataloader_in:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                logits_in.append(self.net(X_batch))
                y_in.append(y_batch)
        logits_in = torch.cat(logits_in).cpu()
        y_in = torch.cat(y_in).cpu()
        alphas_in = torch.exp(logits_in)
        probas_in = alphas_in / alphas_in.sum(-1, keepdim=True)

        logits_out, y_out = [], []
        for X_batch, y_batch in dataloader_out:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)
            with torch.no_grad():
                logits_out.append(self.net(X_batch))
                y_out.append(y_batch)
        logits_out = torch.cat(logits_out).cpu()
        y_out = torch.cat(y_out).cpu()
        alphas_out = torch.exp(logits_out)
        probas_out = alphas_out / alphas_out.sum(-1, keepdim=True)

        uncertainty_in = dirichlet_prior_network_uncertainty(logits_in)
        uncertainty_out = dirichlet_prior_network_uncertainty(logits_out)

        probas_in = probas_in.clamp(1e-8, 1 - 1e-8)
        probas_out = probas_out.clamp(1e-8, 1 - 1e-8)

        # Accuracy
        acc = (y_in == probas_in.argmax(-1)).float().mean().item()

        # Calibration Metrics
        criterion_ece = evaluation.ExpectedCalibrationError()
        criterion_nll = evaluation.NegativeLogLikelihood()
        criterion_bs = evaluation.BrierScore()
        criterion_cc = evaluation.CalibrationCurve()

        ece = criterion_ece(probas_in, y_in)
        nll = criterion_nll(probas_in, y_in)
        brier_score = criterion_bs(probas_in, y_in)
        calibration_curve = criterion_cc(probas_in, y_in)

        # OOD metrics
        unc_in, unc_out = uncertainty_in[
            'mutual_information'], uncertainty_out['mutual_information']
        auroc = evaluation.get_AUROC_ood(unc_in, unc_out)
        entropy_in = uncertainty_in['entropy_of_expected']
        entropy_out = uncertainty_out['entropy_of_expected']

        self.train()
        results = {
            'accuracy': acc,
            # Calibration
            'ece': ece,
            'nll': nll,
            'brier_score': brier_score,
            'calibration_curve': calibration_curve,
            # OOD
            'auroc': auroc,
            'entropy_in': entropy_in,
            'entropy_out': entropy_out,
            'unc_in': unc_in,
            'unc_out': unc_out,
        }
        return results