Ejemplo n.º 1
0
    def init_center_c(self, train_loader: DataLoader, net: BaseNet, eps=0.1):
        """Initialize hypersphere center c as the mean from an initial forward pass on the data."""
        n_samples = 0
        c = torch.zeros(net.rep_dim, device=self.device)

        net.eval()
        with torch.no_grad():
            for data in train_loader:
                # get the inputs of the batch
                if self.dataset_name == 'object' or self.dataset_name == 'texture':
                    inputs, _, _ = data
                else:
                    inputs, _, _ = data
                inputs = inputs.to(self.device)
                outputs, _, _ = net(inputs)
                n_samples += outputs.shape[0]
                c += torch.sum(outputs, dim=0)

        c /= n_samples

        # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights.
        c[(abs(c) < eps) & (c < 0)] = -eps
        c[(abs(c) < eps) & (c > 0)] = eps

        return c
Ejemplo n.º 2
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                print('Unique Semi Targets: ',
                      np.unique(semi_targets.data.cpu().numpy()))

                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing.')
    def _get_output(self, loader, net: BaseNet):
        logger = logging.getLogger()
        epoch_loss = 0.0
        n_batches = 0

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_output = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, idx, _ = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)

                # Save triples of (idx, label, score) in a list
                idx_label_output += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        outputs.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, outputs = zip(*idx_label_output)
        labels = np.array(labels)
        outputs = np.array(outputs)

        return labels, outputs
Ejemplo n.º 4
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute metrics
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # AUC
        self.auc_roc = roc_auc_score(labels, scores)
        # PR-curve
        self.pr_curve = precision_recall_curve(labels, scores)
        precision, recall, thresholds = self.pr_curve
        self.auc_pr = auc(recall, precision)
        self.test_loss = epoch_loss / n_batches
Ejemplo n.º 5
0
    def init_center_c_w(self, train_loader: DataLoader, net: BaseNet, eps=0.1):
        """Initialize hypersphere center c as the mean from an initial forward pass on the data."""
        n_samples = 0
        c = torch.zeros(net.rep_dim, device=self.device)

        net.eval()
        grad_max = torch.tensor([-np.inf], device=self.device)

        for data in train_loader:
            # get the inputs of the batch
            inputs, _, _ = data
            inputs = inputs.to(self.device)
            inputs.requires_grad_(True)
            outputs = net(inputs)
            n_samples += outputs.shape[0]
            if self.mode == 'weight':
                pass
            #                 grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[0]
            #                 grads = grads / (torch.sum(grads**2) + 1e-5)
            elif self.mode == 'input':
                grads = \
                torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=False, retain_graph=False)[0]
                b = grads.shape[0]
                grads_norm = (torch.sum(grads.view(b, -1) ** 2) + 1e-5)
                grad_max = torch.maximum(grad_max, grads_norm.max())

            inputs.requires_grad_(False)

        # with torch.no_grad():
        for data in train_loader:
            # get the inputs of the batch
            inputs, _, _ = data
            inputs = inputs.to(self.device)
            inputs.requires_grad_(True)
            outputs = net(inputs)
            if self.mode == 'weight':
                pass
            #                 grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[0]
            #                 grads = grads / (torch.sum(grads**2) + 1e-5)
            elif self.mode == 'input':
                grads = \
                torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=False, retain_graph=False)[0]
                b = grads.shape[0]
                grads_norm = (torch.sum(grads.view(b, -1) ** 2) + 1e-5)
                outputs = (1 - grads_norm / grad_max) * outputs
            inputs.requires_grad_(False)
            n_samples += outputs.shape[0]
            c += torch.sum(outputs.detach(), dim=0).squeeze()

        c /= n_samples

        # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights.
        c[(abs(c) < eps) & (c < 0)] = -eps
        c[(abs(c) < eps) & (c > 0)] = eps

        return c
Ejemplo n.º 6
0
    def test(self, dataset: BaseADDataset, vae: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device
        vae = vae.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        vae.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(
                    self.device), idx.to(self.device)

                inputs = inputs.view(inputs.size(0), -1)

                rec = vae(inputs)
                likelihood = -binary_cross_entropy(rec, inputs)
                scores = -likelihood  # negative likelihood as anomaly score

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                # Overall loss
                elbo = likelihood - vae.kl_divergence
                loss = -torch.mean(elbo)

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing variational autoencoder.')
Ejemplo n.º 7
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set loss
        criterion = nn.MSELoss(reduction='none')

        # Set device for network
        ae_net = ae_net.to(self.device)
        criterion = criterion.to(self.device)

        # Testing
        logger.info('Testing autoencoder...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(
                    self.device), idx.to(self.device)

                rec = ae_net(inputs)
                rec_loss = criterion(rec, inputs)
                scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss = torch.mean(rec_loss)
                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing autoencoder.')
    def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        if not corner_cracks:
            _, test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        if not corner_cracks:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        else:
            logger.info('Test set AUC (corner): {:.2f}%'.format(100. *
                                                                self.test_auc))

        logger.info('Finished testing.')
Ejemplo n.º 9
0
    def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet):
        logger = logging.getLogger()
        print('R', self.R)
        print('c', self.c)

        # Set device for networks
        net1 = net1.to(self.device)
        net2 = net2.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net1.eval()
        net2.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                code, _ = net1(inputs.view(-1, 1, 9))
                outputs = net2(code)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores)

        self.test_score = scores
        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Finished testing.')
Ejemplo n.º 10
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                # import pdb;pdb.set_trace()
                if labels == 0:
                    check_autoencoder_quality(inputs, test_image[i], outputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * auc))

        test_time = time.time() - start_time
        logger.info('Autoencoder testing time: %.3f' % test_time)
        logger.info('Finished testing autoencoder.')
Ejemplo n.º 11
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)
        list_output = []
        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)
        print('num of test_loader : {}'.format(len(test_loader)))
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        print('deepSVDD test()---------------')
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                print(dist)
                print(labels)
                dist_ = dist.cpu().numpy().tolist()
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist
                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # self.test_auc = roc_auc_score(labels, scores)
        test_acc = accuracy_score(labels, list_output)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc))
        logger.info('Finished testing.')
        return str(test_acc)
    def _test(self, loader, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)
        criterion = BCEWithLogitsLoss()

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)
                labels = labels.type_as(outputs)
                loss = criterion(outputs, labels.unsqueeze(1))

                scores = outputs.sigmoid()

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute metrics
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        test_loss = epoch_loss / n_batches

        return labels, scores, test_loss
Ejemplo n.º 13
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get test data loader
        test_loader = dataset.test_set

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                if len(inputs) == 32:
                    inputs = inputs.to(self.device)
                    inputs = inputs.unsqueeze(1)
                    outputs = ae_net(inputs.float())
                    scores = torch.sum((outputs.float() - inputs.float())**2,
                                       dim=tuple(range(1, outputs.dim())))
                    loss = torch.mean(scores)

                    # Save triple of (idx, label, score) in a list
                    idx_label_score += list(
                        zip(labels.cpu().data.numpy().tolist(),
                            scores.cpu().data.numpy().tolist()))

                    loss_epoch += loss.item()
                    n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
        test_auc = auc(fpr, tpr)
        logger.info('Test set AUC: {:.2f}%'.format(100. * test_auc))

        test_time = time.time() - start_time
        logger.info('Autoencoder testing time: %.3f' % test_time)
        logger.info('Finished testing autoencoder.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        test_loader = dataset.test_set

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs = inputs.to(self.device)
                inputs = inputs.unsqueeze(1)
                outputs = net(inputs.float())
                dist = torch.sum((outputs.float() - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1)
        self.test_auc = auc(fpr, tpr)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
Ejemplo n.º 15
0
    def _test(self, loader, net: BaseNet):
        logger = logging.getLogger()
        epoch_loss = 0.0
        n_batches = 0

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in loader:
                inputs, labels, idx, _ = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                    loss = self.R**2 + (1 / self.nu) * torch.mean(
                        torch.max(torch.zeros_like(scores), scores))
                else:
                    loss = torch.mean(dist)
                    scores = dist

                epoch_loss += loss.item()
                n_batches += 1
                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        test_loss = epoch_loss / n_batches

        return labels, scores, test_loss
Ejemplo n.º 16
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                scores = self.lastlay(outputs)

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
Ejemplo n.º 17
0
    def apply_model(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get apply_model data loader
        _, _, apply_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Applying model
        logger.info('Starting Deep SVDD application.')
        start_time = time.time()
        idx_score = []
        net.eval()
        with torch.no_grad():
            for data in apply_loader:
                inputs, nolabels, idx = data  # nolables are NaN
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.apply_time = time.time() - start_time
        logger.info('Deep SVDD application time: %.3f' % self.apply_time)

        ind, scores = zip(*idx_score)
        self.ind = np.array(ind)
        self.scores = np.array(scores)

        logger.info('Finished Deep SVDD application.')
Ejemplo n.º 18
0
    def init_center_c_grad(self, train_loader: DataLoader, net: BaseNet, layer: torch.nn.Module, eps=0.1):
        """Initialize hypersphere center c as the mean from an initial forward pass on the data."""
        n_samples = 0
        c = None

        net.eval()
        # with torch.no_grad():
        for data in train_loader:
            # get the inputs of the batch
            inputs, _, _ = data
            inputs = inputs.to(self.device)
            inputs.requires_grad_(True)
            outputs = net(inputs)
            n_samples += outputs.shape[0]
            if self.mode == 'weight':
                grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[
                    0]
                grads = grads / (torch.sum(grads ** 2) + 1e-5)
            elif self.mode == 'input':
                grads = torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=True, retain_graph=True)[
                    0]
                if 'grad_norm' in self.add_params:
                    grads = grads / (torch.sqrt(
                        torch.sum(grads ** 2, dim=tuple(range(1, len(grads.shape))), keepdim=True)) + 1e-5)
                grads = torch.sum(grads, dim=0)
            inputs.requires_grad_(False)
            if c is None:
                c = torch.zeros_like(grads)
            c += grads.detach()
        c /= n_samples

        # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights.
        c[(abs(c) < eps) & (c < 0)] = -eps
        c[(abs(c) < eps) & (c > 0)] = eps

        return c
Ejemplo n.º 19
0
    def train(self, dataset: BaseADDataset, oe_dataset: BaseADDataset,
              net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        if oe_dataset is not None:
            num_workers = int(self.n_jobs_dataloader / 2)
        else:
            num_workers = self.n_jobs_dataloader

        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=num_workers)
        if oe_dataset is not None:
            if oe_dataset.shuffle:
                if len(dataset.train_set) > len(oe_dataset.train_set):
                    oe_sampler = RandomSampler(oe_dataset.train_set,
                                               replacement=True,
                                               num_samples=len(
                                                   dataset.train_set))
                    oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                           batch_size=self.batch_size,
                                           shuffle=False,
                                           sampler=oe_sampler,
                                           num_workers=num_workers,
                                           drop_last=True)
                else:
                    oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                           batch_size=self.batch_size,
                                           shuffle=True,
                                           num_workers=num_workers,
                                           drop_last=True)

            else:
                oe_loader = DataLoader(dataset=oe_dataset.train_set,
                                       batch_size=self.batch_size,
                                       shuffle=False,
                                       num_workers=num_workers,
                                       drop_last=True)
            dataset_loader = zip(train_loader, oe_loader)
        else:
            dataset_loader = train_loader

        # Set loss
        if self.objective in ['bce', 'focal']:
            if self.objective == 'bce':
                criterion = nn.BCEWithLogitsLoss()
            if self.objective == 'focal':
                criterion = FocalLoss(gamma=self.focal_gamma)
            criterion = criterion.to(self.device)

        # Set device
        net = net.to(self.device)

        # Set optimizer
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting training...')
        net.train()
        start_time = time.time()

        for epoch in range(self.n_epochs + 1):
            epoch_loss = 0.0
            n_batches = 0
            idx_label_score = []
            epoch_start_time = time.time()

            # start at random point for the outlier exposure dataset in each epoch
            if (oe_dataset is not None) and (epoch < self.n_epochs):
                oe_loader.dataset.offset = np.random.randint(
                    len(oe_loader.dataset))
                if oe_loader.dataset.shuffle_idxs:
                    random.shuffle(oe_loader.dataset.idxs)
                dataset_loader = zip(train_loader, oe_loader)

            # only load samples from the original training set in a last epoch for saving train scores
            if epoch == self.n_epochs:
                dataset_loader = train_loader
                net.eval()

            for data in dataset_loader:
                if (oe_dataset is not None) and (epoch < self.n_epochs):
                    inputs = torch.cat((data[0][0], data[1][0]), 0)
                    labels = torch.cat((data[0][1], data[1][1]), 0)
                    semi_targets = torch.cat((data[0][2], data[1][2]), 0)
                    idx = torch.cat((data[0][3], data[1][3]), 0)
                else:
                    inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                # Zero the network parameter gradients
                if epoch < self.n_epochs:
                    optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)

                if self.objective == 'hsc':
                    if self.hsc_norm == 'l1':
                        dists = torch.norm(outputs, p=1, dim=1)
                    if self.hsc_norm == 'l2':
                        dists = torch.norm(outputs, p=2, dim=1)
                    if self.hsc_norm == 'l2_squared':
                        dists = torch.norm(outputs, p=2, dim=1)**2
                    if self.hsc_norm == 'l2_squared_linear':
                        dists = torch.sqrt(
                            torch.norm(outputs, p=2, dim=1)**2 + 1) - 1

                    scores = 1 - torch.exp(-dists)
                    losses = torch.where(semi_targets == 0, dists,
                                         -torch.log(scores + self.eps))
                    loss = torch.mean(losses)

                if self.objective == 'deepSAD':
                    dists = torch.norm(outputs, p=2, dim=1)**2
                    scores = dists
                    losses = torch.where(
                        semi_targets == 0, dists,
                        ((dists + self.eps)**semi_targets.float()))
                    loss = torch.mean(losses)

                if self.objective in ['bce', 'focal']:
                    targets = torch.zeros(inputs.size(0))
                    targets[semi_targets == -1] = 1
                    targets = targets.view(-1, 1).to(self.device)

                    scores = torch.sigmoid(outputs)
                    loss = criterion(outputs, targets)

                if epoch < self.n_epochs:
                    loss.backward()
                    optimizer.step()

                # save train scores in last epoch
                if epoch == self.n_epochs:
                    idx_label_score += list(
                        zip(idx.cpu().data.numpy().tolist(),
                            labels.cpu().data.numpy().tolist(),
                            scores.flatten().cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

            # Take learning rate scheduler step
            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_last_lr()[0]))

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(
                f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        self.train_scores = idx_label_score

        # Log results
        logger.info('Train Time: {:.3f}s'.format(self.train_time))
        logger.info('Train Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Finished training.')

        return net
Ejemplo n.º 20
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set loss
        if self.objective in ['bce', 'focal']:
            if self.objective == 'bce':
                criterion = nn.BCEWithLogitsLoss()
            if self.objective == 'focal':
                criterion = FocalLoss(gamma=self.focal_gamma)
            criterion = criterion.to(self.device)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        net.eval()
        epoch_loss = 0.0
        n_batches = 0
        idx_label_score = []
        start_time = time.time()

        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)

                if self.objective == 'hsc':
                    if self.hsc_norm == 'l1':
                        dists = torch.norm(outputs, p=1, dim=1)
                    if self.hsc_norm == 'l2':
                        dists = torch.norm(outputs, p=2, dim=1)
                    if self.hsc_norm == 'l2_squared':
                        dists = torch.norm(outputs, p=2, dim=1)**2
                    if self.hsc_norm == 'l2_squared_linear':
                        dists = torch.sqrt(
                            torch.norm(outputs, p=2, dim=1)**2 + 1) - 1

                    scores = 1 - torch.exp(-dists)
                    losses = torch.where(semi_targets == 0, dists,
                                         -torch.log(scores + self.eps))
                    loss = torch.mean(losses)

                if self.objective == 'deepSAD':
                    dists = torch.norm(outputs, p=2, dim=1)**2
                    scores = dists
                    losses = torch.where(
                        semi_targets == 0, dists,
                        ((dists + self.eps)**semi_targets.float()))
                    loss = torch.mean(losses)

                if self.objective in ['bce', 'focal']:
                    targets = torch.zeros(inputs.size(0))
                    targets[semi_targets == -1] = 1
                    targets = targets.view(-1, 1).to(self.device)

                    scores = torch.sigmoid(outputs)
                    loss = criterion(outputs, targets)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.flatten().cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}'.format(100. * self.test_auc))
        logger.info('Finished testing.')
Ejemplo n.º 21
0
    def test(self, dataset: BaseADDataset, net: BaseNet, epoch):
        #def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        # logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        # logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info(
            '---------------------------------------------------------Test set AUC: {:.2f}%'
            .format(100. * self.test_auc))

        # logger.info('Finished testing.')

        # recording the auc to a txt
        f_get_para = open('../log/mnist_test/get_param.txt', 'a')
        f_get_para.write('Test set AUC: {:.2f}%. \r\n'.format(100. *
                                                              self.test_auc))
        f_get_para.close()

        # record test AUC after each 100 epoch
        if (epoch + 1) % 100 == 0:
            f_100_para = open('../log/mnist_test/100_AUC.txt', 'a')
            f_100_para.write('Test set AUC: {:.2f}%. \r\n'.format(
                100. * self.test_auc))
            f_100_para.close()

        self.Accuracy_list.append(100. * self.test_auc)
Ejemplo n.º 22
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0):
        """
            训练集 获取正常数据簇 -- 中心点,半径
            测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据
        """

        logger = logging.getLogger()

        # Set device for networks
        ae_net = ae_net.to(self.device)

        # 训练集 flg==1  测试集 flg==0
        if flg == 1:
            test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing ae...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                error = (outputs - inputs)**2
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist(),
                        error.cpu().data.numpy().tolist()), )

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores, error = zip(*idx_label_score)
        labels = np.array(labels)  # labels.shape(97278, )
        scores = np.array(scores)  # scores.shape(97278, )
        error = np.array(error)  # scores.shape(97278, )

        if flg == 1:  # 训练集
            X = error
            self.kmeans = KMeans(n_clusters=self.clusters).fit(X)
            self.center = self.kmeans.cluster_centers_.tolist()
            self.radius = self.get_radius(X)
            print("roc_self.center", self.center)
            print("roc_self.radius", self.radius)
        else:  # 测试集
            Y = error
            pred_labels = []  # 实际标签
            pred_km = self.kmeans.predict(Y)
            print(pred_km.shape)
            print(pred_km)
            for i in range(len(pred_km)):
                dis = self.manhattan_distance(self.center[pred_km[i]],
                                              Y[i])  # dis:簇中心到点的距离,作为分类依据
                if dis > self.radius[pred_km[i]]:
                    pred_labels.append(1)
                else:
                    pred_labels.append(0)

            pred_labels = np.array(pred_labels)
            self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels)
            # roc_self.test_auc = roc_auc_score(pred_labels, labels)
            fpr, tpr, thresholds = roc_curve(labels, pred_labels)  # 面积作为准确率
            print(fpr, tpr)
            self.test_auc = auc(fpr, tpr)
            self.test_mcc = matthews_corrcoef(labels, pred_labels)
            _, _, f_score, _ = precision_recall_fscore_support(labels,
                                                               pred_labels,
                                                               labels=[0, 1])
            self.test_f_score = f_score[1]

        print(len(labels))
        print(len(scores))

        self.test_time = time.time() - start_time
        if flg == 0:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('ae testing time: %.3f' % self.test_time)
        logger.info('Finished testing ae.')
Ejemplo n.º 23
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device
        net = net.to(self.device)

        # Use importance weighted sampler (Burda et al., 2015) to get a better estimate on the log-likelihood.
        sampler = ImportanceWeightedSampler(mc=1, iw=1)
        elbo = SVI(net, likelihood=binary_cross_entropy, sampler=sampler)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                idx = idx.to(self.device)

                # All test data is considered unlabeled
                inputs = inputs.view(inputs.size(0), -1)
                u = inputs
                y = labels
                y_onehot = torch.Tensor(y.size(0), 2).to(
                    self.device)  # two labels: 0: normal, 1: outlier
                y_onehot.zero_()
                y_onehot.scatter_(1, y.view(-1, 1), 1)

                # Compute loss
                L = -elbo(u, y_onehot)
                U = -elbo(u)

                logits = net.classify(u)
                eps = 1e-8
                classication_loss = -torch.sum(
                    y_onehot * torch.log(logits + eps), dim=1).mean()

                loss = L + self.alpha * classication_loss + U  # J_alpha

                # Compute scores
                scores = logits[:,
                                1]  # likelihood/confidence for anomalous class as anomaly score

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing.')
Ejemplo n.º 24
0
    def test(self, dataset: BaseADDataset, net: BaseNet):

        self.n_components = net.cate_dense_2
        self.n_features = net.rep_dim
        self.mu_test = torch.tensor(np.float32(
            np.zeros([1, self.n_components, self.n_features])),
                                    device=self.device)
        self.cov_test = torch.tensor(np.float32(
            np.zeros([1, self.n_components, self.n_features,
                      self.n_features])),
                                     device=self.device)

        self.isTesting = True

        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        train_loader, test_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        ssim_loss = pytorch_ssim.SSIM(window_size=11, size_average=False)
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in train_loader:
                if self.dataset_name == 'object' or self.dataset_name == 'texture':
                    inputs, labels, idx = data
                else:
                    inputs, labels, idx = data

                inputs = inputs.to(self.device)
                outputs, category, resconstruction = net(inputs)

                if self.objective == 'deep-GMM':
                    phi, mu, cov = self.compute_gmm_params(outputs, category)

                    batch_gamma_sum = torch.sum(category, dim=0)

                    self.gamma_sum += batch_gamma_sum

                    self.mu_test += mu * batch_gamma_sum.unsqueeze(
                        -1)  # keep sums of the numerator only
                    self.cov_test += cov * batch_gamma_sum.unsqueeze(
                        -1).unsqueeze(-1)  # keep sums of the numerator only

                    self.iteration += inputs.size(0)

                    train_phi = self.gamma_sum / self.iteration
                    train_mu = self.mu_test / self.gamma_sum.unsqueeze(-1)
                    train_cov = self.cov_test / self.gamma_sum.unsqueeze(
                        -1).unsqueeze(-1)

                    train_cov = train_cov.squeeze(0)
                    train_mu = train_mu.squeeze(0)

            for data in test_loader:
                if self.dataset_name == 'object' or self.dataset_name == 'texture':
                    inputs, labels, idx = data
                else:
                    inputs, labels, idx = data

                inputs = inputs.to(self.device)
                outputs, category, resconstruction = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)

                if self.objective == 'deep-GMM':
                    phi, _, _ = self.compute_gmm_params(outputs, category)
                    sample_energy, cov_diag = self.compute_energy(
                        outputs,
                        phi=phi,
                        mu=train_mu,
                        cov=train_cov,
                        size_average=False)

                    if self.ae_loss_type == 'ssim':
                        rescon_error = -ssim_loss(inputs, resconstruction)
                        rescon_error = rescon_error * self.ssim_lambda
                    else:
                        rescon_error = torch.sum(
                            (resconstruction - inputs)**2,
                            dim=tuple(range(1, resconstruction.dim())))
                        rescon_error = rescon_error * self.l2_lambda

                    scores = sample_energy + rescon_error

                    #scores = rescon_error
                elif self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                elif self.objective == 'hybrid':
                    if self.ae_loss_type == 'ssim':
                        rescon_error = -ssim_loss(inputs, resconstruction)
                        rescon_error = rescon_error * self.ssim_lambda
                    else:
                        rescon_error = torch.sum(
                            (resconstruction - inputs)**2,
                            dim=tuple(range(1, resconstruction.dim())))
                        rescon_error = rescon_error * self.l2_lambda_test
                    sample_energy = dist
                    scores = dist + rescon_error

                else:
                    if self.ae_loss_type == 'ssim':
                        rescon_error = -ssim_loss(inputs, resconstruction)
                        rescon_error = rescon_error * self.ssim_lambda
                    else:
                        rescon_error = torch.sum(
                            (resconstruction - inputs)**2,
                            dim=tuple(range(1, resconstruction.dim())))
                        rescon_error = rescon_error * self.l2_lambda * 1.5
                    sample_energy = dist
                    scores = dist

                # Save triples of (idx, label, score) in a list
                if self.dataset_name == 'object' or self.dataset_name == 'texture':
                    idx_label_score += list(
                        zip(idx.cpu().data.numpy().tolist(),
                            labels.cpu().data.numpy().tolist(),
                            scores.cpu().data.numpy().tolist(),
                            sample_energy.cpu().data.numpy().tolist(),
                            rescon_error.cpu().data.numpy().tolist()))

                else:
                    idx_label_score += list(
                        zip(idx.cpu().data.numpy().tolist(),
                            labels.cpu().data.numpy().tolist(),
                            scores.cpu().data.numpy().tolist(),
                            sample_energy.cpu().data.numpy().tolist(),
                            rescon_error.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        if self.dataset_name == 'object' or self.dataset_name == 'texture':
            _, labels, scores, energy, rescon_error = zip(*idx_label_score)
        else:
            _, labels, scores, energy, rescon_error = zip(*idx_label_score)
        labels = np.array(labels)
        labels[labels > 0] = 1
        scores = np.array(scores)
        energy = np.array(energy)
        rescon_error = np.array(rescon_error)

        # for i in range(3,40):
        #     if labels[i] == 0:
        #         print('--------------------------------------')
        #         print('           ')
        #     print('labels:', labels[i], 'scores: ', scores[i],'energy: ',self.energy_lambda*energy[i], 'reconstruction: ', rescon_error[i])
        #
        #     if labels[i] == 0:
        #         print('           ')
        #         print('--------------------------------------')
        #
        # for i in range(len(scores)):
        #     if np.isnan(scores[i]) or np.isinf(scores[i]):
        #         scores[i] = 100
        #         print(labels[i])

        scores[scores > 100] = 100

        self.test_auc = roc_auc_score(labels, rescon_error)
        logger.info('Test set reconstruction AUC: {:.2f}%'.format(
            100. * self.test_auc))

        self.test_auc = roc_auc_score(labels, energy)
        logger.info('Test set one-class AUC: {:.2f}%'.format(100. *
                                                             self.test_auc))

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set hybrid AUC: {:.2f}%'.format(100. *
                                                          self.test_auc))

        logger.info('Finished testing.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        output_data = []
        label_data = []
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
                # dist = torch.sum((outputs - self.c) ** 2, dim=1)

                ### NEW
                if (self.c.dim() == 1):  # naive deep_svdd
                    centers = self.c
                    dist = torch.sum((outputs - self.c)**2, dim=1)
                else:
                    centers = torch.transpose(self.c, 0, 1)
                    dist = torch.zeros(outputs.shape[0], device=self.device)
                    for i in range(outputs.shape[0]):
                        # Sum dists from each data point to its corresponding cluster
                        dist[i] = torch.sum((centers - outputs[i])**2,
                                            dim=1).min()
                #import pdb; pdb.set_trace()
                ###
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        #UMAP (same umap model fit in training) - use anomaly_data = True
        # # UMAP Plot (on testing data)
        # kmeans_centers = np.load('centers.npy')
        # output_data = torch.cat(output_data)
        # label_data = torch.cat(label_data).numpy()
        # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True)
        # import pdb; pdb.set_trace()

        # UMAP Plot (on training data)
        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        output_data = []
        label_data = []
        with torch.no_grad():
            for data in train_loader:
                # get the inputs of the batch
                inputs, labels, _ = data  #labels are only for UMAP of hyperspheres
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
        kmeans_centers = np.load('centers.npy')
        output_data = torch.cat(output_data)
        label_data = torch.cat(label_data).numpy()
        self.latent_UMAP(output_data,
                         label_data,
                         kmeans_centers,
                         anomaly_data=True)

        logger.info('Finished testing.')
Ejemplo n.º 26
0
    def test(self, dataset: BaseADDataset, svm_net: BaseNet):
        """ 测试 svm 模型 """
        logger = logging.getLogger()

        # Set device for networks
        svm_net = svm_net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        svm_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                labels = labels.to(self.device)

                outputs = svm_net(inputs)
                _, scores = torch.max(outputs, 1)
                loss = self.hinge_loss(outputs, labels)

                scores = scores.float()
                labels = labels.float()
                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_score = scores

        print(len(labels))
        print(len(scores))
        self.test_auc = roc_auc_score(labels, scores)
        self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        self.test_mcc = matthews_corrcoef(labels, scores)
        _, _, f_score, _ = precision_recall_fscore_support(labels,
                                                           scores,
                                                           labels=[0, 1])
        self.test_f_score = f_score[1]
        self.test_time = time.time() - start_time
        logger.info('svm_trainer testing time: %.3f' % self.test_time)
        logger.info('Finished testing svm_trainer.')
    def init_center_c(self, train_loader: DataLoader, net: BaseNet, eps=0.1):
        """Initialize hypersphere center c as the mean from an initial forward pass on the data."""
        logger = logging.getLogger()
        #TODO incorporate naive Deep SVDD init_c if self.K == 1
        n_samples = 0  ### naive
        c = torch.zeros(net.rep_dim, device=self.device)  ### naive

        net.eval()
        with torch.no_grad():  ### naive
            for data in train_loader:  ### naive
                # get the inputs of the batch
                inputs, _, _ = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                n_samples += outputs.shape[0]
                c += torch.sum(outputs, dim=0)

        c /= n_samples  ### naive
        cen = c  ### naive

        ### NEW multi-center code
        ###logger.info("Initializing {} clusters".format(self.K))
        ###cen = torch.zeros(net.rep_dim, self.K, device=self.device)
        ###kmeans = KMeans(n_clusters=self.K,random_state=0,max_iter=10)
        output_data = []
        label_data = []
        with torch.no_grad():
            for data in train_loader:
                # get the inputs of the batch
                inputs, labels, _ = data  #labels are only for UMAP of hyperspheres
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)

            output_data = torch.cat(output_data)
            ###kmeans = kmeans.fit(output_data)
            ###cluster_centers = torch.from_numpy(kmeans.cluster_centers_.T)
            ###cluster_centers = cluster_centers.type(torch.FloatTensor)
            ###cen = cluster_centers.to(self.device)
            ###dmat = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(cen.detach().cpu().numpy().T))
            ###logger.info(f"Distances between cluster centers: \n{dmat}")

            # Generate silhouette plot
            ## self.silhouette_plot(output_data)

            # UMAP Plot
            ###np.save('centers.npy',kmeans.cluster_centers_)
            np.save('centers.npy', cen.cpu().detach().numpy())
            label_data = torch.cat(label_data).numpy()
            ###self.latent_UMAP(output_data, label_data, kmeans.cluster_centers_)
            self.latent_UMAP(output_data, label_data,
                             cen.cpu().detach().numpy())
        ###
        import pdb
        pdb.set_trace()
        # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights.
        cen[(abs(cen) < eps) & (cen < 0)] = -eps
        cen[(abs(cen) < eps) & (cen > 0)] = eps

        return cen
Ejemplo n.º 28
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs,category,resconstruction = net(inputs)
                dist = torch.sum((outputs - self.c) ** 2, dim=1)

                if self.objective == 'deep-GMM':
                    # (n,k) --> (n,k,1)
                    weights = category.unsqueeze(2)

                    # (n, k, 1) --> (1, k, 1)
                    n_k = torch.sum(weights, 0, keepdim=True)

                    # (n,d) ---> (n, k, d)
                    outputs = outputs.unsqueeze(1).expand(outputs.size(0), self.n_components, outputs.size(1))

                    # (n, k, d) --> (1, k, d)
                    mu = torch.div(torch.sum(weights * outputs, 0, keepdim=True), n_k + self.eps)
                    # (n, k, d) --> (1, k, d)
                    var = torch.div(torch.sum(weights * (outputs - mu) * (outputs - mu), 0, keepdim=True),
                                    n_k + self.eps)

                    # (1, k, d) --> (n, k, d)
                    mu = mu.expand(outputs.size(0), self.n_components, self.n_features)
                    var = var.expand(outputs.size(0), self.n_components, self.n_features)

                    #------------------save mu-?-----------------------

                    # mu = self.mu_test
                    # var = self.var_test
                    #------------------------------------------




                    # (n, k, d) --> (n, k, 1)
                    exponent = torch.exp(-.5 * torch.sum((outputs - mu) * (outputs - mu) / var, 2, keepdim=True))
                    # (n, k, d) --> (n, k, 1)
                    prefactor = torch.rsqrt(
                        ((2. * pi) ** self.n_features) * torch.prod(var, dim=2, keepdim=True) + self.eps)

                    # (n, k, 1)
                    logits_pre = torch.mean(weights, 0, keepdim=True) * prefactor * exponent

                    # (n, k, 1) --> (n, k)
                    logits_pre = torch.squeeze(logits_pre)

                    logits = -torch.log(torch.sum(logits_pre, 1) + self.eps)

                    rescon_error = torch.sum((resconstruction - inputs) ** 2, dim=tuple(range(1, resconstruction.dim())))

                    #scores = logits + rescon_error
                    scores = logits
                    #scores = rescon_error
                elif self.objective == 'soft-boundary':
                    scores = dist - self.R ** 2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(zip(idx.cpu().data.numpy().tolist(),
                                            labels.cpu().data.numpy().tolist(),
                                            scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
Ejemplo n.º 29
0
    def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0):
        """
            dt_type:数据集的类型, 测试集 0 / 训练集 1
        """
        logger = logging.getLogger()

        # Set device for networks
        net = net.to(self.device)

        # Get test data loader
        if is_test == 0:  # 测试集加载器
            _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        if is_test == 1:  # 训练集加载器
            test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing lstm_autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)

                # get lstm test label,label.shape = (128,)
                label = labels.numpy()

                if is_test == 0:
                    for i in range(len(label)):
                        self.test_label.append(label[i])
                if is_test == 1:
                    for i in range(len(label)):
                        self.train_label.append(label[i])

                code, outputs = net(inputs.view(-1, 1, self.n_features))
                code = code.detach().numpy()

                if is_test == 0:
                    for i in range(len(code)):
                        self.test_code.append(code[i])
                if is_test == 1:
                    for i in range(len(code)):
                        self.train_code.append(code[i])

                scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(zip(idx.cpu().data.numpy().tolist(),
                                            labels.cpu().data.numpy().tolist(),
                                            scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        self.test_time = time.time() - start_time
        logger.info('lstm_autoencoder testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        print(len(labels))
        print(len(scores))

        """ 测试集 """
        if is_test == 0:
            self.test_auc = roc_auc_score(labels, scores)
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
            logger.info('Finished test lstm_autoencoder.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)

        # confusion matrix
        true_positive = 0
        true_negative = 0
        false_positive = 0
        false_negative = 0
        number_of_anomalies = int(sum(labels))
        scores_indices = (-scores).argsort()[:number_of_anomalies]
        actual_indices = (-labels).argsort()[:number_of_anomalies]
        # print(scores_indices)
        # print(actual_indices)
        print("most anomalous samples that are actually anomalies: ",
              len(list(set(scores_indices) & set(actual_indices))))
        for i in range(len(labels)):
            if labels[i] == 1 and scores[i] >= 1:
                true_positive += 1
            if labels[i] == 0 and scores[i] < 1:
                true_negative += 1
            if labels[i] == 1 and scores[i] < 1:
                false_negative += 1
            if labels[i] == 0 and scores[i] > 1:
                false_positive += 1
        print("true_positive: ", true_positive)
        print("true_negative: ", true_negative)
        print("false_positive: ", false_positive)
        print("false_negative: ", false_negative)
        print("accuracy: ", ((true_positive + true_negative) / len(labels)))
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        fpr, tpr, threshold = roc_curve(labels, scores)
        roc_auc = auc(fpr, tpr)
        plt.title('Receiver Operating Characteristic')
        plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc)
        plt.legend(loc='lower right')
        plt.plot([0, 1], [0, 1], 'r--')
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.ylabel('True Positive Rate')
        plt.xlabel('False Positive Rate')
        plt.show()

        nonzero_indeces = np.nonzero(labels)[0]
        zero_indeces = np.where(labels == 0)[0]

        outliars = scores[nonzero_indeces]
        normal_samples = scores[zero_indeces]

        plt.hist(scores, color='green', density=False)
        # plt.show()
        plt.hist(outliars, color='blue', density=False)
        plt.yscale('log')
        # plt.plot(scores, scores)
        # plt.plot(outliars, outliars, color="red")
        plt.show()
        # import pdb; pdb.set_trace()
        logger.info('Finished testing.')
        import pdb
        pdb.set_trace()
        np.save(
            "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/"
            + "DeepSVDD" + "_fpr", fpr)
        np.save(
            "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/"
            + "DeepSVDD" + "_tpr", tpr)
        np.save(
            "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/"
            + "DeepSVDD" + "_scores", scores)
        np.save(
            "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/"
            + "DeepSVDD" + "_outliars", outliars)