Exemple #1
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        try:
            _, _,test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        except:
            _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)    

        # Set loss
        criterion = nn.MSELoss(reduction='none')

        # Set device for network
        ae_net = ae_net.to(self.device)
        criterion = criterion.to(self.device)

        # Testing
        logger.info('Testing autoencoder...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(self.device), idx.to(self.device)

                rec = ae_net(inputs)
                rec_loss = criterion(rec, inputs)
                scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim())))

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(zip(idx.cpu().data.numpy().tolist(),
                                            labels.cpu().data.numpy().tolist(),
                                            scores.cpu().data.numpy().tolist()))

                loss = torch.mean(rec_loss)
                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.auc_roc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.auc_roc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing autoencoder.')
    def get_output(self,
                   dataset: BaseADDataset,
                   net: BaseNet,
                   set_split="train"):
        try:
            _, _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        except:
            _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        return self._get_output(test_loader, net)
    def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        if not corner_cracks:
            _, test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        if not corner_cracks:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        else:
            logger.info('Test set AUC (corner): {:.2f}%'.format(100. *
                                                                self.test_auc))

        logger.info('Finished testing.')
Exemple #4
0
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                print('Unique Semi Targets: ',
                      np.unique(semi_targets.data.cpu().numpy()))

                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing.')
Exemple #5
0
    def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
        """Trains the OC-SVM model on the training data."""
        logger = logging.getLogger()

        train_loader, _ = dataset.loaders(batch_size=64, num_workers=n_jobs_dataloader)

        # Training
        logger.info('Starting training...')

        X = ()
        for data in train_loader:
            _, text, _, weights = data
            text, weights = text.to(device), weights.to(device)

            X_batch = self.embedding(text, weights)  # X_batch.shape = (batch_size, embedding_size)
            X += (X_batch.cpu().data.numpy(),)

        X = np.concatenate(X)

        # if rbf-kernel, re-initialize svm with gamma minimizing the numerical error
        if self.kernel == 'rbf':
            self.gamma = 1 / (np.max(pairwise_distances(X)) ** 2)
            self.model = OneClassSVM(kernel='rbf', nu=self.nu, gamma=self.gamma)

        start_time = time.time()
        self.model.fit(X)
        self.results['train_time'] = time.time() - start_time

        logger.info('Training Time: {:.3f}s'.format(self.results['train_time']))
        logger.info('Finished training.')
Exemple #6
0
    def pretrain(self, deepSVDD, cfg, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting pretraining...')
        start_time = time.time()
        net.train()
        best_score = 0
        for epoch in range(self.pre_training_epochs):

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()

            for data in tqdm(train_loader):
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()
                # Update network parameters via backpropagation: forward + backward + optimize
                _, rec_images = net(inputs)
                loss = torch.mean(
                    torch.sum(torch.abs(rec_images - inputs),
                              dim=tuple(range(1, rec_images.dim()))))
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.pre_training_epochs, epoch_train_time,
                loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('Training time: %.3f' % self.train_time)

        logger.info('Finished training.')

        return net
Exemple #7
0
    def train(self, dataset: BaseADDataset, svm_net: BaseNet):
        """ 训练 svm 模型 """
        logger = logging.getLogger()

        # Set device for networks
        svm_net = svm_net.to(self.device)

        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)
        optimizer = optim.SGD(svm_net.parameters(),
                              lr=self.lr,
                              momentum=self.momentum)
        scheduler = optim.lr_scheduler.StepLR(optimizer,
                                              step_size=self.step_size,
                                              gamma=self.gamma)

        # Training
        logger.info('Starting train svm_trainer ...')
        start_time = time.time()
        svm_net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, labels, _ = data
                inputs = inputs.to(self.device)

                # Zero the networks parameter gradients
                optimizer.zero_grad()

                # Update networks parameters via back propagation: forward + backward + optimize
                outputs = svm_net(inputs)

                # get loss
                loss = self.hinge_loss(outputs, labels)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.n_epochs, epoch_train_time,
                loss_epoch / n_batches))

        pretrain_time = time.time() - start_time
        logger.info('svm_trainer train time: %.3f' % pretrain_time)
        logger.info('Finished train svm_trainer.')

        return svm_net
    def train(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        train_loader, _, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Set loss
        criterion = nn.MSELoss(reduction='none')

        # Set device
        #ae_net = ae_net.to(self.device)
        #criterion = criterion.to(self.device)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting pretraining...')
        start_time = time.time()
        ae_net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0]))

            epoch_loss = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                rec = ae_net(inputs)
                rec_loss = criterion(rec, inputs)
                loss = torch.mean(rec_loss)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                        f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        logger.info('Pretraining Time: {:.3f}s'.format(self.train_time))
        logger.info('Finished pretraining.')

        return ae_net
    def t_sne(self, dataset: BaseADDataset, net: BaseNet, data_path, xp_path):
        logger = logging.getLogger()

        center = np.array(self.c.cpu()).reshape(1, 100)

        save_path = xp_path
        with open(os.path.join(data_path, 'test_label.pickle'), 'rb') as f:
            test_class = pickle.load(f)
        test_class = np.array(test_class)
        test_class = np.append(test_class, 2)  # 2: center

        # Set device for network
        net = net.to(self.device)
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # t_sne
        logger.info('Start plot t_sne')
        t_sne_array = np.empty((0, 100))
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                t_sne_array = np.append(t_sne_array,
                                        outputs.cpu().numpy(),
                                        axis=0)

        t_sne_array = np.append(t_sne_array, center, axis=0)

        tsne = TSNE(n_components=2, random_state=32)
        tsne_results = tsne.fit_transform(t_sne_array)
        plt.figure(figsize=(16, 10))

        normal_index = (test_class == 0)
        abnormal_index = (test_class == 1)
        plt.scatter(tsne_results[normal_index, 0],
                    tsne_results[normal_index, 1],
                    c='b',
                    label='normal',
                    s=1,
                    marker=',')
        plt.scatter(tsne_results[abnormal_index, 0],
                    tsne_results[abnormal_index, 1],
                    c='r',
                    label='abnormal',
                    s=1,
                    marker=',')
        plt.scatter(tsne_results[-1, 0],
                    tsne_results[-1, 1],
                    c='k',
                    label='center',
                    s=20,
                    marker='D')

        plt.legend()
        plt.savefig(os.path.join(save_path, 't_sne.png'))
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, semi_targets, idx = data

                inputs = inputs.to(self.device)
                labels = labels.to(self.device)
                semi_targets = semi_targets.to(self.device)
                idx = idx.to(self.device)

                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time
        self.test_scores = idx_label_score

        # Compute metrics
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # AUC
        self.auc_roc = roc_auc_score(labels, scores)
        # PR-curve
        self.pr_curve = precision_recall_curve(labels, scores)
        precision, recall, thresholds = self.pr_curve
        self.auc_pr = auc(recall, precision)
        self.test_loss = epoch_loss / n_batches
Exemple #11
0
    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
        """Tests the OC-SVM model on the test data."""
        logger = logging.getLogger()

        _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader)

        # Get data from loader
        idx_label_score = []
        X = ()
        idxs = []
        labels = []
        for data in test_loader:
            inputs, label_batch, _, idx = data
            inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(inputs)  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(inputs.size(0), -1)  # X_batch.shape = (batch_size, n_channels * height * width)
            X += (X_batch.cpu().data.numpy(),)
            idxs += idx.cpu().data.numpy().astype(np.int64).tolist()
            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()
        X = np.concatenate(X)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()

        scores = (-1.0) * self.model.decision_function(X)

        self.results['test_time'] = time.time() - start_time
        scores = scores.flatten()
        self.rho = -self.model.intercept_[0]

        # Save triples of (idx, label, score) in a list
        idx_label_score += list(zip(idxs, labels, scores.tolist()))
        self.results['test_scores'] = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.results['test_auc'] = roc_auc_score(labels, scores)

        # If hybrid, also test model with linear kernel
        if self.hybrid:
            start_time = time.time()
            scores_linear = (-1.0) * self.linear_model.decision_function(X)
            self.results['test_time_linear'] = time.time() - start_time
            scores_linear = scores_linear.flatten()
            self.results['test_auc_linear'] = roc_auc_score(labels, scores_linear)
            logger.info('Test AUC linear model: {:.2f}%'.format(100. * self.results['test_auc_linear']))
            logger.info('Test Time linear model: {:.3f}s'.format(self.results['test_time_linear']))

        # Log results
        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
        logger.info('Finished testing.')
Exemple #12
0
    def train(self, dataset: BaseADDataset, ae_net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting pretraining...')
        start_time = time.time()
        ae_net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = ae_net(inputs)
                scores = torch.sum((outputs.float() - inputs.float()) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'
                        .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches))

        pretrain_time = time.time() - start_time
        logger.info('Pretraining time: %.3f' % pretrain_time)
        logger.info('Finished pretraining.')

        return ae_net
Exemple #13
0
    def test(self, dataset: BaseADDataset, vae: BaseNet):
        logger = logging.getLogger()

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Set device
        vae = vae.to(self.device)

        # Testing
        logger.info('Starting testing...')
        epoch_loss = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        vae.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, _, idx = data
                inputs, labels, idx = inputs.to(self.device), labels.to(
                    self.device), idx.to(self.device)

                inputs = inputs.view(inputs.size(0), -1)

                rec = vae(inputs)
                likelihood = -binary_cross_entropy(rec, inputs)
                scores = -likelihood  # negative likelihood as anomaly score

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                # Overall loss
                elbo = likelihood - vae.kl_divergence
                loss = -torch.mean(elbo)

                epoch_loss += loss.item()
                n_batches += 1

        self.test_time = time.time() - start_time

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.test_auc = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches))
        logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Test Time: {:.3f}s'.format(self.test_time))
        logger.info('Finished testing variational autoencoder.')
Exemple #14
0
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for networks
        net = net.to(self.device)

        train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        optimizer = optim.RMSprop(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, eps=self.epsilon,
                                  momentum=self.momentum)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Training
        logger.info('Starting train lstm_autoencoder ...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the networks parameter gradients
                optimizer.zero_grad()

                # Update networks parameters via back propagation: forward + backward + optimize
                _, outputs = net(inputs.view(-1, 1, self.n_features))
                scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)
                loss.backward()
                optimizer.step()

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'
                        .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('lstm_autoencoder train time: %.3f' % self.train_time)
        logger.info('Finished train lstm_autoencoder.')

        return net
    def test(self,
             dataset: BaseADDataset,
             device: str = 'cpu',
             n_jobs_dataloader: int = 0,
             set_split='test'):
        if set_split == "train":
            try:
                train_loader, _, _ = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)

            except:
                train_loader, _ = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)
            self.train_labels, self.train_scores, self.train_loss = self._test(
                train_loader)

        elif set_split == "val":
            try:
                _, val_loader, _ = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)

                self.val_labels, self.val_scores, self.val_loss = self._test(
                    val_loader)
            except:
                raise ValueError(
                    "The dataset does not support validation DataLoader")
        else:
            try:
                _, _, test_loader = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)
            except:
                _, test_loader = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)

            self.test_labels, self.test_scores, self.test_loss = self._test(
                test_loader)
    def train(self,
              dataset: BaseADDataset,
              device: str = 'cpu',
              n_jobs_dataloader: int = 0):
        """Trains the Isolation Forest model on the training data."""
        logger = logging.getLogger()

        # do not drop last batch for non-SGD optimization shallow_ssad
        try:
            train_loader, _, _ = dataset.loaders(batch_size=128, num_workers=0)
        except:
            train_loader, _ = dataset.loaders(batch_size=128, num_workers=0)

        # Get data from loader
        X = ()
        for data in train_loader:
            inputs, _, _, _ = data
            inputs = inputs.to(device)
            if self.hybrid:
                inputs = self.ae_net.encoder(
                    inputs
                )  # in hybrid approach, take code representation of AE as features
            X_batch = inputs.view(
                inputs.size(0), -1
            )  # X_batch.shape = (batch_size, n_channels * height * width)
            X += (X_batch.cpu().data.numpy(), )
        X = np.concatenate(X)

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        self.model.fit(X)
        train_time = time.time() - start_time
        self.results['train_time'] = train_time

        logger.info('Training Time: {:.3f}s'.format(
            self.results['train_time']))
        logger.info('Finished training.')
Exemple #17
0
    def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet):
        logger = logging.getLogger()
        print('R', self.R)
        print('c', self.c)

        # Set device for networks
        net1 = net1.to(self.device)
        net2 = net2.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net1.eval()
        net2.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                code, _ = net1(inputs.view(-1, 1, 9))
                outputs = net2(code)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores)

        self.test_score = scores
        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        logger.info('Finished testing.')
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image):
        logger = logging.getLogger()

        # Set device for network
        ae_net = ae_net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for i, data in enumerate(test_loader):
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                # import pdb;pdb.set_trace()
                if labels == 0:
                    check_autoencoder_quality(inputs, test_image[i], outputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * auc))

        test_time = time.time() - start_time
        logger.info('Autoencoder testing time: %.3f' % test_time)
        logger.info('Finished testing autoencoder.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)
        list_output = []
        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)
        print('num of test_loader : {}'.format(len(test_loader)))
        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        print('deepSVDD test()---------------')
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                print(dist)
                print(labels)
                dist_ = dist.cpu().numpy().tolist()
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist
                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        # self.test_auc = roc_auc_score(labels, scores)
        test_acc = accuracy_score(labels, list_output)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
        #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc))
        logger.info('Finished testing.')
        return str(test_acc)
    def test(self, dataset: BaseADDataset, net: BaseNet, val=False):
        if val:
            try:
                _, val_loader, _ = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)

                self.val_labels, self.val_scores, self.val_loss = self._test(
                    val_loader, net)
            except:
                raise ValueError(
                    "The dataset does not support validation DataLoader")
        else:
            try:
                _, _, test_loader = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)
            except:
                _, test_loader = dataset.loaders(
                    batch_size=self.batch_size,
                    num_workers=self.n_jobs_dataloader)

            self.test_labels, self.test_scores, self.test_loss = self._test(
                test_loader, net)
Exemple #21
0
    def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0):
        """Tests the OC-SVM model on the test data."""
        logger = logging.getLogger()

        _, test_loader = dataset.loaders(batch_size=64, num_workers=n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')

        idx_label_score = []
        X = ()
        idxs = []
        labels = []
        for data in test_loader:
            idx, text, label_batch, weights = data
            text = text.to(device)
            label_batch = label_batch.to(device)
            weights = weights.to(device)

            X_batch = self.embedding(text, weights)  # X_batch.shape = (batch_size, embedding_size)
            X += (X_batch.cpu().data.numpy(),)
            idxs += idx
            labels += label_batch.cpu().data.numpy().astype(np.int64).tolist()

        X = np.concatenate(X)

        start_time = time.time()
        scores = (-1.0) * self.model.decision_function(X)
        self.results['test_time'] = time.time() - start_time

        scores = scores.flatten()
        self.rho = -self.model.intercept_[0]

        # Save triples of (idx, label, score) in a list
        idx_label_score += list(zip(idxs, labels, scores.tolist()))
        self.results['test_scores'] = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)
        self.results['test_auc'] = roc_auc_score(labels, scores)

        # Log results
        logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc']))
        logger.info('Test Time: {:.3f}s'.format(self.results['test_time']))
        logger.info('Finished testing.')
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                scores = self.lastlay(outputs)

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('Finished testing.')
Exemple #23
0
def test(dataset: BaseADDataset, ae_net: BaseNet):
    # Set device for network
    ae_net = ae_net.to(device)

    # Get test data loader

    letter, labels = dataset.loaders(batch_size=batch_size,
                                     num_workers=0,
                                     shuffle_test=False,
                                     shuffle_train=False)

    loss_epoch = 0.0
    n_batches = 0
    start_time = time.time()

    with torch.no_grad():
        i = 0
        for data, label in zip(letter, labels):
            i += 1
            inputs, _ = data
            lab, _ = label
            inputs = inputs.to(device)
            lab = lab.to(device)
            # Zero the network parameter gradients
            outputs = ae_net(inputs)
            plot_images_grid(inputs[0:16],
                             export_img='./log/test/input' + str(i),
                             title='Input ',
                             nrow=4,
                             padding=4)
            plot_images_grid(lab[0:16],
                             export_img='./log/test/label' + str(i),
                             title='Label ',
                             nrow=4,
                             padding=4)
            plot_images_grid(outputs[0:16],
                             export_img='./log/test/output' + str(i),
                             title='Output ',
                             nrow=4,
                             padding=4)
Exemple #24
0
    def apply_model(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get apply_model data loader
        _, _, apply_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Applying model
        logger.info('Starting Deep SVDD application.')
        start_time = time.time()
        idx_score = []
        net.eval()
        with torch.no_grad():
            for data in apply_loader:
                inputs, nolabels, idx = data  # nolables are NaN
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.apply_time = time.time() - start_time
        logger.info('Deep SVDD application time: %.3f' % self.apply_time)

        ind, scores = zip(*idx_score)
        self.ind = np.array(ind)
        self.scores = np.array(scores)

        logger.info('Finished Deep SVDD application.')
    def __init__(self, dataset: BaseADDataset, network: BaseNet, k: int,
                 lr: float, n_epochs: int, batch_size: int, rep_dim: int,
                 K: int, weight_decay: float, device: str,
                 n_jobs_dataloader: int, w_rec: float, w_feat: float, cfg):

        super().__init__(lr, n_epochs, batch_size, rep_dim, K, weight_decay,
                         device, n_jobs_dataloader, w_rec, w_feat)
        self.ae_net = network.to(self.device)
        self.train_loader, self.test_loader = dataset.loaders(
            batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        self.optimizer = optim.Adam(self.ae_net.parameters(),
                                    lr=self.lr,
                                    weight_decay=self.weight_decay)
        self.rec_loss = torch.nn.L1Loss()
        self.feat_loss = torch.nn.MSELoss()
        self.best_score = 0
        self.min_loss = 1000
        self.k = k
        self.cfg = cfg
        self.logger = logging.getLogger()
        self.memory = torch.randn(size=(len(self.train_loader.dataset),
                                        self.rep_dim)).to(self.device)
Exemple #26
0
    def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0):
        """
            dt_type:数据集的类型, 测试集 0 / 训练集 1
        """
        logger = logging.getLogger()

        # Set device for networks
        net = net.to(self.device)

        # Get test data loader
        if is_test == 0:  # 测试集加载器
            _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        if is_test == 1:  # 训练集加载器
            test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing lstm_autoencoder...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)

                # get lstm test label,label.shape = (128,)
                label = labels.numpy()

                if is_test == 0:
                    for i in range(len(label)):
                        self.test_label.append(label[i])
                if is_test == 1:
                    for i in range(len(label)):
                        self.train_label.append(label[i])

                code, outputs = net(inputs.view(-1, 1, self.n_features))
                code = code.detach().numpy()

                if is_test == 0:
                    for i in range(len(code)):
                        self.test_code.append(code[i])
                if is_test == 1:
                    for i in range(len(code)):
                        self.train_code.append(code[i])

                scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim())))
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(zip(idx.cpu().data.numpy().tolist(),
                                            labels.cpu().data.numpy().tolist(),
                                            scores.cpu().data.numpy().tolist()))

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        self.test_time = time.time() - start_time
        logger.info('lstm_autoencoder testing time: %.3f' % self.test_time)
        self.test_scores = idx_label_score

        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        print(len(labels))
        print(len(scores))

        """ 测试集 """
        if is_test == 0:
            self.test_auc = roc_auc_score(labels, scores)
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))
            logger.info('Finished test lstm_autoencoder.')
Exemple #27
0
    def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0):
        """
            训练集 获取正常数据簇 -- 中心点,半径
            测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据
        """

        logger = logging.getLogger()

        # Set device for networks
        ae_net = ae_net.to(self.device)

        # 训练集 flg==1  测试集 flg==0
        if flg == 1:
            test_loader, _ = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)
        else:
            _, test_loader = dataset.loaders(
                batch_size=self.batch_size, num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Testing ae...')
        loss_epoch = 0.0
        n_batches = 0
        start_time = time.time()
        idx_label_score = []
        ae_net.eval()
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = ae_net(inputs)
                scores = torch.sum((outputs - inputs)**2,
                                   dim=tuple(range(1, outputs.dim())))
                error = (outputs - inputs)**2
                loss = torch.mean(scores)

                # Save triple of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist(),
                        error.cpu().data.numpy().tolist()), )

                loss_epoch += loss.item()
                n_batches += 1

        logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches))

        _, labels, scores, error = zip(*idx_label_score)
        labels = np.array(labels)  # labels.shape(97278, )
        scores = np.array(scores)  # scores.shape(97278, )
        error = np.array(error)  # scores.shape(97278, )

        if flg == 1:  # 训练集
            X = error
            self.kmeans = KMeans(n_clusters=self.clusters).fit(X)
            self.center = self.kmeans.cluster_centers_.tolist()
            self.radius = self.get_radius(X)
            print("roc_self.center", self.center)
            print("roc_self.radius", self.radius)
        else:  # 测试集
            Y = error
            pred_labels = []  # 实际标签
            pred_km = self.kmeans.predict(Y)
            print(pred_km.shape)
            print(pred_km)
            for i in range(len(pred_km)):
                dis = self.manhattan_distance(self.center[pred_km[i]],
                                              Y[i])  # dis:簇中心到点的距离,作为分类依据
                if dis > self.radius[pred_km[i]]:
                    pred_labels.append(1)
                else:
                    pred_labels.append(0)

            pred_labels = np.array(pred_labels)
            self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels)
            # roc_self.test_auc = roc_auc_score(pred_labels, labels)
            fpr, tpr, thresholds = roc_curve(labels, pred_labels)  # 面积作为准确率
            print(fpr, tpr)
            self.test_auc = auc(fpr, tpr)
            self.test_mcc = matthews_corrcoef(labels, pred_labels)
            _, _, f_score, _ = precision_recall_fscore_support(labels,
                                                               pred_labels,
                                                               labels=[0, 1])
            self.test_f_score = f_score[1]

        print(len(labels))
        print(len(scores))

        self.test_time = time.time() - start_time
        if flg == 0:
            logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        logger.info('ae testing time: %.3f' % self.test_time)
        logger.info('Finished testing ae.')
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay,
                               amsgrad=self.optimizer_name == 'amsgrad')

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Initialize hypersphere center c (if c not loaded)
        if self.c is None:
            logger.info('Initializing center c...')
            self.c = self.init_center_c(train_loader, net)
            logger.info('Center c initialized.')

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            loss_epoch = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, _ = data
                inputs = inputs.to(self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)
                # dist = torch.sum((outputs - self.c) ** 2, dim=1)

                ### NEW - get closest cluster center, take dist, sum/mean for loss
                centers = torch.transpose(self.c, 0, 1)
                dist = torch.zeros(outputs.shape[0], device=self.device)
                for i in range(outputs.shape[0]):
                    # Sum dists from each data point to its corresponding cluster
                    dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min()
                #import pdb; pdb.set_trace()
                ###
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                    loss = self.R**2 + (1 / self.nu) * torch.mean(
                        torch.max(torch.zeros_like(scores), scores))
                else:
                    loss = torch.mean(dist)
                loss.backward()
                optimizer.step()

                # Update hypersphere radius R on mini-batch distances
                if (self.objective == 'soft-boundary') and (
                        epoch >= self.warm_up_n_epochs):
                    self.R.data = torch.tensor(get_radius(dist, self.nu),
                                               device=self.device)

                loss_epoch += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info('  Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format(
                epoch + 1, self.n_epochs, epoch_train_time,
                loss_epoch / n_batches))

        self.train_time = time.time() - start_time
        logger.info('Training time: %.3f' % self.train_time)

        logger.info('Finished training.')

        return net
    def test(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Set device for network
        net = net.to(self.device)

        # Get test data loader
        _, test_loader = dataset.loaders(batch_size=self.batch_size,
                                         num_workers=self.n_jobs_dataloader)

        # Testing
        logger.info('Starting testing...')
        start_time = time.time()
        idx_label_score = []
        net.eval()
        output_data = []
        label_data = []
        with torch.no_grad():
            for data in test_loader:
                inputs, labels, idx = data
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
                # dist = torch.sum((outputs - self.c) ** 2, dim=1)

                ### NEW
                if (self.c.dim() == 1):  # naive deep_svdd
                    centers = self.c
                    dist = torch.sum((outputs - self.c)**2, dim=1)
                else:
                    centers = torch.transpose(self.c, 0, 1)
                    dist = torch.zeros(outputs.shape[0], device=self.device)
                    for i in range(outputs.shape[0]):
                        # Sum dists from each data point to its corresponding cluster
                        dist[i] = torch.sum((centers - outputs[i])**2,
                                            dim=1).min()
                #import pdb; pdb.set_trace()
                ###
                if self.objective == 'soft-boundary':
                    scores = dist - self.R**2
                else:
                    scores = dist

                # Save triples of (idx, label, score) in a list
                idx_label_score += list(
                    zip(idx.cpu().data.numpy().tolist(),
                        labels.cpu().data.numpy().tolist(),
                        scores.cpu().data.numpy().tolist()))

        self.test_time = time.time() - start_time
        logger.info('Testing time: %.3f' % self.test_time)

        self.test_scores = idx_label_score

        # Compute AUC
        _, labels, scores = zip(*idx_label_score)
        labels = np.array(labels)
        scores = np.array(scores)

        self.test_auc = roc_auc_score(labels, scores)
        logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc))

        #UMAP (same umap model fit in training) - use anomaly_data = True
        # # UMAP Plot (on testing data)
        # kmeans_centers = np.load('centers.npy')
        # output_data = torch.cat(output_data)
        # label_data = torch.cat(label_data).numpy()
        # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True)
        # import pdb; pdb.set_trace()

        # UMAP Plot (on training data)
        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        output_data = []
        label_data = []
        with torch.no_grad():
            for data in train_loader:
                # get the inputs of the batch
                inputs, labels, _ = data  #labels are only for UMAP of hyperspheres
                inputs = inputs.to(self.device)
                outputs = net(inputs)
                output_data.append(outputs)
                label_data.append(labels)
        kmeans_centers = np.load('centers.npy')
        output_data = torch.cat(output_data)
        label_data = torch.cat(label_data).numpy()
        self.latent_UMAP(output_data,
                         label_data,
                         kmeans_centers,
                         anomaly_data=True)

        logger.info('Finished testing.')
    def train(self, dataset: BaseADDataset, net: BaseNet):
        logger = logging.getLogger()

        # Get train data loader
        train_loader, _ = dataset.loaders(batch_size=self.batch_size,
                                          num_workers=self.n_jobs_dataloader)

        # Set device for network
        net = net.to(self.device)

        # Set optimizer (Adam optimizer for now)
        optimizer = optim.Adam(net.parameters(),
                               lr=self.lr,
                               weight_decay=self.weight_decay)

        # Set learning rate scheduler
        scheduler = optim.lr_scheduler.MultiStepLR(
            optimizer, milestones=self.lr_milestones, gamma=0.1)

        # Initialize hypersphere center c (if c not loaded)
        if self.c is None:
            logger.info('Initializing center c...')
            self.c = self.init_center_c(train_loader, net)
            logger.info('Center c initialized.')

        # Training
        logger.info('Starting training...')
        start_time = time.time()
        net.train()
        for epoch in range(self.n_epochs):

            scheduler.step()
            if epoch in self.lr_milestones:
                logger.info('  LR scheduler: new learning rate is %g' %
                            float(scheduler.get_lr()[0]))

            epoch_loss = 0.0
            n_batches = 0
            epoch_start_time = time.time()
            for data in train_loader:
                inputs, _, semi_targets, _ = data
                inputs, semi_targets = inputs.to(self.device), semi_targets.to(
                    self.device)

                # Zero the network parameter gradients
                optimizer.zero_grad()

                # Update network parameters via backpropagation: forward + backward + optimize
                outputs = net(inputs)
                dist = torch.sum((outputs - self.c)**2, dim=1)
                losses = torch.where(
                    semi_targets == 0, dist,
                    self.eta * ((dist + self.eps)**semi_targets.float()))
                loss = torch.mean(losses)
                loss.backward()
                optimizer.step()

                epoch_loss += loss.item()
                n_batches += 1

            # log epoch statistics
            epoch_train_time = time.time() - epoch_start_time
            logger.info(
                f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s '
                f'| Train Loss: {epoch_loss / n_batches:.6f} |')

        self.train_time = time.time() - start_time
        logger.info('Training Time: {:.3f}s'.format(self.train_time))
        logger.info('Finished training.')

        return net