def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Get test data loader try: _, _,test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) except: _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set loss criterion = nn.MSELoss(reduction='none') # Set device for network ae_net = ae_net.to(self.device) criterion = criterion.to(self.device) # Testing logger.info('Testing autoencoder...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to(self.device), idx.to(self.device) rec = ae_net(inputs) rec_loss = criterion(rec, inputs) scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim()))) # Save triple of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss = torch.mean(rec_loss) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.auc_roc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.auc_roc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing autoencoder.')
def get_output(self, dataset: BaseADDataset, net: BaseNet, set_split="train"): try: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) except: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) return self._get_output(test_loader, net)
def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader if not corner_cracks: _, test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) if not corner_cracks: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) else: logger.info('Test set AUC (corner): {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) print('Unique Semi Targets: ', np.unique(semi_targets.data.cpu().numpy())) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing.')
def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0): """Trains the OC-SVM model on the training data.""" logger = logging.getLogger() train_loader, _ = dataset.loaders(batch_size=64, num_workers=n_jobs_dataloader) # Training logger.info('Starting training...') X = () for data in train_loader: _, text, _, weights = data text, weights = text.to(device), weights.to(device) X_batch = self.embedding(text, weights) # X_batch.shape = (batch_size, embedding_size) X += (X_batch.cpu().data.numpy(),) X = np.concatenate(X) # if rbf-kernel, re-initialize svm with gamma minimizing the numerical error if self.kernel == 'rbf': self.gamma = 1 / (np.max(pairwise_distances(X)) ** 2) self.model = OneClassSVM(kernel='rbf', nu=self.nu, gamma=self.gamma) start_time = time.time() self.model.fit(X) self.results['train_time'] = time.time() - start_time logger.info('Training Time: {:.3f}s'.format(self.results['train_time'])) logger.info('Finished training.')
def pretrain(self, deepSVDD, cfg, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() net.train() best_score = 0 for epoch in range(self.pre_training_epochs): loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in tqdm(train_loader): inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize _, rec_images = net(inputs) loss = torch.mean( torch.sum(torch.abs(rec_images - inputs), dim=tuple(range(1, rec_images.dim())))) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.pre_training_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net
def train(self, dataset: BaseADDataset, svm_net: BaseNet): """ 训练 svm 模型 """ logger = logging.getLogger() # Set device for networks svm_net = svm_net.to(self.device) train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) optimizer = optim.SGD(svm_net.parameters(), lr=self.lr, momentum=self.momentum) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=self.step_size, gamma=self.gamma) # Training logger.info('Starting train svm_trainer ...') start_time = time.time() svm_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, labels, _ = data inputs = inputs.to(self.device) # Zero the networks parameter gradients optimizer.zero_grad() # Update networks parameters via back propagation: forward + backward + optimize outputs = svm_net(inputs) # get loss loss = self.hinge_loss(outputs, labels) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) pretrain_time = time.time() - start_time logger.info('svm_trainer train time: %.3f' % pretrain_time) logger.info('Finished train svm_trainer.') return svm_net
def train(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Get train data loader train_loader, _, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set loss criterion = nn.MSELoss(reduction='none') # Set device #ae_net = ae_net.to(self.device) #criterion = criterion.to(self.device) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() ae_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) epoch_loss = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize rec = ae_net(inputs) rec_loss = criterion(rec, inputs) loss = torch.mean(rec_loss) loss.backward() optimizer.step() epoch_loss += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time logger.info('Pretraining Time: {:.3f}s'.format(self.train_time)) logger.info('Finished pretraining.') return ae_net
def t_sne(self, dataset: BaseADDataset, net: BaseNet, data_path, xp_path): logger = logging.getLogger() center = np.array(self.c.cpu()).reshape(1, 100) save_path = xp_path with open(os.path.join(data_path, 'test_label.pickle'), 'rb') as f: test_class = pickle.load(f) test_class = np.array(test_class) test_class = np.append(test_class, 2) # 2: center # Set device for network net = net.to(self.device) _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # t_sne logger.info('Start plot t_sne') t_sne_array = np.empty((0, 100)) with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) t_sne_array = np.append(t_sne_array, outputs.cpu().numpy(), axis=0) t_sne_array = np.append(t_sne_array, center, axis=0) tsne = TSNE(n_components=2, random_state=32) tsne_results = tsne.fit_transform(t_sne_array) plt.figure(figsize=(16, 10)) normal_index = (test_class == 0) abnormal_index = (test_class == 1) plt.scatter(tsne_results[normal_index, 0], tsne_results[normal_index, 1], c='b', label='normal', s=1, marker=',') plt.scatter(tsne_results[abnormal_index, 0], tsne_results[abnormal_index, 1], c='r', label='abnormal', s=1, marker=',') plt.scatter(tsne_results[-1, 0], tsne_results[-1, 1], c='k', label='center', s=20, marker='D') plt.legend() plt.savefig(os.path.join(save_path, 't_sne.png'))
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute metrics _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # AUC self.auc_roc = roc_auc_score(labels, scores) # PR-curve self.pr_curve = precision_recall_curve(labels, scores) precision, recall, thresholds = self.pr_curve self.auc_pr = auc(recall, precision) self.test_loss = epoch_loss / n_batches
def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0): """Tests the OC-SVM model on the test data.""" logger = logging.getLogger() _, test_loader = dataset.loaders(batch_size=128, num_workers=n_jobs_dataloader) # Get data from loader idx_label_score = [] X = () idxs = [] labels = [] for data in test_loader: inputs, label_batch, _, idx = data inputs, label_batch, idx = inputs.to(device), label_batch.to(device), idx.to(device) if self.hybrid: inputs = self.ae_net.encoder(inputs) # in hybrid approach, take code representation of AE as features X_batch = inputs.view(inputs.size(0), -1) # X_batch.shape = (batch_size, n_channels * height * width) X += (X_batch.cpu().data.numpy(),) idxs += idx.cpu().data.numpy().astype(np.int64).tolist() labels += label_batch.cpu().data.numpy().astype(np.int64).tolist() X = np.concatenate(X) # Testing logger.info('Starting testing...') start_time = time.time() scores = (-1.0) * self.model.decision_function(X) self.results['test_time'] = time.time() - start_time scores = scores.flatten() self.rho = -self.model.intercept_[0] # Save triples of (idx, label, score) in a list idx_label_score += list(zip(idxs, labels, scores.tolist())) self.results['test_scores'] = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.results['test_auc'] = roc_auc_score(labels, scores) # If hybrid, also test model with linear kernel if self.hybrid: start_time = time.time() scores_linear = (-1.0) * self.linear_model.decision_function(X) self.results['test_time_linear'] = time.time() - start_time scores_linear = scores_linear.flatten() self.results['test_auc_linear'] = roc_auc_score(labels, scores_linear) logger.info('Test AUC linear model: {:.2f}%'.format(100. * self.results['test_auc_linear'])) logger.info('Test Time linear model: {:.3f}s'.format(self.results['test_time_linear'])) # Log results logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc'])) logger.info('Test Time: {:.3f}s'.format(self.results['test_time'])) logger.info('Finished testing.')
def train(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() ae_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = ae_net(inputs) scores = torch.sum((outputs.float() - inputs.float()) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}' .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) pretrain_time = time.time() - start_time logger.info('Pretraining time: %.3f' % pretrain_time) logger.info('Finished pretraining.') return ae_net
def test(self, dataset: BaseADDataset, vae: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device vae = vae.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] vae.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to( self.device), idx.to(self.device) inputs = inputs.view(inputs.size(0), -1) rec = vae(inputs) likelihood = -binary_cross_entropy(rec, inputs) scores = -likelihood # negative likelihood as anomaly score # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) # Overall loss elbo = likelihood - vae.kl_divergence loss = -torch.mean(elbo) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing variational autoencoder.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for networks net = net.to(self.device) train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) optimizer = optim.RMSprop(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, eps=self.epsilon, momentum=self.momentum) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting train lstm_autoencoder ...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the networks parameter gradients optimizer.zero_grad() # Update networks parameters via back propagation: forward + backward + optimize _, outputs = net(inputs.view(-1, 1, self.n_features)) scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}' .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('lstm_autoencoder train time: %.3f' % self.train_time) logger.info('Finished train lstm_autoencoder.') return net
def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0, set_split='test'): if set_split == "train": try: train_loader, _, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) except: train_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.train_labels, self.train_scores, self.train_loss = self._test( train_loader) elif set_split == "val": try: _, val_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.val_labels, self.val_scores, self.val_loss = self._test( val_loader) except: raise ValueError( "The dataset does not support validation DataLoader") else: try: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) except: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.test_labels, self.test_scores, self.test_loss = self._test( test_loader)
def train(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0): """Trains the Isolation Forest model on the training data.""" logger = logging.getLogger() # do not drop last batch for non-SGD optimization shallow_ssad try: train_loader, _, _ = dataset.loaders(batch_size=128, num_workers=0) except: train_loader, _ = dataset.loaders(batch_size=128, num_workers=0) # Get data from loader X = () for data in train_loader: inputs, _, _, _ = data inputs = inputs.to(device) if self.hybrid: inputs = self.ae_net.encoder( inputs ) # in hybrid approach, take code representation of AE as features X_batch = inputs.view( inputs.size(0), -1 ) # X_batch.shape = (batch_size, n_channels * height * width) X += (X_batch.cpu().data.numpy(), ) X = np.concatenate(X) # Training logger.info('Starting training...') start_time = time.time() self.model.fit(X) train_time = time.time() - start_time self.results['train_time'] = train_time logger.info('Training Time: {:.3f}s'.format( self.results['train_time'])) logger.info('Finished training.')
def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet): logger = logging.getLogger() print('R', self.R) print('c', self.c) # Set device for networks net1 = net1.to(self.device) net2 = net2.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net1.eval() net2.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) code, _ = net1(inputs.view(-1, 1, 9)) outputs = net2(code) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores) self.test_score = scores self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for i, data in enumerate(test_loader): inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) # import pdb;pdb.set_trace() if labels == 0: check_autoencoder_quality(inputs, test_image[i], outputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) list_output = [] # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) print('num of test_loader : {}'.format(len(test_loader))) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() print('deepSVDD test()---------------') with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) print(dist) print(labels) dist_ = dist.cpu().numpy().tolist() if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # self.test_auc = roc_auc_score(labels, scores) test_acc = accuracy_score(labels, list_output) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc)) logger.info('Finished testing.') return str(test_acc)
def test(self, dataset: BaseADDataset, net: BaseNet, val=False): if val: try: _, val_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.val_labels, self.val_scores, self.val_loss = self._test( val_loader, net) except: raise ValueError( "The dataset does not support validation DataLoader") else: try: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) except: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.test_labels, self.test_scores, self.test_loss = self._test( test_loader, net)
def test(self, dataset: BaseADDataset, device: str = 'cpu', n_jobs_dataloader: int = 0): """Tests the OC-SVM model on the test data.""" logger = logging.getLogger() _, test_loader = dataset.loaders(batch_size=64, num_workers=n_jobs_dataloader) # Testing logger.info('Starting testing...') idx_label_score = [] X = () idxs = [] labels = [] for data in test_loader: idx, text, label_batch, weights = data text = text.to(device) label_batch = label_batch.to(device) weights = weights.to(device) X_batch = self.embedding(text, weights) # X_batch.shape = (batch_size, embedding_size) X += (X_batch.cpu().data.numpy(),) idxs += idx labels += label_batch.cpu().data.numpy().astype(np.int64).tolist() X = np.concatenate(X) start_time = time.time() scores = (-1.0) * self.model.decision_function(X) self.results['test_time'] = time.time() - start_time scores = scores.flatten() self.rho = -self.model.intercept_[0] # Save triples of (idx, label, score) in a list idx_label_score += list(zip(idxs, labels, scores.tolist())) self.results['test_scores'] = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.results['test_auc'] = roc_auc_score(labels, scores) # Log results logger.info('Test AUC: {:.2f}%'.format(100. * self.results['test_auc'])) logger.info('Test Time: {:.3f}s'.format(self.results['test_time'])) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) scores = self.lastlay(outputs) # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(dataset: BaseADDataset, ae_net: BaseNet): # Set device for network ae_net = ae_net.to(device) # Get test data loader letter, labels = dataset.loaders(batch_size=batch_size, num_workers=0, shuffle_test=False, shuffle_train=False) loss_epoch = 0.0 n_batches = 0 start_time = time.time() with torch.no_grad(): i = 0 for data, label in zip(letter, labels): i += 1 inputs, _ = data lab, _ = label inputs = inputs.to(device) lab = lab.to(device) # Zero the network parameter gradients outputs = ae_net(inputs) plot_images_grid(inputs[0:16], export_img='./log/test/input' + str(i), title='Input ', nrow=4, padding=4) plot_images_grid(lab[0:16], export_img='./log/test/label' + str(i), title='Label ', nrow=4, padding=4) plot_images_grid(outputs[0:16], export_img='./log/test/output' + str(i), title='Output ', nrow=4, padding=4)
def apply_model(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get apply_model data loader _, _, apply_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Applying model logger.info('Starting Deep SVDD application.') start_time = time.time() idx_score = [] net.eval() with torch.no_grad(): for data in apply_loader: inputs, nolabels, idx = data # nolables are NaN inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_score += list( zip(idx.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.apply_time = time.time() - start_time logger.info('Deep SVDD application time: %.3f' % self.apply_time) ind, scores = zip(*idx_score) self.ind = np.array(ind) self.scores = np.array(scores) logger.info('Finished Deep SVDD application.')
def __init__(self, dataset: BaseADDataset, network: BaseNet, k: int, lr: float, n_epochs: int, batch_size: int, rep_dim: int, K: int, weight_decay: float, device: str, n_jobs_dataloader: int, w_rec: float, w_feat: float, cfg): super().__init__(lr, n_epochs, batch_size, rep_dim, K, weight_decay, device, n_jobs_dataloader, w_rec, w_feat) self.ae_net = network.to(self.device) self.train_loader, self.test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.optimizer = optim.Adam(self.ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay) self.rec_loss = torch.nn.L1Loss() self.feat_loss = torch.nn.MSELoss() self.best_score = 0 self.min_loss = 1000 self.k = k self.cfg = cfg self.logger = logging.getLogger() self.memory = torch.randn(size=(len(self.train_loader.dataset), self.rep_dim)).to(self.device)
def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0): """ dt_type:数据集的类型, 测试集 0 / 训练集 1 """ logger = logging.getLogger() # Set device for networks net = net.to(self.device) # Get test data loader if is_test == 0: # 测试集加载器 _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) if is_test == 1: # 训练集加载器 test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing lstm_autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) # get lstm test label,label.shape = (128,) label = labels.numpy() if is_test == 0: for i in range(len(label)): self.test_label.append(label[i]) if is_test == 1: for i in range(len(label)): self.train_label.append(label[i]) code, outputs = net(inputs.view(-1, 1, self.n_features)) code = code.detach().numpy() if is_test == 0: for i in range(len(code)): self.test_code.append(code[i]) if is_test == 1: for i in range(len(code)): self.train_code.append(code[i]) scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) self.test_time = time.time() - start_time logger.info('lstm_autoencoder testing time: %.3f' % self.test_time) self.test_scores = idx_label_score _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) print(len(labels)) print(len(scores)) """ 测试集 """ if is_test == 0: self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished test lstm_autoencoder.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0): """ 训练集 获取正常数据簇 -- 中心点,半径 测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据 """ logger = logging.getLogger() # Set device for networks ae_net = ae_net.to(self.device) # 训练集 flg==1 测试集 flg==0 if flg == 1: test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing ae...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) error = (outputs - inputs)**2 loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist(), error.cpu().data.numpy().tolist()), ) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores, error = zip(*idx_label_score) labels = np.array(labels) # labels.shape(97278, ) scores = np.array(scores) # scores.shape(97278, ) error = np.array(error) # scores.shape(97278, ) if flg == 1: # 训练集 X = error self.kmeans = KMeans(n_clusters=self.clusters).fit(X) self.center = self.kmeans.cluster_centers_.tolist() self.radius = self.get_radius(X) print("roc_self.center", self.center) print("roc_self.radius", self.radius) else: # 测试集 Y = error pred_labels = [] # 实际标签 pred_km = self.kmeans.predict(Y) print(pred_km.shape) print(pred_km) for i in range(len(pred_km)): dis = self.manhattan_distance(self.center[pred_km[i]], Y[i]) # dis:簇中心到点的距离,作为分类依据 if dis > self.radius[pred_km[i]]: pred_labels.append(1) else: pred_labels.append(0) pred_labels = np.array(pred_labels) self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels) # roc_self.test_auc = roc_auc_score(pred_labels, labels) fpr, tpr, thresholds = roc_curve(labels, pred_labels) # 面积作为准确率 print(fpr, tpr) self.test_auc = auc(fpr, tpr) self.test_mcc = matthews_corrcoef(labels, pred_labels) _, _, f_score, _ = precision_recall_fscore_support(labels, pred_labels, labels=[0, 1]) self.test_f_score = f_score[1] print(len(labels)) print(len(scores)) self.test_time = time.time() - start_time if flg == 0: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('ae testing time: %.3f' % self.test_time) logger.info('Finished testing ae.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) # dist = torch.sum((outputs - self.c) ** 2, dim=1) ### NEW - get closest cluster center, take dist, sum/mean for loss centers = torch.transpose(self.c, 0, 1) dist = torch.zeros(outputs.shape[0], device=self.device) for i in range(outputs.shape[0]): # Sum dists from each data point to its corresponding cluster dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min() #import pdb; pdb.set_trace() ### if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: loss = torch.mean(dist) loss.backward() optimizer.step() # Update hypersphere radius R on mini-batch distances if (self.objective == 'soft-boundary') and ( epoch >= self.warm_up_n_epochs): self.R.data = torch.tensor(get_radius(dist, self.nu), device=self.device) loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() output_data = [] label_data = [] with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) # dist = torch.sum((outputs - self.c) ** 2, dim=1) ### NEW if (self.c.dim() == 1): # naive deep_svdd centers = self.c dist = torch.sum((outputs - self.c)**2, dim=1) else: centers = torch.transpose(self.c, 0, 1) dist = torch.zeros(outputs.shape[0], device=self.device) for i in range(outputs.shape[0]): # Sum dists from each data point to its corresponding cluster dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min() #import pdb; pdb.set_trace() ### if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #UMAP (same umap model fit in training) - use anomaly_data = True # # UMAP Plot (on testing data) # kmeans_centers = np.load('centers.npy') # output_data = torch.cat(output_data) # label_data = torch.cat(label_data).numpy() # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True) # import pdb; pdb.set_trace() # UMAP Plot (on training data) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) output_data = [] label_data = [] with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, labels, _ = data #labels are only for UMAP of hyperspheres inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) kmeans_centers = np.load('centers.npy') output_data = torch.cat(output_data) label_data = torch.cat(label_data).numpy() self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data=True) logger.info('Finished testing.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) epoch_loss = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, semi_targets, _ = data inputs, semi_targets = inputs.to(self.device), semi_targets.to( self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) loss.backward() optimizer.step() epoch_loss += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time logger.info('Training Time: {:.3f}s'.format(self.train_time)) logger.info('Finished training.') return net