def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet): logger = logging.getLogger() print('R', self.R) print('c', self.c) # Set device for networks net1 = net1.to(self.device) net2 = net2.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net1.eval() net2.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) code, _ = net1(inputs.view(-1, 1, 9)) outputs = net2(code) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores) self.test_score = scores self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def _get_output(self, loader, net: BaseNet): logger = logging.getLogger() epoch_loss = 0.0 n_batches = 0 # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_output = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, idx, _ = data inputs = inputs.to(self.device) outputs = net(inputs) # Save triples of (idx, label, score) in a list idx_label_output += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), outputs.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, outputs = zip(*idx_label_output) labels = np.array(labels) outputs = np.array(outputs) return labels, outputs
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) print('Unique Semi Targets: ', np.unique(semi_targets.data.cpu().numpy())) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing.')
def pretrain(self, deepSVDD, cfg, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() net.train() best_score = 0 for epoch in range(self.pre_training_epochs): loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in tqdm(train_loader): inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize _, rec_images = net(inputs) loss = torch.mean( torch.sum(torch.abs(rec_images - inputs), dim=tuple(range(1, rec_images.dim())))) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.pre_training_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net
def train(self, dataset: BaseADDataset, svm_net: BaseNet): """ 训练 svm 模型 """ logger = logging.getLogger() # Set device for networks svm_net = svm_net.to(self.device) train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) optimizer = optim.SGD(svm_net.parameters(), lr=self.lr, momentum=self.momentum) scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=self.step_size, gamma=self.gamma) # Training logger.info('Starting train svm_trainer ...') start_time = time.time() svm_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, labels, _ = data inputs = inputs.to(self.device) # Zero the networks parameter gradients optimizer.zero_grad() # Update networks parameters via back propagation: forward + backward + optimize outputs = svm_net(inputs) # get loss loss = self.hinge_loss(outputs, labels) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) pretrain_time = time.time() - start_time logger.info('svm_trainer train time: %.3f' % pretrain_time) logger.info('Finished train svm_trainer.') return svm_net
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute metrics _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # AUC self.auc_roc = roc_auc_score(labels, scores) # PR-curve self.pr_curve = precision_recall_curve(labels, scores) precision, recall, thresholds = self.pr_curve self.auc_pr = auc(recall, precision) self.test_loss = epoch_loss / n_batches
def t_sne(self, dataset: BaseADDataset, net: BaseNet, data_path, xp_path): logger = logging.getLogger() center = np.array(self.c.cpu()).reshape(1, 100) save_path = xp_path with open(os.path.join(data_path, 'test_label.pickle'), 'rb') as f: test_class = pickle.load(f) test_class = np.array(test_class) test_class = np.append(test_class, 2) # 2: center # Set device for network net = net.to(self.device) _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # t_sne logger.info('Start plot t_sne') t_sne_array = np.empty((0, 100)) with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) t_sne_array = np.append(t_sne_array, outputs.cpu().numpy(), axis=0) t_sne_array = np.append(t_sne_array, center, axis=0) tsne = TSNE(n_components=2, random_state=32) tsne_results = tsne.fit_transform(t_sne_array) plt.figure(figsize=(16, 10)) normal_index = (test_class == 0) abnormal_index = (test_class == 1) plt.scatter(tsne_results[normal_index, 0], tsne_results[normal_index, 1], c='b', label='normal', s=1, marker=',') plt.scatter(tsne_results[abnormal_index, 0], tsne_results[abnormal_index, 1], c='r', label='abnormal', s=1, marker=',') plt.scatter(tsne_results[-1, 0], tsne_results[-1, 1], c='k', label='center', s=20, marker='D') plt.legend() plt.savefig(os.path.join(save_path, 't_sne.png'))
def train(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() ae_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = ae_net(inputs) scores = torch.sum((outputs.float() - inputs.float()) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}' .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) pretrain_time = time.time() - start_time logger.info('Pretraining time: %.3f' % pretrain_time) logger.info('Finished pretraining.') return ae_net
def test(self, dataset: BaseADDataset, vae: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device vae = vae.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] vae.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to( self.device), idx.to(self.device) inputs = inputs.view(inputs.size(0), -1) rec = vae(inputs) likelihood = -binary_cross_entropy(rec, inputs) scores = -likelihood # negative likelihood as anomaly score # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) # Overall loss elbo = likelihood - vae.kl_divergence loss = -torch.mean(elbo) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing variational autoencoder.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set loss criterion = nn.MSELoss(reduction='none') # Set device for network ae_net = ae_net.to(self.device) criterion = criterion.to(self.device) # Testing logger.info('Testing autoencoder...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to( self.device), idx.to(self.device) rec = ae_net(inputs) rec_loss = criterion(rec, inputs) scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim()))) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss = torch.mean(rec_loss) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing autoencoder.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for networks net = net.to(self.device) train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) optimizer = optim.RMSprop(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, eps=self.epsilon, momentum=self.momentum) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting train lstm_autoencoder ...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the networks parameter gradients optimizer.zero_grad() # Update networks parameters via back propagation: forward + backward + optimize _, outputs = net(inputs.view(-1, 1, self.n_features)) scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}' .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('lstm_autoencoder train time: %.3f' % self.train_time) logger.info('Finished train lstm_autoencoder.') return net
def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader if not corner_cracks: _, test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) if not corner_cracks: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) else: logger.info('Test set AUC (corner): {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for i, data in enumerate(test_loader): inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) # import pdb;pdb.set_trace() if labels == 0: check_autoencoder_quality(inputs, test_image[i], outputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) list_output = [] # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) print('num of test_loader : {}'.format(len(test_loader))) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() print('deepSVDD test()---------------') with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) print(dist) print(labels) dist_ = dist.cpu().numpy().tolist() if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # self.test_auc = roc_auc_score(labels, scores) test_acc = accuracy_score(labels, list_output) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc)) logger.info('Finished testing.') return str(test_acc)
def _test(self, loader, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) criterion = BCEWithLogitsLoss() # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) labels = labels.type_as(outputs) loss = criterion(outputs, labels.unsqueeze(1)) scores = outputs.sigmoid() # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute metrics _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) test_loss = epoch_loss / n_batches return labels, scores, test_loss
def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader test_loader = dataset.test_set # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for inputs, labels in test_loader: if len(inputs) == 32: inputs = inputs.to(self.device) inputs = inputs.unsqueeze(1) outputs = ae_net(inputs.float()) scores = torch.sum((outputs.float() - inputs.float())**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) test_auc = auc(fpr, tpr) logger.info('Test set AUC: {:.2f}%'.format(100. * test_auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader test_loader = dataset.test_set # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for inputs, labels in test_loader: inputs = inputs.to(self.device) inputs = inputs.unsqueeze(1) outputs = net(inputs.float()) dist = torch.sum((outputs.float() - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) self.test_auc = auc(fpr, tpr) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def train_one_step(self, net: BaseNet, epoch: int): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Training logger.info('Starting training...') start_time = time.time() net.train() epoch_loss = 0.0 n_batches = 0 epoch_start_time = time.time() for data in self.train_loader: inputs, targets, _, _ = data inputs, targets = inputs.to(self.device), targets.to(self.device) # Zero the network parameter gradients self.optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) targets = targets.type_as(outputs) loss = self.criterion(outputs, targets.unsqueeze(1)) loss.backward() self.optimizer.step() epoch_loss += loss.item() n_batches += 1 self.scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') return {'train_loss': epoch_loss / n_batches}
def _test(self, loader, net: BaseNet): logger = logging.getLogger() epoch_loss = 0.0 n_batches = 0 # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, idx, _ = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: loss = torch.mean(dist) scores = dist epoch_loss += loss.item() n_batches += 1 # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) test_loss = epoch_loss / n_batches return labels, scores, test_loss
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) scores = self.lastlay(outputs) # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(dataset: BaseADDataset, ae_net: BaseNet): # Set device for network ae_net = ae_net.to(device) # Get test data loader letter, labels = dataset.loaders(batch_size=batch_size, num_workers=0, shuffle_test=False, shuffle_train=False) loss_epoch = 0.0 n_batches = 0 start_time = time.time() with torch.no_grad(): i = 0 for data, label in zip(letter, labels): i += 1 inputs, _ = data lab, _ = label inputs = inputs.to(device) lab = lab.to(device) # Zero the network parameter gradients outputs = ae_net(inputs) plot_images_grid(inputs[0:16], export_img='./log/test/input' + str(i), title='Input ', nrow=4, padding=4) plot_images_grid(lab[0:16], export_img='./log/test/label' + str(i), title='Label ', nrow=4, padding=4) plot_images_grid(outputs[0:16], export_img='./log/test/output' + str(i), title='Output ', nrow=4, padding=4)
def apply_model(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get apply_model data loader _, _, apply_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Applying model logger.info('Starting Deep SVDD application.') start_time = time.time() idx_score = [] net.eval() with torch.no_grad(): for data in apply_loader: inputs, nolabels, idx = data # nolables are NaN inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_score += list( zip(idx.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.apply_time = time.time() - start_time logger.info('Deep SVDD application time: %.3f' % self.apply_time) ind, scores = zip(*idx_score) self.ind = np.array(ind) self.scores = np.array(scores) logger.info('Finished Deep SVDD application.')
def __init__(self, dataset: BaseADDataset, network: BaseNet, k: int, lr: float, n_epochs: int, batch_size: int, rep_dim: int, K: int, weight_decay: float, device: str, n_jobs_dataloader: int, w_rec: float, w_feat: float, cfg): super().__init__(lr, n_epochs, batch_size, rep_dim, K, weight_decay, device, n_jobs_dataloader, w_rec, w_feat) self.ae_net = network.to(self.device) self.train_loader, self.test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.optimizer = optim.Adam(self.ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay) self.rec_loss = torch.nn.L1Loss() self.feat_loss = torch.nn.MSELoss() self.best_score = 0 self.min_loss = 1000 self.k = k self.cfg = cfg self.logger = logging.getLogger() self.memory = torch.randn(size=(len(self.train_loader.dataset), self.rep_dim)).to(self.device)
def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0): """ dt_type:数据集的类型, 测试集 0 / 训练集 1 """ logger = logging.getLogger() # Set device for networks net = net.to(self.device) # Get test data loader if is_test == 0: # 测试集加载器 _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) if is_test == 1: # 训练集加载器 test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing lstm_autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) # get lstm test label,label.shape = (128,) label = labels.numpy() if is_test == 0: for i in range(len(label)): self.test_label.append(label[i]) if is_test == 1: for i in range(len(label)): self.train_label.append(label[i]) code, outputs = net(inputs.view(-1, 1, self.n_features)) code = code.detach().numpy() if is_test == 0: for i in range(len(code)): self.test_code.append(code[i]) if is_test == 1: for i in range(len(code)): self.train_code.append(code[i]) scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) self.test_time = time.time() - start_time logger.info('lstm_autoencoder testing time: %.3f' % self.test_time) self.test_scores = idx_label_score _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) print(len(labels)) print(len(scores)) """ 测试集 """ if is_test == 0: self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished test lstm_autoencoder.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0): """ 训练集 获取正常数据簇 -- 中心点,半径 测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据 """ logger = logging.getLogger() # Set device for networks ae_net = ae_net.to(self.device) # 训练集 flg==1 测试集 flg==0 if flg == 1: test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing ae...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) error = (outputs - inputs)**2 loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist(), error.cpu().data.numpy().tolist()), ) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores, error = zip(*idx_label_score) labels = np.array(labels) # labels.shape(97278, ) scores = np.array(scores) # scores.shape(97278, ) error = np.array(error) # scores.shape(97278, ) if flg == 1: # 训练集 X = error self.kmeans = KMeans(n_clusters=self.clusters).fit(X) self.center = self.kmeans.cluster_centers_.tolist() self.radius = self.get_radius(X) print("roc_self.center", self.center) print("roc_self.radius", self.radius) else: # 测试集 Y = error pred_labels = [] # 实际标签 pred_km = self.kmeans.predict(Y) print(pred_km.shape) print(pred_km) for i in range(len(pred_km)): dis = self.manhattan_distance(self.center[pred_km[i]], Y[i]) # dis:簇中心到点的距离,作为分类依据 if dis > self.radius[pred_km[i]]: pred_labels.append(1) else: pred_labels.append(0) pred_labels = np.array(pred_labels) self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels) # roc_self.test_auc = roc_auc_score(pred_labels, labels) fpr, tpr, thresholds = roc_curve(labels, pred_labels) # 面积作为准确率 print(fpr, tpr) self.test_auc = auc(fpr, tpr) self.test_mcc = matthews_corrcoef(labels, pred_labels) _, _, f_score, _ = precision_recall_fscore_support(labels, pred_labels, labels=[0, 1]) self.test_f_score = f_score[1] print(len(labels)) print(len(scores)) self.test_time = time.time() - start_time if flg == 0: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('ae testing time: %.3f' % self.test_time) logger.info('Finished testing ae.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) # dist = torch.sum((outputs - self.c) ** 2, dim=1) ### NEW - get closest cluster center, take dist, sum/mean for loss centers = torch.transpose(self.c, 0, 1) dist = torch.zeros(outputs.shape[0], device=self.device) for i in range(outputs.shape[0]): # Sum dists from each data point to its corresponding cluster dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min() #import pdb; pdb.set_trace() ### if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: loss = torch.mean(dist) loss.backward() optimizer.step() # Update hypersphere radius R on mini-batch distances if (self.objective == 'soft-boundary') and ( epoch >= self.warm_up_n_epochs): self.R.data = torch.tensor(get_radius(dist, self.nu), device=self.device) loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() output_data = [] label_data = [] with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) # dist = torch.sum((outputs - self.c) ** 2, dim=1) ### NEW if (self.c.dim() == 1): # naive deep_svdd centers = self.c dist = torch.sum((outputs - self.c)**2, dim=1) else: centers = torch.transpose(self.c, 0, 1) dist = torch.zeros(outputs.shape[0], device=self.device) for i in range(outputs.shape[0]): # Sum dists from each data point to its corresponding cluster dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min() #import pdb; pdb.set_trace() ### if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #UMAP (same umap model fit in training) - use anomaly_data = True # # UMAP Plot (on testing data) # kmeans_centers = np.load('centers.npy') # output_data = torch.cat(output_data) # label_data = torch.cat(label_data).numpy() # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True) # import pdb; pdb.set_trace() # UMAP Plot (on training data) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) output_data = [] label_data = [] with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, labels, _ = data #labels are only for UMAP of hyperspheres inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) kmeans_centers = np.load('centers.npy') output_data = torch.cat(output_data) label_data = torch.cat(label_data).numpy() self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data=True) logger.info('Finished testing.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) epoch_loss = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, semi_targets, _ = data inputs, semi_targets = inputs.to(self.device), semi_targets.to( self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) loss.backward() optimizer.step() epoch_loss += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time logger.info('Training Time: {:.3f}s'.format(self.train_time)) logger.info('Finished training.') return net
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) self.train_loader = train_loader # Set device for network net = net.to(self.device) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) epoch_loss = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, semi_targets, _ = data inputs, semi_targets = inputs.to(self.device), semi_targets.to( self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) positive, unlabeled = semi_targets, 1 - semi_targets n_positive, n_unlabeled = max([1., torch.sum(positive)]), max( [1., torch.sum(unlabeled)]) gp = torch.t(torch.log(1 + torch.exp(-outputs))) gu = torch.t(torch.log(1 + torch.exp(outputs))) loss_positive = self.pi * torch.sum(gp * positive) / n_positive loss_negative = torch.sum( gu * unlabeled) / n_unlabeled - self.pi * torch.sum( gu * positive) / n_positive loss = loss_positive + loss_negative """ func = torch.t(torch.sigmoid(-outputs)) loss_positive = -self.pi*torch.sum(func*positive)/n_positive loss_negative = torch.sum(1/(1-func*unlabeled))/n_unlabeled - self.pi*torch.sum(1/(1-func*positive))/n_positive loss = (loss_positive + loss_negative)**2 """ loss.backward() optimizer.step() epoch_loss += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time logger.info('Training Time: {:.3f}s'.format(self.train_time)) logger.info('Finished training.') return net
def train(self, dataset: BaseADDataset, oe_dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get train data loader if oe_dataset is not None: num_workers = int(self.n_jobs_dataloader / 2) else: num_workers = self.n_jobs_dataloader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=num_workers) if oe_dataset is not None: if oe_dataset.shuffle: if len(dataset.train_set) > len(oe_dataset.train_set): oe_sampler = RandomSampler(oe_dataset.train_set, replacement=True, num_samples=len( dataset.train_set)) oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=False, sampler=oe_sampler, num_workers=num_workers, drop_last=True) else: oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=True, num_workers=num_workers, drop_last=True) else: oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=False, num_workers=num_workers, drop_last=True) dataset_loader = zip(train_loader, oe_loader) else: dataset_loader = train_loader # Set loss if self.objective in ['bce', 'focal']: if self.objective == 'bce': criterion = nn.BCEWithLogitsLoss() if self.objective == 'focal': criterion = FocalLoss(gamma=self.focal_gamma) criterion = criterion.to(self.device) # Set device net = net.to(self.device) # Set optimizer optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting training...') net.train() start_time = time.time() for epoch in range(self.n_epochs + 1): epoch_loss = 0.0 n_batches = 0 idx_label_score = [] epoch_start_time = time.time() # start at random point for the outlier exposure dataset in each epoch if (oe_dataset is not None) and (epoch < self.n_epochs): oe_loader.dataset.offset = np.random.randint( len(oe_loader.dataset)) if oe_loader.dataset.shuffle_idxs: random.shuffle(oe_loader.dataset.idxs) dataset_loader = zip(train_loader, oe_loader) # only load samples from the original training set in a last epoch for saving train scores if epoch == self.n_epochs: dataset_loader = train_loader net.eval() for data in dataset_loader: if (oe_dataset is not None) and (epoch < self.n_epochs): inputs = torch.cat((data[0][0], data[1][0]), 0) labels = torch.cat((data[0][1], data[1][1]), 0) semi_targets = torch.cat((data[0][2], data[1][2]), 0) idx = torch.cat((data[0][3], data[1][3]), 0) else: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) # Zero the network parameter gradients if epoch < self.n_epochs: optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) if self.objective == 'hsc': if self.hsc_norm == 'l1': dists = torch.norm(outputs, p=1, dim=1) if self.hsc_norm == 'l2': dists = torch.norm(outputs, p=2, dim=1) if self.hsc_norm == 'l2_squared': dists = torch.norm(outputs, p=2, dim=1)**2 if self.hsc_norm == 'l2_squared_linear': dists = torch.sqrt( torch.norm(outputs, p=2, dim=1)**2 + 1) - 1 scores = 1 - torch.exp(-dists) losses = torch.where(semi_targets == 0, dists, -torch.log(scores + self.eps)) loss = torch.mean(losses) if self.objective == 'deepSAD': dists = torch.norm(outputs, p=2, dim=1)**2 scores = dists losses = torch.where( semi_targets == 0, dists, ((dists + self.eps)**semi_targets.float())) loss = torch.mean(losses) if self.objective in ['bce', 'focal']: targets = torch.zeros(inputs.size(0)) targets[semi_targets == -1] = 1 targets = targets.view(-1, 1).to(self.device) scores = torch.sigmoid(outputs) loss = criterion(outputs, targets) if epoch < self.n_epochs: loss.backward() optimizer.step() # save train scores in last epoch if epoch == self.n_epochs: idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.flatten().cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 # Take learning rate scheduler step scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_last_lr()[0])) # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time self.train_scores = idx_label_score # Log results logger.info('Train Time: {:.3f}s'.format(self.train_time)) logger.info('Train Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Finished training.') return net