def init_center_c(self, train_loader: DataLoader, net: BaseNet, eps=0.1): """Initialize hypersphere center c as the mean from an initial forward pass on the data.""" n_samples = 0 c = torch.zeros(net.rep_dim, device=self.device) net.eval() with torch.no_grad(): for data in train_loader: # get the inputs of the batch if self.dataset_name == 'object' or self.dataset_name == 'texture': inputs, _, _ = data else: inputs, _, _ = data inputs = inputs.to(self.device) outputs, _, _ = net(inputs) n_samples += outputs.shape[0] c += torch.sum(outputs, dim=0) c /= n_samples # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights. c[(abs(c) < eps) & (c < 0)] = -eps c[(abs(c) < eps) & (c > 0)] = eps return c
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) print('Unique Semi Targets: ', np.unique(semi_targets.data.cpu().numpy())) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing.')
def _get_output(self, loader, net: BaseNet): logger = logging.getLogger() epoch_loss = 0.0 n_batches = 0 # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_output = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, idx, _ = data inputs = inputs.to(self.device) outputs = net(inputs) # Save triples of (idx, label, score) in a list idx_label_output += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), outputs.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, outputs = zip(*idx_label_output) labels = np.array(labels) outputs = np.array(outputs) return labels, outputs
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) losses = torch.where( semi_targets == 0, dist, self.eta * ((dist + self.eps)**semi_targets.float())) loss = torch.mean(losses) scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute metrics _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # AUC self.auc_roc = roc_auc_score(labels, scores) # PR-curve self.pr_curve = precision_recall_curve(labels, scores) precision, recall, thresholds = self.pr_curve self.auc_pr = auc(recall, precision) self.test_loss = epoch_loss / n_batches
def init_center_c_w(self, train_loader: DataLoader, net: BaseNet, eps=0.1): """Initialize hypersphere center c as the mean from an initial forward pass on the data.""" n_samples = 0 c = torch.zeros(net.rep_dim, device=self.device) net.eval() grad_max = torch.tensor([-np.inf], device=self.device) for data in train_loader: # get the inputs of the batch inputs, _, _ = data inputs = inputs.to(self.device) inputs.requires_grad_(True) outputs = net(inputs) n_samples += outputs.shape[0] if self.mode == 'weight': pass # grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[0] # grads = grads / (torch.sum(grads**2) + 1e-5) elif self.mode == 'input': grads = \ torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=False, retain_graph=False)[0] b = grads.shape[0] grads_norm = (torch.sum(grads.view(b, -1) ** 2) + 1e-5) grad_max = torch.maximum(grad_max, grads_norm.max()) inputs.requires_grad_(False) # with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, _, _ = data inputs = inputs.to(self.device) inputs.requires_grad_(True) outputs = net(inputs) if self.mode == 'weight': pass # grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[0] # grads = grads / (torch.sum(grads**2) + 1e-5) elif self.mode == 'input': grads = \ torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=False, retain_graph=False)[0] b = grads.shape[0] grads_norm = (torch.sum(grads.view(b, -1) ** 2) + 1e-5) outputs = (1 - grads_norm / grad_max) * outputs inputs.requires_grad_(False) n_samples += outputs.shape[0] c += torch.sum(outputs.detach(), dim=0).squeeze() c /= n_samples # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights. c[(abs(c) < eps) & (c < 0)] = -eps c[(abs(c) < eps) & (c > 0)] = eps return c
def test(self, dataset: BaseADDataset, vae: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device vae = vae.to(self.device) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] vae.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to( self.device), idx.to(self.device) inputs = inputs.view(inputs.size(0), -1) rec = vae(inputs) likelihood = -binary_cross_entropy(rec, inputs) scores = -likelihood # negative likelihood as anomaly score # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) # Overall loss elbo = likelihood - vae.kl_divergence loss = -torch.mean(elbo) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing variational autoencoder.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set loss criterion = nn.MSELoss(reduction='none') # Set device for network ae_net = ae_net.to(self.device) criterion = criterion.to(self.device) # Testing logger.info('Testing autoencoder...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs, labels, idx = inputs.to(self.device), labels.to( self.device), idx.to(self.device) rec = ae_net(inputs) rec_loss = criterion(rec, inputs) scores = torch.mean(rec_loss, dim=tuple(range(1, rec.dim()))) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss = torch.mean(rec_loss) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet, corner_cracks=True): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader if not corner_cracks: _, test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) if not corner_cracks: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) else: logger.info('Test set AUC (corner): {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net1: BaseNet, net2: BaseNet): logger = logging.getLogger() print('R', self.R) print('c', self.c) # Set device for networks net1 = net1.to(self.device) net2 = net2.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net1.eval() net2.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) code, _ = net1(inputs.view(-1, 1, 9)) outputs = net2(code) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores) self.test_score = scores self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet, test_image): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for i, data in enumerate(test_loader): inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) # import pdb;pdb.set_trace() if labels == 0: check_autoencoder_quality(inputs, test_image[i], outputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) list_output = [] # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) print('num of test_loader : {}'.format(len(test_loader))) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() print('deepSVDD test()---------------') with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) print(dist) print(labels) dist_ = dist.cpu().numpy().tolist() if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # self.test_auc = roc_auc_score(labels, scores) test_acc = accuracy_score(labels, list_output) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #logger.info('Test set AUCCURAY : {:.2f}%'.format(100. * test_acc)) logger.info('Finished testing.') return str(test_acc)
def _test(self, loader, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) criterion = BCEWithLogitsLoss() # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) labels = labels.type_as(outputs) loss = criterion(outputs, labels.unsqueeze(1)) scores = outputs.sigmoid() # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute metrics _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) test_loss = epoch_loss / n_batches return labels, scores, test_loss
def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader test_loader = dataset.test_set # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for inputs, labels in test_loader: if len(inputs) == 32: inputs = inputs.to(self.device) inputs = inputs.unsqueeze(1) outputs = ae_net(inputs.float()) scores = torch.sum((outputs.float() - inputs.float())**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) test_auc = auc(fpr, tpr) logger.info('Test set AUC: {:.2f}%'.format(100. * test_auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader test_loader = dataset.test_set # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for inputs, labels in test_loader: inputs = inputs.to(self.device) inputs = inputs.unsqueeze(1) outputs = net(inputs.float()) dist = torch.sum((outputs.float() - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) fpr, tpr, thresholds = roc_curve(labels, scores, pos_label=1) self.test_auc = auc(fpr, tpr) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def _test(self, loader, net: BaseNet): logger = logging.getLogger() epoch_loss = 0.0 n_batches = 0 # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in loader: inputs, labels, idx, _ = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: loss = torch.mean(dist) scores = dist epoch_loss += loss.item() n_batches += 1 # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) test_loss = epoch_loss / n_batches return labels, scores, test_loss
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) scores = self.lastlay(outputs) # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def apply_model(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get apply_model data loader _, _, apply_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Applying model logger.info('Starting Deep SVDD application.') start_time = time.time() idx_score = [] net.eval() with torch.no_grad(): for data in apply_loader: inputs, nolabels, idx = data # nolables are NaN inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_score += list( zip(idx.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.apply_time = time.time() - start_time logger.info('Deep SVDD application time: %.3f' % self.apply_time) ind, scores = zip(*idx_score) self.ind = np.array(ind) self.scores = np.array(scores) logger.info('Finished Deep SVDD application.')
def init_center_c_grad(self, train_loader: DataLoader, net: BaseNet, layer: torch.nn.Module, eps=0.1): """Initialize hypersphere center c as the mean from an initial forward pass on the data.""" n_samples = 0 c = None net.eval() # with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, _, _ = data inputs = inputs.to(self.device) inputs.requires_grad_(True) outputs = net(inputs) n_samples += outputs.shape[0] if self.mode == 'weight': grads = torch.autograd.grad(outputs=outputs.sum(), inputs=layer, create_graph=True, retain_graph=True)[ 0] grads = grads / (torch.sum(grads ** 2) + 1e-5) elif self.mode == 'input': grads = torch.autograd.grad(outputs=outputs.sum(), inputs=inputs, create_graph=True, retain_graph=True)[ 0] if 'grad_norm' in self.add_params: grads = grads / (torch.sqrt( torch.sum(grads ** 2, dim=tuple(range(1, len(grads.shape))), keepdim=True)) + 1e-5) grads = torch.sum(grads, dim=0) inputs.requires_grad_(False) if c is None: c = torch.zeros_like(grads) c += grads.detach() c /= n_samples # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights. c[(abs(c) < eps) & (c < 0)] = -eps c[(abs(c) < eps) & (c > 0)] = eps return c
def train(self, dataset: BaseADDataset, oe_dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get train data loader if oe_dataset is not None: num_workers = int(self.n_jobs_dataloader / 2) else: num_workers = self.n_jobs_dataloader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=num_workers) if oe_dataset is not None: if oe_dataset.shuffle: if len(dataset.train_set) > len(oe_dataset.train_set): oe_sampler = RandomSampler(oe_dataset.train_set, replacement=True, num_samples=len( dataset.train_set)) oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=False, sampler=oe_sampler, num_workers=num_workers, drop_last=True) else: oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=True, num_workers=num_workers, drop_last=True) else: oe_loader = DataLoader(dataset=oe_dataset.train_set, batch_size=self.batch_size, shuffle=False, num_workers=num_workers, drop_last=True) dataset_loader = zip(train_loader, oe_loader) else: dataset_loader = train_loader # Set loss if self.objective in ['bce', 'focal']: if self.objective == 'bce': criterion = nn.BCEWithLogitsLoss() if self.objective == 'focal': criterion = FocalLoss(gamma=self.focal_gamma) criterion = criterion.to(self.device) # Set device net = net.to(self.device) # Set optimizer optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay) # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting training...') net.train() start_time = time.time() for epoch in range(self.n_epochs + 1): epoch_loss = 0.0 n_batches = 0 idx_label_score = [] epoch_start_time = time.time() # start at random point for the outlier exposure dataset in each epoch if (oe_dataset is not None) and (epoch < self.n_epochs): oe_loader.dataset.offset = np.random.randint( len(oe_loader.dataset)) if oe_loader.dataset.shuffle_idxs: random.shuffle(oe_loader.dataset.idxs) dataset_loader = zip(train_loader, oe_loader) # only load samples from the original training set in a last epoch for saving train scores if epoch == self.n_epochs: dataset_loader = train_loader net.eval() for data in dataset_loader: if (oe_dataset is not None) and (epoch < self.n_epochs): inputs = torch.cat((data[0][0], data[1][0]), 0) labels = torch.cat((data[0][1], data[1][1]), 0) semi_targets = torch.cat((data[0][2], data[1][2]), 0) idx = torch.cat((data[0][3], data[1][3]), 0) else: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) # Zero the network parameter gradients if epoch < self.n_epochs: optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) if self.objective == 'hsc': if self.hsc_norm == 'l1': dists = torch.norm(outputs, p=1, dim=1) if self.hsc_norm == 'l2': dists = torch.norm(outputs, p=2, dim=1) if self.hsc_norm == 'l2_squared': dists = torch.norm(outputs, p=2, dim=1)**2 if self.hsc_norm == 'l2_squared_linear': dists = torch.sqrt( torch.norm(outputs, p=2, dim=1)**2 + 1) - 1 scores = 1 - torch.exp(-dists) losses = torch.where(semi_targets == 0, dists, -torch.log(scores + self.eps)) loss = torch.mean(losses) if self.objective == 'deepSAD': dists = torch.norm(outputs, p=2, dim=1)**2 scores = dists losses = torch.where( semi_targets == 0, dists, ((dists + self.eps)**semi_targets.float())) loss = torch.mean(losses) if self.objective in ['bce', 'focal']: targets = torch.zeros(inputs.size(0)) targets[semi_targets == -1] = 1 targets = targets.view(-1, 1).to(self.device) scores = torch.sigmoid(outputs) loss = criterion(outputs, targets) if epoch < self.n_epochs: loss.backward() optimizer.step() # save train scores in last epoch if epoch == self.n_epochs: idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.flatten().cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 # Take learning rate scheduler step scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_last_lr()[0])) # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info( f'| Epoch: {epoch + 1:03}/{self.n_epochs:03} | Train Time: {epoch_train_time:.3f}s ' f'| Train Loss: {epoch_loss / n_batches:.6f} |') self.train_time = time.time() - start_time self.train_scores = idx_label_score # Log results logger.info('Train Time: {:.3f}s'.format(self.train_time)) logger.info('Train Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Finished training.') return net
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set loss if self.objective in ['bce', 'focal']: if self.objective == 'bce': criterion = nn.BCEWithLogitsLoss() if self.objective == 'focal': criterion = FocalLoss(gamma=self.focal_gamma) criterion = criterion.to(self.device) # Set device for network net = net.to(self.device) # Testing logger.info('Starting testing...') net.eval() epoch_loss = 0.0 n_batches = 0 idx_label_score = [] start_time = time.time() with torch.no_grad(): for data in test_loader: inputs, labels, semi_targets, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) semi_targets = semi_targets.to(self.device) idx = idx.to(self.device) outputs = net(inputs) if self.objective == 'hsc': if self.hsc_norm == 'l1': dists = torch.norm(outputs, p=1, dim=1) if self.hsc_norm == 'l2': dists = torch.norm(outputs, p=2, dim=1) if self.hsc_norm == 'l2_squared': dists = torch.norm(outputs, p=2, dim=1)**2 if self.hsc_norm == 'l2_squared_linear': dists = torch.sqrt( torch.norm(outputs, p=2, dim=1)**2 + 1) - 1 scores = 1 - torch.exp(-dists) losses = torch.where(semi_targets == 0, dists, -torch.log(scores + self.eps)) loss = torch.mean(losses) if self.objective == 'deepSAD': dists = torch.norm(outputs, p=2, dim=1)**2 scores = dists losses = torch.where( semi_targets == 0, dists, ((dists + self.eps)**semi_targets.float())) loss = torch.mean(losses) if self.objective in ['bce', 'focal']: targets = torch.zeros(inputs.size(0)) targets[semi_targets == -1] = 1 targets = targets.view(-1, 1).to(self.device) scores = torch.sigmoid(outputs) loss = criterion(outputs, targets) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.flatten().cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet, epoch): #def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing # logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time # logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info( '---------------------------------------------------------Test set AUC: {:.2f}%' .format(100. * self.test_auc)) # logger.info('Finished testing.') # recording the auc to a txt f_get_para = open('../log/mnist_test/get_param.txt', 'a') f_get_para.write('Test set AUC: {:.2f}%. \r\n'.format(100. * self.test_auc)) f_get_para.close() # record test AUC after each 100 epoch if (epoch + 1) % 100 == 0: f_100_para = open('../log/mnist_test/100_AUC.txt', 'a') f_100_para.write('Test set AUC: {:.2f}%. \r\n'.format( 100. * self.test_auc)) f_100_para.close() self.Accuracy_list.append(100. * self.test_auc)
def test(self, dataset: BaseADDataset, ae_net: BaseNet, flg=0): """ 训练集 获取正常数据簇 -- 中心点,半径 测试集 Kmeans 对数据进行预测,超过簇半径为异常数据,否则正常数据 """ logger = logging.getLogger() # Set device for networks ae_net = ae_net.to(self.device) # 训练集 flg==1 测试集 flg==0 if flg == 1: test_loader, _ = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) else: _, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing ae...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) error = (outputs - inputs)**2 loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist(), error.cpu().data.numpy().tolist()), ) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores, error = zip(*idx_label_score) labels = np.array(labels) # labels.shape(97278, ) scores = np.array(scores) # scores.shape(97278, ) error = np.array(error) # scores.shape(97278, ) if flg == 1: # 训练集 X = error self.kmeans = KMeans(n_clusters=self.clusters).fit(X) self.center = self.kmeans.cluster_centers_.tolist() self.radius = self.get_radius(X) print("roc_self.center", self.center) print("roc_self.radius", self.radius) else: # 测试集 Y = error pred_labels = [] # 实际标签 pred_km = self.kmeans.predict(Y) print(pred_km.shape) print(pred_km) for i in range(len(pred_km)): dis = self.manhattan_distance(self.center[pred_km[i]], Y[i]) # dis:簇中心到点的距离,作为分类依据 if dis > self.radius[pred_km[i]]: pred_labels.append(1) else: pred_labels.append(0) pred_labels = np.array(pred_labels) self.test_ftr, self.test_tpr, _ = roc_curve(labels, pred_labels) # roc_self.test_auc = roc_auc_score(pred_labels, labels) fpr, tpr, thresholds = roc_curve(labels, pred_labels) # 面积作为准确率 print(fpr, tpr) self.test_auc = auc(fpr, tpr) self.test_mcc = matthews_corrcoef(labels, pred_labels) _, _, f_score, _ = precision_recall_fscore_support(labels, pred_labels, labels=[0, 1]) self.test_f_score = f_score[1] print(len(labels)) print(len(scores)) self.test_time = time.time() - start_time if flg == 0: logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('ae testing time: %.3f' % self.test_time) logger.info('Finished testing ae.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set device net = net.to(self.device) # Use importance weighted sampler (Burda et al., 2015) to get a better estimate on the log-likelihood. sampler = ImportanceWeightedSampler(mc=1, iw=1) elbo = SVI(net, likelihood=binary_cross_entropy, sampler=sampler) # Testing logger.info('Starting testing...') epoch_loss = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, _, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) idx = idx.to(self.device) # All test data is considered unlabeled inputs = inputs.view(inputs.size(0), -1) u = inputs y = labels y_onehot = torch.Tensor(y.size(0), 2).to( self.device) # two labels: 0: normal, 1: outlier y_onehot.zero_() y_onehot.scatter_(1, y.view(-1, 1), 1) # Compute loss L = -elbo(u, y_onehot) U = -elbo(u) logits = net.classify(u) eps = 1e-8 classication_loss = -torch.sum( y_onehot * torch.log(logits + eps), dim=1).mean() loss = L + self.alpha * classication_loss + U # J_alpha # Compute scores scores = logits[:, 1] # likelihood/confidence for anomalous class as anomaly score # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) epoch_loss += loss.item() n_batches += 1 self.test_time = time.time() - start_time self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # Log results logger.info('Test Loss: {:.6f}'.format(epoch_loss / n_batches)) logger.info('Test AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Test Time: {:.3f}s'.format(self.test_time)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet): self.n_components = net.cate_dense_2 self.n_features = net.rep_dim self.mu_test = torch.tensor(np.float32( np.zeros([1, self.n_components, self.n_features])), device=self.device) self.cov_test = torch.tensor(np.float32( np.zeros([1, self.n_components, self.n_features, self.n_features])), device=self.device) self.isTesting = True logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader train_loader, test_loader = dataset.loaders( batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) ssim_loss = pytorch_ssim.SSIM(window_size=11, size_average=False) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in train_loader: if self.dataset_name == 'object' or self.dataset_name == 'texture': inputs, labels, idx = data else: inputs, labels, idx = data inputs = inputs.to(self.device) outputs, category, resconstruction = net(inputs) if self.objective == 'deep-GMM': phi, mu, cov = self.compute_gmm_params(outputs, category) batch_gamma_sum = torch.sum(category, dim=0) self.gamma_sum += batch_gamma_sum self.mu_test += mu * batch_gamma_sum.unsqueeze( -1) # keep sums of the numerator only self.cov_test += cov * batch_gamma_sum.unsqueeze( -1).unsqueeze(-1) # keep sums of the numerator only self.iteration += inputs.size(0) train_phi = self.gamma_sum / self.iteration train_mu = self.mu_test / self.gamma_sum.unsqueeze(-1) train_cov = self.cov_test / self.gamma_sum.unsqueeze( -1).unsqueeze(-1) train_cov = train_cov.squeeze(0) train_mu = train_mu.squeeze(0) for data in test_loader: if self.dataset_name == 'object' or self.dataset_name == 'texture': inputs, labels, idx = data else: inputs, labels, idx = data inputs = inputs.to(self.device) outputs, category, resconstruction = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'deep-GMM': phi, _, _ = self.compute_gmm_params(outputs, category) sample_energy, cov_diag = self.compute_energy( outputs, phi=phi, mu=train_mu, cov=train_cov, size_average=False) if self.ae_loss_type == 'ssim': rescon_error = -ssim_loss(inputs, resconstruction) rescon_error = rescon_error * self.ssim_lambda else: rescon_error = torch.sum( (resconstruction - inputs)**2, dim=tuple(range(1, resconstruction.dim()))) rescon_error = rescon_error * self.l2_lambda scores = sample_energy + rescon_error #scores = rescon_error elif self.objective == 'soft-boundary': scores = dist - self.R**2 elif self.objective == 'hybrid': if self.ae_loss_type == 'ssim': rescon_error = -ssim_loss(inputs, resconstruction) rescon_error = rescon_error * self.ssim_lambda else: rescon_error = torch.sum( (resconstruction - inputs)**2, dim=tuple(range(1, resconstruction.dim()))) rescon_error = rescon_error * self.l2_lambda_test sample_energy = dist scores = dist + rescon_error else: if self.ae_loss_type == 'ssim': rescon_error = -ssim_loss(inputs, resconstruction) rescon_error = rescon_error * self.ssim_lambda else: rescon_error = torch.sum( (resconstruction - inputs)**2, dim=tuple(range(1, resconstruction.dim()))) rescon_error = rescon_error * self.l2_lambda * 1.5 sample_energy = dist scores = dist # Save triples of (idx, label, score) in a list if self.dataset_name == 'object' or self.dataset_name == 'texture': idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist(), sample_energy.cpu().data.numpy().tolist(), rescon_error.cpu().data.numpy().tolist())) else: idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist(), sample_energy.cpu().data.numpy().tolist(), rescon_error.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC if self.dataset_name == 'object' or self.dataset_name == 'texture': _, labels, scores, energy, rescon_error = zip(*idx_label_score) else: _, labels, scores, energy, rescon_error = zip(*idx_label_score) labels = np.array(labels) labels[labels > 0] = 1 scores = np.array(scores) energy = np.array(energy) rescon_error = np.array(rescon_error) # for i in range(3,40): # if labels[i] == 0: # print('--------------------------------------') # print(' ') # print('labels:', labels[i], 'scores: ', scores[i],'energy: ',self.energy_lambda*energy[i], 'reconstruction: ', rescon_error[i]) # # if labels[i] == 0: # print(' ') # print('--------------------------------------') # # for i in range(len(scores)): # if np.isnan(scores[i]) or np.isinf(scores[i]): # scores[i] = 100 # print(labels[i]) scores[scores > 100] = 100 self.test_auc = roc_auc_score(labels, rescon_error) logger.info('Test set reconstruction AUC: {:.2f}%'.format( 100. * self.test_auc)) self.test_auc = roc_auc_score(labels, energy) logger.info('Test set one-class AUC: {:.2f}%'.format(100. * self.test_auc)) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set hybrid AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() output_data = [] label_data = [] with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) # dist = torch.sum((outputs - self.c) ** 2, dim=1) ### NEW if (self.c.dim() == 1): # naive deep_svdd centers = self.c dist = torch.sum((outputs - self.c)**2, dim=1) else: centers = torch.transpose(self.c, 0, 1) dist = torch.zeros(outputs.shape[0], device=self.device) for i in range(outputs.shape[0]): # Sum dists from each data point to its corresponding cluster dist[i] = torch.sum((centers - outputs[i])**2, dim=1).min() #import pdb; pdb.set_trace() ### if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) #UMAP (same umap model fit in training) - use anomaly_data = True # # UMAP Plot (on testing data) # kmeans_centers = np.load('centers.npy') # output_data = torch.cat(output_data) # label_data = torch.cat(label_data).numpy() # self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data = True) # import pdb; pdb.set_trace() # UMAP Plot (on training data) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) output_data = [] label_data = [] with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, labels, _ = data #labels are only for UMAP of hyperspheres inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) kmeans_centers = np.load('centers.npy') output_data = torch.cat(output_data) label_data = torch.cat(label_data).numpy() self.latent_UMAP(output_data, label_data, kmeans_centers, anomaly_data=True) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, svm_net: BaseNet): """ 测试 svm 模型 """ logger = logging.getLogger() # Set device for networks svm_net = svm_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] svm_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) labels = labels.to(self.device) outputs = svm_net(inputs) _, scores = torch.max(outputs, 1) loss = self.hinge_loss(outputs, labels) scores = scores.float() labels = labels.float() # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_score = scores print(len(labels)) print(len(scores)) self.test_auc = roc_auc_score(labels, scores) self.test_ftr, self.test_tpr, _ = roc_curve(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) self.test_mcc = matthews_corrcoef(labels, scores) _, _, f_score, _ = precision_recall_fscore_support(labels, scores, labels=[0, 1]) self.test_f_score = f_score[1] self.test_time = time.time() - start_time logger.info('svm_trainer testing time: %.3f' % self.test_time) logger.info('Finished testing svm_trainer.')
def init_center_c(self, train_loader: DataLoader, net: BaseNet, eps=0.1): """Initialize hypersphere center c as the mean from an initial forward pass on the data.""" logger = logging.getLogger() #TODO incorporate naive Deep SVDD init_c if self.K == 1 n_samples = 0 ### naive c = torch.zeros(net.rep_dim, device=self.device) ### naive net.eval() with torch.no_grad(): ### naive for data in train_loader: ### naive # get the inputs of the batch inputs, _, _ = data inputs = inputs.to(self.device) outputs = net(inputs) n_samples += outputs.shape[0] c += torch.sum(outputs, dim=0) c /= n_samples ### naive cen = c ### naive ### NEW multi-center code ###logger.info("Initializing {} clusters".format(self.K)) ###cen = torch.zeros(net.rep_dim, self.K, device=self.device) ###kmeans = KMeans(n_clusters=self.K,random_state=0,max_iter=10) output_data = [] label_data = [] with torch.no_grad(): for data in train_loader: # get the inputs of the batch inputs, labels, _ = data #labels are only for UMAP of hyperspheres inputs = inputs.to(self.device) outputs = net(inputs) output_data.append(outputs) label_data.append(labels) output_data = torch.cat(output_data) ###kmeans = kmeans.fit(output_data) ###cluster_centers = torch.from_numpy(kmeans.cluster_centers_.T) ###cluster_centers = cluster_centers.type(torch.FloatTensor) ###cen = cluster_centers.to(self.device) ###dmat = scipy.spatial.distance.squareform(scipy.spatial.distance.pdist(cen.detach().cpu().numpy().T)) ###logger.info(f"Distances between cluster centers: \n{dmat}") # Generate silhouette plot ## self.silhouette_plot(output_data) # UMAP Plot ###np.save('centers.npy',kmeans.cluster_centers_) np.save('centers.npy', cen.cpu().detach().numpy()) label_data = torch.cat(label_data).numpy() ###self.latent_UMAP(output_data, label_data, kmeans.cluster_centers_) self.latent_UMAP(output_data, label_data, cen.cpu().detach().numpy()) ### import pdb pdb.set_trace() # If c_i is too close to 0, set to +-eps. Reason: a zero unit can be trivially matched with zero weights. cen[(abs(cen) < eps) & (cen < 0)] = -eps cen[(abs(cen) < eps) & (cen > 0)] = eps return cen
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs,category,resconstruction = net(inputs) dist = torch.sum((outputs - self.c) ** 2, dim=1) if self.objective == 'deep-GMM': # (n,k) --> (n,k,1) weights = category.unsqueeze(2) # (n, k, 1) --> (1, k, 1) n_k = torch.sum(weights, 0, keepdim=True) # (n,d) ---> (n, k, d) outputs = outputs.unsqueeze(1).expand(outputs.size(0), self.n_components, outputs.size(1)) # (n, k, d) --> (1, k, d) mu = torch.div(torch.sum(weights * outputs, 0, keepdim=True), n_k + self.eps) # (n, k, d) --> (1, k, d) var = torch.div(torch.sum(weights * (outputs - mu) * (outputs - mu), 0, keepdim=True), n_k + self.eps) # (1, k, d) --> (n, k, d) mu = mu.expand(outputs.size(0), self.n_components, self.n_features) var = var.expand(outputs.size(0), self.n_components, self.n_features) #------------------save mu-?----------------------- # mu = self.mu_test # var = self.var_test #------------------------------------------ # (n, k, d) --> (n, k, 1) exponent = torch.exp(-.5 * torch.sum((outputs - mu) * (outputs - mu) / var, 2, keepdim=True)) # (n, k, d) --> (n, k, 1) prefactor = torch.rsqrt( ((2. * pi) ** self.n_features) * torch.prod(var, dim=2, keepdim=True) + self.eps) # (n, k, 1) logits_pre = torch.mean(weights, 0, keepdim=True) * prefactor * exponent # (n, k, 1) --> (n, k) logits_pre = torch.squeeze(logits_pre) logits = -torch.log(torch.sum(logits_pre, 1) + self.eps) rescon_error = torch.sum((resconstruction - inputs) ** 2, dim=tuple(range(1, resconstruction.dim()))) #scores = logits + rescon_error scores = logits #scores = rescon_error elif self.objective == 'soft-boundary': scores = dist - self.R ** 2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, net: BaseNet, is_test=0): """ dt_type:数据集的类型, 测试集 0 / 训练集 1 """ logger = logging.getLogger() # Set device for networks net = net.to(self.device) # Get test data loader if is_test == 0: # 测试集加载器 _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) if is_test == 1: # 训练集加载器 test_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing lstm_autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) # get lstm test label,label.shape = (128,) label = labels.numpy() if is_test == 0: for i in range(len(label)): self.test_label.append(label[i]) if is_test == 1: for i in range(len(label)): self.train_label.append(label[i]) code, outputs = net(inputs.view(-1, 1, self.n_features)) code = code.detach().numpy() if is_test == 0: for i in range(len(code)): self.test_code.append(code[i]) if is_test == 1: for i in range(len(code)): self.train_code.append(code[i]) scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) self.test_time = time.time() - start_time logger.info('lstm_autoencoder testing time: %.3f' % self.test_time) self.test_scores = idx_label_score _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) print(len(labels)) print(len(scores)) """ 测试集 """ if is_test == 0: self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished test lstm_autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) # confusion matrix true_positive = 0 true_negative = 0 false_positive = 0 false_negative = 0 number_of_anomalies = int(sum(labels)) scores_indices = (-scores).argsort()[:number_of_anomalies] actual_indices = (-labels).argsort()[:number_of_anomalies] # print(scores_indices) # print(actual_indices) print("most anomalous samples that are actually anomalies: ", len(list(set(scores_indices) & set(actual_indices)))) for i in range(len(labels)): if labels[i] == 1 and scores[i] >= 1: true_positive += 1 if labels[i] == 0 and scores[i] < 1: true_negative += 1 if labels[i] == 1 and scores[i] < 1: false_negative += 1 if labels[i] == 0 and scores[i] > 1: false_positive += 1 print("true_positive: ", true_positive) print("true_negative: ", true_negative) print("false_positive: ", false_positive) print("false_negative: ", false_negative) print("accuracy: ", ((true_positive + true_negative) / len(labels))) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) fpr, tpr, threshold = roc_curve(labels, scores) roc_auc = auc(fpr, tpr) plt.title('Receiver Operating Characteristic') plt.plot(fpr, tpr, 'b', label='AUC = %0.2f' % roc_auc) plt.legend(loc='lower right') plt.plot([0, 1], [0, 1], 'r--') plt.xlim([0, 1]) plt.ylim([0, 1]) plt.ylabel('True Positive Rate') plt.xlabel('False Positive Rate') plt.show() nonzero_indeces = np.nonzero(labels)[0] zero_indeces = np.where(labels == 0)[0] outliars = scores[nonzero_indeces] normal_samples = scores[zero_indeces] plt.hist(scores, color='green', density=False) # plt.show() plt.hist(outliars, color='blue', density=False) plt.yscale('log') # plt.plot(scores, scores) # plt.plot(outliars, outliars, color="red") plt.show() # import pdb; pdb.set_trace() logger.info('Finished testing.') import pdb pdb.set_trace() np.save( "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/" + "DeepSVDD" + "_fpr", fpr) np.save( "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/" + "DeepSVDD" + "_tpr", tpr) np.save( "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/" + "DeepSVDD" + "_scores", scores) np.save( "/home/liviu/Documents/Dev/Deep-SVDD-PyTorch/results/credit_fraud/" + "DeepSVDD" + "_outliars", outliars)