def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) #scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) scores = bidirectional_score(inputs, outputs) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list(zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1
def train(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(ae_net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=self.lr_milestones, gamma=0.1) # Training logger.info('Starting pretraining...') start_time = time.time() ae_net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = ae_net(inputs) #scores = torch.sum((outputs - inputs) ** 2, dim=tuple(range(1, outputs.dim()))) scores = bidirectional_score(inputs, outputs) loss = torch.mean(scores) loss.backward() optimizer.step() loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}' .format(epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) pretrain_time = time.time() - start_time logger.info('Pretraining time: %.3f' % pretrain_time) logger.info('Finished pretraining.') return ae_net
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: # 这里的label全部都是0和1,其中,1的比例占9份,0的比例占1份,这正好符合 # 另外测试的时候并没有打乱数据,数据是按照index的0开始的顺序排列的. inputs, labels, idx = data inputs = inputs.to(self.device) # 对下面几句代码的解释 # output的输出为[128,32],其中128是batch的大小,32是网络的输出 # self.c大小是[32] # 经过torch.sum之后,dist等于是将所有的output加了起来.变为了dist的大小为[128],作为输出的score outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # 这里是将数据整合成元组 # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) # 这个idx_label_score整合之后,数据的序列是按照index的顺序排列的. self.test_scores = idx_label_score # Compute AUC # labels里面是0和1,scores是网络输出的scores _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) # 这个函数计算的是一个面积,他并不是按照预测的scores是不是大于0.5来看的,是要计算一个面积的.具体看我的笔记 self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def test(self, dataset: BaseADDataset, ae_net: BaseNet): logger = logging.getLogger() # Set device for network ae_net = ae_net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Testing autoencoder...') loss_epoch = 0.0 n_batches = 0 start_time = time.time() idx_label_score = [] ae_net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = ae_net(inputs) scores = torch.sum((outputs - inputs)**2, dim=tuple(range(1, outputs.dim()))) loss = torch.mean(scores) # Save triple of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) loss_epoch += loss.item() n_batches += 1 logger.info('Test set Loss: {:.8f}'.format(loss_epoch / n_batches)) _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * auc)) test_time = time.time() - start_time logger.info('Autoencoder testing time: %.3f' % test_time) logger.info('Finished testing autoencoder.')
def test(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get test data loader _, test_loader = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Testing logger.info('Starting testing...') start_time = time.time() idx_label_score = [] net.eval() with torch.no_grad(): for data in test_loader: inputs, labels, idx = data inputs = inputs.to(self.device) outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 else: scores = dist # Save triples of (idx, label, score) in a list idx_label_score += list( zip(idx.cpu().data.numpy().tolist(), labels.cpu().data.numpy().tolist(), scores.cpu().data.numpy().tolist())) self.test_time = time.time() - start_time logger.info('Testing time: %.3f' % self.test_time) self.test_scores = idx_label_score # Compute AUC _, labels, scores = zip(*idx_label_score) labels = np.array(labels) scores = np.array(scores) self.test_auc = roc_auc_score(labels, scores) logger.info('Test set AUC: {:.2f}%'.format(100. * self.test_auc)) logger.info('Finished testing.')
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') # 需要注意的是,这里的c是针对网络的最后一层输出的每一个cell都有一个center # 另外,这个c是不随着网络更新的,只是在最开始的时候生成一次,这就导致前面的autoencoder一定要进行了咯????? self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # 这里计算的c是32的大小,这个程序的batchsize是200 # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: # 第一个是所有的图片,第二个是图片对应的label,这里每个的label都是0,第三个是这个图片在数据集中对应的index inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) # 得到的outputs是[200,32]七种200是batch的大小 # 相当于求每个batch里面,这32个的和 # dist大小是200,相当于是这是一个32维的空间,求一个样本到圆心的距离的时候是每一维的距离平方然后求和 # 最后对dist的mean反映了公式里面,对n个样本,进行求平均值 # 这里就对应论文里面的loss了 dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: # 对应我需要的情况 loss = torch.mean(dist) loss.backward() optimizer.step() # Update hypersphere radius R on mini-batch distances if (self.objective == 'soft-boundary') and ( epoch >= self.warm_up_n_epochs): self.R.data = torch.tensor(get_radius(dist, self.nu), device=self.device) loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net
def train(self, dataset: BaseADDataset, net: BaseNet): logger = logging.getLogger() # Set device for network net = net.to(self.device) # Get train data loader train_loader, _ = dataset.loaders(batch_size=self.batch_size, num_workers=self.n_jobs_dataloader) # Set optimizer (Adam optimizer for now) optimizer = optim.Adam(net.parameters(), lr=self.lr, weight_decay=self.weight_decay, amsgrad=self.optimizer_name == 'amsgrad') # Set learning rate scheduler scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=self.lr_milestones, gamma=0.1) # Initialize hypersphere center c (if c not loaded) if self.c is None: logger.info('Initializing center c...') self.c = self.init_center_c(train_loader, net) logger.info('Center c initialized.') # Training logger.info('Starting training...') start_time = time.time() net.train() for epoch in range(self.n_epochs): scheduler.step() if epoch in self.lr_milestones: logger.info(' LR scheduler: new learning rate is %g' % float(scheduler.get_lr()[0])) loss_epoch = 0.0 n_batches = 0 epoch_start_time = time.time() for data in train_loader: inputs, _, _ = data inputs = inputs.to(self.device) # Zero the network parameter gradients optimizer.zero_grad() # Update network parameters via backpropagation: forward + backward + optimize outputs = net(inputs) dist = torch.sum((outputs - self.c)**2, dim=1) if self.objective == 'soft-boundary': scores = dist - self.R**2 loss = self.R**2 + (1 / self.nu) * torch.mean( torch.max(torch.zeros_like(scores), scores)) else: loss = torch.mean(dist) loss.backward() optimizer.step() # Update hypersphere radius R on mini-batch distances if (self.objective == 'soft-boundary') and ( epoch >= self.warm_up_n_epochs): self.R.data = torch.tensor(get_radius(dist, self.nu), device=self.device) loss_epoch += loss.item() n_batches += 1 # log epoch statistics epoch_train_time = time.time() - epoch_start_time logger.info(' Epoch {}/{}\t Time: {:.3f}\t Loss: {:.8f}'.format( epoch + 1, self.n_epochs, epoch_train_time, loss_epoch / n_batches)) self.train_time = time.time() - start_time logger.info('Training time: %.3f' % self.train_time) logger.info('Finished training.') return net