def __init__(self, args): super().__init__() self._network = IncrementalNet(args['convnet_type'], False) self._device = args['device'] self.convnet_type = args['convnet_type'] self.expert = None
def _train_expert(self, train_loader, test_loader): self.expert = IncrementalNet(self.convnet_type, False) self.expert.update_fc(self.task_size) if len(self._multiple_gpus) > 1: self.expert = nn.DataParallel(self.expert, self._multiple_gpus) self.expert.to(self._device) optimizer = optim.SGD(self.expert.parameters(), lr=lrate_expert, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones_expert, gamma=lrate_decay_expert) prog_bar = tqdm(range(epochs_expert)) for _, epoch in enumerate(prog_bar): self.expert.train() losses = 0. for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to( self._device), (targets - self._known_classes).to( self._device) logits = self.expert(inputs)['logits'] loss = F.cross_entropy(logits, targets) losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() train_acc = self._compute_accuracy(self.expert, train_loader, self._known_classes) test_acc = self._compute_accuracy(self.expert, test_loader, self._known_classes) info = 'Expert CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.2f}, Test accy {:.2f}'.format( epoch + 1, epochs_expert, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info) if len(self._multiple_gpus) > 1: self.expert = self.expert.module
def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], False) self._seen_classes = []
class End2End(BaseLearner): def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], False) self._seen_classes = [] def after_task(self): self._old_network = self._network.copy().freeze() self._known_classes = self._total_classes logging.info('Exemplar size: {}'.format(self.exemplar_size)) def incremental_train(self, data_manager): self._cur_task += 1 self.task_size = data_manager.get_task_size(self._cur_task) self._total_classes = self._known_classes + self.task_size self._network.update_fc(self._total_classes) self._seen_classes.append(self.task_size) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Procedure if len(self._multiple_gpus) > 1: self._network = nn.DataParallel(self._network, self._multiple_gpus) self._train(data_manager, self.train_loader, self.test_loader) self.build_rehearsal_memory(data_manager, self.samples_per_class) if len(self._multiple_gpus) > 1: self._network = self._network.module def _train(self, data_manager, train_loader, test_loader): self._network.to(self._device) if self._old_network is not None: self._old_network.to(self._device) if self._cur_task == 0: optimizer = optim.SGD(self._network.parameters(), lr=lrate_init, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=milestones_init, gamma=lrate_decay) self._is_finetuning = False self._run(self.train_loader, self.test_loader, epochs_init, optimizer, scheduler, 'Training') return # New + exemplars optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay) self._is_finetuning = False self._run(self.train_loader, self.test_loader, epochs, optimizer, scheduler, 'Training') # Finetune if self._fixed_memory: finetune_samples_per_class = self._memory_per_class self._construct_exemplar_unified(data_manager, finetune_samples_per_class) else: finetune_samples_per_class = self._memory_size // self._known_classes self._reduce_exemplar(data_manager, finetune_samples_per_class) self._construct_exemplar(data_manager, finetune_samples_per_class) if len(self._multiple_gpus) > 1: self._old_network = self._network.module.copy().freeze() else: self._old_network = self._network.copy().freeze() finetune_train_dataset = data_manager.get_dataset( [], source='train', mode='train', appendent=self._get_memory()) finetune_train_loader = DataLoader(finetune_train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) # Update all weights or only the weights of FC layer? # According to the experiment results, fine-tuning all weights is slightly better. optimizer = optim.SGD(self._network.parameters(), lr=lrate_finetune, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=milestones_finetune, gamma=lrate_decay) self._is_finetuning = True self._run(finetune_train_loader, self.test_loader, epochs_finetune, optimizer, scheduler, 'Finetuning') # Remove the temporary exemplars of new classes if self._fixed_memory: self._data_memory = self._data_memory[:-self._memory_per_class * self.task_size] self._targets_memory = self._targets_memory[:-self. _memory_per_class * self.task_size] # Check assert len( np.setdiff1d(self._targets_memory, np.arange( 0, self._known_classes))) == 0, 'Exemplar error!' def _run(self, train_loader, test_loader, epochs_, optimizer, scheduler, process): prog_bar = tqdm(range(epochs_)) for _, epoch in enumerate(prog_bar, start=1): self._network.train() losses = 0. correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) logits = self._network(inputs)['logits'] # CELoss clf_loss = F.cross_entropy(logits, targets) if self._cur_task == 0: distill_loss = torch.zeros(1, device=self._device) else: finetuning_task = ( self._cur_task + 1) if self._is_finetuning else self._cur_task distill_loss = 0. old_logits = self._old_network(inputs)['logits'] for i in range(1, finetuning_task + 1): lo = sum(self._seen_classes[:i - 1]) hi = sum(self._seen_classes[:i]) task_prob_new = F.softmax(logits[:, lo:hi], dim=1) task_prob_old = F.softmax(old_logits[:, lo:hi], dim=1) task_prob_new = task_prob_new**(1 / T) task_prob_old = task_prob_old**(1 / T) task_prob_new = task_prob_new / task_prob_new.sum( 1).view(-1, 1) task_prob_old = task_prob_old / task_prob_old.sum( 1).view(-1, 1) distill_loss += F.binary_cross_entropy( task_prob_new, task_prob_old) distill_loss *= 1 / finetuning_task loss = clf_loss + distill_loss losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) scheduler.step() # train_acc = self._compute_accuracy(self._network, train_loader) train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info1 = '{} => '.format(process) info2 = 'Task {}, Epoch {}/{}, Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format( self._cur_task, epoch + 1, epochs_, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info1 + info2) logging.info(info1 + info2)
class iCaRL(BaseLearner): def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], False) def after_task(self): self._old_network = self._network.copy().freeze() self._known_classes = self._total_classes logging.info('Exemplar size: {}'.format(self.exemplar_size)) def incremental_train(self, data_manager): self._cur_task += 1 self._total_classes = self._known_classes + data_manager.get_task_size( self._cur_task) self._network.update_fc(self._total_classes) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Procedure if len(self._multiple_gpus) > 1: self._network = nn.DataParallel(self._network, self._multiple_gpus) self._train(self.train_loader, self.test_loader) self.build_rehearsal_memory(data_manager, self.samples_per_class) if len(self._multiple_gpus) > 1: self._network = self._network.module def _train(self, train_loader, test_loader): self._network.to(self._device) if self._old_network is not None: self._old_network.to(self._device) optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=weight_decay) # 1e-5 scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay) self._update_representation(train_loader, test_loader, optimizer, scheduler) def _update_representation(self, train_loader, test_loader, optimizer, scheduler): prog_bar = tqdm(range(epochs)) for _, epoch in enumerate(prog_bar): self._network.train() losses = 0. correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) logits = self._network(inputs)['logits'] onehots = target2onehot(targets, self._total_classes) if self._old_network is None: loss = F.binary_cross_entropy_with_logits(logits, onehots) else: old_onehots = torch.sigmoid( self._old_network(inputs)['logits'].detach()) new_onehots = onehots.clone() new_onehots[:, :self._known_classes] = old_onehots loss = F.binary_cross_entropy_with_logits( logits, new_onehots) optimizer.zero_grad() loss.backward() optimizer.step() losses += loss.item() # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) scheduler.step() # train_acc = self._compute_accuracy(self._network, train_loader) train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format( self._cur_task, epoch + 1, epochs, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info)
class End2End(BaseLearner): def __init__(self, args): super().__init__() self._network = IncrementalNet(args['convnet_type'], False) self._device = args['device'] self._seen_classes = [] def after_task(self): self._old_network = self._network.copy().freeze() self._known_classes = self._total_classes def incremental_train(self, data_manager): self._cur_task += 1 task_size = data_manager.get_task_size(self._cur_task) self._total_classes = self._known_classes + task_size self._network.update_fc(self._total_classes) self._seen_classes.append(task_size) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) # Procedure self._train(data_manager, self.train_loader, self.test_loader) self._reduce_exemplar(data_manager, memory_size // self._total_classes) self._construct_exemplar(data_manager, memory_size // self._total_classes) def _train(self, data_manager, train_loader, test_loader): self._network.to(self._device) if self._old_network is not None: self._old_network.to(self._device) if self._cur_task == 0: optimizer = optim.SGD(self._network.parameters(), lr=lrate_init, momentum=0.9, weight_decay=1e-3) scheduler = optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=milestones_init, gamma=lrate_decay) self._is_finetuning = False self._run(self.train_loader, self.test_loader, epochs_init, optimizer, scheduler, 'Training') return # New + exemplars optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=1e-3) scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay) self._is_finetuning = False self._run(self.train_loader, self.test_loader, epochs, optimizer, scheduler, 'Training') # Finetune samples_per_class = memory_size // self._known_classes self._reduce_exemplar(data_manager, samples_per_class) self._construct_exemplar(data_manager, samples_per_class) self._old_network = self._network.copy().freeze() finetune_train_dataset = data_manager.get_dataset( [], source='train', mode='train', appendent=self._get_memory()) finetune_train_loader = DataLoader(finetune_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) optimizer = optim.SGD(self._network.parameters(), lr=lrate_finetune, momentum=0.9, weight_decay=1e-3) scheduler = optim.lr_scheduler.MultiStepLR( optimizer=optimizer, milestones=milestones_finetune, gamma=lrate_decay) self._is_finetuning = True self._run(finetune_train_loader, self.test_loader, epochs_finetune, optimizer, scheduler, 'Finetuning') def _run(self, train_loader, test_loader, epochs_, optimizer, scheduler, process): prog_bar = tqdm(range(epochs_)) for _, epoch in enumerate(prog_bar, start=1): self._network.train() losses = 0. correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) logits = self._network(inputs) # CELoss clf_loss = F.cross_entropy(logits, targets) if self._cur_task == 0: distill_loss = torch.zeros(1, device=self._device) else: finetuning_task = ( self._cur_task + 1) if self._is_finetuning else self._cur_task distill_loss = 0. old_logits = self._old_network(inputs) for i in range(1, finetuning_task + 1): lo = sum(self._seen_classes[:i - 1]) hi = sum(self._seen_classes[:i]) distill_loss += F.binary_cross_entropy( F.softmax(logits[:, lo:hi] / T, dim=1), F.softmax(old_logits[:, lo:hi] / T, dim=1)) loss = clf_loss + distill_loss losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) scheduler.step() # train_acc = self._compute_accuracy(self._network, train_loader) train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info1 = '{} => '.format(process) info2 = 'Task {}, Epoch {}/{}, Loss {:.3f}, Train_accy {:.2f}, Test_accy {:.2f}'.format( self._cur_task, epoch + 1, epochs_, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info1 + info2) logging.info(info1 + info2)
def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], pretrained=False, gradcam=True)
class LwM(BaseLearner): def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], pretrained=False, gradcam=True) def after_task(self): self._network.zero_grad() self._network.unset_gradcam_hook() self._old_network = self._network.copy().eval() self._network.set_gradcam_hook() self._old_network.set_gradcam_hook() self._known_classes = self._total_classes logging.info('Exemplar size: {}'.format(self.exemplar_size)) def incremental_train(self, data_manager): self._cur_task += 1 self._total_classes = self._known_classes + data_manager.get_task_size( self._cur_task) self._network.update_fc(self._total_classes) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Procedure if len(self._multiple_gpus) > 1: self._network = nn.DataParallel(self._network, self._multiple_gpus) self._train(self.train_loader, self.test_loader) if len(self._multiple_gpus) > 1: self._network = self._network.module def _train(self, train_loader, test_loader): self._network.to(self._device) if self._old_network is not None: self._old_network.to(self._device) optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=weight_decay) # optimizer = optim.Adam(self._network.parameters(), lr=lrate, weight_decay=1e-5) scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay) self._run(train_loader, test_loader, optimizer, scheduler) def _run(self, train_loader, test_loader, optimizer, scheduler): for epoch in range(1, epochs + 1): self._network.train() clf_losses = 0. # cross entropy distill_losses = 0. # distillation attention_losses = 0. # attention distillation correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) outputs = self._network(inputs) logits = outputs['logits'] optimizer.zero_grad() # Same effect as nn.Module.zero_grad() if self._old_network is None: clf_loss = F.cross_entropy(logits, targets) clf_losses += clf_loss.item() loss = clf_loss else: self._old_network.zero_grad() old_outputs = self._old_network(inputs) old_logits = old_outputs['logits'] # Classification loss # if no old samples saved, only calculate loss for new logits clf_loss = F.cross_entropy(logits[:, self._known_classes:], targets - self._known_classes) clf_losses += clf_loss.item() # Distillation loss # if no old samples saved, only calculate distillation loss for old logits ''' distill_loss = F.binary_cross_entropy_with_logits( logits[:, :self._known_classes], torch.sigmoid(old_logits.detach()) ) * distill_ratio ''' distill_loss = _KD_loss(logits[:, :self._known_classes], old_logits.detach(), T=2) * distill_ratio distill_losses += distill_loss.item() # Attention distillation loss top_base_indices = logits[:, :self._known_classes].argmax( dim=1) onehot_top_base = target2onehot( top_base_indices, self._known_classes).to(self._device) logits[:, :self._known_classes].backward( gradient=onehot_top_base, retain_graph=True) old_logits.backward(gradient=onehot_top_base) attention_loss = gradcam_distillation( outputs['gradcam_gradients'][0], old_outputs['gradcam_gradients'][0].detach(), outputs['gradcam_activations'][0], old_outputs['gradcam_activations'] [0].detach()) * attention_ratio attention_losses += attention_loss.item() # Integration loss = clf_loss + distill_loss + attention_loss self._old_network.zero_grad() self._network.zero_grad() optimizer.zero_grad() # Same effect as nn.Module.zero_grad() loss.backward() optimizer.step() # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) scheduler.step() # train_acc = self._compute_accuracy(self._network, train_loader) train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info1 = 'Task {}, Epoch {}/{} => clf_loss {:.2f}, '.format( self._cur_task, epoch, epochs, clf_losses / (i + 1)) info2 = 'distill_loss {:.2f}, attention_loss {:.2f}, Train_accy {:.2f}, Test_accy {:.2f}'.format( distill_losses / (i + 1), attention_losses / (i + 1), train_acc, test_acc) logging.info(info1 + info2)
class iCaRL(BaseLearner): def __init__(self, args): super().__init__() self._network = IncrementalNet(args['convnet_type'], False) self._device = args['device'] def after_task(self): self._old_network = self._network.copy().freeze() self._known_classes = self._total_classes def eval_task(self): y_pred, y_true = self._eval_ncm(self.test_loader, self._class_means) accy = accuracy(y_pred, y_true, self._known_classes) return accy def incremental_train(self, data_manager): self._cur_task += 1 self._total_classes = self._known_classes + data_manager.get_task_size( self._cur_task) self._network.update_fc(self._total_classes) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) # Procedure self._train(self.train_loader, self.test_loader) #训练集 测试集训练 self._reduce_exemplar(data_manager, memory_size // self._total_classes) #范例集精简 self._construct_exemplar(data_manager, memory_size // self._total_classes) #为新类样本创建范例集 def _train(self, train_loader, test_loader): self._network.to(self._device) optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=1e-5) scheduler = optim.lr_scheduler.MultiStepLR(optimizer=optimizer, milestones=milestones, gamma=lrate_decay) self._update_representation(train_loader, test_loader, optimizer, scheduler) def _update_representation(self, train_loader, test_loader, optimizer, scheduler): #特征表示更新 prog_bar = tqdm(range(epochs)) for _, epoch in enumerate(prog_bar): self._network.train() losses = 0. for i, (_, inputs, targets) in enumerate(train_loader): #新类 inputs, targets = inputs.to(self._device), targets.to( self._device) logits = self._network(inputs) #新类的输入和forward onehots = target2onehot(targets, self._total_classes) #新类标签->onehot if self._old_network is None: #没有原始网络 单纯分类 loss = F.binary_cross_entropy_with_logits(logits, onehots) else: old_onehots = torch.sigmoid( self._old_network( inputs).detach()) #原来的网络对新样本的预测(用于计算蒸馏损失) new_onehots = onehots.clone() #新的onehot new_onehots[:, :self. _known_classes] = old_onehots #由于gt这个onehot向量的label位肯定在 :_known_classes后面 loss = F.binary_cross_entropy_with_logits( logits, new_onehots) #因此新网络的输出既可以与新样本的新位置的gt算CELoss #也可以与原来的网络的预测结果算CELoss 以求不要忘记原来网络的输出 optimizer.zero_grad() loss.backward() optimizer.step() losses += loss.item() scheduler.step() train_acc = self._compute_accuracy(self._network, train_loader) test_acc = self._compute_accuracy(self._network, test_loader) info = 'Task {}, Epoch {}/{} => Loss {:.3f}, Train_accy {:.3f}, Test_accy {:.3f}'.format( self._cur_task, epoch + 1, epochs, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info)
class DR(BaseLearner): def __init__(self, args): super().__init__(args) self._network = IncrementalNet(args['convnet_type'], False) self.convnet_type = args['convnet_type'] self.expert = None def after_task(self): self._old_network = self._network.copy().freeze() self._known_classes = self._total_classes logging.info('Exemplar size: {}'.format(self.exemplar_size)) def incremental_train(self, data_manager): self._cur_task += 1 self.task_size = data_manager.get_task_size(self._cur_task) self._total_classes = self._known_classes + self.task_size self._network.update_fc(self._total_classes) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) expert_train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train') self.expert_train_loader = DataLoader(expert_train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers) expert_test_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='test', mode='test') self.expert_test_loader = DataLoader(expert_test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Procedure logging.info('Training the expert CNN...') self._train_expert(self.expert_train_loader, self.expert_test_loader) if self._cur_task == 0: self._network = self.expert.copy() else: self.expert = self.expert.freeze() logging.info('Training the updated CNN...') if len(self._multiple_gpus) > 1: self._network = nn.DataParallel(self._network, self._multiple_gpus) self._train(self.train_loader, self.test_loader) self.build_rehearsal_memory(data_manager, self.samples_per_class) if len(self._multiple_gpus) > 1 and self._cur_task > 0: self._network = self._network.module def _train(self, train_loader, test_loader): self._network.to(self._device) if self._old_network is not None: self._old_network.to(self._device) optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=lrate_decay) prog_bar = tqdm(range(epochs)) for _, epoch in enumerate(prog_bar): self._network.train() losses = 0. correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to(self._device), targets.to( self._device) logits = self._network(inputs)['logits'] exp_logits = self.expert(inputs)['logits'] old_logits = self._old_network(inputs)['logits'] # Distillation dist_term = _KD_loss(logits[:, self._known_classes:], exp_logits, T1) # Retrospection retr_term = _KD_loss(logits[:, :self._known_classes], old_logits, T2) loss = dist_term + retr_term losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() # acc _, preds = torch.max(logits, dim=1) correct += preds.eq(targets.expand_as(preds)).cpu().sum() total += len(targets) scheduler.step() # train_acc = self._compute_accuracy(self._network, train_loader) train_acc = np.around(tensor2numpy(correct) * 100 / total, decimals=2) test_acc = self._compute_accuracy(self._network, test_loader) info = 'Updated CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.2f}, Test accy {:.2f}'.format( epoch + 1, epochs, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info) def _train_expert(self, train_loader, test_loader): self.expert = IncrementalNet(self.convnet_type, False) self.expert.update_fc(self.task_size) if len(self._multiple_gpus) > 1: self.expert = nn.DataParallel(self.expert, self._multiple_gpus) self.expert.to(self._device) optimizer = optim.SGD(self.expert.parameters(), lr=lrate_expert, momentum=0.9, weight_decay=weight_decay) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones_expert, gamma=lrate_decay_expert) prog_bar = tqdm(range(epochs_expert)) for _, epoch in enumerate(prog_bar): self.expert.train() losses = 0. for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to( self._device), (targets - self._known_classes).to( self._device) logits = self.expert(inputs)['logits'] loss = F.cross_entropy(logits, targets) losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() train_acc = self._compute_accuracy(self.expert, train_loader, self._known_classes) test_acc = self._compute_accuracy(self.expert, test_loader, self._known_classes) info = 'Expert CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.2f}, Test accy {:.2f}'.format( epoch + 1, epochs_expert, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info) if len(self._multiple_gpus) > 1: self.expert = self.expert.module def _compute_accuracy(self, model, loader, offset=0): model.eval() correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(loader): inputs = inputs.to(self._device) targets -= offset with torch.no_grad(): outputs = model(inputs)['logits'] predicts = torch.max(outputs, dim=1)[1] correct += (predicts.cpu() == targets).sum() total += len(targets) return np.around(tensor2numpy(correct) * 100 / total, decimals=2)
class DR(BaseLearner): def __init__(self, args): super().__init__() self._network = IncrementalNet(args['convnet_type'], False) #网络 self._device = args['device'] self.convnet_type = args['convnet_type'] self.expert = None #专家网络 def after_task(self): self._old_network = self._network.copy().freeze() #上一次迭代的网络 self._known_classes = self._total_classes def eval_task(self): y_pred, y_true = self._eval_ncm(self.test_loader, self._class_means) accy = accuracy(y_pred, y_true, self._known_classes) return accy def incremental_train(self, data_manager): self._cur_task += 1 self.task_size = data_manager.get_task_size(self._cur_task) self._total_classes = self._known_classes + self.task_size self._network.update_fc(self._total_classes) logging.info('Learning on {}-{}'.format(self._known_classes, self._total_classes)) # Loader train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train', appendent=self._get_memory()) self.train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) test_dataset = data_manager.get_dataset(np.arange( 0, self._total_classes), source='test', mode='test') self.test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) expert_train_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='train', mode='train') self.expert_train_loader = DataLoader(expert_train_dataset, batch_size=batch_size, shuffle=True, num_workers=4) expert_test_dataset = data_manager.get_dataset(np.arange( self._known_classes, self._total_classes), source='test', mode='test') self.expert_test_loader = DataLoader(expert_test_dataset, batch_size=batch_size, shuffle=False, num_workers=4) # Procedure logging.info('Training the expert CNN...') self._train_expert( self.expert_train_loader, self.expert_test_loader) #专家网络训练 专家网络在每次训练时 专门用于学习新类 if self._cur_task == 0: self._network = self.expert.copy() #刚刚开始 网络=专家网络 else: self.expert = self.expert.freeze() #专家网络.freeze() logging.info('Training the updated CNN...') self._train(self.train_loader, self.test_loader) #针对专家网络和上一轮的旧网络进行增量学习 self._reduce_exemplar(data_manager, memory_size // self._total_classes) #范例集精简 self._construct_exemplar(data_manager, memory_size // self._total_classes) #新类构建范例集 def _train(self, train_loader, test_loader): self._network.to(self._device) optimizer = optim.SGD(self._network.parameters(), lr=lrate, momentum=0.9, weight_decay=1e-5) scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=lrate_decay) prog_bar = tqdm(range(epochs)) for _, epoch in enumerate(prog_bar): #epoch进度条 self._network.train() losses = 0. for i, (_, inputs, targets) in enumerate(train_loader): #训练集 inputs, targets = inputs.to(self._device), targets.to( self._device) #数据 标签 logits = self._network(inputs) #当前网络的特征向量的softmax置信度输出 exp_logits = self.expert(inputs) #专家网络的分类置信度输出 old_logits = self._old_network(inputs) #旧网络的分类置信度输出 # Distillation dist_term = _KD_loss(logits[:, self._known_classes:], exp_logits, T1) #蒸馏损失 当前网络对新类别的分类结果与专家网络贴近 # Retrospection retr_term = _KD_loss(logits[:, :self._known_classes], old_logits, T2) #记忆损失 当前网络对旧类别的分类结果与旧网络贴近 #旧网络是k1分类器 专家网络是k2分类器 当前网络是k1+k2分类器 损失分为两部分 loss = dist_term + retr_term losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() train_acc = self._compute_accuracy(self._network, train_loader) test_acc = self._compute_accuracy(self._network, test_loader) info = 'Updated CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.3f}, Test accy {:.3f}'.format( epoch + 1, epochs, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info) def _train_expert(self, train_loader, test_loader): self.expert = IncrementalNet(self.convnet_type, False) #专家网络 self.expert.update_fc(self.task_size) self.expert.to(self._device) optimizer = optim.SGD(self.expert.parameters(), lr=lrate_expert, momentum=0.9, weight_decay=1e-5) scheduler = optim.lr_scheduler.MultiStepLR( optimizer, milestones=milestones_expert, gamma=lrate_decay_expert) prog_bar = tqdm(range(epochs_expert)) for _, epoch in enumerate(prog_bar): self.expert.train() losses = 0. for i, (_, inputs, targets) in enumerate(train_loader): inputs, targets = inputs.to( self._device), (targets - self._known_classes).to( self._device) #专家网络只分k2种类,这里有k1+k2种,因此下标要减去k1 logits = self.expert(inputs) loss = F.cross_entropy(logits, targets) losses += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() scheduler.step() train_acc = self._compute_accuracy(self.expert, train_loader, self._known_classes) test_acc = self._compute_accuracy(self.expert, test_loader, self._known_classes) info = 'Expert CNN => Epoch {}/{}, Loss {:.3f}, Train accy {:.3f}, Test accy {:.3f}'.format( epoch + 1, epochs_expert, losses / len(train_loader), train_acc, test_acc) prog_bar.set_description(info) logging.info(info) def _compute_accuracy(self, model, loader, offset=0): model.eval() correct, total = 0, 0 for i, (_, inputs, targets) in enumerate(loader): inputs = inputs.to(self._device) targets -= offset with torch.no_grad(): outputs = model(inputs) predicts = torch.max(outputs, dim=1)[1] correct += (predicts.cpu() == targets).sum() total += len(targets) return np.around(tensor2numpy(correct) / total, decimals=3)