def f(self, x, return_acc=False): #x, layer number to calculate if x.size == 1: x = np.append(x, 0.32) x = x.reshape(1, 2) target = int(x[:, 0]) print("Start run ", target) start_time = default_timer() self.net = resnet50(60).cuda() device = 'cuda' if torch.cuda.is_available() else 'cpu' if device == 'cuda': self.net = torch.nn.DataParallel(self.net) cudnn.benchmark = True self.net.load_state_dict(torch.load(checkpoint), True) if self.inc_index == 1: self.net.module.fc = nn.Linear(512 * 4, 30).cuda() else: self.net.module.fc = nn.Linear(512 * 4, 10).cuda() self.net.train() cur_wc = 0 count = 0 for m in self.net.modules(): if target == count: break elif isinstance(m, nn.Conv2d): for param in m.parameters(): cur_wc += param.numel() param.requires_grad = False elif isinstance(m, nn.BatchNorm2d): for param in m.parameters(): param.requires_grad = False count += 1 BASE_DATA_ROOT = '/home/bbboming/HDD/Paper/datasets_object/ICIFAR100_60_30_10/BASE/' DATA_ROOT = '/home/bbboming/HDD/Paper/datasets_object/ICIFAR100_60_30_10/INC%d/' % self.inc_index train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD), ]) trainset = datasets.ImageFolder(os.path.join(DATA_ROOT, 'train'), train_transform) cifar100_training_loader = torch.utils.data.DataLoader( trainset, batch_size=self.batch_size, pin_memory=True, num_workers=4, shuffle=self.shuffle) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(settings.CIFAR100_TRAIN_MEAN, settings.CIFAR100_TRAIN_STD), ]) testset = datasets.ImageFolder(os.path.join(DATA_ROOT, 'test'), test_transform) cifar100_test_loader = torch.utils.data.DataLoader( testset, batch_size=self.batch_size, pin_memory=True, num_workers=4, shuffle=False) base_testset = datasets.ImageFolder( os.path.join(BASE_DATA_ROOT, 'test'), test_transform) cifar100_base_test_loader = torch.utils.data.DataLoader( base_testset, batch_size=self.batch_size, pin_memory=True, num_workers=4, shuffle=False) loss_function = nn.CrossEntropyLoss() optimizer = optim.SGD(self.net.parameters(), lr=self.lr, momentum=0.9, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min') iter_per_epoch = len(cifar100_training_loader) warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * self.warm) checkpoint_path = os.path.join(settings.CHECKPOINT_PATH, 'resnet50_inc%d' % self.inc_index, settings.TIME_NOW) #create checkpoint folder to save model if not os.path.exists(checkpoint_path): os.makedirs(checkpoint_path) checkpoint_path = os.path.join(checkpoint_path, '{net}-{target}-{type}.pth') best_acc = 0.0 best_base_acc = 0.0 best_inc_acc = 0.0 for epoch in range(1, settings.EPOCH): self.net.train() # train(epoch) for batch_index, (images, labels) in enumerate(cifar100_training_loader): images = Variable(images) labels = Variable(labels) labels = labels.cuda() images = images.cuda() optimizer.zero_grad() outputs = self.net(images) loss = loss_function(outputs, labels) loss.backward() optimizer.step() if epoch <= self.warm: warmup_scheduler.step() n_iter = (epoch - 1) * len(cifar100_training_loader) + batch_index + 1 #print('[Target {target}] [Training Epoch: {epoch}/{total_epoch}]\tLoss: {:0.4f}\tLR: {:0.6f}'.format( # loss.item(), # optimizer.param_groups[0]['lr'], # target=target, # epoch=epoch, # total_epoch=settings.EPOCH #)) #Evaluation Accuracy self.net.eval() self.basenet.eval() test_loss = 0.0 # cost function error correct = 0.0 #INC Testset for (images, labels) in cifar100_test_loader: images = Variable(images) labels = Variable(labels) images = images.cuda() labels = labels.cuda() soft_layer = nn.Softmax(dim=1).cuda() base_outputs = self.basenet(images) outputs = self.net(images) loss = loss_function(outputs, labels) test_loss += loss.item() soft_base = soft_layer(base_outputs) soft_inc = soft_layer(outputs) softmax = torch.cat([soft_base, soft_inc], dim=1) labels_all = labels + 60 _, preds = softmax.max(1) correct += preds.eq(labels_all).sum() #Base Testset correct_base = 0.0 for (images, labels) in cifar100_base_test_loader: images = Variable(images) labels = Variable(labels) images = images.cuda() labels = labels.cuda() soft_layer = nn.Softmax(dim=1).cuda() base_outputs = self.basenet(images) outputs = self.net(images) soft_base = soft_layer(base_outputs) soft_inc = soft_layer(outputs) softmax = torch.cat([soft_base, soft_inc], dim=1) labels_all = labels _, preds = softmax.max(1) correct_base += preds.eq(labels_all).sum() avg_loss = test_loss / len(cifar100_test_loader.dataset) base_acc = correct_base.float() / len( cifar100_base_test_loader.dataset) inc_acc = correct.float() / len(cifar100_test_loader.dataset) acc = (correct.float() + correct_base.float()) / ( len(cifar100_test_loader.dataset) + len(cifar100_base_test_loader.dataset)) print( 'Test set: Average loss: {:.4f}, Accuracy: {:.4f} (BaseAcc {:.4f} IncAcc {:.4f})' .format(avg_loss, acc, base_acc, inc_acc)) train_scheduler.step(avg_loss) #start to save best performance model after learning rate decay to 0.01 if epoch > 10 and best_acc < acc: torch.save( self.net.state_dict(), checkpoint_path.format(target=target, net='resnet50', type='best')) best_acc = acc best_inc_acc = inc_acc best_base_acc = base_acc # share_ratio = target / self.count best_dict[str(target)] = best_acc.detach().cpu().item() memory_efficiency = cur_wc / self.total_wc obj_acc = best_acc.detach().cpu().item() alpha = x[:, 1].item() threshold = 0.02 target_mem_eff = 0.70 #Objective Function obj_f = np.abs((self.max_acc - obj_acc) - threshold) print_str = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S") + " x= {x}, alpha= {alpha} Memory_Efficiency= {memory_efficiency}, combined_classification_acc= {best_acc}, obj_acc= {obj_acc}, OBJ_F= {obj_f}" \ .format(x=target, alpha=alpha,best_acc=best_acc, obj_acc=obj_acc, memory_efficiency=memory_efficiency, obj_f=obj_f) with open("history.log", "a") as f_hist: f_hist.write(print_str + "\n") print(print_str) if self.min_acc != 0: csv.write("%d, %d, %f, %f, %f, %f, %f\n" % (self.iteration, target, obj_acc, threshold, obj_f, self.min_acc, self.max_acc)) self.iteration += 1 end_time = default_timer() print("operation time: ", (end_time - start_time)) if return_acc: return (best_acc.detach().cpu().item()) return (obj_f)
class Trainer: def __init__(self, model: Module, train_loader: DataLoader, test_loader: DataLoader, device=DEFAULT_DEVICE, lr=DEFAULT_LR, momentum=DEFAULT_MOMENTUM, epochs=DEFAULT_EPOCHS, batch_size=DEFAULT_BATCH_SIZE, parallelism=DEFAULT_PARALLELISM, milestones=MILESTONES, gamma=0.2, warm_phases=WARM_PHASES, criterion=loss.CrossEntropyLoss()): print("initialize trainer") # parameter pre-processing self.test_loader = test_loader if torch.cuda.device_count() > 1 and parallelism: print(f"using {torch.cuda.device_count()} GPUs") self.model = nn.DataParallel(model) else: self.model = model self.model.to(device) optimizer = optim.SGD( # choose whether train or not filter(lambda p: p.requires_grad, self.model.parameters()), lr=lr, momentum=momentum, weight_decay=5e-4) train_scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=gamma) # warm phases self.warm_phases = warm_phases # warmup learning rate self.warmup_scheduler = WarmUpLR(optimizer, len(train_loader) * self.warm_phases) self.hp = HyperParameter(scheduler=train_scheduler, optimizer=optimizer, criterion=criterion, batch_size=batch_size, epochs=epochs, device=device) self.train_loader = train_loader print("initialize finished") print(f"hyper parameter: {self.hp}") def train(self, save_path, attack=False, attacker=None, params: Dict = None): self._init_attacker(attack, attacker, params) batch_number = len(self.train_loader) # get current learning rate now_lr = self.hp.optimizer.state_dict().get("param_groups")[0].get( "lr") # record best accuracy best_acc = 0 for ep in range(1, self.hp.epochs + 1): training_acc, running_loss = 0, .0 start_time = time.process_time() for index, data in enumerate(self.train_loader): inputs, labels = data[0].to(self.hp.device), data[1].to( self.hp.device) self.hp.optimizer.zero_grad() if attack: # calculate this first, for this will zero the grad adv_inputs = self.attacker.calc_perturbation( inputs, labels) # zero the grad self.hp.optimizer.zero_grad() outputs = self.model(inputs) adv_outputs = self.model(adv_inputs) _loss = self.hp.criterion(outputs, labels) + self.hp.criterion( adv_outputs, labels) else: outputs = self.model(inputs) _loss = self.hp.criterion(outputs, labels) _loss.backward() self.hp.optimizer.step() outputs: torch.Tensor training_acc += (outputs.argmax( dim=1) == labels).float().mean().item() # warm up learning rate if ep <= self.warm_phases: self.warmup_scheduler.step() # detect learning rate change new_lr = self.hp.optimizer.state_dict().get( "param_groups")[0].get("lr") if new_lr != now_lr: now_lr = new_lr print(f"learning rate changes to {now_lr:.6f}") running_loss += _loss.item() if index % batch_number == batch_number - 1: end_time = time.process_time() acc = self.test(self.model, test_loader=self.test_loader, device=self.hp.device) print( f"epoch: {ep} loss: {(running_loss / batch_number):.6f} train accuracy: {training_acc / batch_number} " f"test accuracy: {acc} time: {end_time - start_time:.2f}s" ) if best_acc < acc: best_acc = acc self._save_best_model(save_path, ep, acc) # change learning rate by step self.hp.scheduler.step(ep) torch.save(self.model.state_dict(), f"{save_path}-latest") print("finished training") print(f"best accuracy on test set: {best_acc}") @staticmethod def test(model: Module, test_loader, device, debug=False): correct = 0 with torch.no_grad(): for data in test_loader: inputs, labels = data[0].to(device), data[1].to(device) _, y_hats = model(inputs).max(1) match = (y_hats == labels) correct += len(match.nonzero()) if debug: print(f"Testing: {len(test_loader.dataset)}") print(f"correct: {correct}") print(f"accuracy: {100*correct/len(test_loader.dataset):.3f}%") return correct / len(test_loader.dataset) def _init_attacker(self, attack, attacker, params): self.attack = attack if attack: print(f"robustness training with {attacker.__name__}") self.attacker = attacker(self.model, **params) self.attacker.print_params() else: print("normal training") def _save_best_model(self, save_path, current_epochs, accuracy): """save best model with current info""" info = { "current_epochs": current_epochs, "total_epochs": self.hp.epochs, "accuracy": accuracy } if self.attack: info.update({ "attack": self.attack, "attacker": type(self.attacker).__name__, "epsilons": self.attacker.epsilon, }) with open(os.path.join(os.path.dirname(save_path), "info.json"), "w", encoding="utf8") as f: json.dump(info, f) torch.save(self.model.state_dict(), f"{save_path}-best") @staticmethod def train_tl(origin_model_path, save_path, train_loader, test_loader, device, choice="resnet50"): print(f"transform learning on model: {origin_model_path}") model = TLResNet.create_model(choice) model.load_model(origin_model_path) trainer = Trainer(model=model, train_loader=train_loader, test_loader=test_loader, device=device) trainer.train(save_path)