def train(self, t, train, valid, args): # self.model=deepcopy(self.initial_model) # Restart model: isolate if t == 0: which_types = ['mcl'] else: which_types = ['ac', 'mcl'] for which_type in which_types: print('Training Type: ', which_type) best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr, which_type) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() iter_bar = tqdm(train, desc='Train Iter (loss=X.XXX)') self.train_epoch(t, train, iter_bar, which_type) clock1 = time.time() train_loss, train_acc = self.eval(t, train, which_type) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / len(train), 1000 * self.sbatch * (clock2 - clock1) / len(train), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, valid, which_type) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr, which_type) print() # Restore best utils.set_model_(self.model, best_model) return
def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience # train only the column for the current task self.model.unfreeze_column(t) # the optimizer trains solely the params for the current task self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |".format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc, ), end="", ) # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print( " Valid: loss={:.3f}, acc={:5.1f}% |".format( valid_loss, 100 * valid_acc ), end="", ) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) return
def train(self, t, train_data, valid_data, device='cuda'): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr # train only the column for the current task self.model.unfreeze_column(t) # 1 define the optimizer and scheduler self.optimizer = self._get_optimizer(lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, self.epochs) # 2 define the dataloader train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.batch, shuffle=True, num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=self.batch, shuffle=False, num_workers=4, pin_memory=True) # Loop epochs for e in range(self.epochs): # Train self.train_epoch(t, train_loader, device=device) train_loss, train_acc = self.eval(t, train_loader, mode='train', device=device) # Valid valid_loss, valid_acc = self.eval(t, valid_loader, mode='train', device=device) print( '| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |' .format(e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Train_Loss/Task: {}'.format(t), { 'train_loss': train_loss, 'valid_loss': valid_loss }, global_step=e) self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t), { 'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100 }, global_step=e) # Adapt lr scheduler.step() # update the best model if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) utils.set_model_(self.model, best_model) return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) self.W = {} self.p_old = {} for n, p in self.model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') self.W[n] = p.data.clone().zero_() self.p_old[n] = p.data.clone() # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() num_batch = xtrain.size(0) self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/num_batch,1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') print() #save log for current task & old tasks at every epoch if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) self.update_omega(self.W, self.epsilon) self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) # Freeze the weights return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model,best_model) # Update old self.model_old=deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # Fisher ops if t>0: fisher_old={} for n,_ in self.model.named_parameters(): fisher_old[n]=self.fisher[n].clone() self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) if t>0: # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals for n,_ in self.model.named_parameters(): self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1) # Checked: it is better than the other option #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n]) torch.save(self.model.state_dict(),'pretrain_ewc.pth') return
def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |" .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc, ), end="", ) # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print( " Valid: loss={:.3f}, acc={:5.1f}% |".format( valid_loss, 100 * valid_acc), end="", ) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best & freeze utils.set_model_(self.model, best_model) for n, p in self.model.named_parameters(): if not n.startswith("last"): p.requires_grad = False return
def train(self, tasks, xtrain, ytrain, xvalid, yvalid): self.model = deepcopy(self.initial_model) # Restart model task_t, task_v = tasks best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs try: for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(task_t, xtrain, ytrain) clock1 = time.time() train_loss = self.eval_validation(task_t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f} |" .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, ), end="", ) # Valid valid_loss = self.eval_validation(task_v, xvalid, yvalid) print(" Valid: loss={:.3f} |".format(valid_loss), end="") # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best utils.set_model_(self.model, best_model) return
def train(self, t, xtrain, ytrain, xvalid, yvalid, args, ac_pre_mask, pre_mask_back, pre_mask_pre, pre_mask, from_t): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) self.mask_pre = pre_mask_pre self.mask_back = pre_mask_back self.ac_pre_mask = ac_pre_mask self.pre_mask = pre_mask self.from_t = from_t # Loop epochs try: for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain, args=args) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain, args=args) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args=args) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model, best_model) return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model,best_model) # Model update if t==0: self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) else: fisher_new=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) for (n,p),(_,p_old) in zip(self.model.named_parameters(),self.model_old.named_parameters()): p=fisher_new[n]*p+self.fisher[n]*p_old self.fisher[n]+=fisher_new[n] p/=(self.fisher[n]==0).float()+self.fisher[n] # Old model save self.model_old=deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) return
def train(self,t,xtrain,ytrain,xvalid,yvalid,data): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') #save log for current task & old tasks at every epoch self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc) for task in range(t): xvalid_t=data[task]['valid']['x'].cuda() yvalid_t=data[task]['valid']['y'].cuda() valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t) self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t) # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best and save model as old utils.set_model_(self.model,best_model) self.model_old = Net([1, 28, 28], [(0, 10), (1, 10), (2, 10), (3, 10), (4, 10), (5, 10), (6, 10), (7, 10), (8, 10), (9, 10)]).cuda() self.model_old.load_state_dict(self.model.state_dict()) self.model_old.eval() utils.freeze_model(self.model_old) self.logger.save() return
def train(self, t, xtrain, ytrain, xvalid, yvalid, args): #N-CL best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print('before: ', self.model.fc1.weight) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain, args) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) print('after: ', self.model.fc1.weight) return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best, save model as old utils.set_model_(self.model,best_model) if t>0: model_state = utils.get_model(self.model) model_old_state = utils.get_model(self.model_old) for name, param in self.model.named_parameters(): #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name] model_state[name]=(model_state[name]+model_old_state[name]*t)/(t+1) utils.set_model_(self.model,model_state) self.model_old=deepcopy(self.model) utils.freeze_model(self.model_old) self.model_old.eval() return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data): best_loss = np.inf best_acc = 0 best_model = utils.get_model(self.model) lr = self.lr # patience = self.lr_patience self.optimizer = self._get_optimizer(t, lr) nepochs = self.nepochs test_max = 0 # Loop epochs try: for e in range(nepochs): # Train self.train_epoch(xtrain, ytrain, cur_epoch=e, nepoch=nepochs) train_loss, train_acc = self.eval(xtrain, ytrain) print( '| [{:d}/5], Epoch {:d}/{:d}, | Train: loss={:.3f}, acc={:2.2f}% |' .format(t + 1, e + 1, nepochs, train_loss, 100 * train_acc), end='') # # Valid valid_loss, valid_acc = self.eval(xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.2f}% |'.format( valid_loss, 100 * valid_acc), end='') print() xtest = data[5]['test']['x'].cuda() ytest = data[5]['test']['y'].cuda() _, test_acc = self.eval(xtest, ytest) # # Adapt lr # if valid_loss < best_loss: # best_loss = min(best_loss,valid_loss) # if valid_acc > best_acc: # best_acc = max(best_acc, valid_acc) if test_acc > self.test_max: self.test_max = max(self.test_max, test_acc) best_model = utils.get_model(self.model) print( '>>> Test on All Task:->>> Max_acc : {:2.2f}% Curr_acc : {:2.2f}%<<<' .format(100 * self.test_max, 100 * test_acc)) except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model, best_model) return
def search_network(self, t, train_data, valid_data, batch_size, epochs, device='cuda'): # 0 prepare print("Search Stage") best_loss = np.inf best_model = utils.get_model(self.model) lr_a = self.o_lr_a lr = self.o_lr # 1 define optimizers self.optimizer_oa = self._get_optimizer_oa(lr_a) self.optimizer_o = self._get_optimizer_o(lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer_o, epochs) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer_o, patience=self.lr_patience, # factor=self.lr_factor) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(0.5 * num_train)) train_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), num_workers=4, pin_memory=True) # 3 training the model for e in range(epochs): # 3.1 search self.search_epoch(t, train_loader, valid_loader, device) # 3.2 compute training loss train_loss, train_acc = self.eval(t, train_loader, mode='search', device=device) # 3.3 compute valid loss valid_loss, valid_acc = self.eval(t, valid_loader, mode='search', device=device) # 3.4 logging print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'.format( e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Search_Loss/Task: {}'.format(t), {'train_loss': train_loss, 'valid_loss': valid_loss}, global_step=e) self.writer.add_scalars('Search_Accuracy/Task: {}'.format(t), {'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100}, global_step=e) # 3.5 Adapt lr scheduler.step() if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) # 4 Restore best model utils.set_model_(self.model, best_model)
def post_train(self, t, xtrain, ytrain, xvalid, yvalid): # Restore best, save model as old if t > 0: model_state = utils.get_model(self.model) model_old_state = utils.get_model(self.model_old) for name, param in self.model.named_parameters(): #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name] model_state[name] = (model_state[name] + model_old_state[name] * t) / (t + 1) utils.set_model_(self.model, model_state) self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) self.model_old.eval() return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best and save model as old utils.set_model_(self.model,best_model) self.model_old=deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) torch.save(self.model.state_dict(),'pretrain_lwf.pth') return
def train_network(self, t, train_data, valid_data, batch_size, epochs, device='cuda'): # 0 prepare print("Training Begin") best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr # 1 define the optimizer and scheduler self.optimizer = self._get_optimizer(lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer, epochs) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, patience=self.lr_patience, # factor=self.lr_factor) # 2 define the dataloader train_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, shuffle=True, num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader( valid_data, batch_size=batch_size, shuffle=False, num_workers=4, pin_memory=True) # 3 training the model for e in range(epochs): # 3.1 train self.train_epoch(t, train_loader, device=device) # 3.2 compute training loss train_loss, train_acc = self.eval(t, train_loader, mode='train', device=device) # 3.3 compute valid loss valid_loss, valid_acc = self.eval(t, valid_loader, mode='train', device=device) # 3.4 logging print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'.format( e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Train_Loss/Task: {}'.format(t), {'train_loss': train_loss, 'valid_loss': valid_loss}, global_step=e) self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t), {'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100}, global_step=e) # 3.5 Adapt learning rate scheduler.step() # 3.6 update the best model if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) # 4 Restore best model utils.set_model_(self.model, best_model)
def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs try: for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model, best_model) # Activations mask task = torch.autograd.Variable(torch.LongTensor([t]).cuda(), volatile=False) mask = self.model.mask(task, s=self.smax) for i in range(len(mask)): mask[i] = torch.autograd.Variable(mask[i].data.clone(), requires_grad=False) if t == 0: self.mask_pre = mask else: for i in range(len(self.mask_pre)): self.mask_pre[i] = torch.max(self.mask_pre[i], mask[i]) # Weights mask self.mask_back = {} for n, _ in self.model.named_parameters(): vals = self.model.get_view_for(n, self.mask_pre) if vals is not None: self.mask_back[n] = 1 - vals return
def train(self, t, train_data_loader, test_data_loader, val_data_loader): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) task = torch.autograd.Variable( torch.LongTensor([t]).cuda(), volatile=False ) if torch.cuda.is_available() else torch.autograd.Variable( torch.LongTensor([t]), volatile=False) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epochewc(t, train_data_loader) clock1 = time.time() train_loss, train_acc, train_recall, train_f1 = self.eval_withregx( t, test_data_loader) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / train_data_loader.__len__(), 1000 * self.sbatch * (clock2 - clock1) / train_data_loader.__len__(), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc, valid_recall, valid_f1 = self.eval_withregx( t, val_data_loader) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) # Update old self.model_old = deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # Fisher ops if t > 0: fisher_old = {} startDateTimeOldLast = datetime.now() for n, _ in self.model.named_parameters(): fisher_old[n] = self.fisher[n].clone() print('DataTime OldLast', datetime.now() - startDateTimeOldLast) print("Analysis compute memory waste in Old Task") # self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) self.fisher = utils.fisher_matrix_diag_nlp(t, train_data_loader, self.model, self.criterion, opt=self.opt) if t > 0: # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals startDateTime = datetime.now() for n, _ in self.model.named_parameters(): self.fisher[n] = (self.fisher[n] + fisher_old[n] * t) / ( t + 1) # Checked: it is better than the other option #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n]) print("Analysis compute memory waste") print('DataTime OldLast', datetime.now() - startDateTime) return
def train(self, t, train_data_loader, test_data_loader, val_data_loader): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) task = torch.autograd.Variable( torch.LongTensor([t]).cuda(), volatile=False ) if torch.cuda.is_available() else torch.autograd.Variable( torch.LongTensor([t]), volatile=False) # Loop epochs print("Size of account ===> " + str(self.nepochs)) for e in range(self.nepochs): # Train clock0 = time.time() self.train_epochlwf(t, train_data_loader) clock1 = time.time() train_loss, train_acc, train_recall, train_f1 = self.evallwf( t, test_data_loader) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / train_data_loader.__len__(), 1000 * self.sbatch * (clock2 - clock1) / train_data_loader.__len__(), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc, valid_recall, valid_f1 = self.evallwf( t, val_data_loader) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) # Update old self.model_old = deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr lr_rho = self.lr_rho patience = self.lr_patience self.optimizer = self._get_optimizer(lr, lr_rho) # Loop epochs for e in range(self.nepochs): self.epoch = self.epoch + 1 # Train clock0 = time.time() num_batch = xtrain.size(0) self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format(e + 1, 1000 * self.sbatch * (clock1 - clock0) / num_batch, 1000 * self.sbatch * (clock2 - clock1) / num_batch, train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # save log for current task & old tasks at every epoch self.logger.add(epoch=(t * self.nepochs) + e, task_num=t + 1, valid_loss=valid_loss, valid_acc=valid_acc) for task in range(t): xvalid_t = data[task]['valid']['x'].cuda() yvalid_t = data[task]['valid']['y'].cuda() valid_loss_t, valid_acc_t = self.eval(task, xvalid_t, yvalid_t) self.logger.add(epoch=(t * self.nepochs) + e, task_num=task + 1, valid_loss=valid_loss_t, valid_acc=valid_acc_t) if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor lr_rho /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() patience = self.lr_patience self.optimizer = self._get_optimizer(lr, lr_rho) print() utils.freeze_model(self.model_old) # Freeze the weights # Restore best utils.set_model_(self.model, best_model) self.model_old = deepcopy(self.model) self.saved = 1 self.logger.save() return
def train(self, t, xtrain, ytrain, xvalid, yvalid, phase, args): #N-CL best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr, phase) if phase == 'mcl': print('before: ', self.model.mcl.fc1.weight) task = torch.autograd.Variable(torch.LongTensor([t]).cuda(), volatile=False) print( 'before: ', self.model.mask(task, phase=phase, smax=self.smax, args=args)[0]) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain, phase=phase, args=args) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain, phase=phase, args=args) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid, phase=phase, args=args) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr, phase) print() # Restore best utils.set_model_(self.model, best_model) if phase == 'mcl': print('after: ', self.model.mcl.fc1.weight) task = torch.autograd.Variable(torch.LongTensor([t]).cuda(), volatile=False) print( 'after: ', self.model.mask(task, phase=phase, smax=self.smax, args=args)[0]) return
def train(self, tasks, xtrain, ytrain, xvalid, yvalid, args): self.model = deepcopy(self.initial_model) # Restart model task_t, task_v = tasks best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) miles = [] for i in range(3): miles.append(int(self.nepochs * 0.9**(i + 1))) print(sorted(miles)) train_scheduler = torch.optim.lr_scheduler.MultiStepLR( self.optimizer, milestones=sorted(miles), gamma=0.2) #learning rate # Loop epochs try: for e in range(self.nepochs): #print("newnewnewnewnewnew") # Train clock0 = time.time() self.train_epoch(task_t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval_validation( task_t, xtrain, ytrain) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}%|' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid #print(xtrain.size(0)) valid_loss, valid_acc = self.eval_validation( task_v, xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}%|'.format( valid_loss, valid_acc * 100), end='') # Adapt lr train_scheduler.step(e) print(' lr={:.1e}'.format(lr), end='') if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) # patience=self.lr_patience # print(' *',end='') # print(' lr={:.1e}'.format(lr),end='') # else: # patience-=1 # if patience<=0: # lr/=self.lr_factor # print(' lr={:.1e}'.format(lr),end='') # if lr<self.lr_min: # print() # break # patience=self.lr_patience # self.optimizer=self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best utils.set_model_(self.model, best_model) torch.save(self.model.state_dict(), 'pretrain_cifar100_joint_lr0.1_without-clip.pth') return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') #save log for current task & old tasks at every epoch self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc) for task in range(t): xvalid_t=data[task]['valid']['x'].cuda() yvalid_t=data[task]['valid']['y'].cuda() valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t) self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) self.logger.save() # Update old self.model_old = Net(input_size, taskcla).cuda() self.model_old.load_state_dict(self.model.state_dict()) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # Fisher ops if t>0: fisher_old={} for n,_ in self.model.named_parameters(): fisher_old[n]=self.fisher[n].clone() self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) if t>0: # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals for n,_ in self.model.named_parameters(): self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1) # Checked: it is better than the other option #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n]) return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() num_batch = xtrain.size(0) self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format(e + 1, 1000 * self.sbatch * (clock1 - clock0) / num_batch, 1000 * self.sbatch * (clock2 - clock1) / num_batch, train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') print(' lr : {:.6f}'.format(self.optimizer.param_groups[0]['lr'])) #save log for current task & old tasks at every epoch # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) # Update old self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) # Freeze the weights self.omega_update(t, xtrain) return
def train(self, xtrain, ytrain, xvalid, yvalid, xtest, ytest): best_loss = np.inf best_acc = -np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() num_batch = xtrain.size(0) self.train_epoch(xtrain, ytrain) clock1 = time.time() # train_loss,train_acc=self.eval(xtrain,ytrain) # clock2=time.time() # print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( # e+1,1000*self.sbatch*(clock1-clock0)/num_batch, # 1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='') # Valid valid_loss, valid_acc = self.eval(xvalid, yvalid, test=False) clock2 = time.time() print( 'Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms, Valid: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / num_batch, 1000 * self.sbatch * (clock2 - clock1) / xvalid.size(0), valid_loss, 100 * valid_acc), end='') # test_loss,test_acc=self.eval(xtest,ytest) # print(' Test: acc={:5.1f}% |'.format(100*test_acc),end='') self.valid_rs.append((valid_loss, valid_acc)) # Adapt lr # if valid_acc > best_acc: # best_model = utils.get_model(self.model) # best_acc = valid_acc if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: lr = self.lr_min print() patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) # self.logger.save() return
def train(self, t, xtrain, ytrain, xvalid, yvalid): if t > 0: #reinit modules not in bestpath with random, according to the paper layers = ['conv1', 'conv2', 'conv3', 'fc1', 'fc2'] for (n, p), (m, q) in zip(self.model.named_parameters(), self.initial_model.named_parameters()): if n == m: layer, module, par = n.split(".") module = int(module) if layer in layers: if module not in self.model.bestPath[ 0:t, layers.index(layer)]: p.data = deepcopy(q.data) #init path for this task Path = np.random.randint(0, self.M - 1, size=(self.P, self.L, self.N)) guesses = list(range(self.M)) lr = [] patience = [] best_loss = [] for p in range(self.P): lr.append(self.lr) patience.append(self.lr_patience) best_loss.append(np.inf) for j in range(self.L): np.random.shuffle(guesses) Path[p, j, :] = guesses[:self.N] #do not repeat modules winner = 0 best_path_model = utils.get_model(self.model) best_loss_overall = np.inf try: for g in range(self.generations): if np.max(lr) < self.lr_min: break for p in range(self.P): if lr[p] < self.lr_min: continue # train only the modules in the current path, minus the ones in the model.bestPath self.model.unfreeze_path(t, Path[p]) # the optimizer trains solely the params for the current task self.optimizer = self._get_optimizer(lr[p]) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain, Path[p]) clock1 = time.time() train_loss, train_acc, _ = self.eval( t, xtrain, ytrain, Path[p]) clock2 = time.time() print( '| Generation {:3d} | Path {:3d} | Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( g + 1, p + 1, e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc, _ = self.eval( t, xvalid, yvalid, Path[p]) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Save the winner if valid_loss < best_loss_overall: best_loss_overall = valid_loss best_path_model = utils.get_model(self.model) winner = p print(' B', end='') # Adapt lr if valid_loss < best_loss[p]: best_loss[p] = valid_loss patience[p] = self.lr_patience print(' *', end='') else: patience[p] -= 1 if patience[p] <= 0: lr[p] /= self.lr_factor print(' lr={:.1e}'.format(lr[p]), end='') if lr[p] < self.lr_min: print() break patience[p] = self.lr_patience print() # Restore winner model utils.set_model_(self.model, best_path_model) print('| Winning path: {:3d} | Best loss: {:.3f} |'.format( winner + 1, best_loss_overall)) # Keep the winner and mutate it print('Mutating') probability = 1 / (self.N * self.L) #probability to mutate for p in range(self.P): if p != winner: best_loss[p] = np.inf lr[p] = lr[winner] patience[p] = self.lr_patience for j in range(self.L): for k in range(self.N): Path[p, j, k] = Path[winner, j, k] if np.random.rand() < probability: Path[p, j, k] = ( Path[p, j, k] + np.random.randint(-2, 3) ) % self.M # add int in [-2,2] to the path, this seems yet another hyperparam except KeyboardInterrupt: print() #save the best path into the model self.model.bestPath[t] = Path[winner] print(self.model.bestPath[t]) return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() # self.model.variance_init() # trainer net의 variance크게 init # 1. trainer_net training 하는데 regularization을 위해서 saver_net의 정보 이용 self.train_epoch(xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(xtrain, ytrain, self.sample) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(xvalid, yvalid, self.sample) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # save log for current task & old tasks at every epoch self.logger.add(epoch=(t * self.nepochs) + e, task_num=t + 1, valid_loss=valid_loss, valid_acc=valid_acc) for task in range(t): xvalid_t = data[task]['valid']['x'].cuda() yvalid_t = data[task]['valid']['y'].cuda() valid_loss_t, valid_acc_t = self.eval(xvalid_t, yvalid_t, self.sample) self.logger.add(epoch=(t * self.nepochs) + e, task_num=task + 1, valid_loss=valid_loss_t, valid_acc=valid_acc_t) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() #self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) # Freeze the weights #self.print_log(e) # for n, m in self.model.named_children(): # print(n, m.weight.sigma.min()) # Restore best utils.set_model_(self.model, best_model) self.logger.save() return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) #log losses_train = [] losses_valid = [] acc_train = [] acc_valid = [] reg_train = [] reg_valid = [] self.logs['mask'][t]={} self.logs['mask_pre'][t]={} task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False) bmask=self.model.mask(task,s=self.smax) for i in range(len(bmask)): bmask[i]=torch.autograd.Variable(bmask[i].data.clone(),requires_grad=False) self.logs['mask'][t][i]={} self.logs['mask'][t][i][-1]=deepcopy(bmask[i].data.cpu().numpy().astype(np.float32)) if t==0: self.logs['mask_pre'][t][i]=deepcopy((0*bmask[i]).data.cpu().numpy().astype(np.float32)) else: self.logs['mask_pre'][t][i]=deepcopy(self.mask_pre[i].data.cpu().numpy().astype(np.float32)) if not self.single_task or (self.single_task and t==0): # Loop epochs try: for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc,train_reg=self.eval_withreg(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1, 1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc,valid_reg=self.eval_withreg(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') #log losses_train.append(train_loss) acc_train.append(train_acc) reg_train.append(train_reg) losses_valid.append(valid_loss) acc_valid.append(valid_acc) reg_valid.append(valid_reg) # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Log activations mask task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False) bmask=self.model.mask(task,s=self.smax) for i in range(len(bmask)): self.logs['mask'][t][i][e] = deepcopy(bmask[i].data.cpu().numpy().astype(np.float32)) # Log losses if self.logs is not None: self.logs['train_loss'][t] = np.array(losses_train) self.logs['train_acc'][t] = np.array(acc_train) self.logs['train_reg'][t] = np.array(reg_train) self.logs['valid_loss'][t] = np.array(losses_valid) self.logs['valid_acc'][t] = np.array(acc_valid) self.logs['valid_reg'][t] = np.array(reg_valid) except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model,best_model) # Activations mask task=torch.autograd.Variable(torch.LongTensor([t]).cuda(),volatile=False) mask=self.model.mask(task,s=self.smax) for i in range(len(mask)): mask[i]=torch.autograd.Variable(mask[i].data.clone(),requires_grad=False) if t==0: self.mask_pre=mask else: for i in range(len(self.mask_pre)): self.mask_pre[i]=torch.max(self.mask_pre[i],mask[i]) # Weights mask self.mask_back={} for n,_ in self.model.named_parameters(): vals=self.model.get_view_for(n,self.mask_pre) if vals is not None: self.mask_back[n]=1-vals return
def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Update old self.model_old = deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # reset importance omega for n, p in self.model.named_parameters(): if p.requires_grad: self.omega[n] = p.data.clone().zero_() self.DELTA[n] = p.data.clone().zero_() self.p_old[n] = p.data.clone() # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain, e) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') self.logger.log_scalar(str(t) + "_train acc", train_acc, e) self.logger.log_scalar(str(t) + "_valid acc", valid_acc, e) self.logger.log_scalar(str(t) + "_train loss", train_loss, e) self.logger.log_scalar(str(t) + "_valid loss", valid_loss, e) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) # Update task regularization OMEGA for (n, param), (_, param_old) in zip(self.model.named_parameters(), self.model_old.named_parameters()): if p.requires_grad: #change = param.detach().clone() - param_old #o = torch.nn.functional.relu(self.omega[n])/(change.pow(2) + self.xi) o = torch.nn.functional.relu( self.omega[n]) / (self.DELTA[n].pow(2) + self.xi) self.OMEGA[n] = self.OMEGA[n] * self.decay + o * ( 1 - self.decay) #self.OMEGA[n] + o # return