def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience # train only the column for the current task self.model.unfreeze_column(t) # the optimizer trains solely the params for the current task self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |".format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc, ), end="", ) # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print( " Valid: loss={:.3f}, acc={:5.1f}% |".format( valid_loss, 100 * valid_acc ), end="", ) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) return
def train(self, t, train, valid, args): # self.model=deepcopy(self.initial_model) # Restart model: isolate if t == 0: which_types = ['mcl'] else: which_types = ['ac', 'mcl'] for which_type in which_types: print('Training Type: ', which_type) best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr, which_type) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() iter_bar = tqdm(train, desc='Train Iter (loss=X.XXX)') self.train_epoch(t, train, iter_bar, which_type) clock1 = time.time() train_loss, train_acc = self.eval(t, train, which_type) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / len(train), 1000 * self.sbatch * (clock2 - clock1) / len(train), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, valid, which_type) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr, which_type) print() # Restore best utils.set_model_(self.model, best_model) return
def load_checkpoint(filepath, device): # load on model: https://pytorch.org/tutorials/beginner/saving_loading_models.html#saving-loading-model-across-devices if device == torch.device('cuda'): checkpoint = torch.load(filepath) else: # load on CPU checkpoint = torch.load(filepath, map_location='cpu') arch = checkpoint['model'] # Load model model = eval(get_model(arch)['load_command']) # Freeze parameters for param in model.parameters(): param.requires_grad = False # new classifier for model model.classifier = nn.Sequential(nn.Linear(get_model(arch)['classifier_input'], checkpoint['hidden_units']), nn.ReLU(), nn.Dropout(0.3), nn.Linear(checkpoint['hidden_units'], checkpoint['output_size']), nn.LogSoftmax(dim=1)) model.load_state_dict(checkpoint['model_state_dict']) model.class_to_idx = checkpoint['class_to_idx'] if device == torch.device('cuda'): model.to(device) model.eval() # set to eval mode for inference optimizer = get_optimizer(model, checkpoint['learnrate'], checkpoint['optimizer_state_dict']) return (model, optimizer)
def train(self, t, xtrain, ytrain, xvalid, yvalid, data, input_size, taskcla): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) self.W = {} self.p_old = {} for n, p in self.model.named_parameters(): if p.requires_grad: n = n.replace('.', '__') self.W[n] = p.data.clone().zero_() self.p_old[n] = p.data.clone() # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() num_batch = xtrain.size(0) self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/num_batch,1000*self.sbatch*(clock2-clock1)/num_batch,train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') print() #save log for current task & old tasks at every epoch if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) self.update_omega(self.W, self.epsilon) self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) # Freeze the weights return
def train(self, t, train_data, valid_data, device='cuda'): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr # train only the column for the current task self.model.unfreeze_column(t) # 1 define the optimizer and scheduler self.optimizer = self._get_optimizer(lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( self.optimizer, self.epochs) # 2 define the dataloader train_loader = torch.utils.data.DataLoader(train_data, batch_size=self.batch, shuffle=True, num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader(valid_data, batch_size=self.batch, shuffle=False, num_workers=4, pin_memory=True) # Loop epochs for e in range(self.epochs): # Train self.train_epoch(t, train_loader, device=device) train_loss, train_acc = self.eval(t, train_loader, mode='train', device=device) # Valid valid_loss, valid_acc = self.eval(t, valid_loader, mode='train', device=device) print( '| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |' .format(e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Train_Loss/Task: {}'.format(t), { 'train_loss': train_loss, 'valid_loss': valid_loss }, global_step=e) self.writer.add_scalars('Train_Accuracy/Task: {}'.format(t), { 'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100 }, global_step=e) # Adapt lr scheduler.step() # update the best model if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) utils.set_model_(self.model, best_model) return
def main(): if args.white_box_attack: # white-box attack logging.info('pgd white-box attack') checkpoint = torch.load(args.model_path) state_dict = checkpoint.get('state_dict', checkpoint) num_classes = checkpoint.get('num_classes', 10) normalize_input = checkpoint.get('normalize_input', False) model = get_model(args.model, num_classes=num_classes, normalize_input=normalize_input) if not all([k.startswith('module') for k in state_dict]): state_dict = {'module.' + k: v for k, v in state_dict.items()} if use_cuda: model = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True model.load_state_dict(state_dict) eval_adv_test_whitebox(model, device, test_loader) else: # black-box attack logging.info('pgd black-box attack') model = get_model(args.model, num_classes=10) if use_cuda: model_target = torch.nn.DataParallel(model).cuda() cudnn.benchmark = True model_target.load_state_dict(torch.load(args.target_model_path)) model = get_model(args.model, num_classes=10) if use_cuda: model_source = torch.nn.DataParallel(model).cuda() model_source.load_state_dict(torch.load(args.source_model_path)) eval_adv_test_blackbox(model_target, model_source, device, test_loader)
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model,best_model) # Update old self.model_old=deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) # Freeze the weights # Fisher ops if t>0: fisher_old={} for n,_ in self.model.named_parameters(): fisher_old[n]=self.fisher[n].clone() self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) if t>0: # Watch out! We do not want to keep t models (or fisher diagonals) in memory, therefore we have to merge fisher diagonals for n,_ in self.model.named_parameters(): self.fisher[n]=(self.fisher[n]+fisher_old[n]*t)/(t+1) # Checked: it is better than the other option #self.fisher[n]=0.5*(self.fisher[n]+fisher_old[n]) torch.save(self.model.state_dict(),'pretrain_ewc.pth') return
def train(self, t, xtrain, ytrain, xvalid, yvalid, args, ac_pre_mask, pre_mask_back, pre_mask_pre, pre_mask, from_t): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) self.mask_pre = pre_mask_pre self.mask_back = pre_mask_back self.ac_pre_mask = ac_pre_mask self.pre_mask = pre_mask self.from_t = from_t # Loop epochs try: for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain, args=args) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain, args=args) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args=args) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model, best_model) return
def train(self, tasks, xtrain, ytrain, xvalid, yvalid): self.model = deepcopy(self.initial_model) # Restart model task_t, task_v = tasks best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs try: for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(task_t, xtrain, ytrain) clock1 = time.time() train_loss = self.eval_validation(task_t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f} |" .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, ), end="", ) # Valid valid_loss = self.eval_validation(task_v, xvalid, yvalid) print(" Valid: loss={:.3f} |".format(valid_loss), end="") # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() except KeyboardInterrupt: print() # Restore best utils.set_model_(self.model, best_model) return
def train(self, t, xtrain, ytrain, xvalid, yvalid): best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain) clock2 = time.time() print( "| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |" .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc, ), end="", ) # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid) print( " Valid: loss={:.3f}, acc={:5.1f}% |".format( valid_loss, 100 * valid_acc), end="", ) # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(" *", end="") else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(" lr={:.1e}".format(lr), end="") if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best & freeze utils.set_model_(self.model, best_model) for n, p in self.model.named_parameters(): if not n.startswith("last"): p.requires_grad = False return
def save_all_models(self, task_id): print("Saving all models for task {} ...".format(task_id+1)) dis=utils.get_model(self.discriminator) torch.save({'model_state_dict': dis, }, os.path.join(self.checkpoint, 'discriminator_{}.pth.tar'.format(task_id))) model=utils.get_model(self.model) torch.save({'model_state_dict': model, }, os.path.join(self.checkpoint, 'model_{}.pth.tar'.format(task_id)))
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model,best_model) # Model update if t==0: self.fisher=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) else: fisher_new=utils.fisher_matrix_diag(t,xtrain,ytrain,self.model,self.criterion) for (n,p),(_,p_old) in zip(self.model.named_parameters(),self.model_old.named_parameters()): p=fisher_new[n]*p+self.fisher[n]*p_old self.fisher[n]+=fisher_new[n] p/=(self.fisher[n]==0).float()+self.fisher[n] # Old model save self.model_old=deepcopy(self.model) self.model_old.eval() utils.freeze_model(self.model_old) return
def train(self,t,xtrain,ytrain,xvalid,yvalid,data): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format(e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') #save log for current task & old tasks at every epoch self.logger.add(epoch=(t*self.nepochs)+e, task_num=t+1, valid_loss=valid_loss, valid_acc=valid_acc) for task in range(t): xvalid_t=data[task]['valid']['x'].cuda() yvalid_t=data[task]['valid']['y'].cuda() valid_loss_t,valid_acc_t=self.eval(task,xvalid_t,yvalid_t) self.logger.add(epoch=(t*self.nepochs)+e, task_num=task+1, valid_loss=valid_loss_t, valid_acc=valid_acc_t) # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best and save model as old utils.set_model_(self.model,best_model) self.model_old = Net([1, 28, 28], [(0, 10), (1, 10), (2, 10), (3, 10), (4, 10), (5, 10), (6, 10), (7, 10), (8, 10), (9, 10)]).cuda() self.model_old.load_state_dict(self.model.state_dict()) self.model_old.eval() utils.freeze_model(self.model_old) self.logger.save() return
def train(self, t, xtrain, ytrain, xvalid, yvalid, args): #N-CL best_loss = np.inf best_model = utils.get_model(self.model) lr = self.lr patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print('before: ', self.model.fc1.weight) # Loop epochs for e in range(self.nepochs): # Train clock0 = time.time() self.train_epoch(t, xtrain, ytrain) clock1 = time.time() train_loss, train_acc = self.eval(t, xtrain, ytrain, args) clock2 = time.time() print( '| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |' .format( e + 1, 1000 * self.sbatch * (clock1 - clock0) / xtrain.size(0), 1000 * self.sbatch * (clock2 - clock1) / xtrain.size(0), train_loss, 100 * train_acc), end='') # Valid valid_loss, valid_acc = self.eval(t, xvalid, yvalid, args) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format( valid_loss, 100 * valid_acc), end='') # Adapt lr if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) patience = self.lr_patience print(' *', end='') else: patience -= 1 if patience <= 0: lr /= self.lr_factor print(' lr={:.1e}'.format(lr), end='') if lr < self.lr_min: print() break patience = self.lr_patience self.optimizer = self._get_optimizer(lr) print() # Restore best utils.set_model_(self.model, best_model) print('after: ', self.model.fc1.weight) return
def train(self,t,xtrain,ytrain,xvalid,yvalid): best_loss=np.inf best_model=utils.get_model(self.model) lr=self.lr patience=self.lr_patience self.optimizer=self._get_optimizer(lr) # Loop epochs for e in range(self.nepochs): # Train clock0=time.time() self.train_epoch(t,xtrain,ytrain) clock1=time.time() train_loss,train_acc=self.eval(t,xtrain,ytrain) clock2=time.time() print('| Epoch {:3d}, time={:5.1f}ms/{:5.1f}ms | Train: loss={:.3f}, acc={:5.1f}% |'.format( e+1,1000*self.sbatch*(clock1-clock0)/xtrain.size(0),1000*self.sbatch*(clock2-clock1)/xtrain.size(0),train_loss,100*train_acc),end='') # Valid valid_loss,valid_acc=self.eval(t,xvalid,yvalid) print(' Valid: loss={:.3f}, acc={:5.1f}% |'.format(valid_loss,100*valid_acc),end='') # Adapt lr if valid_loss<best_loss: best_loss=valid_loss best_model=utils.get_model(self.model) patience=self.lr_patience print(' *',end='') else: patience-=1 if patience<=0: lr/=self.lr_factor print(' lr={:.1e}'.format(lr),end='') if lr<self.lr_min: print() break patience=self.lr_patience self.optimizer=self._get_optimizer(lr) print() # Restore best, save model as old utils.set_model_(self.model,best_model) if t>0: model_state = utils.get_model(self.model) model_old_state = utils.get_model(self.model_old) for name, param in self.model.named_parameters(): #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name] model_state[name]=(model_state[name]+model_old_state[name]*t)/(t+1) utils.set_model_(self.model,model_state) self.model_old=deepcopy(self.model) utils.freeze_model(self.model_old) self.model_old.eval() return
def train(self, t, xtrain, ytrain, xvalid, yvalid, data): best_loss = np.inf best_acc = 0 best_model = utils.get_model(self.model) lr = self.lr # patience = self.lr_patience self.optimizer = self._get_optimizer(t, lr) nepochs = self.nepochs test_max = 0 # Loop epochs try: for e in range(nepochs): # Train self.train_epoch(xtrain, ytrain, cur_epoch=e, nepoch=nepochs) train_loss, train_acc = self.eval(xtrain, ytrain) print( '| [{:d}/5], Epoch {:d}/{:d}, | Train: loss={:.3f}, acc={:2.2f}% |' .format(t + 1, e + 1, nepochs, train_loss, 100 * train_acc), end='') # # Valid valid_loss, valid_acc = self.eval(xvalid, yvalid) print(' Valid: loss={:.3f}, acc={:5.2f}% |'.format( valid_loss, 100 * valid_acc), end='') print() xtest = data[5]['test']['x'].cuda() ytest = data[5]['test']['y'].cuda() _, test_acc = self.eval(xtest, ytest) # # Adapt lr # if valid_loss < best_loss: # best_loss = min(best_loss,valid_loss) # if valid_acc > best_acc: # best_acc = max(best_acc, valid_acc) if test_acc > self.test_max: self.test_max = max(self.test_max, test_acc) best_model = utils.get_model(self.model) print( '>>> Test on All Task:->>> Max_acc : {:2.2f}% Curr_acc : {:2.2f}%<<<' .format(100 * self.test_max, 100 * test_acc)) except KeyboardInterrupt: print() # Restore best validation model utils.set_model_(self.model, best_model) return
def __init__(self, args): self.args = args # Set up model self.device = args.device self.model = get_model(args.model, input_channels=args.input_channels, pretrained=args.pretrained) self.backward = get_model(args.Bmodel, input_channels=args.input_channels, pretrained=args.Bpretrained) self.model = self.model.to(self.device) self.backward = self.backward.to(self.device) if args.use_multiple_gpu: self.model = torch.nn.DataParallel(self.model) self.backward = torch.nn.DataParallel(self.backward) if args.mode == 'train': self.loss_function = MonodepthLoss(n=4, SSIM_w=0.85, disp_gradient_w=0.1, lr_w=1).to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=args.learning_rate) self.Boptimizer = optim.Adam(self.backward.parameters(), lr=args.learning_rate) self.Goptimizer = optim.Adam(itertools.chain( self.model.parameters(), self.backward.parameters()), lr=args.learning_rate) self.val_n_img, self.val_loader = prepare_dataloader( args.val_data_dir, args.mode, args.augment_parameters, False, args.batch_size, (args.input_height, args.input_width), args.num_workers) else: self.model.load_state_dict(torch.load(args.model_path)) #self.backward.load_state_dict(torch.load(args.Bmodel_path)) args.augment_parameters = None args.do_augmentation = False args.batch_size = 1 # Load data self.output_directory = args.output_directory self.input_height = args.input_height self.input_width = args.input_width self.n_img, self.loader = prepare_dataloader( args.data_dir, args.mode, args.augment_parameters, args.do_augmentation, args.batch_size, (args.input_height, args.input_width), args.num_workers) if 'cuda' in self.device: torch.cuda.synchronize()
def search_network(self, t, train_data, valid_data, batch_size, epochs, device='cuda'): # 0 prepare print("Search Stage") best_loss = np.inf best_model = utils.get_model(self.model) lr_a = self.o_lr_a lr = self.o_lr # 1 define optimizers self.optimizer_oa = self._get_optimizer_oa(lr_a) self.optimizer_o = self._get_optimizer_o(lr) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer_o, epochs) # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer_o, patience=self.lr_patience, # factor=self.lr_factor) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(0.5 * num_train)) train_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[:split]), num_workers=4, pin_memory=True) valid_loader = torch.utils.data.DataLoader( train_data, batch_size=batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler(indices[split:num_train]), num_workers=4, pin_memory=True) # 3 training the model for e in range(epochs): # 3.1 search self.search_epoch(t, train_loader, valid_loader, device) # 3.2 compute training loss train_loss, train_acc = self.eval(t, train_loader, mode='search', device=device) # 3.3 compute valid loss valid_loss, valid_acc = self.eval(t, valid_loader, mode='search', device=device) # 3.4 logging print('| Epoch {:3d} | Train: loss={:.3f}, acc={:5.1f}% | Valid: loss={:.3f}, acc={:5.1f}% |'.format( e, train_loss, 100 * train_acc, valid_loss, 100 * valid_acc)) self.writer.add_scalars('Search_Loss/Task: {}'.format(t), {'train_loss': train_loss, 'valid_loss': valid_loss}, global_step=e) self.writer.add_scalars('Search_Accuracy/Task: {}'.format(t), {'train_acc': train_acc * 100, 'valid_acc': valid_acc * 100}, global_step=e) # 3.5 Adapt lr scheduler.step() if valid_loss < best_loss: best_loss = valid_loss best_model = utils.get_model(self.model) # 4 Restore best model utils.set_model_(self.model, best_model)
def __init__(self, clients, data, device, project_dir, model_name, num_of_clients, lr, drop_rate, stride, multiple_scale, clustering=False, clustering_method="finch", max_distance=2, n_cluster=2): self.project_dir = project_dir self.data = data self.device = device self.model_name = model_name self.clients = clients self.client_list = self.data.client_list self.num_of_clients = num_of_clients self.lr = lr self.multiple_scale = multiple_scale self.drop_rate = drop_rate self.stride = stride self.multiple_scale = [] for s in multiple_scale.split(','): self.multiple_scale.append(math.sqrt(float(s))) self.federated_model = get_model(750, drop_rate, stride).to(device) self.federated_model.classifier.classifier = nn.Sequential() self.federated_model.eval() self.train_loss = [] self.use_clustering = clustering self.clustering_group_for_kd = None self.cdw = None self.clients_using = None self.clients_weights = None self.clustering_method = clustering_method self.max_dis = max_distance self.n_cluster = n_cluster
def main(): opt = parse_args() kwargs = { 'ctx': [mx.cpu()], 'pretrained': False, 'classes': 1000, 'ratio': opt.ratio } if opt.use_se: kwargs['use_se'] = True logging.info("get symbol ...") net = get_model(opt.model, **kwargs) # Option 1 logging.info("option 1: print network ...") logging.info(net) # Option 2 (net must be HybridSequential, if want to plot whole graph) logging.info("option 2: draw network ...") net.hybridize() net.collect_params().initialize() x = mx.sym.var('data') sym = net(x) digraph = mx.viz.plot_network(sym, shape={'data': (1, 3, 224, 224)}, save_format='png') digraph.view() digraph.render() keys = sorted(dict(net.collect_params()).keys()) logging.info(json.dumps(keys, indent=4))
def __init__(self, cid, data, device, project_dir, model_name, local_epoch, lr, batch_size, drop_rate, stride, clustering=False): self.cid = cid self.project_dir = project_dir self.model_name = model_name self.data = data self.device = device self.local_epoch = local_epoch self.lr = lr self.batch_size = batch_size self.dataset_sizes = self.data.train_dataset_sizes[cid] self.train_loader = self.data.train_loaders[cid] self.model = get_model(self.data.train_class_sizes[cid], drop_rate, stride) self.classifier = copy.deepcopy(self.model.classifier.classifier) self.model.classifier.classifier = nn.Sequential() self.distance = 0 self.optimization = Optimization(self.train_loader, self.device) self.use_clustering = clustering
def test(): model_folder = '/home/stormlab/seg/LSTM-UNet-Outputs/Retrained/LSTMUNet/MyRun_SIM/2020-03-03_191130' with open(os.path.join(model_folder, 'model_params.pickle'), 'rb') as fobj: model_dict = pickle.load(fobj) model_cls = get_model(model_dict['name']) device = '/gpu:0' with tf.device(device): model = model_cls(*model_dict['params'], data_format='NHWC', pad_image=False) model.load_weights(os.path.join(model_folder, 'model.ckpt')) log_print("Restored from {}".format(os.path.join( model_folder, 'model'))) image = cv2.imread( '/home/stormlab/seg/LSTM-UNet-Outputs/Retrained/LSTMUNet/MyRun_SIM/2020-03-03_191130/image7.png', -1) plt.imshow(image, cmap='gray') img = cv2.resize(image, (64, 64), interpolation=cv2.INTER_AREA) image = cv2.normalize(img.astype(np.float32), None, 0.0, 1.0, cv2.NORM_MINMAX) np_image = np.expand_dims(image, axis=0) # Add another dimension for tensorflow np_image = np.expand_dims(np_image, axis=0) np_image = np.expand_dims(np_image, axis=-1) logits, pred = model(np_image, False) pred = np.squeeze(pred, (0, 1, 4)) plt.imshow(pred, cmap='gray')
def eval_tf_model(model_name, lr, val_X, val_Y, checkpoint_path, logdir): ''' Evaluate the performance of the model saved at checkpoint_path on the full dataset Arguments --------- model_name: str The name of the model to be imported from models.py lr: float The size of each update step made by the optimizer val_X: numpy.array The gene expression data to be held out to evaluate model performance val_Y: numpy.array The labels corresponding to whether each sample in val_X represents healthy or unhealthy gene expression checkpoint_path: str The path to the weights for the best performing iteration of the model logdir: str or Path or None The directory to save tensorboard logs to Returns ------- val_acc: float The accuracy the model achieved in predicting val_Y from val_X val_auroc: float The area under the receiver operating characteristic curve based on the model's decision function on val_X ''' # Load model model = utils.get_model(model_name, logdir, lr) model.load_weights(checkpoint_path) val_loss, val_acc, val_auroc, val_aupr = model.evaluate(val_X, val_Y) return val_acc, val_auroc, val_aupr
def feature_analysis(config): dataset = MarkerExpressionDataset(config) model = get_model(config) for marker in dataset.markers: all_features, all_labels, _ = dataset.get_all_data( marker, feature_selection=False, dup_reduce=True) model = base.clone(model) all_pred_labels = [0 for _ in range(len(all_labels))] all_pred_score = [0 for _ in range(len(all_labels))] for i in range(len(all_features)): train_features = all_features.copy() train_labels = all_labels.copy() del train_features[i] del train_labels[i] # model.fit(all_features, all_labels) model.fit(train_features, train_labels) all_pred_score[i] = model.predict_proba([all_features[i]])[0] all_pred_labels[i] = model.predict([all_features[i]])[0] tps = sum([ y_true == y_pred for y_true, y_pred in zip(all_labels, all_pred_labels) ]) acc = tps / len(all_features) results = eval_results(all_labels, all_pred_score, dataset.classes) print('marker %s: acc %1.2f' % (marker, 100 * acc)) print(results)
def _get_model(self, args) -> torch.nn.Module: """ Get the model. Automatically moves model to specified device(s). Args: args: Experiment args Returns: Model """ # Determine device if args.cuda_device_ids[0] == -2: self.device = "cpu" logger.info("Running experiment on the CPU ...") else: self.device = f"cuda:{args.cuda_device_ids[0]}" # Get model self.model = get_model( model=args.model, n_input_channels=args.input_channels, pretrained=args.imagenet_pretrained, args=self.args, ) logger.info("Training a {} model with {} parameters".format( args.model, sum(p.numel() for p in self.model.parameters()))) # Load pretrained model if args.checkpoint: logging.info(f"Loading pretrained model from {args.checkpoint}") self.load(args.checkpoint) self.multi_gpu = len( args.cuda_device_ids) > 1 or args.cuda_device_ids[0] == -1 # Check if multiple cuda devices are selected if self.multi_gpu: num_cuda_devices = torch.cuda.device_count() if args.cuda_device_ids[0] == -1: # Select all devices cuda_device_ids = list(range(num_cuda_devices)) else: cuda_device_ids = args.cuda_device_ids # Check if multiple cuda devices are available if num_cuda_devices > 1: logger.info( f"Running experiment on the following GPUs: {cuda_device_ids}" ) # Transform model into data parallel model on all selected cuda deviecs self.model = torch.nn.DataParallel(self.model, device_ids=cuda_device_ids) else: logger.warning( f"Attempted to run the experiment on multiple GPUs while only {num_cuda_devices} GPU was available" ) logger.debug(f"Sending model to device: {self.device}") return self.model.to(self.device)
def post(self, request): """ POST Confirma y crea los tfgs devolviendo los errores :param request: :return : """ try: params = utils.get_params(request) self.logger.info('INICIO WS - UPLOADFILECONFIRMVIEW POST del usuario: %s con parametros: %s' % (request.user.email if hasattr(request.user, 'email') else request.user.username, params)) if request.user.has_perm('tfgs.tfg.masivos') or request.user.is_admin: model = get_model(params.get('model')) load_tfgs = SUBIDAS.get(params.get('model'))() resul = load_tfgs.upload_file_confirm(params['list_tfg']) if resul['status']: resul_status = status.HTTP_200_OK else: resul = dict(message=resul['message']) resul_status = status.HTTP_400_BAD_REQUEST else: resul = dict(message="Sin privilegios") resul_status = status.HTTP_405_METHOD_NOT_ALLOWED self.logger.info('FIN WS - UPLOADFILECONFIRMVIEW POST del usuario: %s con resultado: %s' % (request.user.email if hasattr(request.user, 'email') else request.user.username, resul)) return Response(resul, status=resul_status) except Exception as e: resul = dict(status=False, message="Error en la llamada") self.logger.critical('UPLOADFILECONFIRMVIEW POST: %s %s' % (resul, e)) return Response(resul, status=status.HTTP_400_BAD_REQUEST)
def post_train(self, t, xtrain, ytrain, xvalid, yvalid): # Restore best, save model as old if t > 0: model_state = utils.get_model(self.model) model_old_state = utils.get_model(self.model_old) for name, param in self.model.named_parameters(): #model_state[name]=(1-self.alpha)*model_old_state[name]+self.alpha*model_state[name] model_state[name] = (model_state[name] + model_old_state[name] * t) / (t + 1) utils.set_model_(self.model, model_state) self.model_old = deepcopy(self.model) utils.freeze_model(self.model_old) self.model_old.eval() return
def initialize(config): model = utils.get_model(config["model"]) # Adapt model for distributed settings if configured model = idist.auto_model(model, find_unused_parameters=True) optimizer = optim.SGD( model.parameters(), lr=config["learning_rate"], momentum=config["momentum"], weight_decay=config["weight_decay"], nesterov=True, ) optimizer = idist.auto_optim(optimizer) criterion = nn.CrossEntropyLoss().to(idist.device()) le = config["num_iters_per_epoch"] milestones_values = [ (0, 0.0), (le * config["num_warmup_epochs"], config["learning_rate"]), (le * config["num_epochs"], 0.0), ] lr_scheduler = PiecewiseLinear(optimizer, param_name="lr", milestones_values=milestones_values) return model, optimizer, criterion, lr_scheduler
def initialize(config): model = utils.get_model(config["model"], config["model_dir"], config["dropout"], config["n_fc"], config["num_classes"]) config["learning_rate"] *= idist.get_world_size() # Adapt model for distributed settings if configured model = idist.auto_model(model) optimizer = optim.AdamW( model.parameters(), lr=config["learning_rate"], weight_decay=config["weight_decay"], ) optimizer = idist.auto_optim(optimizer) criterion = nn.BCEWithLogitsLoss() le = config["num_iters_per_epoch"] milestones_values = [ (0, 0.0), (le * config["num_warmup_epochs"], config["learning_rate"]), (le * config["num_epochs"], 0.0), ] lr_scheduler = PiecewiseLinear(optimizer, param_name="lr", milestones_values=milestones_values) return model, optimizer, criterion, lr_scheduler
def initialize(config): model = utils.get_model(config["model"]) # Adapt model for distributed backend if provided model = idist.auto_model(model) optimizer = utils.get_optimizer( config["optimizer"], model, learning_rate=config["learning_rate"], weight_decay=config["weight_decay"], ) # Adapt optimizer for distributed backend if provided optimizer = idist.auto_optim(optimizer) criterion = nn.CrossEntropyLoss().to(idist.device()) le = config["num_iters_per_epoch"] milestones_values = [ (0, 0.0), (le * config["num_warmup_epochs"], config["learning_rate"]), (le * config["num_epochs"], 0.0), ] lr_scheduler = PiecewiseLinear(optimizer, param_name="lr", milestones_values=milestones_values) return model, optimizer, criterion, lr_scheduler
def features_loading(net, dataset, device, features_model): " loads the features of a given model, it freezes them during training time to fine-tune the classifier" conv_features, _ = get_model(model='Ablation', binary_layer=False, opu=False, n_epochs='_', dataset=dataset, opu_output=8000, opu_input=1024, sign_back=False, device=device) conv_features.load_state_dict( torch.load(path_to_folder + 'models/' + features_model + '.pt')) conv_features.eval() conv_features = conv_features.state_dict() for name, param in net.named_parameters(): if name.split('.')[0] == 'features': param.data = conv_features[name] param.requires_grad = False print('- Robust features loaded!') return net
def get_forecast(query, url=None): """ Fetches the forecast from the model provider and returns the forecast subset to the query domain. """ if url is not None: warnings.warn('Forecast was extracted from %s' ' which may be out of date.' % url) model = utils.get_model(query['model']) fcst = model.fetch(url=url) sub_fcst = subset.subset_dataset(fcst, query) return sub_fcst
def main(): parser = argparse.ArgumentParser(description='Predict the testing set') parser.add_argument('--model_type', default='RandomForest') parser.add_argument('--test', action='store_true') args = parser.parse_args() model = get_model(args.model_type, args.test) print "Loaded Model: %s" % model print "Loading Training Data" training = load_training() print "Adding new features" training = add_features(training) print "Running Cross Validaton" cross_validate(training, model)
def main(): parser = argparse.ArgumentParser(description='Predict the testing set') parser.add_argument('--model_type', default='RandomForest') parser.add_argument('--test', action='store_true') args = parser.parse_args() if args.test: suffix = 'test' else: suffix = time.strftime("%d_%m_%Y") model = get_model(args.model_type, args.test) print "Loaded Model: %s" % model print "Loading Training Data" training = load_training() if not args.test: print "Adding new features" training = add_features(training) print "Training Model" classifier = train(training, model) print "Saving Classifier" output_dir = 'models/classifier_%s' % suffix try: os.mkdir(output_dir) except: pass joblib.dump(classifier, '%s/%s.pkl' % (output_dir, classifier.__class__.__name__)) print "Loading testing set" testing = load_testing() if not args.test: print "Adding new features to testing set" testing = add_features(testing) print "Making predictions on testing set" predictions = predict(classifier, testing) output_predictions(predictions, threshold=0.7, filename='prediction_%s.csv' % suffix)
import tensorflow as tf ## Tools import utils ## Parameters import params ## you can modify the content of params.py import preprocess img_height = params.img_height img_width = params.img_width img_channels = params.img_channels ## Test epoch epoch_ids = [10] ## Load model model = utils.get_model() ## Preprocess def img_pre_process(img): """ Processes the image and returns it :param img: The image to be processed :return: Returns the processed image """ # Chop off 1/2 from the top and cut bottom 150px(which contains the head of car) ratio = img_height / img_width h1, h2 = int(img.shape[0]/2),img.shape[0]-150 w = (h2-h1) / ratio padding = int(round((img.shape[1] - w) / 2)) img = img[h1:h2, padding:-padding] ## Resize the image