def optimize(self, model, train_loader, val_loader=None, optimizer=None): """Perform full optimization.""" # Initialize criterion = self.criterion metrics = {'train':[], 'val':[]} first_epoch = 0 model_path = None # Send models to device criterion = criterion.to('cuda:0') model = model.to('cuda:0') # Get optimizer (after sending to device) if optimizer is None: optimizer = self.get_optimizer(model) # Resume from checkpoint if self.checkpoint_dir is not None: model_path = os.path.join(self.checkpoint_dir, 'model.pth') if self.resume: first_epoch, metrics = files.load_checkpoint(self.checkpoint_dir, model, optimizer) print(f"{self.__class__.__name__}:" f" epochs:{self.num_epochs}" f" momentum:{self.momentum}" f" weight_decay:{self.weight_decay}" f" nesterov:{self.nesterov}") # Perform epochs if not self.validate_only: for epoch in range(first_epoch, 1 if self.validate_only else self.num_epochs): print(optimizer) m = self.optimize_epoch(model, criterion, optimizer, train_loader, epoch, is_validation=False) metrics["train"].append(m) if epoch > 25: if val_loader: with torch.no_grad(): m = self.optimize_epoch(model, criterion, optimizer, val_loader, epoch, is_validation=True) metrics["val"].append(m) files.save_checkpoint(self.checkpoint_dir, model, optimizer, metrics, epoch) else: print('only evaluating!', flush=True) with torch.no_grad(): m = self.optimize_epoch(model, criterion, optimizer, val_loader, 0, is_validation=True) metrics["val"].append(m) print(f"Model optimization completed. Saving final model to {model_path}") files.save_model(model, model_path) return model, metrics
def validate(epoch): model.eval() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() global best_pred, acclist_train, acclist_val is_best = False tbar = tqdm(val_loader, desc='\r') for batch_idx, (data, target) in enumerate(tbar): if args.cuda: data, target = data.cuda(), target.cuda() with torch.no_grad(): output = model(data) loss = criterion(output, target) acc1, acc5 = accuracy(output, target, topk=(1, 5)) top1.update(acc1[0], data.size(0)) top5.update(acc5[0], data.size(0)) losses.update(loss.item(), data.size(0)) tbar.set_description('Top1: %.3f | Top5: %.3f' % (top1.avg, top5.avg)) niter = (epoch - 1) * len(val_loader) + batch_idx writer.add_scalar('Val/Loss', losses.avg, niter) writer.add_scalar('Val/Prec@1', top1.avg, niter) writer.add_scalar('Val/Pred@5', top5.avg, niter) if args.eval: print('Top1 Acc: %.3f | Top5 Acc: %.3f ' % (top1.avg, top5.avg)) return # save checkpoint acclist_val += [top1.avg] if top1.avg > best_pred: best_pred = top1.avg is_best = True save_checkpoint( { 'epoch': epoch, 'state_dict': model.module.state_dict(), 'optimizer': optimizer.state_dict(), 'best_pred': best_pred, 'acclist_train': acclist_train, 'acclist_val': acclist_val, }, args=args, is_best=is_best)
def optimize(self, model, train_loader, val_loader=None, optimizer=None): """Perform full optimization.""" # Initialize criterion = self.criterion metrics = {'train': [], 'val': []} first_epoch = 0 # Send models to device criterion = criterion.to(self.dev) model = model.to(self.dev) # Get optimizer (after sending to device) if optimizer is None: optimizer = self.get_optimizer(model) if self.checkpoint_dir is not None: model_path = os.path.join(self.checkpoint_dir, 'model.pth') if self.resume: first_epoch, metrics = files.load_checkpoint( self.checkpoint_dir, model, optimizer) # Perform epochs if not self.validate_only: for epoch in range(first_epoch, 1 if self.validate_only else self.num_epochs): print(optimizer) m = self.optimize_epoch(model, criterion, optimizer, train_loader, epoch, is_validation=False) metrics["train"].append(m) if (epoch > (self.num_epochs - 20)) or (epoch % 5 == 0): if val_loader: with torch.no_grad(): m = self.optimize_epoch(model, criterion, optimizer, val_loader, epoch, is_validation=True) metrics["val"].append(m) files.save_checkpoint(self.checkpoint_dir, model, optimizer, metrics, epoch) if epoch in [84, 126]: files.save_checkpoint(self.checkpoint_dir, model, optimizer, metrics, epoch, defsave=True) else: print('only evaluating!', flush=True) with torch.no_grad(): m = self.optimize_epoch(model, criterion, optimizer, val_loader, 99, is_validation=True) metrics["val"].append(m) torch.save(model, os.path.join(self.checkpoint_dir, 'model.pth')) return model, metrics