Example #1
0
    def optimize(self, model, train_loader, val_loader=None, optimizer=None):
        """Perform full optimization."""
        # Initialize
        criterion = self.criterion
        metrics = {'train':[], 'val':[]}
        first_epoch = 0
        model_path = None

        # Send models to device
        criterion = criterion.to('cuda:0')
        model = model.to('cuda:0')

        # Get optimizer (after sending to device)
        if optimizer is None:
            optimizer = self.get_optimizer(model)

        # Resume from checkpoint
        if self.checkpoint_dir is not None:
            model_path = os.path.join(self.checkpoint_dir, 'model.pth')
            if self.resume:
                first_epoch, metrics = files.load_checkpoint(self.checkpoint_dir, model, optimizer)

        print(f"{self.__class__.__name__}:"
            f" epochs:{self.num_epochs}"
            f" momentum:{self.momentum}"
            f" weight_decay:{self.weight_decay}"
            f" nesterov:{self.nesterov}")

        # Perform epochs
        if not self.validate_only:
            for epoch in range(first_epoch, 1 if self.validate_only else self.num_epochs):
                print(optimizer)
                m = self.optimize_epoch(model, criterion, optimizer, train_loader, epoch, is_validation=False)
                metrics["train"].append(m)
                if epoch > 25:
                    if val_loader:
                        with torch.no_grad():
                            m = self.optimize_epoch(model, criterion, optimizer, val_loader, epoch, is_validation=True)
                            metrics["val"].append(m)
                files.save_checkpoint(self.checkpoint_dir, model, optimizer, metrics, epoch)
        else:
            print('only evaluating!', flush=True)
            with torch.no_grad():
                m = self.optimize_epoch(model, criterion, optimizer, val_loader, 0, is_validation=True)
                metrics["val"].append(m)
        print(f"Model optimization completed. Saving final model to {model_path}")
        files.save_model(model, model_path)

        return model, metrics
    def validate(epoch):
        model.eval()
        losses = AverageMeter()
        top1 = AverageMeter()
        top5 = AverageMeter()
        global best_pred, acclist_train, acclist_val
        is_best = False
        tbar = tqdm(val_loader, desc='\r')
        for batch_idx, (data, target) in enumerate(tbar):
            if args.cuda:
                data, target = data.cuda(), target.cuda()
            with torch.no_grad():
                output = model(data)
                loss = criterion(output, target)
                acc1, acc5 = accuracy(output, target, topk=(1, 5))
                top1.update(acc1[0], data.size(0))
                top5.update(acc5[0], data.size(0))
                losses.update(loss.item(), data.size(0))

            tbar.set_description('Top1: %.3f | Top5: %.3f' %
                                 (top1.avg, top5.avg))
            niter = (epoch - 1) * len(val_loader) + batch_idx
            writer.add_scalar('Val/Loss', losses.avg, niter)
            writer.add_scalar('Val/Prec@1', top1.avg, niter)
            writer.add_scalar('Val/Pred@5', top5.avg, niter)

        if args.eval:
            print('Top1 Acc: %.3f | Top5 Acc: %.3f ' % (top1.avg, top5.avg))
            return
        # save checkpoint
        acclist_val += [top1.avg]
        if top1.avg > best_pred:
            best_pred = top1.avg
            is_best = True
        save_checkpoint(
            {
                'epoch': epoch,
                'state_dict': model.module.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_pred': best_pred,
                'acclist_train': acclist_train,
                'acclist_val': acclist_val,
            },
            args=args,
            is_best=is_best)
Example #3
0
    def optimize(self, model, train_loader, val_loader=None, optimizer=None):
        """Perform full optimization."""
        # Initialize
        criterion = self.criterion
        metrics = {'train': [], 'val': []}
        first_epoch = 0

        # Send models to device
        criterion = criterion.to(self.dev)
        model = model.to(self.dev)

        # Get optimizer (after sending to device)
        if optimizer is None:
            optimizer = self.get_optimizer(model)
        if self.checkpoint_dir is not None:
            model_path = os.path.join(self.checkpoint_dir, 'model.pth')
            if self.resume:
                first_epoch, metrics = files.load_checkpoint(
                    self.checkpoint_dir, model, optimizer)

        # Perform epochs
        if not self.validate_only:
            for epoch in range(first_epoch,
                               1 if self.validate_only else self.num_epochs):
                print(optimizer)
                m = self.optimize_epoch(model,
                                        criterion,
                                        optimizer,
                                        train_loader,
                                        epoch,
                                        is_validation=False)
                metrics["train"].append(m)
                if (epoch > (self.num_epochs - 20)) or (epoch % 5 == 0):
                    if val_loader:
                        with torch.no_grad():
                            m = self.optimize_epoch(model,
                                                    criterion,
                                                    optimizer,
                                                    val_loader,
                                                    epoch,
                                                    is_validation=True)
                            metrics["val"].append(m)
                files.save_checkpoint(self.checkpoint_dir, model, optimizer,
                                      metrics, epoch)
                if epoch in [84, 126]:
                    files.save_checkpoint(self.checkpoint_dir,
                                          model,
                                          optimizer,
                                          metrics,
                                          epoch,
                                          defsave=True)
        else:
            print('only evaluating!', flush=True)
            with torch.no_grad():
                m = self.optimize_epoch(model,
                                        criterion,
                                        optimizer,
                                        val_loader,
                                        99,
                                        is_validation=True)
                metrics["val"].append(m)

        torch.save(model, os.path.join(self.checkpoint_dir, 'model.pth'))

        return model, metrics