Ejemplo n.º 1
0
    def train(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()

        self.model.train()

        for step, (input, target) in enumerate(self.train_queue):

            input = input.cuda(self.device, non_blocking=True)
            target = target.cuda(self.device, non_blocking=True)

            self.optimizer.zero_grad()
            logits, logits_aux = self.model(input)
            loss = self.criterion(logits, target)
            if self.args.auxiliary:
                loss_aux = self.criterion(logits_aux, target)
                loss += self.args.auxiliary_weight*loss_aux
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(), self.args.grad_clip)
            self.optimizer.step()

            prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
            n = input.size(0)
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)
            top5.update(prec5.item(), n)

            if step % args.report_freq == 0:
                self.logger.info('train %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)

        return top1.avg, objs.avg
Ejemplo n.º 2
0
def train(train_loader, model, criterion, optimizer, epoch):
    batch_time = dutils.AverageMeter()
    data_time = dutils.AverageMeter()
    losses = dutils.AverageMeter()
    top1 = dutils.AverageMeter()
    top5 = dutils.AverageMeter()

    # switch to train mode
    model.train()

    end = time.time()
    for i, (input, target) in enumerate(train_loader):
        # measure data loading time
        data_time.update(time.time() - end)

        if args.gpu is not None:
            input = input.cuda(args.gpu, non_blocking=True)

        # Host to GPU copies are much faster when they originate from pinned (page-locked) memory.
        # once you pin a tensor or storage, you can use asynchronous GPU copies
        target = target.cuda(args.gpu, non_blocking=True)

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec1, prec5 = dutils.accuracy(output, target, topk=(1, 5))
        losses.update(loss.item(), input.size(0))
        top1.update(prec1[0], input.size(0))
        top5.update(prec5[0], input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % args.print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                  'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                      epoch,
                      i,
                      len(train_loader),
                      batch_time=batch_time,
                      data_time=data_time,
                      loss=losses,
                      top1=top1,
                      top5=top5))
Ejemplo n.º 3
0
    def train(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()

        for step, (input, target) in enumerate(self.train_queue):
            self.model.train()
            n = input.size(0)
            input = input.cuda(non_blocking=True)
            target = target.cuda(non_blocking=True)

            # Get a random minibatch from the search queue(validation set) with replacement
            input_search, target_search = next(iter(self.valid_queue))
            input_search = input_search.cuda(non_blocking=True)
            target_search = target_search.cuda(non_blocking=True)

            # Update the architecture parameters
            self.architect.step(input,
                                target,
                                input_search,
                                target_search,
                                self.lr,
                                self.optimizer,
                                unrolled=self.args.sec_approx)

            self.optimizer.zero_grad()

            logits = self.model(input)
            loss = self.criterion(logits, target)

            loss.backward()
            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.args.grad_clip)

            # Update the network parameters
            self.optimizer.step()

            prec1 = dutils.accuracy(logits, target, topk=(1, ))[0]
            objs.update(loss.item(), n)
            top1.update(prec1.item(), n)

            if step % args.report_freq == 0:
                self.logger.info('model size: %f',
                                 dutils.calc_parameters_count(self.model))
                self.logger.info('train %03d loss: %e top1: %f', step,
                                 objs.avg, top1.avg)

        return top1.avg, objs.avg
Ejemplo n.º 4
0
def validate(val_loader, model, criterion):
    batch_time = dutils.AverageMeter()
    losses = dutils.AverageMeter()
    top1 = dutils.AverageMeter()
    top5 = dutils.AverageMeter()

    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        end = time.time()
        for i, (input, target) in enumerate(val_loader):
            if args.gpu is not None:
                input = input.cuda(args.gpu, non_blocking=True)
            target = target.cuda(args.gpu, non_blocking=True)

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec1, prec5 = dutils.accuracy(output, target, topk=(1, 5))
            losses.update(loss.item(), input.size(0))
            top1.update(prec1[0], input.size(0))
            top5.update(prec5[0], input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % args.print_freq == 0:
                print('Test: [{0}/{1}]\t'
                      'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                      'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                      'Prec@1 {top1.val:.3f} ({top1.avg:.3f})\t'
                      'Prec@5 {top5.val:.3f} ({top5.avg:.3f})'.format(
                          i,
                          len(val_loader),
                          batch_time=batch_time,
                          loss=losses,
                          top1=top1,
                          top5=top5))

        print(' * Prec@1 {top1.avg:.3f} Prec@5 {top5.avg:.3f}'.format(
            top1=top1, top5=top5))
Ejemplo n.º 5
0
    def infer(self):
        objs = dutils.AverageMeter()
        top1 = dutils.AverageMeter()
        top5 = dutils.AverageMeter()
        self.model.eval()
        with torch.no_grad():
            for step, (input, target) in enumerate(self.valid_queue):
                input = input.cuda(self.device, non_blocking=True)
                target = target.cuda(self.device, non_blocking=True)

                logits, _ = self.model(input)
                loss = self.criterion(logits, target)

                prec1, prec5 = dutils.accuracy(logits, target, topk=(1,5))
                n = input.size(0)
                objs.update(loss.item(), n)
                top1.update(prec1.item(), n)
                top5.update(prec5.item(), n)

                if step % args.report_freq == 0:
                    self.logger.info('valid %03d %e %f %f', step, objs.avg, top1.avg, top5.avg)
            return top1.avg, top5.avg, objs.avg