Ejemplo n.º 1
0
    def train_epoch(self, epoch, printer=print):
        top1 = AverageMeter()
        top5 = AverageMeter()
        losses = AverageMeter()

        cur_lr = self.optimizer.param_groups[0]['lr']

        self.model.train()
        prefetcher = data_prefetcher(self.train_loader)
        X, y = prefetcher.next()
        i = 0
        while X is not None:
            i += 1
            N = X.size(0)
            self.steps += 1

            logits, aux_logits = self.model(X)
            loss = self.criterion(logits, y)

            if self.use_aux:
                loss += self.config.aux_weight * self.criterion(aux_logits, y)

            self.optimizer.zero_grad()
            if self.opt_level == 'O0':
                loss.backward()
            else:
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()

            nn.utils.clip_grad_norm_(self.model.parameters(),
                                     self.config.grad_clip)
            self.optimizer.step()

            prec1, prec5 = accuracy(logits, y, topk=(1, 5))
            losses.update(loss.item(), N)
            top1.update(prec1.item(), N)
            top5.update(prec5.item(), N)

            if self.steps % self.log_step == 0 and self.rank == 0:
                self.writer.add_scalar('train/lr', round(cur_lr, 5),
                                       self.steps)
                self.writer.add_scalar('train/loss', loss.item(), self.steps)
                self.writer.add_scalar('train/top1', prec1.item(), self.steps)
                self.writer.add_scalar('train/top5', prec5.item(), self.steps)

            if self.gpu == 0 and (i % self.config.print_freq == 0
                                  or i == len(self.train_loader) - 1):
                printer(
                    f'Train: Epoch: [{epoch}][{i}/{len(self.train_loader) - 1}]\t'
                    f'Step {self.steps}\t'
                    f'lr {round(cur_lr, 5)}\t'
                    f'Loss {losses.val:.4f} ({losses.avg:.4f})\t'
                    f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})\t')

            X, y = prefetcher.next()

        if self.gpu == 0:
            printer("Train: [{:3d}/{}] Final Prec@1 {:.4%}".format(
                epoch, self.total_epochs - 1, top1.avg))
Ejemplo n.º 2
0
    def val_epoch(self, epoch, printer):
        top1 = AverageMeter()
        top5 = AverageMeter()
        losses = AverageMeter()

        self.model.eval()

        prefetcher = data_prefetcher(self.valid_loader)
        X, y = prefetcher.next()
        i = 0

        with torch.no_grad():
            while X is not None:
                N = X.size(0)
                i += 1

                logits, _ = self.model(X)

                loss = self.criterion(logits, y)

                prec1, prec5 = accuracy(logits, y, topk=(1, 5))
                losses.update(loss.item(), N)
                top1.update(prec1.item(), N)
                top5.update(prec5.item(), N)

                if self.rank == 0 and (i % self.config.print_freq == 0
                                       or i == len(self.valid_loader) - 1):
                    printer(
                        f'Valid: Epoch: [{epoch}][{i}/{len(self.valid_loader)}]\t'
                        f'Step {self.steps}\t'
                        f'Loss {losses.avg:.4f}\t'
                        f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})')

                X, y = prefetcher.next()

        if self.rank == 0:
            self.writer.add_scalar('val/loss', losses.avg, self.steps)
            self.writer.add_scalar('val/top1', top1.avg, self.steps)
            self.writer.add_scalar('val/top5', top5.avg, self.steps)

            printer("Valid: [{:3d}/{}] Final Prec@1 {:.4%}".format(
                epoch, self.total_epochs - 1, top1.avg))

        return top1.avg
Ejemplo n.º 3
0
    def train_epoch(self, epoch, printer=print):
        batch_time = eval_util.AverageMeter()
        data_time = eval_util.AverageMeter()
        forward_time = eval_util.AverageMeter()
        backward_time = eval_util.AverageMeter()
        log_time = eval_util.AverageMeter()

        self.model.train()

        end = time.time()
        self.data_ins.set_epoch(epoch)

        prefetcher = data_prefetcher(self.train_loader)
        images, _ = prefetcher.next()
        i = 0
        while images is not None:
            i += 1
            self.adjust_learning_rate(self.steps)
            self.adjust_mm(self.steps)
            self.steps += 1

            assert images.dim(
            ) == 5, f"Input must have 5 dims, got: {images.dim()}"
            view1 = images[:, 0, ...].contiguous()
            view2 = images[:, 1, ...].contiguous()
            # measure data loading time
            data_time.update(time.time() - end)

            # forward
            tflag = time.time()
            q, target_z = self.model(view1, view2, self.mm)
            forward_time.update(time.time() - tflag)

            tflag = time.time()
            loss = self.forward_loss(q, target_z)

            self.optimizer.zero_grad()
            if self.opt_level == 'O0':
                loss.backward()
            else:
                with amp.scale_loss(loss, self.optimizer) as scaled_loss:
                    scaled_loss.backward()
            self.optimizer.step()
            backward_time.update(time.time() - tflag)

            tflag = time.time()
            if self.steps % self.log_step == 0:
                self.logger.update('steps', self.steps)
                self.logger.update(
                    'lr', round(self.optimizer.param_groups[0]['lr'], 5))
                self.logger.update('mm', round(self.mm, 5))
                self.logger.update('loss', loss.item(), view1.size(0))

                if self.rank == 0:
                    self.loss_log_tool.update(self.logger.get_key_val('steps'),
                                              self.logger.get_key_val('loss'))
                    self.lr_log_tool.update(self.logger.get_key_val('steps'),
                                            self.logger.get_key_val('lr'))
                    if self.steps % 100 == 0:
                        self.loss_log_tool.plot(self.loss_log_png,
                                                x_label='steps',
                                                y_label='loss',
                                                label='loss')
                        self.loss_log_tool.save_log()
                        self.lr_log_tool.plot(self.lr_log_png,
                                              x_label='steps',
                                              y_label='lr',
                                              label='lr')
                        self.lr_log_tool.save_log()
            log_time.update(time.time() - tflag)

            batch_time.update(time.time() - end)
            end = time.time()

            # Print log info
            if self.gpu == 0 and self.steps % self.log_step == 0:
                printer(
                    f'Epoch: [{epoch}][{i}/{len(self.train_loader)}]\t{str(self.logger)}\t'
                    f'Batch Time {batch_time.val:.4f} ({batch_time.avg:.4f})\t'
                    f'Data Time {data_time.val:.4f} ({data_time.avg:.4f})\t'
                    f'forward Time {forward_time.val:.4f} ({forward_time.avg:.4f})\t'
                    f'backward Time {backward_time.val:.4f} ({backward_time.avg:.4f})\t'
                    f'Log Time {log_time.val:.4f} ({log_time.avg:.4f})\t')

            images, _ = prefetcher.next()
Ejemplo n.º 4
0
    def train_epoch(self, epoch, printer=print):
        top1 = AverageMeter()
        top5 = AverageMeter()
        losses = AverageMeter()

        cur_lr = self.lr_scheduler.get_last_lr()[0]

        self.model.print_alphas(self.logger)
        self.model.train()

        prefetcher_trn = data_prefetcher(self.train_loader)
        prefetcher_val = data_prefetcher(self.valid_loader)
        trn_X, trn_y = prefetcher_trn.next()
        val_X, val_y = prefetcher_val.next()
        i = 0
        while trn_X is not None:
            i += 1
            N = trn_X.size(0)
            self.steps += 1

            # architect step (alpha)
            self.alpha_optim.zero_grad()
            self.architect.unrolled_backward(trn_X, trn_y, val_X, val_y,
                                             cur_lr, self.w_optim)
            self.alpha_optim.step()

            # child network step (w)
            self.w_optim.zero_grad()
            logits = self.model(trn_X)
            loss = self.model.criterion(logits, trn_y)
            loss.backward()
            nn.utils.clip_grad_norm_(self.model.weights(),
                                     self.config.w_grad_clip)
            self.w_optim.step()

            prec1, prec5 = accuracy(logits, trn_y, topk=(1, 5))
            losses.update(loss.item(), N)
            top1.update(prec1.item(), N)
            top5.update(prec5.item(), N)

            if self.steps % self.log_step == 0:
                self.writer.add_scalar('train/lr', round(cur_lr, 5),
                                       self.steps)
                self.writer.add_scalar('train/loss', loss.item(), self.steps)
                self.writer.add_scalar('train/top1', prec1.item(), self.steps)
                self.writer.add_scalar('train/top5', prec5.item(), self.steps)

            if i % self.config.print_freq == 0 or i == len(
                    self.train_loader) - 1:
                printer(
                    f'Train: Epoch: [{epoch}][{i}/{len(self.train_loader) - 1}]\t'
                    f'Step {self.steps}\t'
                    f'lr {round(cur_lr, 5)}\t'
                    f'Loss {losses.val:.4f} ({losses.avg:.4f})\t'
                    f'Prec@(1,5) ({top1.avg:.1%}, {top5.avg:.1%})\t')

            trn_X, trn_y = prefetcher_trn.next()
            val_X, val_y = prefetcher_val.next()

        printer("Train: [{:3d}/{}] Final Prec@1 {:.4%}".format(
            epoch, self.total_epochs - 1, top1.avg))