Ejemplo n.º 1
0
    def validate(self, phase):
        btime_rec = utils.AverageMeter(0)
        dtime_rec = utils.AverageMeter(0)
        npts_rec = utils.AverageMeter(0)
        recorder = {}
        for rec in self.args.trainer['loss_record']:
            recorder[rec] = utils.AverageMeter(10)
   
        self.model.switch_to('eval')
    
        end = time.time()
        all_together = []
        for i, (image, sparse, mask, flow_target, rgb_target) in enumerate(self.val_loader):
            if 'val_iter' in self.args.trainer and self.args.trainer['val_iter'] != -1 and i == self.args.trainer['val_iter']:
                break
    
            assert image.shape[0] > 0

            dtime_rec.update(time.time() - end)
            npts_rec.update(int(torch.sum(mask) / mask.size(0) / mask.size(1)))
    
            image = image.cuda()
            sparse = sparse.cuda()
            mask = mask.cuda()
            flow_target = flow_target.cuda()
            rgb_target = rgb_target.cuda()

            self.model.set_input(image, torch.cat([sparse, mask], dim=1), flow_target, rgb_target)
            tensor_dict, loss_dict = self.model.eval()
            for k in loss_dict.keys():
                recorder[k].update(utils.reduce_tensors(loss_dict[k]).item()) 
            btime_rec.update(time.time() - end)
            end = time.time()

            # tb visualize
            if self.rank == 0:
                if i >= self.args.trainer['val_disp_start_iter'] and i < self.args.trainer['val_disp_end_iter']:
                    all_together.append(utils.visualize_tensor(image, mask, tensor_dict['flow_tensors'], tensor_dict['common_tensors'], tensor_dict['rgb_tensors'], self.args.data['data_mean'], self.args.data['data_div']))
                if i == self.args.trainer['val_disp_end_iter'] and self.args.trainer['val_disp_end_iter'] > self.args.trainer['val_disp_start_iter']:
                    all_together = torch.cat(all_together, dim=2)
                    grid = vutils.make_grid(all_together, nrow=1, normalize=True, range=(0, 255), scale_each=False)
                    if self.tb_logger is not None:
                        self.tb_logger.add_image('Image_' + phase, grid, self.curr_step + 1)

        # logging
        if self.rank == 0:
            loss_str = ""
            for k in recorder.keys():
                if self.tb_logger is not None:
                    self.tb_logger.add_scalar('val_{}'.format(k), recorder[k].avg, self.curr_step + 1)
                loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(k, loss=recorder[k])

            self.logger.info('Validation Iter: [{0}]\t'.format(self.curr_step) +
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(batch_time=btime_rec) +
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(data_time=dtime_rec) +
                  loss_str +
                  'NPts {num_pts.val} ({num_pts.avg:.1f})\t'.format(num_pts=npts_rec))

        self.model.switch_to("train")
Ejemplo n.º 2
0
    def evaluate(self, phase):
        btime_rec = utils.AverageMeter(0)
        dtime_rec = utils.AverageMeter(0)
        recorder = {}
        for rec in self.args.trainer['eval_record']:
            recorder[rec] = utils.AverageMeter()
        self.model.switch_to('eval')
        end = time.time()
        for i, inputs in enumerate(
                self.eval_loader):  # padded samples will be evaluted twice.
            dtime_rec.update(time.time() - end)
            self.model.set_input(*inputs)

            eval_dict = self.model.evaluate()
            for k in eval_dict.keys():
                recorder[k].update(
                    utils.reduce_tensors(eval_dict[k]).item() /
                    self.world_size)

            btime_rec.update(time.time() - end)
            end = time.time()

        # logging
        if self.rank == 0:
            eval_str = ""
            for k in recorder.keys():
                if self.tb_logger is not None and phase == 'on_eval':
                    self.tb_logger.add_scalar('eval_{}'.format(k),
                                              recorder[k].avg, self.curr_step)
                eval_str += '{}: {value.avg:.5g}\t'.format(k,
                                                           value=recorder[k])

            self.logger.info(
                'Evaluation Iter: [{0}]\t'.format(self.curr_step) +
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(
                    batch_time=btime_rec) +
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    data_time=dtime_rec) + eval_str)

        self.model.switch_to('train')
Ejemplo n.º 3
0
    def validate(self, phase):
        btime_rec = utils.AverageMeter(0)
        dtime_rec = utils.AverageMeter(0)
        recorder = {}
        for rec in self.args.trainer['loss_record']:
            recorder[rec] = utils.AverageMeter(10)

        self.model.switch_to('eval')

        end = time.time()
        all_together = []
        for i, inputs in enumerate(self.val_loader):
            if ('val_iter' in self.args.trainer
                    and self.args.trainer['val_iter'] != -1
                    and i == self.args.trainer['val_iter']):
                break

            dtime_rec.update(time.time() - end)

            self.model.set_input(*inputs)
            tensor_dict, loss_dict = self.model.forward()
            for k in loss_dict.keys():
                recorder[k].update(
                    utils.reduce_tensors(loss_dict[k]).item() /
                    self.world_size)
            btime_rec.update(time.time() - end)
            end = time.time()

            # tb visualize
            if self.rank == 0:
                disp_start = max(self.args.trainer['val_disp_start_iter'], 0)
                disp_end = min(self.args.trainer['val_disp_end_iter'],
                               len(self.val_loader))
                if (i >= disp_start and i < disp_end):
                    all_together.append(
                        utils.visualize_tensor(tensor_dict['common_tensors'],
                                               self.args.data['data_mean'],
                                               self.args.data['data_div']))
                if (i == disp_end - 1 and disp_end > disp_start):
                    all_together = torch.cat(all_together, dim=2)
                    grid = vutils.make_grid(all_together,
                                            nrow=1,
                                            normalize=True,
                                            range=(0, 255),
                                            scale_each=False)
                    if self.tb_logger is not None:
                        self.tb_logger.add_image('Image_' + phase, grid,
                                                 self.curr_step)

                    cv2.imwrite(
                        "{}/images/{}_{}.png".format(self.args.exp_path, phase,
                                                     self.curr_step),
                        grid.permute(1, 2, 0).numpy())

        # logging
        if self.rank == 0:
            loss_str = ""
            for k in recorder.keys():
                if self.tb_logger is not None and phase == 'on_val':
                    self.tb_logger.add_scalar('val_{}'.format(k),
                                              recorder[k].avg, self.curr_step)
                loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(
                    k, loss=recorder[k])

            self.logger.info(
                'Validation Iter: [{0}]\t'.format(self.curr_step) +
                'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(
                    batch_time=btime_rec) +
                'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                    data_time=dtime_rec) + loss_str)

        self.model.switch_to('train')
Ejemplo n.º 4
0
    def train(self):

        btime_rec = utils.AverageMeter(10)
        dtime_rec = utils.AverageMeter(10)
        recorder = {}
        for rec in self.args.trainer['loss_record']:
            recorder[rec] = utils.AverageMeter(10)

        self.model.switch_to('train')

        end = time.time()
        for i, inputs in enumerate(self.train_loader):
            self.curr_step = self.start_iter + i
            self.lr_scheduler.step(self.curr_step)
            curr_lr = self.lr_scheduler.get_lr()[0]

            # measure data loading time
            dtime_rec.update(time.time() - end)

            self.model.set_input(*inputs)
            loss_dict = self.model.step()
            for k in loss_dict.keys():
                recorder[k].update(
                    utils.reduce_tensors(loss_dict[k]).item() /
                    self.world_size)

            btime_rec.update(time.time() - end)
            end = time.time()

            self.curr_step += 1

            # logging
            if self.rank == 0 and self.curr_step % self.args.trainer[
                    'print_freq'] == 0:
                loss_str = ""
                if self.tb_logger is not None:
                    self.tb_logger.add_scalar('lr', curr_lr, self.curr_step)
                for k in recorder.keys():
                    if self.tb_logger is not None:
                        self.tb_logger.add_scalar('train_{}'.format(k),
                                                  recorder[k].avg,
                                                  self.curr_step)
                    loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(
                        k, loss=recorder[k])

                self.logger.info(
                    'Iter: [{0}/{1}]\t'.format(self.curr_step,
                                               len(self.train_loader)) +
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.
                    format(batch_time=btime_rec) +
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(
                        data_time=dtime_rec) + loss_str +
                    'lr {lr:.2g}'.format(lr=curr_lr))

            # save
            if (self.rank == 0
                    and (self.curr_step % self.args.trainer['save_freq'] == 0
                         or self.curr_step == self.args.model['total_iter'])):
                self.model.save_state(
                    "{}/checkpoints".format(self.args.exp_path),
                    self.curr_step)

            # validate
            if (self.curr_step % self.args.trainer['val_freq'] == 0
                    or self.curr_step == self.args.model['total_iter']):
                self.validate('on_val')

            if ((self.curr_step % self.args.trainer['eval_freq'] == 0
                 or self.curr_step == self.args.model['total_iter'])
                ) and self.args.trainer['eval']:
                self.evaluate('on_eval')
Ejemplo n.º 5
0
    def train(self):
    
        btime_rec = utils.AverageMeter(10)
        dtime_rec = utils.AverageMeter(10)
        npts_rec = utils.AverageMeter(1000)
        recorder = {}
        for rec in self.args.trainer['loss_record']:
            recorder[rec] = utils.AverageMeter(10)
    
        self.model.switch_to('train')
    
        end = time.time()
        for i, (image, sparse, mask, flow_target, rgb_target) in enumerate(self.train_loader):
            self.curr_step = self.start_iter + i
            self.lr_scheduler.step(self.curr_step)
            curr_lr = self.lr_scheduler.get_lr()[0]

            # measure data loading time
            dtime_rec.update(time.time() - end)
            npts_rec.update(int(torch.sum(mask)/mask.size(0)/mask.size(1)))
    
            assert image.shape[0] > 0
            image = image.cuda()
            sparse = sparse.cuda()
            mask = mask.cuda()
            flow_target = flow_target.cuda()
            rgb_target = rgb_target.cuda()

            self.model.set_input(image, torch.cat([sparse, mask], dim=1), flow_target, rgb_target)
            loss_dict = self.model.step()
            for k in loss_dict.keys():
                recorder[k].update(utils.reduce_tensors(loss_dict[k]).item()) 

            btime_rec.update(time.time() - end)
            end = time.time()
    
            # logging
            if self.rank == 0 and self.curr_step % self.args.trainer['print_freq'] == 0:
                loss_str = ""
                if self.tb_logger is not None:
                    self.tb_logger.add_scalar('npts', npts_rec.avg, self.curr_step)
                    self.tb_logger.add_scalar('lr', curr_lr, self.curr_step)
                for k in recorder.keys():
                    if self.tb_logger is not None:
                        self.tb_logger.add_scalar('train_{}'.format(k), recorder[k].avg, self.curr_step + 1)
                    loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(k, loss=recorder[k])

                self.logger.info('Iter: [{0}/{1}]\t'.format(self.curr_step, len(self.train_loader)) +
                    'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'.format(batch_time=btime_rec) +
                    'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'.format(data_time=dtime_rec) +
                    loss_str +
                    'NPts {num_pts.val} ({num_pts.avg:.1f})\t'.format(num_pts=npts_rec) +
                    'lr {lr:.2g}'.format(lr=curr_lr))
    
            # validate
            if (self.curr_step + 1) % self.args.trainer['val_freq'] == 0 or (self.curr_step + 1) == self.args.model['total_iter']:
                self.validate('on_val')
    
            # save
            if self.rank == 0 and ((self.curr_step + 1) % self.args.trainer['save_freq'] == 0 or (self.curr_step + 1) == self.args.model['total_iter']):
                self.model.save_state("{}/checkpoints".format(self.args.exp_path), self.curr_step + 1)
Ejemplo n.º 6
0
    def train(self):

        btime_rec = utils.AverageMeter(10)
        dtime_rec = utils.AverageMeter(10)
        recorder = {}
        for rec in self.args.trainer['loss_record']:
            recorder[rec] = utils.AverageMeter(10)

        self.model.switch_to('train')

        end = time.time()
        total = len(self.train_loader)
        bar = ProgressBar(total, max_width=80)
        running_loss = []
        for i, inputs in enumerate(self.train_loader):
            bar.numerator = i + 1
            if self.rank == 0:
                print(bar, end='\r')
            self.curr_step = self.start_iter + i
            self.lr_scheduler.step(self.curr_step)
            curr_lr = self.lr_scheduler.get_lr()[0]

            # measure data loading time
            dtime_rec.update(time.time() - end)

            self.model.set_input(*inputs)
            loss_dict = self.model.step()
            for k in loss_dict.keys():
                recorder[k].update(utils.reduce_tensors(loss_dict[k]).item())

            btime_rec.update(time.time() - end)
            end = time.time()

            self.curr_step += 1

            # logging
            if self.rank == 0 and self.curr_step % self.args.trainer[
                    'print_freq'] == 0:
                loss_str = ""
                if self.tb_logger is not None:
                    self.tb_logger.add_scalar('lr', curr_lr, self.curr_step)
                for k in recorder.keys():
                    if self.tb_logger is not None:
                        self.tb_logger.add_scalar('train_{}'.format(k),
                                                  recorder[k].avg,
                                                  self.curr_step)
                    loss_str += '{}: {loss.val:.4g} ({loss.avg:.4g})\t'.format(
                        k, loss=recorder[k])

                print(Style.BRIGHT + Fore.CYAN + 'Iter: [{0}/{1}]\t'.format(
                    self.curr_step, len(self.train_loader)) + loss_str +
                      'lr {lr:.2g}'.format(lr=curr_lr))

            # save
            if (self.rank == 0
                    and (self.curr_step % self.args.trainer['save_freq'] == 0
                         or self.curr_step == self.args.model['total_iter'])):
                self.model.save_state(
                    "{}/checkpoints".format(self.args.exp_path),
                    self.curr_step)

            if (self.curr_step == self.args.model['total_iter']):
                break