Beispiel #1
0
def evaluate(model: torch.nn.Module, criterion: torch.nn.Module, data_loader: Iterable, device: torch.device,
             epoch: int, summary: TensorboardSummary, save_output: bool):
    model.eval()
    criterion.eval()

    # initialize stats
    eval_stats = {'l1': 0.0, 'occ_be': 0.0, 'l1_raw': 0.0, 'iou': 0.0, 'rr': 0.0, 'epe': 0.0, 'error_px': 0.0,
                  'total_px': 0.0}
    # config text logger
    logger = summary.config_logger(epoch)
    # init output file
    if save_output:
        output_idx = 0
        output_file = {'left': [], 'right': [], 'disp': [], 'disp_pred': [], 'occ_mask': [], 'occ_pred': []}

    tbar = tqdm(data_loader)
    valid_samples = len(tbar)
    for idx, data in enumerate(tbar):
        # forward pass
        outputs, losses, sampled_disp = forward_pass(model, data, device, criterion, eval_stats, idx, logger)

        if losses is None:
            valid_samples -= 1
            continue

        # clear cache
        torch.cuda.empty_cache()

        # save output
        if save_output:
            output_file['left'].append(data['left'][0])
            output_file['right'].append(data['right'][0])
            output_file['disp'].append(data['disp'][0])
            output_file['occ_mask'].append(data['occ_mask'][0].cpu())
            output_file['disp_pred'].append(outputs['disp_pred'].data[0].cpu())
            output_file['occ_pred'].append(outputs['occ_pred'].data[0].cpu())

            # save to file
            if len(output_file['left']) >= 50:
                output_idx = save_and_clear(output_idx, output_file)

    # save to file
    if save_output:
        save_and_clear(output_idx, output_file)

    # compute avg
    eval_stats['epe'] = eval_stats['epe'] / valid_samples
    eval_stats['iou'] = eval_stats['iou'] / valid_samples
    eval_stats['px_error_rate'] = eval_stats['error_px'] / eval_stats['total_px']

    # write to tensorboard
    write_summary(eval_stats, summary, epoch, 'eval')

    # log to text
    logger.info('Epoch %d, epe %.4f, iou %.4f, px error %.4f' %
                (epoch, eval_stats['epe'], eval_stats['iou'], eval_stats['px_error_rate']))
    print()

    return eval_stats
Beispiel #2
0
def train_one_epoch(model: torch.nn.Module, data_loader: Iterable, optimizer: torch.optim.Optimizer,
                    criterion: torch.nn.Module, device: torch.device, epoch: int, summary: TensorboardSummary,
                    max_norm: float = 0, amp: object = None):
    """
    train model for 1 epoch
    """
    model.train()
    criterion.train()

    # initialize stats
    train_stats = {'l1': 0.0, 'occ_be': 0.0, 'l1_raw': 0.0, 'iou': 0.0, 'rr': 0.0, 'epe': 0.0, 'error_px': 0.0,
                   'total_px': 0.0}

    tbar = tqdm(data_loader)
    for idx, data in enumerate(tbar):
        # forward pass
        _, losses, sampled_disp = forward_pass(model, data, device, criterion, train_stats)

        if losses is None:
            continue

        # terminate training if exploded
        if not math.isfinite(losses['aggregated'].item()):
            print("Loss is {}, stopping training".format(losses['aggregated'].item()))
            sys.exit(1)

        # backprop
        optimizer.zero_grad()
        if amp is not None:
            with amp.scale_loss(losses['aggregated'], optimizer) as scaled_loss:
                scaled_loss.backward()
        else:
            losses['aggregated'].backward()

        # clip norm
        if max_norm > 0:
            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm)

        # step optimizer
        optimizer.step()

        print('pixel_error', losses['error_px'] / losses['total_px'])

        # clear cache
        torch.cuda.empty_cache()

    # compute avg
    train_stats['px_error_rate'] = train_stats['error_px'] / train_stats['total_px']

    # log to tensorboard
    write_summary(train_stats, summary, epoch, 'train')

    print('Training loss', train_stats['l1'], 'pixel error rate', train_stats['px_error_rate'])
    print('RR loss', train_stats['rr'])

    return