Beispiel #1
0
def main():
    # load data
    train_loader = torch.utils.data.DataLoader(NYUDepthDataset(
        cfg.trainval_data_root,
        'train',
        sample_num=cfg.sample_num,
        superpixel=False,
        relative=False,
        transform=True),
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.num_workers,
                                               drop_last=True)
    print('Train Batches:', len(train_loader))

    # val_loader = torch.utils.data.DataLoader(NYUDepthDataset(cfg.trainval_data_root, 'val', transform=True),
    #                                          batch_size=cfg.batch_size, shuffle=True,
    #                                          num_workers=cfg.num_workers, drop_last=True)
    # print('Validation Batches:', len(val_loader))

    test_set = NyuDepthMat(
        cfg.test_data_root,
        '/home/ans/PycharmProjects/SDFCN/data/testIdxs.txt')
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=cfg.batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # train_set = NyuDepthMat(cfg.test_data_root, '/home/ans/PycharmProjects/SDFCN/data/trainIdxs.txt')
    # train_loader = torch.utils.data.DataLoader(train_set,
    #                                           batch_size=cfg.batch_size,
    #                                           shuffle=True, drop_last=True)
    # train_loader = test_loader
    #
    val_loader = test_loader
    # load model and weight
    # model = FCRN(cfg.batch_size)
    model = ResDUCNet(model=torchvision.models.resnet50(pretrained=False))
    init_upsample = False
    # print(model)

    loss_fn = berHu()

    if cfg.use_gpu:
        print('Use CUDA')
        model = model.cuda()
        # loss_fn = berHu().cuda()
        # loss_fn = torch.nn.MSELoss().cuda()
        loss_fn = torch.nn.L1Loss().cuda()

    start_epoch = 0
    best_val_err = 10e3

    if cfg.resume_from_file:
        if os.path.isfile(cfg.resume_file):
            print("=> loading checkpoint '{}'".format(cfg.resume_file))
            checkpoint = torch.load(cfg.resume_file)
            # start_epoch = checkpoint['epoch']
            start_epoch = 0
            # model.load_state_dict(checkpoint['state_dict'])
            model.load_state_dict(checkpoint['model_state'])
            # print("=> loaded checkpoint '{}' (epoch {})"
            #       .format(cfg.resume_file, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(cfg.resume_file))
    # else:
    #     if init_upsample:
    #         print('Loading weights from ', cfg.weights_file)
    #         # bone_state_dict = load_weights(model, cfg.weights_file, dtype)
    #         model.load_state_dict(load_weights(model, cfg.weights_file, dtype))
    #     else:
    #         print('Loading weights from ', cfg.resnet50_file)
    #         pretrained_dict = torch.load(cfg.resnet50_file)
    #         model_dict = model.state_dict()
    #         pretrained_dict = {k: v for k, v in pretrained_dict.items() if k in model_dict}
    #         model_dict.update(pretrained_dict)
    #         model.load_state_dict(model_dict)
    #     print('Weights loaded.')

    # val_error, val_rmse = validate(val_loader, model, loss_fn)
    # print('before train: val_error %f, rmse: %f' % (val_error, val_rmse))

    vis = Visualizer(cfg.env)
    # 4.Optim
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
    print("optimizer set.")
    scheduler = lr_scheduler.StepLR(optimizer,
                                    step_size=cfg.step,
                                    gamma=cfg.lr_decay)

    for epoch in range(cfg.num_epochs):

        scheduler.step()
        # print(optimizer.state_dict()['param_groups'][0]['lr'])
        print('Starting train epoch %d / %d, lr=%f' %
              (start_epoch + epoch + 1, cfg.num_epochs,
               optimizer.state_dict()['param_groups'][0]['lr']))

        model.train()
        running_loss = 0
        count = 0
        epoch_loss = 0

        for i_batch, sample_batched in enumerate(train_loader):
            input_var = Variable(sample_batched['rgb'].type(dtype))
            depth_var = Variable(sample_batched['depth'].type(dtype))

            optimizer.zero_grad()
            output = model(input_var)
            loss = loss_fn(output, depth_var)

            if i_batch % cfg.print_freq == cfg.print_freq - 1:
                print('{0} batches, loss:{1}'.format(i_batch + 1,
                                                     loss.data.cpu().item()))
                vis.plot('loss', loss.data.cpu().item())

            if i_batch % (cfg.print_freq * 10) == (cfg.print_freq * 10) - 1:
                vis.depth('pred', output)
                # vis.imshow('img', sample_batched['rgb'].type(dtype))
                vis.depth('depth', sample_batched['depth'].type(dtype))

            count += 1
            running_loss += loss.data.cpu().numpy()

            loss.backward()
            optimizer.step()

        epoch_loss = running_loss / count
        print('epoch loss:', epoch_loss)

        val_error, val_rmse = validate(val_loader, model, loss_fn, vis=vis)
        vis.plot('val_error', val_error)
        vis.plot('val_rmse', val_rmse)
        vis.log('epoch:{epoch},lr={lr},epoch_loss:{loss},val_error:{val_cm}'.
                format(epoch=start_epoch + epoch + 1,
                       loss=epoch_loss,
                       val_cm=val_error,
                       lr=optimizer.state_dict()['param_groups'][0]['lr']))

        if val_error < best_val_err:
            best_val_err = val_error
            if not os.path.exists(cfg.checkpoint_dir):
                os.mkdir(cfg.checkpoint_dir)

            torch.save(
                {
                    'epoch': start_epoch + epoch + 1,
                    'state_dict': model.state_dict(),
                    # 'optimitezer': optimizer.state_dict(),
                },
                os.path.join(
                    cfg.checkpoint_dir,
                    '{}_{}_epoch_{}_{}'.format(cfg.checkpoint, cfg.env,
                                               start_epoch + epoch + 1,
                                               cfg.checkpoint_postfix)))

    torch.save(
        {
            'epoch': start_epoch + epoch + 1,
            'state_dict': model.state_dict(),
            # 'optimitezer': optimizer.state_dict(),
        },
        os.path.join(
            cfg.checkpoint_dir,
            '{}_{}_epoch_{}_{}'.format(cfg.checkpoint, cfg.env,
                                       start_epoch + epoch + 1,
                                       cfg.checkpoint_postfix)))
Beispiel #2
0
def main():
    # load data
    train_loader = torch.utils.data.DataLoader(NYUDepthDataset(
        cfg.trainval_data_root,
        'train',
        sample_num=cfg.sample_num,
        superpixel=False,
        relative=True,
        transform=True),
                                               batch_size=cfg.batch_size,
                                               shuffle=True,
                                               num_workers=cfg.num_workers,
                                               drop_last=True)
    print('Train Batches:', len(train_loader))

    # val_loader = torch.utils.data.DataLoader(NYUDepthDataset(cfg.trainval_data_root, 'val', transform=True),
    #                                          batch_size=cfg.batch_size, shuffle=True,
    #                                          num_workers=cfg.num_workers, drop_last=True)
    # print('Validation Batches:', len(val_loader))

    test_set = NyuDepthMat(
        cfg.test_data_root,
        '/home/ans/PycharmProjects/SDFCN/data/testIdxs.txt')
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=cfg.batch_size,
                                              shuffle=True,
                                              drop_last=True)

    # train_set = NyuDepthMat(cfg.test_data_root, '/home/ans/PycharmProjects/SDFCN/data/trainIdxs.txt')
    # train_loader = torch.utils.data.DataLoader(train_set,
    #                                           batch_size=cfg.batch_size,
    #                                           shuffle=True, drop_last=True)
    # train_loader = test_loader
    #
    val_loader = test_loader
    # load model and weight
    # model = FCRN(cfg.batch_size)
    model = DUCNet(model=torchvision.models.resnet50(pretrained=True))
    init_upsample = False
    # print(model)

    # loss_fn = berHu()

    if cfg.use_gpu:
        print('Use CUDA')
        model = model.cuda()
        berhu_loss = berHu().cuda()
        rela_loss = relativeloss().cuda()
        loss_fn = torch.nn.MSELoss().cuda()
    else:
        exit(0)

    start_epoch = 0
    # resume_from_file = False
    best_val_err = 10e3

    vis = Visualizer(cfg.env)
    print('Created visdom environment:', cfg.env)
    # 4.Optim
    optimizer = torch.optim.Adam(model.parameters(), lr=cfg.lr)
    print("optimizer set.")
    scheduler = lr_scheduler.StepLR(optimizer, step_size=cfg.step, gamma=0.1)

    for epoch in range(cfg.num_epochs):

        scheduler.step()
        print('Starting train epoch %d / %d, lr=%f' %
              (start_epoch + epoch + 1, cfg.num_epochs,
               optimizer.state_dict()['param_groups'][0]['lr']))

        model.train()
        running_loss = 0
        count = 0
        epoch_loss = 0

        for i_batch, sample_batched in enumerate(train_loader):
            input_var = Variable(sample_batched['rgb'].type(dtype))
            depth_var = Variable(sample_batched['depth'].type(dtype))

            optimizer.zero_grad()
            output = model(input_var)
            # loss = loss_fn(output, depth_var)
            loss1 = loss_fn(output, depth_var)
            Ah, Aw, Bh, Bw = generate_relative_pos(sample_batched['center'])

            loss2 = rela_loss(output[..., 0, Ah, Aw], output[..., 0, Bh, Bw],
                              sample_batched['ord'])
            loss = loss1 + loss2

            if i_batch % cfg.print_freq == cfg.print_freq - 1:
                print('{0} batches, loss:{1}, berhu:{2}, relative:{3}'.format(
                    i_batch + 1,
                    loss.data.cpu().item(),
                    loss1.data.cpu().item(),
                    loss2.data.cpu().item()))
                vis.plot('loss', loss.data.cpu().item())

            if i_batch % (cfg.print_freq * 10) == (cfg.print_freq * 10) - 1:
                vis.depth('pred', output)
                # vis.imshow('img', sample_batched['rgb'].type(dtype))
                vis.depth('depth', sample_batched['depth'].type(dtype))

            count += 1
            running_loss += loss.data.cpu().numpy()

            loss.backward()
            optimizer.step()

        epoch_loss = running_loss / count
        print('epoch loss:', epoch_loss)

        val_error, val_rmse = validate(val_loader, model, loss_fn, vis=vis)
        vis.plot('val_error', val_error)
        vis.plot('val_rmse', val_rmse)
        vis.log('epoch:{epoch},lr={lr},epoch_loss:{loss},val_error:{val_cm}'.
                format(epoch=start_epoch + epoch + 1,
                       loss=epoch_loss,
                       val_cm=val_error,
                       lr=optimizer.state_dict()['param_groups'][0]['lr']))

        if val_error < best_val_err:
            best_val_err = val_error
            if not os.path.exists(cfg.checkpoint_dir):
                os.mkdir(cfg.checkpoint_dir)

            torch.save(
                {
                    'epoch': start_epoch + epoch + 1,
                    'state_dict': model.state_dict(),
                    # 'optimitezer': optimizer.state_dict(),
                },
                os.path.join(
                    cfg.checkpoint_dir,
                    '{}_{}_epoch_{}_{}'.format(cfg.checkpoint, cfg.env,
                                               start_epoch + epoch + 1,
                                               cfg.checkpoint_postfix)))

    torch.save(
        {
            'epoch': start_epoch + epoch + 1,
            'state_dict': model.state_dict(),
            # 'optimitezer': optimizer.state_dict(),
        },
        os.path.join(
            cfg.checkpoint_dir,
            '{}_{}_epoch_{}_{}'.format(cfg.checkpoint, cfg.env,
                                       start_epoch + epoch + 1,
                                       cfg.checkpoint_postfix)))