Ejemplo n.º 1
0
def load_model(input_args):
    torch.manual_seed(input_args.seed)
    model = hsm(input_args.maxdisp, clean=False, level=1)
    model = nn.DataParallel(model)
    model.cuda()

    # load model
    if input_args.loadmodel is not None:
        base_weights = input_args.loadmodel
        if base_weights.startswith('s3://'):
            filename = os.path.basename(base_weights)
            model_path = f'{input_args.savemodel}/initial_weights/{filename}'
            if not os.path.exists(model_path):
                command = f'aws s3 cp {base_weights} {model_path}'
                os.system(command)
            base_weights = model_path

        pretrained_dict = torch.load(base_weights)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if ('disp' not in k)
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))
    optimizer = optim.Adam(model.parameters(), lr=0.1, betas=(0.9, 0.999))
    torch.manual_seed(input_args.seed)  # set again
    torch.cuda.manual_seed(input_args.seed)
    return model, optimizer
Ejemplo n.º 2
0
                    help='test time resolution ratio 0-x')
parser.add_argument('--max_disp', type=float, default=-1,
                    help='maximum disparity to search for')
parser.add_argument('--level', type=int, default=1,
                    help='output level of output, default is level 1 (stage 3),\
                          can also use level 2 (stage 2) or level 3 (stage 1)')
args = parser.parse_args()



# dataloader
from dataloader import listfiles as DA
test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath)

# construct model
model = hsm(128,args.clean,level=args.level)
model = nn.DataParallel(model, device_ids=[0])
model.cuda()

if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] =  {k:v for k,v in pretrained_dict['state_dict'].items() if 'disp' not in k}
    model.load_state_dict(pretrained_dict['state_dict'],strict=False)
else:
    print('run with random init')
print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))

# dry run
multip = 48
imgL = np.zeros((1,3,24*multip,32*multip))
imgR = np.zeros((1,3,24*multip,32*multip))
Ejemplo n.º 3
0
parser.add_argument('--database', default='/ssd//',
                    help='data path')
parser.add_argument('--epochs', type=int, default=10,
                    help='number of epochs to train')
parser.add_argument('--batchsize', type=int, default=24,
                    help='samples per batch')
parser.add_argument('--loadmodel', default=None,
                    help='weights path')
parser.add_argument('--savemodel', default='./',
                    help='save path')
parser.add_argument('--seed', type=int, default=1, metavar='S',
                    help='random seed (default: 1)')
args = parser.parse_args()
torch.manual_seed(args.seed)

model = hsm(args.maxdisp,clean=False,level=1)
model = nn.DataParallel(model)
model.cuda()

# load model
if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] =  {k:v for k,v in list(pretrained_dict['state_dict'].items()) if ('disp' not in k) }
    model.load_state_dict(pretrained_dict['state_dict'],strict=False)

print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))

optimizer = optim.Adam(model.parameters(), lr=0.1, betas=(0.9, 0.999))

def _init_fn(worker_id):
    np.random.seed()
def main():
    parser = argparse.ArgumentParser(description='HSM-Net')
    parser.add_argument('--maxdisp',
                        type=int,
                        default=384,
                        help='maxium disparity')
    parser.add_argument('--name', default='name')
    parser.add_argument('--database',
                        default='/data/private',
                        help='data path')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='number of epochs to train')
    parser.add_argument(
        '--batch_size',
        type=int,
        default=16,
        # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on)
        help='samples per batch')
    parser.add_argument(
        '--val_batch_size',
        type=int,
        default=4,
        # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on)
        help='samples per batch')
    parser.add_argument('--loadmodel', default=None, help='weights path')
    parser.add_argument('--log_dir',
                        default="/data/private/logs/high-res-stereo")
    # parser.add_argument('--savemodel', default=os.path.join(os.getcwd(),'/trained_model'),
    #                     help='save path')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--val_epoch', type=int, default=4)
    parser.add_argument('--save_epoch', type=int, default=10)
    parser.add_argument("--val", action="store_true", default=False)
    parser.add_argument("--save_numpy", action="store_true", default=False)
    parser.add_argument("--testres", type=float, default=1.8)
    parser.add_argument("--threshold", type=float, default=0.7)
    parser.add_argument("--use_pseudoGT", default=False, action="store_true")
    parser.add_argument("--lr", default=1e-3, type=float)
    parser.add_argument("--lr_decay", default=2, type=int)
    parser.add_argument("--gpu", default=[0], nargs="+")
    parser.add_argument("--no_aug", default=False, action="store_true")

    args = parser.parse_args()
    torch.manual_seed(args.seed)
    torch.manual_seed(args.seed)  # set again
    torch.cuda.manual_seed(args.seed)
    batch_size = args.batch_size
    scale_factor = args.maxdisp / 384.  # controls training resolution
    args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ")
    gpu = []
    for i in args.gpu:
        gpu.append(int(i))
    args.gpu = gpu

    root_dir = "/data/private/KITTI_raw/2011_09_26/2011_09_26_drive_0013_sync"
    disp_dir = "final-768px_testres-3.3/disp"
    entp_dir = "final-768px_testres-3.3/entropy"
    mode = "image"
    image_name = "0000000040.npy"  #* this is the 4th image in the validation set
    train_left, train_right, train_disp, train_entp = kitti_raw_loader(
        root_dir, disp_dir, entp_dir, mode=mode, image_name=image_name)
    train_left = train_left * args.batch_size * 16
    train_right = train_right * args.batch_size * 16
    train_disp = train_disp * args.batch_size * 16
    train_entp = train_entp * args.batch_size * 16

    all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader(
        '%s/KITTI2015/data_scene_flow/training/' % args.database, val=args.val)

    left_val = [left_val[3]]
    right_val = [right_val[3]]
    disp_val_L = [disp_val_L[3]]

    loader_kitti15 = DA.myImageFloder(train_left,
                                      train_right,
                                      train_disp,
                                      rand_scale=[0.9, 2.4 * scale_factor],
                                      order=0,
                                      use_pseudoGT=args.use_pseudoGT,
                                      entropy_threshold=args.threshold,
                                      left_entropy=train_entp,
                                      no_aug=args.no_aug)
    val_loader_kitti15 = DA.myImageFloder(left_val,
                                          right_val,
                                          disp_val_L,
                                          is_validation=True,
                                          testres=args.testres)

    train_data_inuse = loader_kitti15
    val_data_inuse = val_loader_kitti15

    # ! For internal bug in Pytorch, if you are going to set num_workers >0 in one dataloader, it must also be set to
    # ! n >0 for the other data loader as well (ex. 1 for valLoader and 10 for trainLoader)
    ValImgLoader = torch.utils.data.DataLoader(
        val_data_inuse,
        drop_last=False,
        batch_size=args.val_batch_size,
        shuffle=False,
        worker_init_fn=_init_fn,
        num_workers=args.val_batch_size)  #

    TrainImgLoader = torch.utils.data.DataLoader(
        train_data_inuse,
        batch_size=batch_size,
        shuffle=True,
        drop_last=True,
        worker_init_fn=_init_fn,
        num_workers=args.batch_size)  # , , worker_init_fn=_init_fn
    print('%d batches per epoch' % (len(train_data_inuse) // batch_size))

    model = hsm(args.maxdisp, clean=False, level=1)

    if len(args.gpu) > 1:
        from sync_batchnorm.sync_batchnorm import convert_model
        model = nn.DataParallel(model, device_ids=args.gpu)
        model = convert_model(model)
    else:
        model = nn.DataParallel(model, device_ids=args.gpu)

    model.cuda()

    # load model
    if args.loadmodel is not None:
        print("loading pretrained model: " + str(args.loadmodel))
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if ('disp' not in k)
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999))

    log = logger.Logger(args.log_dir, args.name, save_numpy=args.save_numpy)
    total_iters = 0
    val_sample_count = 0
    val_batch_count = 0
    save_path = os.path.join(args.log_dir,
                             os.path.join(args.name, "saved_model"))
    os.makedirs(save_path, exist_ok=True)

    for epoch in range(1, args.epochs + 1):
        total_train_loss = 0
        train_score_accum_dict = {
        }  # accumulates scores throughout a batch to get average score
        train_score_accum_dict["num_scored"] = 0
        adjust_learning_rate(optimizer,
                             args.lr,
                             args.lr_decay,
                             epoch,
                             args.epochs,
                             decay_rate=0.1)

        print('Epoch %d / %d' % (epoch, args.epochs))

        # SAVE
        if epoch != 1 and epoch % args.save_epoch == 0:
            print("saving weights at epoch: " + str(epoch))
            savefilename = os.path.join(save_path,
                                        'ckpt_' + str(total_iters) + '.tar')

            torch.save(
                {
                    'iters': total_iters,
                    'state_dict': model.state_dict(),
                    'train_loss': total_train_loss / len(TrainImgLoader),
                    "optimizer": optimizer.state_dict()
                }, savefilename)

        ## val ##

        if epoch == 1 or epoch % args.val_epoch == 0:
            print("validating at epoch: " + str(epoch))
            val_score_accum_dict = {}
            val_img_idx = 0
            for batch_idx, (imgL_crop, imgR_crop,
                            disp_crop_L) in enumerate(ValImgLoader):

                vis, scores_list, err_map_list = val_step(
                    model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp,
                    args.testres)

                for score, err_map in zip(scores_list, err_map_list):
                    for (score_tag,
                         score_val), (map_tag,
                                      map_val) in zip(score.items(),
                                                      err_map.items()):
                        log.scalar_summary(
                            "val/im_" + str(val_img_idx) + "/" + score_tag,
                            score_val, val_sample_count)
                        log.image_summary("val/" + map_tag, map_val,
                                          val_sample_count)

                        if score_tag not in val_score_accum_dict.keys():
                            val_score_accum_dict[score_tag] = 0
                        val_score_accum_dict[score_tag] += score_val
                    val_img_idx += 1
                    val_sample_count += 1

                log.image_summary('val/left', imgL_crop[0:1], val_sample_count)
                log.image_summary('val/right', imgR_crop[0:1],
                                  val_sample_count)
                log.disp_summary('val/gt0', disp_crop_L[0:1],
                                 val_sample_count)  # <-- GT disp
                log.entp_summary('val/entropy', vis['entropy'],
                                 val_sample_count)
                log.disp_summary('val/output3', vis['output3'][0],
                                 val_sample_count)

            for score_tag, score_val in val_score_accum_dict.items():
                log.scalar_summary("val/" + score_tag + "_batch_avg",
                                   score_val, epoch)

        ## training ##
        for batch_idx, (imgL_crop, imgR_crop,
                        disp_crop_L) in enumerate(TrainImgLoader):
            print("training at epoch: " + str(epoch))

            is_scoring = total_iters % 10 == 0

            loss, vis, scores_list, maps = train_step(model,
                                                      optimizer,
                                                      imgL_crop,
                                                      imgR_crop,
                                                      disp_crop_L,
                                                      args.maxdisp,
                                                      is_scoring=is_scoring)

            total_train_loss += loss

            if is_scoring:
                log.scalar_summary('train/loss_batch', loss, total_iters)
                for score in scores_list:
                    for tag, val in score.items():
                        log.scalar_summary("train/" + tag + "_batch", val,
                                           total_iters)

                        if tag not in train_score_accum_dict.keys():
                            train_score_accum_dict[tag] = 0
                        train_score_accum_dict[tag] += val
                        train_score_accum_dict[
                            "num_scored"] += imgL_crop.shape[0]

                for tag, err_map in maps[0].items():
                    log.image_summary("train/" + tag, err_map, total_iters)

            if total_iters % 10 == 0:
                log.image_summary('train/left', imgL_crop[0:1], total_iters)
                log.image_summary('train/right', imgR_crop[0:1], total_iters)
                log.disp_summary('train/gt0', disp_crop_L[0:1],
                                 total_iters)  # <-- GT disp
                log.entp_summary('train/entropy', vis['entropy'][0:1],
                                 total_iters)
                log.disp_summary('train/output3', vis['output3'][0:1],
                                 total_iters)

            total_iters += 1

        log.scalar_summary('train/loss',
                           total_train_loss / len(TrainImgLoader), epoch)
        for tag, val in train_score_accum_dict.items():
            log.scalar_summary("train/" + tag + "_avg",
                               val / train_score_accum_dict["num_scored"],
                               epoch)

        torch.cuda.empty_cache()
    # Save final checkpoint
    print("Finished training!\n Saving the last checkpoint...")
    savefilename = os.path.join(save_path, 'final' + '.tar')

    torch.save(
        {
            'iters': total_iters,
            'state_dict': model.state_dict(),
            'train_loss': total_train_loss / len(TrainImgLoader),
            "optimizer": optimizer.state_dict()
        }, savefilename)
Ejemplo n.º 5
0
wandb_logger = wandb.init(name="submission.py",
                          project="rvc_stereo",
                          save_code=True,
                          magic=True,
                          config=args)

# dataloader
from dataloader import listfiles as DA

# test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath)
# print("total test images: " + str(len(test_left_img)))
# print("output path: " + args.outdir)

# construct model
model = hsm(args.max_disp, args.clean, level=args.level)
model = nn.DataParallel(model, device_ids=[0])
model.cuda()

if args.loadmodel is not None:
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] = {
        k: v
        for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k
    }
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)
else:
    print('run with random init')
print('Number of model parameters: {}'.format(
    sum([p.data.nelement() for p in model.parameters()])))
Ejemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument(
        '--datapath',
        default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014",
        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--name',
                        default='rvc_highres_output',
                        help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument(
        '--testres',
        type=float,
        default=0.5,  #default used to be 0.5
        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--debug_image', type=str, default=None)
    parser.add_argument("--eth_testres", type=float, default=3.5)
    parser.add_argument("--score_results", action="store_true", default=False)
    parser.add_argument("--save_weights", action="store_true", default=False)
    parser.add_argument("--kitti", action="store_true", default=False)
    parser.add_argument("--eth", action="store_true", default=False)
    parser.add_argument("--mb", action="store_true", default=False)
    parser.add_argument("--all_data", action="store_true", default=False)
    parser.add_argument("--eval_train_only",
                        action="store_true",
                        default=False)
    parser.add_argument("--debug", action="store_true", default=False)
    parser.add_argument("--batchsize", type=int, default=16)
    parser.add_argument("--prepare_kitti", action="store_true", default=False)

    args = parser.parse_args()

    # wandb.init(name=args.name, project="high-res-stereo", save_code=True, magic=True, config=args)

    if not os.path.exists("output"):
        os.mkdir("output")

    kitti_merics = {}
    eth_metrics = {}
    mb_metrics = {}

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = convert_model(model)
    # wandb.watch(model)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model.eval()

    if not args.prepare_kitti:
        dataset = RVCDataset(args)
    if args.prepare_kitti:
        _, _, _, left_val, right_val, disp_val_L = lk15.dataloader(
            '/data/private/KITTI2015/data_scene_flow/training/',
            val=True)  # change to trainval when finetuning on KITTI

        dataset = DA.myImageFloder(left_val,
                                   right_val,
                                   disp_val_L,
                                   rand_scale=[1, 1],
                                   order=0)

    dataloader = DataLoader(dataset,
                            batch_size=args.batchsize,
                            shuffle=False,
                            num_workers=0)

    steps = 0
    max_disp = None
    origianl_image_size = None
    top_pad = None
    left_pad = None
    testres = [args.testres]
    dataset_type = None
    data_path = [args.datapath]
    # for (imgL, imgR, gt_disp_raw, max_disp, origianl_image_size, top_pad, left_pad, testres, dataset_type , data_path) in dataloader:
    for (imgL, imgR, gt_disp_raw) in dataloader:
        # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1
        data_path = data_path[0]
        img_name = os.path.basename(os.path.normpath(data_path))
        testres = float(testres[0])
        gt_disp_raw = gt_disp_raw[0]

        cum_metrics = None
        if dataset_type == 0:
            cum_metrics = mb_metrics

        elif dataset_type == 1:
            cum_metrics = eth_metrics

        elif dataset_type == 2:
            cum_metrics = kitti_merics

        print(img_name)

        if args.max_disp > 0:
            max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * testres // 64 * 64)
        if (max_disp * testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp,
                                                     16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,
                                                      16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,
                                                      32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,
                                                      64).cuda()
        print("    max disparity = " + str(model.module.maxdisp))

        # wandb.log({"imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))),
        #            "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape)))}, step=steps)

        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()

            # * output dimensions same as input dimensions
            # * (ex: imgL[1, 3, 704, 2240] then pred_disp[1, 704, 2240])
            pred_disp, entropy = model(imgL, imgR)

            torch.cuda.synchronize()
            ttime = (time.time() - start_time)

            print('    time = %.2f' % (ttime * 1000))

        # * squeeze (remove dimensions with size 1) (ex: pred_disp[1, 704, 2240] ->[704, 2240])
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = int(top_pad[0])
        left_pad = int(left_pad[0])
        entropy = entropy[top_pad:, :pred_disp.shape[1] -
                          left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # save predictions
        idxname = img_name

        if not os.path.exists('output/%s/%s' % (args.name, idxname)):
            os.makedirs('output/%s/%s' % (args.name, idxname))

        idxname = '%s/disp0%s' % (idxname, args.name)

        # * shrink image back to the GT size (ex: pred_disp[675, 2236] -> [375, 1242])
        # ! we element-wise divide pred_disp by testres becasue the image is shrinking,
        # ! so the distance between pixels should also shrink by the same factor
        pred_disp_raw = cv2.resize(
            pred_disp / testres,
            (origianl_image_size[1], origianl_image_size[0]),
            interpolation=cv2.INTER_LINEAR)
        pred_disp = pred_disp_raw  # raw is to use for scoring

        gt_disp = gt_disp_raw.numpy()

        # * clip while keep inf
        # ? `pred_disp != pred_disp` is always true, right??
        # ? `pred_disp[pred_invalid] = np.inf` why do this?
        pred_invalid = np.logical_or(pred_disp == np.inf,
                                     pred_disp != pred_disp)
        pred_disp[pred_invalid] = np.inf

        pred_disp_png = (pred_disp * 256).astype("uint16")

        gt_invalid = np.logical_or(gt_disp == np.inf, gt_disp != gt_disp)
        gt_disp[gt_invalid] = 0
        gt_disp_png = (gt_disp * 256).astype("uint16")
        entorpy_png = (entropy * 256).astype('uint16')

        # ! raw output to png
        pred_disp_path = 'output/%s/%s/disp.png' % (args.name,
                                                    idxname.split('/')[0])
        gt_disp_path = 'output/%s/%s/gt_disp.png' % (args.name,
                                                     idxname.split('/')[0])
        assert (cv2.imwrite(pred_disp_path, pred_disp_png))
        assert (cv2.imwrite(gt_disp_path, gt_disp_png))
        assert (cv2.imwrite(
            'output/%s/%s/ent.png' % (args.name, idxname.split('/')[0]),
            entorpy_png))

        # ! Experimental color maps
        gt_disp_color_path = 'output/%s/%s/gt_disp_color.png' % (
            args.name, idxname.split('/')[0])
        pred_disp_color_path = 'output/%s/%s/disp_color.png' % (
            args.name, idxname.split('/')[0])

        gt_colormap = convert_to_colormap(gt_disp_png)
        pred_colormap = convert_to_colormap(pred_disp_png)
        entropy_colormap = convert_to_colormap(entorpy_png)
        assert (cv2.imwrite(gt_disp_color_path, gt_colormap))
        assert (cv2.imwrite(pred_disp_color_path, pred_colormap))

        # ! diff colormaps
        diff_colormap_path = 'output/%s/%s/diff_color.png' % (
            args.name, idxname.split('/')[0])
        false_positive_path = 'output/%s/%s/false_positive_color.png' % (
            args.name, idxname.split('/')[0])
        false_negative_path = 'output/%s/%s/false_negative_color.png' % (
            args.name, idxname.split('/')[0])
        gt_disp_png[gt_invalid] = pred_disp_png[gt_invalid]
        gt_disp_png = gt_disp_png.astype("int32")
        pred_disp_png = pred_disp_png.astype("int32")

        diff_colormap = convert_to_colormap(np.abs(gt_disp_png -
                                                   pred_disp_png))
        false_positive_colormap = convert_to_colormap(
            np.abs(np.clip(gt_disp_png - pred_disp_png, None, 0)))
        false_negative_colormap = convert_to_colormap(
            np.abs(np.clip(gt_disp_png - pred_disp_png, 0, None)))
        assert (cv2.imwrite(diff_colormap_path, diff_colormap))
        assert (cv2.imwrite(false_positive_path, false_positive_colormap))
        assert (cv2.imwrite(false_negative_path, false_negative_colormap))

        out_pfm_path = 'output/%s/%s.pfm' % (args.name, idxname)
        with open(out_pfm_path, 'w') as f:
            save_pfm(f, pred_disp[::-1, :])
        with open(
                'output/%s/%s/time_%s.txt' %
            (args.name, idxname.split('/')[0], args.name), 'w') as f:
            f.write(str(ttime))
        print("    output = " + out_pfm_path)

        caption = img_name + ", " + str(
            tuple(pred_disp_png.shape)) + ", max disparity = " + str(
                int(max_disp[0])) + ", time = " + str(ttime)

        # read GT depthmap and upload as jpg

        # wandb.log({"disparity": wandb.Image(pred_colormap, caption=caption) , "gt": wandb.Image(gt_colormap), "entropy": wandb.Image(entropy_colormap, caption= str(entorpy_png.shape)),
        #            "diff":wandb.Image(diff_colormap), "false_positive":wandb.Image(false_positive_colormap), "false_negative":wandb.Image(false_negative_colormap)}, step=steps)

        torch.cuda.empty_cache()
        steps += 1

        # Todo: find out what mask0nocc does. It's probably not the same as KITTI's object map
        if dataset_type == 2:
            obj_map_path = os.path.join(data_path, "obj_map.png")
        else:
            obj_map_path = None

        if args.score_results:
            if pred_disp_raw.shape != gt_disp_raw.shape:  # pred_disp_raw[375 x 1242] gt_disp_raw[675 x 2236]
                ratio = float(gt_disp_raw.shape[1]) / pred_disp_raw.shape[1]
                disp_resized = cv2.resize(
                    pred_disp_raw,
                    (gt_disp_raw.shape[1], gt_disp_raw.shape[0])) * ratio
                pred_disp_raw = disp_resized  # [675 x 2236]
            # if args.debug:
            #     out_resized_pfm_path = 'output/%s/%s/pred_scored.pfm' % (args.name, img_name)
            #     with open(out_resized_pfm_path, 'w') as f:
            #         save_pfm(f, pred_disp_raw)

            #     out_resized_gt_path = 'output/%s/%s/gt_scored.pfm' % (args.name, img_name)
            #     with open(out_resized_gt_path, 'w') as f:
            #         save_pfm(f, gt_disp_raw.numpy())

            metrics = score_rvc.get_metrics(
                pred_disp_raw,
                gt_disp_raw,
                int(max_disp[0]),
                dataset_type,
                ('output/%s/%s' % (args.name, idxname.split('/')[0])),
                disp_path=pred_disp_path,
                gt_path=gt_disp_path,
                obj_map_path=obj_map_path,
                debug=args.debug)

            avg_metrics = {}
            for (key, val) in metrics.items():
                if cum_metrics.get(key) == None:
                    cum_metrics[key] = []
                cum_metrics[key].append(val)
                avg_metrics["avg_" + key] = sum(cum_metrics[key]) / len(
                    cum_metrics[key])

            # wandb.log(metrics, step=steps)
            # wandb.log(avg_metrics, step=steps)

    # if args.save_weights and os.path.exists(args.loadmodel):
    #     wandb.save(args.loadmodel)

    if args.prepare_kitti and (args.all_data or args.kitti):
        in_path = 'output/%s' % (args.name)
        out_path = "/home/isaac/high-res-stereo/kitti_submission_output"
        out_path = prepare_kitti(in_path, out_path)
        subprocess.run(
            ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path])
        print("KITTI submission evaluation saved to: " + out_path)
Ejemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser(description='HSM-Net')
    parser.add_argument('--maxdisp', type=int, default=384,
                        help='maxium disparity')
    parser.add_argument('--name', default='name')
    parser.add_argument('--database', default='/data/private',
                        help='data path')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--val_batch_size', type=int, default=1,
                        help='samples per batch')
    parser.add_argument('--loadmodel', default=None,
                        help='weights path')
    parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo")
    parser.add_argument("--testres", default=[0], nargs="+")
    parser.add_argument("--no_aug",default=False, action="store_true")

    args = parser.parse_args()
    torch.manual_seed(args.seed)
    torch.manual_seed(args.seed)  # set again
    torch.cuda.manual_seed(args.seed)
    args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ")
    testres = []
    for i in args.testres:
        testres.append(float(i))
    args.testres=testres

    all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader(
        '%s/KITTI2015/data_scene_flow/training/' % args.database, val=True)

    left_val = [left_val[3]]
    right_val = [right_val[3]]
    disp_val_L = [disp_val_L[3]]

    # all_l = all_left_disp + left_val
    # all_r = all_right_img + right_val
    # all_d = all_left_disp + disp_val_L

    # correct_shape = (1242, 375)
    # for i in range(len(all_l)):
    #     l = np.array(Image.open(all_l[i]).convert("RGB"))
    #     r = np.array(Image.open(all_r[i]).convert("RGB"))
    #     d = Image.open(all_d[i])
    #     if l.shape != (375, 1242, 3):
    #
    #         l2 = cv2.resize(l, correct_shape, interpolation=cv2.INTER_CUBIC)
    #         r2 = cv2.resize(r, correct_shape, interpolation=cv2.INTER_CUBIC)
    #         d2 = np.array(torchvision.transforms.functional.resize(d, [375, 1242]))
    #         # d = np.stack([d, d, d], axis=-1)
    #         # d2 = cv2.resize(d.astype("uint16"), correct_shape)
    #
    #         cv2.imwrite(all_l[i], cv2.cvtColor(l2, cv2.COLOR_RGB2BGR))
    #         cv2.imwrite(all_r[i], cv2.cvtColor(r2, cv2.COLOR_RGB2BGR))
    #         cv2.imwrite(all_d[i], d2)


        # cv2.resize(l,())
    model = hsm(args.maxdisp, clean=False, level=1)
    model.cuda()

    # load model
    print("loading pretrained model: " + str(args.loadmodel))
    pretrained_dict = torch.load(args.loadmodel)
    pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k)}
    model = nn.DataParallel(model, device_ids=[0])
    model.load_state_dict(pretrained_dict['state_dict'], strict=False)

    name = "val_at_many_res" + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    log = logger.Logger(args.log_dir, name)
    val_sample_count = 0
    for res in args.testres:

        val_loader_kitti15 = DA.myImageFloder(left_val, right_val, disp_val_L, is_validation=True, testres=res)
        ValImgLoader = torch.utils.data.DataLoader(val_loader_kitti15, drop_last=False, batch_size=args.val_batch_size,
                                                   shuffle=False, worker_init_fn=_init_fn,
                                                   num_workers=0)
        print("================ res: " + str(res) + " ============================")
        ## val ##
        val_score_accum_dict = {}
        val_img_idx = 0
        for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader):
            vis, scores_list, err_map_list = val_step(model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, res)

            for score, err_map in zip(scores_list, err_map_list):
                for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()):
                    log.scalar_summary("val/im_" + str(val_img_idx) + "/" + str(res) + "/"+ score_tag, score_val, val_sample_count)
                    log.image_summary("val/" + str(res) + "/"+ map_tag, map_val, val_sample_count)

                    if score_tag not in val_score_accum_dict.keys():
                        val_score_accum_dict[score_tag] = 0
                    val_score_accum_dict[score_tag]+=score_val
                    print("res: " + str(res) + " " + score_tag + ": " + str(score_val))

                val_img_idx+=1
                val_sample_count += 1

                log.image_summary('val/left', imgL_crop[0:1], val_sample_count)
                # log.image_summary('val/right', imgR_crop[0:1], val_sample_count)
                log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count)  # <-- GT disp
                log.entp_summary('val/entropy', vis['entropy'], val_sample_count)
                log.disp_summary('val/output3', vis['output3'][0], val_sample_count)
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument("--name", required=True)
    parser.add_argument('--datapath', default='./data-mbtest/',
                        help='test data path')
    parser.add_argument('--loadmodel', default=None,
                        help='model path')
    parser.add_argument('--clean', type=float, default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument('--testres', type=float, default=1.8,  # Too low for images. Sometimes turn it to 2 ~ 3
                        # for ETH3D we need to use different resolution
                        # 1 - nothibg, 0,5 halves the image, 2 doubles the size of the iamge. We need to
                        # middleburry 1 (3000, 3000)
                        # ETH (3~4) since (1000, 1000)
                        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp', type=int, default=2056,
                        help='maximum disparity to search for')
    parser.add_argument('--level', type=int, default=1,
                        help='output level of output, default is level 1 (stage 3),\
                            can also use level 2 (stage 2) or level 3 (stage 1)')
    args = parser.parse_args()
    args.max_disp = int(args.max_disp) # max_disp = 2056 * testres
    args.max_disp = 16 * math.floor(args.max_disp/16)

    args.name = args.name + "_"+ datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    # dataloader
    from dataloader import listfiles as DA

    # test_left_img, test_right_img, _, _ = DA.dataloader(args.datapath)
    # print("total test images: " + str(len(test_left_img)))
    # print("output path: " + args.outdir)

    # construct model
    model = hsm(args.max_disp, args.clean, level=args.level)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k}
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))

    # dry run
    multip = 48
    imgL = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgR = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgL = Variable(torch.FloatTensor(imgL).cuda())
    imgR = Variable(torch.FloatTensor(imgR).cuda())
    with torch.no_grad():
        model.eval()
        pred_disp, entropy = model(imgL, imgR)

    left_img_dir = os.path.join(args.datapath, "stereo_front_left")
    right_img_dir = os.path.join(args.datapath, "stereo_front_right")

    left_img_path_list = os.listdir(left_img_dir)
    left_img_path_list.sort()
    right_img_path_list = os.listdir(right_img_dir)
    right_img_path_list.sort()

    processed = get_transform()
    model.eval()

    # save predictions
    out_path = os.path.join(args.datapath, args.name)
    if not os.path.exists(out_path):
        os.mkdir(out_path)

    disp_path = os.path.join(out_path, "disp")
    entp_path = os.path.join(out_path, "entropy")

    if not os.path.exists(disp_path):
        os.mkdir(disp_path)

    if not os.path.exists(entp_path):
        os.mkdir(entp_path)
    

    
    for (left_img_name, right_img_name) in zip(left_img_path_list, right_img_path_list):

        left_img_path = os.path.join(left_img_dir, left_img_name)
        right_img_path = os.path.join(right_img_dir, right_img_name)

        print(left_img_path)

        imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3]
        imgR_o = (skimage.io.imread(right_img_path).astype('float32'))[:, :, :3]
        imgsize = imgL_o.shape[:2]

        max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * args.testres // 64 * 64)
        if (max_disp * args.testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda()

        # resize
        imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC)
        imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC)

        imgL = processed(imgL_o).numpy()
        imgR = processed(imgR_o).numpy()

        imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]])
        imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]])

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        top_pad = max_h - imgL.shape[2]
        left_pad = max_w - imgL.shape[3]
        imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)
        imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0)

        # test
        imgL = torch.FloatTensor(imgL)
        imgR = torch.FloatTensor(imgR)

        imgL = imgL.cuda()
        imgR = imgR.cuda()

        with torch.no_grad():
            torch.cuda.synchronize()

            pred_disp, entropy = model(imgL, imgR)
            torch.cuda.synchronize()

        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = max_h - imgL_o.shape[0]
        left_pad = max_w - imgL_o.shape[1]
        entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # resize to highres
        pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp)
        pred_disp[invalid] = np.inf

        out_file_name = left_img_path[len(left_img_path) - len("315970554564438888.jpg"): len(left_img_path) - len("jpg")]
        out_file_name = os.path.join(out_file_name + "png", )

        pred_disp_png = (pred_disp * 256).astype('uint16')
        cv2.imwrite(os.path.join(disp_path, out_file_name), pred_disp_png)
        entropy_png = (entropy* 256).astype('uint16')

        cv2.imwrite(os.path.join(entp_path, out_file_name), entropy_png)

        torch.cuda.empty_cache()
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument('--name', required=True, type=str)
    parser.add_argument('--datapath',
                        default='/data/privateKITTI_raw/',
                        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument(
        '--testres',
        type=float,
        default=1.8,  # Too low for images. Sometimes turn it to 2 ~ 3
        # for ETH3D we need to use different resolution
        # 1 - nothibg, 0,5 halves the image, 2 doubles the size of the iamge. We need to
        # middleburry 1 (3000, 3000)
        # ETH (3~4) since (1000, 1000)
        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=384,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                            can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--save_err', action="store_true")
    args = parser.parse_args()
    # args.name = args.name + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    name = "eval" + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    logger = Logger("/data/private/logs/high-res-stereo", name)
    print("Saving log at: " + name)
    # construct model
    model = hsm(args.max_disp, args.clean, level=args.level)
    model = nn.DataParallel(model)
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # dry run
    multip = 48
    imgL = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgR = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgL = Variable(torch.FloatTensor(imgL).cuda())
    imgR = Variable(torch.FloatTensor(imgR).cuda())
    with torch.no_grad():
        model.eval()
        pred_disp, entropy = model(imgL, imgR)

    processed = get_transform()
    model.eval()

    with open("KITTI2015_val.txt") as file:
        lines = file.readlines()

    left_img_paths = [x.strip() for x in lines]
    right_img_paths = []
    for p in left_img_paths:
        right_img_paths.append(p.replace("image_2", "image_3"))
    left_img_paths = [left_img_paths[3]]
    right_img_paths = [right_img_paths[3]]
    for i, (left_img_path,
            right_img_path) in enumerate(zip(left_img_paths, right_img_paths)):

        print(left_img_path)
        imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3]
        imgR_o = (
            skimage.io.imread(right_img_path).astype('float32'))[:, :, :3]
        imgsize = imgL_o.shape[:2]

        max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * args.testres // 64 * 64)
        if (max_disp * args.testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp,
                                                     16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,
                                                      16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,
                                                      32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,
                                                      64).cuda()

        # resize
        imgL_o = cv2.resize(imgL_o,
                            None,
                            fx=args.testres,
                            fy=args.testres,
                            interpolation=cv2.INTER_CUBIC)
        imgR_o = cv2.resize(imgR_o,
                            None,
                            fx=args.testres,
                            fy=args.testres,
                            interpolation=cv2.INTER_CUBIC)
        # torch.save(imgL_o, "/home/isaac/high-res-stereo/debug/my_submission/img1.pt")

        imgL = processed(imgL_o).numpy()
        imgR = processed(imgR_o).numpy()
        # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img2.pt")

        imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]])
        imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]])
        # torch.save(imgL, "/home/isaac/high-res-stereo/debug/my_submission/img3.pt")

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        top_pad = max_h - imgL.shape[2]
        left_pad = max_w - imgL.shape[3]
        imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                          mode='constant',
                          constant_values=0)
        imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                          mode='constant',
                          constant_values=0)

        # test
        imgL = torch.FloatTensor(imgL)
        imgR = torch.FloatTensor(imgR)

        imgL = imgL.cuda()
        imgR = imgR.cuda()

        with torch.no_grad():
            torch.cuda.synchronize()

            pred_disp, entropy = model(imgL, imgR)
            torch.cuda.synchronize()

        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = max_h - imgL_o.shape[0]
        left_pad = max_w - imgL_o.shape[1]
        entropy = entropy[top_pad:, :pred_disp.shape[1] -
                          left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # resize to highres
        pred_disp = cv2.resize(pred_disp / args.testres,
                               (imgsize[1], imgsize[0]),
                               interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp)
        pred_disp[invalid] = np.inf

        out_base_path = left_img_path.split("/")[:-3]
        out_base_path = "/" + os.path.join(*out_base_path)
        out_base_path = os.path.join(out_base_path, args.name)
        # out_base_path = "/data/private/Middlebury/kitti_testres" + str(args.testres) + "_maxdisp" + str(int(args.max_disp))

        img_name = left_img_path.split("/")[-1][:-3] + "png"
        disp_path = os.path.join(out_base_path, "disp")

        os.makedirs(disp_path, exist_ok=True)
        pred_disp_png = (pred_disp * 256).astype("uint16")
        cv2.imwrite(os.path.join(disp_path, img_name), pred_disp_png)
        logger.disp_summary("disp" + "/" + img_name[:-4], pred_disp, i)
        # disp_map(pred_disp, os.path.join(disp_path, img_name))
        # logger.image_summary("poster", pred_disp, i)
        # i+=1
        np.save(os.path.join(disp_path, img_name[:-len(".png")]), pred_disp)

        entp_path = os.path.join(out_base_path, "entropy")
        os.makedirs(entp_path, exist_ok=True)
        # saving entropy as png
        entropy_png = ((entropy / entropy.max()) * 256)
        cv2.imwrite(os.path.join(entp_path, img_name), entropy_png)
        logger.disp_summary("entropy" + "/" + img_name[:-4], entropy, i)
        # save_disp_as_colormap(entropy, os.path.join(entp_path, img_name))
        np.save(os.path.join(entp_path, img_name[:-len(".png")]), entropy)
        torch.cuda.empty_cache()
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(description='HSM-Net')
    parser.add_argument('--maxdisp',
                        type=int,
                        default=384,
                        help='maxium disparity')
    parser.add_argument('--name', default='name')
    parser.add_argument('--database',
                        default='/data/private',
                        help='data path')
    parser.add_argument('--epochs',
                        type=int,
                        default=10,
                        help='number of epochs to train')
    parser.add_argument(
        '--batch_size',
        type=int,
        default=18,
        # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on)
        help='samples per batch')
    parser.add_argument('--val_batch_size',
                        type=int,
                        default=2,
                        help='validation samples per batch')
    parser.add_argument('--loadmodel', default=None, help='weights path')
    parser.add_argument('--log_dir',
                        default="/data/private/logs/high-res-stereo")
    # parser.add_argument('--savemodel', default=os.path.join(os.getcwd(),'/trained_model'),
    #                     help='save path')
    parser.add_argument('--seed',
                        type=int,
                        default=1,
                        metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--val_epoch', type=int, default=2)
    parser.add_argument('--save_epoch', type=int, default=1)
    parser.add_argument("--val", action="store_true", default=False)
    parser.add_argument("--save_numpy", action="store_true", default=False)
    parser.add_argument("--testres", type=float, default=1.8)
    parser.add_argument("--threshold", type=float, default=0.7)
    parser.add_argument("--use_pseudoGT", default=False, action="store_true")
    parser.add_argument("--lr", default=1e-3, type=float)
    parser.add_argument("--lr_decay", default=2, type=int)
    parser.add_argument("--gpu", default=[0], nargs="+")

    args = parser.parse_args()
    torch.manual_seed(args.seed)
    torch.manual_seed(args.seed)  # set again
    torch.cuda.manual_seed(args.seed)
    scale_factor = args.maxdisp / 384.  # controls training resolution
    args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ")
    gpu = []
    for i in args.gpu:
        gpu.append(int(i))
    args.gpu = gpu

    all_left_img = [
        "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im0.png"
    ] * args.batch_size * 16
    all_right_img = [
        "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im1.png"
    ] * args.batch_size * 16
    all_left_disp = [
        "/data/private/Middlebury/kitti_testres1.15_maxdisp384/disp/Cable-perfect.npy"
    ] * args.batch_size * 16
    all_left_entp = [
        "/data/private/Middlebury/kitti_testres1.15_maxdisp384/entropy/Cable-perfect.npy"
    ] * args.batch_size * 16

    loader_mb = DA.myImageFloder(all_left_img,
                                 all_right_img,
                                 all_left_disp,
                                 rand_scale=[0.225, 0.6 * scale_factor],
                                 order=0,
                                 use_pseudoGT=args.use_pseudoGT,
                                 entropy_threshold=args.threshold,
                                 left_entropy=all_left_entp)

    val_left_img = [
        "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im0.png"
    ]
    val_right_img = [
        "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/im1.png"
    ]
    val_disp = [
        "/data/private/Middlebury/mb-ex/trainingF/Cable-perfect/disp0GT.pfm"
    ]
    val_loader_mb = DA.myImageFloder(val_left_img,
                                     val_right_img,
                                     val_disp,
                                     is_validation=True,
                                     testres=args.testres)

    TrainImgLoader = torch.utils.data.DataLoader(
        loader_mb,
        batch_size=args.batch_size,
        shuffle=True,
        drop_last=True,
        worker_init_fn=_init_fn,
        num_workers=args.batch_size)  # , , worker_init_fn=_init_fn

    ValImgLoader = torch.utils.data.DataLoader(val_loader_mb,
                                               batch_size=1,
                                               shuffle=False,
                                               drop_last=False,
                                               worker_init_fn=_init_fn,
                                               num_workers=1)

    print('%d batches per epoch' % (len(loader_mb) // args.batch_size))

    model = hsm(args.maxdisp, clean=False, level=1)

    gpus = [0, 1]
    if len(gpus) > 1:
        from sync_batchnorm.sync_batchnorm import convert_model
        model = nn.DataParallel(model, device_ids=gpus)
        model = convert_model(model)
    else:
        model = nn.DataParallel(model, device_ids=gpus)

    model.cuda()

    # load model
    if args.loadmodel is not None:
        print("loading pretrained model: " + str(args.loadmodel))
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if ('disp' not in k)
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)

    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999))

    log = logger.Logger(args.log_dir, args.name, save_numpy=args.save_numpy)
    total_iters = 0
    val_sample_count = 0
    val_batch_count = 0

    save_path = os.path.join(args.log_dir,
                             os.path.join(args.name, "saved_model"))
    os.makedirs(save_path, exist_ok=True)

    for epoch in range(1, args.epochs + 1):
        total_train_loss = 0
        train_score_accum_dict = {
        }  # accumulates scores throughout a batch to get average score
        train_score_accum_dict["num_scored"] = 0
        adjust_learning_rate(optimizer,
                             args.lr,
                             args.lr_decay,
                             epoch,
                             args.epochs,
                             decay_rate=0.1)

        print('Epoch %d / %d' % (epoch, args.epochs))

        # SAVE
        if epoch != 1 and epoch % args.save_epoch == 0:
            print("saving weights at epoch: " + str(epoch))
            savefilename = os.path.join(save_path,
                                        'ckpt_' + str(total_iters) + '.tar')

            torch.save(
                {
                    'iters': total_iters,
                    'state_dict': model.state_dict(),
                    'train_loss': total_train_loss / len(TrainImgLoader),
                    "optimizer": optimizer.state_dict()
                }, savefilename)

        ## val ##
        if epoch % args.val_epoch == 0:
            print("validating at epoch: " + str(epoch))
            val_score_accum_dict = {
            }  # accumulates scores throughout a batch to get average score
            for batch_idx, (imgL_crop, imgR_crop,
                            disp_crop_L) in enumerate(ValImgLoader):

                vis, scores_list, err_map_list = val_step(
                    model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp,
                    args.testres)

                for score, err_map in zip(scores_list, err_map_list):
                    for (score_tag,
                         score_val), (map_tag,
                                      map_val) in zip(score.items(),
                                                      err_map.items()):
                        log.scalar_summary("val/" + score_tag, score_val,
                                           val_sample_count)
                        log.image_summary("val/" + map_tag, map_val,
                                          val_sample_count)

                        if score_tag not in val_score_accum_dict.keys():
                            val_score_accum_dict[score_tag] = 0
                        val_score_accum_dict[score_tag] += score_val
                    val_sample_count += 1

                log.image_summary('val/left', imgL_crop[0:1], val_sample_count)
                log.image_summary('val/right', imgR_crop[0:1],
                                  val_sample_count)
                log.disp_summary('val/gt0', disp_crop_L[0:1],
                                 val_sample_count)  # <-- GT disp
                log.entp_summary('val/entropy', vis['entropy'],
                                 val_sample_count)
                log.disp_summary('val/output3', vis['output3'][0],
                                 val_sample_count)

                for score_tag, score_val in val_score_accum_dict.items():
                    log.scalar_summary("val/" + score_tag + "_batch_avg",
                                       score_val, val_batch_count)
                val_batch_count += 1

        ## training ##
        for batch_idx, (imgL_crop, imgR_crop,
                        disp_crop_L) in enumerate(TrainImgLoader):
            print("training at epoch: " + str(epoch))

            is_scoring = total_iters % 10 == 0

            loss, vis, scores_list, maps = train_step(model,
                                                      optimizer,
                                                      imgL_crop,
                                                      imgR_crop,
                                                      disp_crop_L,
                                                      args.maxdisp,
                                                      is_scoring=is_scoring)

            total_train_loss += loss

            if is_scoring:
                log.scalar_summary('train/loss_batch', loss, total_iters)
                for score in scores_list:
                    for tag, val in score.items():
                        log.scalar_summary("train/" + tag + "_batch", val,
                                           total_iters)

                        if tag not in train_score_accum_dict.keys():
                            train_score_accum_dict[tag] = 0
                        train_score_accum_dict[tag] += val
                        train_score_accum_dict[
                            "num_scored"] += imgL_crop.shape[0]

                for tag, err_map in maps[0].items():
                    log.image_summary("train/" + tag, err_map, total_iters)

            if total_iters % 10 == 0:
                log.image_summary('train/left', imgL_crop[0:1], total_iters)
                log.image_summary('train/right', imgR_crop[0:1], total_iters)
                log.disp_summary('train/gt0', disp_crop_L[0:1],
                                 total_iters)  # <-- GT disp
                log.entp_summary('train/entropy', vis['entropy'][0:1],
                                 total_iters)
                log.disp_summary('train/output3', vis['output3'][0:1],
                                 total_iters)

            total_iters += 1

        log.scalar_summary('train/loss',
                           total_train_loss / len(TrainImgLoader), epoch)
        for tag, val in train_score_accum_dict.items():
            log.scalar_summary("train/" + tag + "_avg",
                               val / train_score_accum_dict["num_scored"],
                               epoch)

        torch.cuda.empty_cache()
    # Save final checkpoint
    print("Finished training!\n Saving the last checkpoint...")
    savefilename = os.path.join(save_path, 'final' + '.tar')

    torch.save(
        {
            'iters': total_iters,
            'state_dict': model.state_dict(),
            'train_loss': total_train_loss / len(TrainImgLoader),
            "optimizer": optimizer.state_dict()
        }, savefilename)
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument('--datapath',
                        default='./data-mbtest/',
                        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--outdir', default='output', help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument('--testres',
                        type=float,
                        default=0.5,
                        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--dtype', type=int)
    args = parser.parse_args()

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # dry run
    multip = 48
    imgL = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgR = np.zeros((1, 3, 24 * multip, 32 * multip))
    imgL = Variable(torch.FloatTensor(imgL).cuda())
    imgR = Variable(torch.FloatTensor(imgR).cuda())
    with torch.no_grad():
        model.eval()
        pred_disp, entropy = model(imgL, imgR)

    # Get arguments.
    method_name = sys.argv[1]

    if args.dtype == 0:  # KITTI
        args.testres = 1.8
    elif args.dtype == 1:  # Middlebury
        args.testres = 1
    elif args.dtype == 2:  # ETH
        args.testres = 3.5  # Gengsahn said it's between 3~4. Find with linear grid search

    processed = get_transform()
    model.eval()

    datasets_dir_path = "datasets_middlebury2014"

    folders = [os.path.join(datasets_dir_path, 'training')]
    if not args.training_only:
        folders.append(os.path.join(datasets_dir_path, 'test'))

    for folder in folders:
        datasets = [
            dataset for dataset in os.listdir(folder)
            if os.path.isdir(os.path.join(folder, dataset))
        ]

        for dataset_name in datasets:
            im0_path = os.path.join(folder, dataset_name, 'im0.png')
            im1_path = os.path.join(folder, dataset_name, 'im1.png')
            calib = ReadMiddlebury2014CalibFile(
                os.path.join(folder, dataset_name, 'calib.txt'))
            output_dir_path = os.path.join(folder, dataset_name)

            imgL_o = (skimage.io.imread(im0_path).astype('float32'))[:, :, :3]
            imgR_o = (skimage.io.imread(im1_path).astype('float32'))[:, :, :3]
            imgsize = imgL_o.shape[:2]

            if args.max_disp > 0:
                max_disp = int(args.max_disp)
            else:
                path_to_replace = os.path.basename(os.path.normpath(im0_path))
                with open(im0_path.replace(path_to_replace, 'calib.txt')) as f:
                    lines = f.readlines()
                    max_disp = int(int(lines[6].split('=')[-1]))

            ## change max disp
            tmpdisp = int(max_disp * args.testres // 64 * 64)
            if (max_disp * args.testres / 64 * 64) > tmpdisp:
                model.module.maxdisp = tmpdisp + 64
            else:
                model.module.maxdisp = tmpdisp
            if model.module.maxdisp == 64: model.module.maxdisp = 128
            model.module.disp_reg8 = disparityregression(
                model.module.maxdisp, 16).cuda()
            model.module.disp_reg16 = disparityregression(
                model.module.maxdisp, 16).cuda()
            model.module.disp_reg32 = disparityregression(
                model.module.maxdisp, 32).cuda()
            model.module.disp_reg64 = disparityregression(
                model.module.maxdisp, 64).cuda()
            print("max disparity = " + str(model.module.maxdisp))

            # resize
            imgL_o = cv2.resize(imgL_o,
                                None,
                                fx=args.testres,
                                fy=args.testres,
                                interpolation=cv2.INTER_CUBIC)
            imgR_o = cv2.resize(imgR_o,
                                None,
                                fx=args.testres,
                                fy=args.testres,
                                interpolation=cv2.INTER_CUBIC)
            imgL = processed(imgL_o).numpy()
            imgR = processed(imgR_o).numpy()

            imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]])
            imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]])

            ##fast pad
            max_h = int(imgL.shape[2] // 64 * 64)
            max_w = int(imgL.shape[3] // 64 * 64)
            if max_h < imgL.shape[2]: max_h += 64
            if max_w < imgL.shape[3]: max_w += 64

            top_pad = max_h - imgL.shape[2]
            left_pad = max_w - imgL.shape[3]
            imgL = np.lib.pad(imgL,
                              ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                              mode='constant',
                              constant_values=0)
            imgR = np.lib.pad(imgR,
                              ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)),
                              mode='constant',
                              constant_values=0)

            # test
            imgL = Variable(torch.FloatTensor(imgL).cuda())
            imgR = Variable(torch.FloatTensor(imgR).cuda())
            with torch.no_grad():
                torch.cuda.synchronize()
                start_time = time.time()
                pred_disp, entropy = model(imgL, imgR)
                torch.cuda.synchronize()
                ttime = (time.time() - start_time)
                print('time = %.2f' % (ttime * 1000))
            pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

            top_pad = max_h - imgL_o.shape[0]
            left_pad = max_w - imgL_o.shape[1]
            entropy = entropy[top_pad:, :pred_disp.shape[1] -
                              left_pad].cpu().numpy()
            pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

            # save predictions
            idxname = im0_path.split('/')[-2]
            if not os.path.exists('%s/%s' % (args.outdir, idxname)):
                os.makedirs('%s/%s' % (args.outdir, idxname))
            idxname = '%s/disp0%s' % (idxname, method_name)

            # resize to highres
            pred_disp = cv2.resize(pred_disp / args.testres,
                                   (imgsize[1], imgsize[0]),
                                   interpolation=cv2.INTER_LINEAR)

            # clip while keep inf
            invalid = np.logical_or(pred_disp == np.inf,
                                    pred_disp != pred_disp)
            pred_disp[invalid] = np.inf

            np.save('%s/%s-disp.npy' % (args.outdir, idxname.split('/')[0]),
                    (pred_disp))
            np.save('%s/%s-ent.npy' % (args.outdir, idxname.split('/')[0]),
                    (entropy))
            cv2.imwrite(
                '%s/%s-disp.png' % (args.outdir, idxname.split('/')[0]),
                pred_disp / pred_disp[~invalid].max() * 255)
            cv2.imwrite('%s/%s-ent.png' % (args.outdir, idxname.split('/')[0]),
                        entropy / entropy.max() * 255)

            with open('%s/%s.pfm' % (args.outdir, idxname), 'w') as f:
                save_pfm(f, pred_disp[::-1, :])
            with open(
                    '%s/%s/time%s.txt' %
                (args.outdir, idxname.split('/')[0], method_name), 'w') as f:
                f.write(str(ttime))

    torch.cuda.empty_cache()
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(description='HSM')
    parser.add_argument(
        '--datapath',
        default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014",
        help='test data path')
    parser.add_argument('--loadmodel', default=None, help='model path')
    parser.add_argument('--name',
                        default='rvc_highres_output',
                        help='output dir')
    parser.add_argument('--clean',
                        type=float,
                        default=-1,
                        help='clean up output using entropy estimation')
    parser.add_argument(
        '--testres',
        type=float,
        default=-1,  #default used to be 0.5
        help='test time resolution ratio 0-x')
    parser.add_argument('--max_disp',
                        type=float,
                        default=-1,
                        help='maximum disparity to search for')
    parser.add_argument(
        '--level',
        type=int,
        default=1,
        help='output level of output, default is level 1 (stage 3),\
                              can also use level 2 (stage 2) or level 3 (stage 1)'
    )
    parser.add_argument('--debug_image', type=str, default=None)
    parser.add_argument("--eth_testres", type=int, default=3.5)
    args = parser.parse_args()

    wandb.init(name=args.name,
               project="rvc_stereo",
               save_code=True,
               magic=True,
               config=args)

    use_adaptive_testres = False
    if args.testres == -1:
        use_adaptive_testres = True

    # construct model
    model = hsm(128, args.clean, level=args.level)
    model = nn.DataParallel(model, device_ids=[0])
    model.cuda()

    if args.loadmodel is not None:
        pretrained_dict = torch.load(args.loadmodel)
        pretrained_dict['state_dict'] = {
            k: v
            for k, v in pretrained_dict['state_dict'].items()
            if 'disp' not in k
        }
        model.load_state_dict(pretrained_dict['state_dict'], strict=False)
    else:
        print('run with random init')
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model.eval()

    if args.testres > 0:
        dataset = RVCDataset(args.datapath, testres=args.testres)
    else:
        dataset = RVCDataset(args.datapath, eth_testres=args.eth_testres)
    dataloader = DataLoader(dataset,
                            batch_size=1,
                            shuffle=False,
                            num_workers=0)
    steps = 0
    for (imgL, imgR, max_disp, origianl_image_size, dataset_type,
         img_name) in dataloader:
        # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1
        img_name = img_name[0]

        if args.debug_image != None and not args.debug_image in img_name:
            continue

        print(img_name)

        if use_adaptive_testres:
            if dataset_type == 0:  # Middlebury
                args.testres = 1
            elif dataset_type == 2:
                args.testres = 1.8
            elif dataset_type == 1:  # Gengsahn said it's between 3~4. Find with linear grid search
                args.testres = 3.5
            else:
                raise ValueError(
                    "name of the folder does not contain any of: kitti, middlebury, eth3d"
                )

        if args.max_disp > 0:
            max_disp = int(args.max_disp)

        ## change max disp
        tmpdisp = int(max_disp * args.testres // 64 * 64)
        if (max_disp * args.testres / 64 * 64) > tmpdisp:
            model.module.maxdisp = tmpdisp + 64
        else:
            model.module.maxdisp = tmpdisp
        if model.module.maxdisp == 64: model.module.maxdisp = 128
        model.module.disp_reg8 = disparityregression(model.module.maxdisp,
                                                     16).cuda()
        model.module.disp_reg16 = disparityregression(model.module.maxdisp,
                                                      16).cuda()
        model.module.disp_reg32 = disparityregression(model.module.maxdisp,
                                                      32).cuda()
        model.module.disp_reg64 = disparityregression(model.module.maxdisp,
                                                      64).cuda()
        print("    max disparity = " + str(model.module.maxdisp))

        ##fast pad
        max_h = int(imgL.shape[2] // 64 * 64)
        max_w = int(imgL.shape[3] // 64 * 64)
        if max_h < imgL.shape[2]: max_h += 64
        if max_w < imgL.shape[3]: max_w += 64

        wandb.log(
            {
                "imgL":
                wandb.Image(imgL,
                            caption=img_name + ", " + str(tuple(imgL.shape))),
                "imgR":
                wandb.Image(imgR,
                            caption=img_name + ", " + str(tuple(imgR.shape)))
            },
            step=steps)

        with torch.no_grad():
            torch.cuda.synchronize()
            start_time = time.time()

            pred_disp, entropy = model(imgL, imgR)

            torch.cuda.synchronize()
            ttime = (time.time() - start_time)
            torch.save(pred_disp,
                       "/home/isaac/high-res-stereo/debug/rvc/out.pt")

            print('    time = %.2f' % (ttime * 1000))
        pred_disp = torch.squeeze(pred_disp).data.cpu().numpy()

        top_pad = max_h - origianl_image_size[0][0]
        left_pad = max_w - origianl_image_size[1][0]
        entropy = entropy[top_pad:, :pred_disp.shape[1] -
                          left_pad].cpu().numpy()
        pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad]

        # save predictions
        idxname = img_name
        if not os.path.exists('%s/%s' % (args.name, idxname)):
            os.makedirs('%s/%s' % (args.name, idxname))
        idxname = '%s/disp0%s' % (idxname, args.name)

        # resize to highres
        pred_disp = cv2.resize(
            pred_disp / args.testres,
            (origianl_image_size[1], origianl_image_size[0]),
            interpolation=cv2.INTER_LINEAR)

        # clip while keep inf
        invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp)
        pred_disp[invalid] = np.inf

        pred_disp_png = pred_disp / pred_disp[~invalid].max() * 255
        cv2.imwrite('%s/%s/disp.png' % (args.name, idxname.split('/')[0]),
                    pred_disp_png)
        entorpy_png = entropy / entropy.max() * 255
        cv2.imwrite('%s/%s/ent.png' % (args.name, idxname.split('/')[0]),
                    entropy / entropy.max() * 255)

        out_pfm_path = '%s/%s.pfm' % (args.name, idxname)
        with open(out_pfm_path, 'w') as f:
            save_pfm(f, pred_disp[::-1, :])
        with open(
                '%s/%s/time%s.txt' %
            (args.name, idxname.split('/')[0], args.name), 'w') as f:
            f.write(str(ttime))
        print("    output = " + out_pfm_path)

        caption = img_name + ", " + str(tuple(
            pred_disp_png.shape)) + ", max disparity = " + str(
                max_disp) + ", time = " + str(ttime)
        wandb.log(
            {
                "disparity": wandb.Image(pred_disp_png, caption=caption),
                "entropy": wandb.Image(entorpy_png,
                                       caption=str(entorpy_png.shape))
            },
            step=steps)
        torch.cuda.empty_cache()
        steps += 1