def main(): processed = get_transform() model.eval() for inx in range(len(test_left_img)): print(test_left_img[inx]) imgL_o = (skimage.io.imread(test_left_img[inx]).astype('float32'))[:,:,:3] imgR_o = (skimage.io.imread(test_right_img[inx]).astype('float32'))[:,:,:3] imgsize = imgL_o.shape[:2] if args.max_disp>0: if args.max_disp % 16 != 0: args.max_disp = 16 * math.floor(args.max_disp/16) max_disp = int(args.max_disp) else: with open(test_left_img[inx].replace('im0.png','calib.txt')) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp*args.testres//64*64) if (max_disp*args.testres/64*64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp ==64: model.module.maxdisp=128 model.module.disp_reg8 = disparityregression(model.module.maxdisp,16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp,16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp,32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp,64).cuda() print(model.module.maxdisp) # resize imgL_o = cv2.resize(imgL_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o,None,fx=args.testres,fy=args.testres,interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL,[1,3,imgL.shape[1],imgL.shape[2]]) imgR = np.reshape(imgR,[1,3,imgR.shape[1],imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h-imgL.shape[2] left_pad = max_w-imgL.shape[3] imgL = np.lib.pad(imgL,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) imgR = np.lib.pad(imgR,((0,0),(0,0),(top_pad,0),(0,left_pad)),mode='constant',constant_values=0) # test imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp,entropy = model(imgL,imgR) torch.cuda.synchronize() ttime = (time.time() - start_time); print('time = %.2f' % (ttime*1000) ) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h-imgL_o.shape[0] left_pad = max_w-imgL_o.shape[1] entropy = entropy[top_pad:,:pred_disp.shape[1]-left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:,:pred_disp.shape[1]-left_pad] # save predictions idxname = test_left_img[inx].split('/')[-2] if not os.path.exists('%s/%s'%(args.outdir,idxname)): os.makedirs('%s/%s'%(args.outdir,idxname)) idxname = '%s/disp0HSM'%(idxname) # resize to highres pred_disp = cv2.resize(pred_disp/args.testres,(imgsize[1],imgsize[0]),interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf,pred_disp!=pred_disp) pred_disp[invalid] = np.inf np.save('%s/%s-disp.npy'% (args.outdir, idxname.split('/')[0]),(pred_disp)) np.save('%s/%s-ent.npy'% (args.outdir, idxname.split('/')[0]),(entropy)) cv2.imwrite('%s/%s-disp.png'% (args.outdir, idxname.split('/')[0]),pred_disp/pred_disp[~invalid].max()*255) cv2.imwrite('%s/%s-ent.png'% (args.outdir, idxname.split('/')[0]),entropy/entropy.max()*255) with open('%s/%s.pfm'% (args.outdir, idxname),'w') as f: save_pfm(f,pred_disp[::-1,:]) with open('%s/%s/timeHSM.txt'%(args.outdir,idxname.split('/')[0]),'w') as f: f.write(str(ttime)) torch.cuda.empty_cache()
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument( '--datapath', default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014", help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--name', default='rvc_highres_output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=0.5, #default used to be 0.5 help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--debug_image', type=str, default=None) parser.add_argument("--eth_testres", type=float, default=3.5) parser.add_argument("--score_results", action="store_true", default=False) parser.add_argument("--save_weights", action="store_true", default=False) parser.add_argument("--kitti", action="store_true", default=False) parser.add_argument("--eth", action="store_true", default=False) parser.add_argument("--mb", action="store_true", default=False) parser.add_argument("--all_data", action="store_true", default=False) parser.add_argument("--eval_train_only", action="store_true", default=False) parser.add_argument("--debug", action="store_true", default=False) parser.add_argument("--batchsize", type=int, default=16) parser.add_argument("--prepare_kitti", action="store_true", default=False) args = parser.parse_args() # wandb.init(name=args.name, project="high-res-stereo", save_code=True, magic=True, config=args) if not os.path.exists("output"): os.mkdir("output") kitti_merics = {} eth_metrics = {} mb_metrics = {} # construct model model = hsm(128, args.clean, level=args.level) model = convert_model(model) # wandb.watch(model) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.eval() if not args.prepare_kitti: dataset = RVCDataset(args) if args.prepare_kitti: _, _, _, left_val, right_val, disp_val_L = lk15.dataloader( '/data/private/KITTI2015/data_scene_flow/training/', val=True) # change to trainval when finetuning on KITTI dataset = DA.myImageFloder(left_val, right_val, disp_val_L, rand_scale=[1, 1], order=0) dataloader = DataLoader(dataset, batch_size=args.batchsize, shuffle=False, num_workers=0) steps = 0 max_disp = None origianl_image_size = None top_pad = None left_pad = None testres = [args.testres] dataset_type = None data_path = [args.datapath] # for (imgL, imgR, gt_disp_raw, max_disp, origianl_image_size, top_pad, left_pad, testres, dataset_type , data_path) in dataloader: for (imgL, imgR, gt_disp_raw) in dataloader: # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1 data_path = data_path[0] img_name = os.path.basename(os.path.normpath(data_path)) testres = float(testres[0]) gt_disp_raw = gt_disp_raw[0] cum_metrics = None if dataset_type == 0: cum_metrics = mb_metrics elif dataset_type == 1: cum_metrics = eth_metrics elif dataset_type == 2: cum_metrics = kitti_merics print(img_name) if args.max_disp > 0: max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * testres // 64 * 64) if (max_disp * testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() print(" max disparity = " + str(model.module.maxdisp)) # wandb.log({"imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))), # "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape)))}, step=steps) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() # * output dimensions same as input dimensions # * (ex: imgL[1, 3, 704, 2240] then pred_disp[1, 704, 2240]) pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) print(' time = %.2f' % (ttime * 1000)) # * squeeze (remove dimensions with size 1) (ex: pred_disp[1, 704, 2240] ->[704, 2240]) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = int(top_pad[0]) left_pad = int(left_pad[0]) entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = img_name if not os.path.exists('output/%s/%s' % (args.name, idxname)): os.makedirs('output/%s/%s' % (args.name, idxname)) idxname = '%s/disp0%s' % (idxname, args.name) # * shrink image back to the GT size (ex: pred_disp[675, 2236] -> [375, 1242]) # ! we element-wise divide pred_disp by testres becasue the image is shrinking, # ! so the distance between pixels should also shrink by the same factor pred_disp_raw = cv2.resize( pred_disp / testres, (origianl_image_size[1], origianl_image_size[0]), interpolation=cv2.INTER_LINEAR) pred_disp = pred_disp_raw # raw is to use for scoring gt_disp = gt_disp_raw.numpy() # * clip while keep inf # ? `pred_disp != pred_disp` is always true, right?? # ? `pred_disp[pred_invalid] = np.inf` why do this? pred_invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[pred_invalid] = np.inf pred_disp_png = (pred_disp * 256).astype("uint16") gt_invalid = np.logical_or(gt_disp == np.inf, gt_disp != gt_disp) gt_disp[gt_invalid] = 0 gt_disp_png = (gt_disp * 256).astype("uint16") entorpy_png = (entropy * 256).astype('uint16') # ! raw output to png pred_disp_path = 'output/%s/%s/disp.png' % (args.name, idxname.split('/')[0]) gt_disp_path = 'output/%s/%s/gt_disp.png' % (args.name, idxname.split('/')[0]) assert (cv2.imwrite(pred_disp_path, pred_disp_png)) assert (cv2.imwrite(gt_disp_path, gt_disp_png)) assert (cv2.imwrite( 'output/%s/%s/ent.png' % (args.name, idxname.split('/')[0]), entorpy_png)) # ! Experimental color maps gt_disp_color_path = 'output/%s/%s/gt_disp_color.png' % ( args.name, idxname.split('/')[0]) pred_disp_color_path = 'output/%s/%s/disp_color.png' % ( args.name, idxname.split('/')[0]) gt_colormap = convert_to_colormap(gt_disp_png) pred_colormap = convert_to_colormap(pred_disp_png) entropy_colormap = convert_to_colormap(entorpy_png) assert (cv2.imwrite(gt_disp_color_path, gt_colormap)) assert (cv2.imwrite(pred_disp_color_path, pred_colormap)) # ! diff colormaps diff_colormap_path = 'output/%s/%s/diff_color.png' % ( args.name, idxname.split('/')[0]) false_positive_path = 'output/%s/%s/false_positive_color.png' % ( args.name, idxname.split('/')[0]) false_negative_path = 'output/%s/%s/false_negative_color.png' % ( args.name, idxname.split('/')[0]) gt_disp_png[gt_invalid] = pred_disp_png[gt_invalid] gt_disp_png = gt_disp_png.astype("int32") pred_disp_png = pred_disp_png.astype("int32") diff_colormap = convert_to_colormap(np.abs(gt_disp_png - pred_disp_png)) false_positive_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, None, 0))) false_negative_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, 0, None))) assert (cv2.imwrite(diff_colormap_path, diff_colormap)) assert (cv2.imwrite(false_positive_path, false_positive_colormap)) assert (cv2.imwrite(false_negative_path, false_negative_colormap)) out_pfm_path = 'output/%s/%s.pfm' % (args.name, idxname) with open(out_pfm_path, 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( 'output/%s/%s/time_%s.txt' % (args.name, idxname.split('/')[0], args.name), 'w') as f: f.write(str(ttime)) print(" output = " + out_pfm_path) caption = img_name + ", " + str( tuple(pred_disp_png.shape)) + ", max disparity = " + str( int(max_disp[0])) + ", time = " + str(ttime) # read GT depthmap and upload as jpg # wandb.log({"disparity": wandb.Image(pred_colormap, caption=caption) , "gt": wandb.Image(gt_colormap), "entropy": wandb.Image(entropy_colormap, caption= str(entorpy_png.shape)), # "diff":wandb.Image(diff_colormap), "false_positive":wandb.Image(false_positive_colormap), "false_negative":wandb.Image(false_negative_colormap)}, step=steps) torch.cuda.empty_cache() steps += 1 # Todo: find out what mask0nocc does. It's probably not the same as KITTI's object map if dataset_type == 2: obj_map_path = os.path.join(data_path, "obj_map.png") else: obj_map_path = None if args.score_results: if pred_disp_raw.shape != gt_disp_raw.shape: # pred_disp_raw[375 x 1242] gt_disp_raw[675 x 2236] ratio = float(gt_disp_raw.shape[1]) / pred_disp_raw.shape[1] disp_resized = cv2.resize( pred_disp_raw, (gt_disp_raw.shape[1], gt_disp_raw.shape[0])) * ratio pred_disp_raw = disp_resized # [675 x 2236] # if args.debug: # out_resized_pfm_path = 'output/%s/%s/pred_scored.pfm' % (args.name, img_name) # with open(out_resized_pfm_path, 'w') as f: # save_pfm(f, pred_disp_raw) # out_resized_gt_path = 'output/%s/%s/gt_scored.pfm' % (args.name, img_name) # with open(out_resized_gt_path, 'w') as f: # save_pfm(f, gt_disp_raw.numpy()) metrics = score_rvc.get_metrics( pred_disp_raw, gt_disp_raw, int(max_disp[0]), dataset_type, ('output/%s/%s' % (args.name, idxname.split('/')[0])), disp_path=pred_disp_path, gt_path=gt_disp_path, obj_map_path=obj_map_path, debug=args.debug) avg_metrics = {} for (key, val) in metrics.items(): if cum_metrics.get(key) == None: cum_metrics[key] = [] cum_metrics[key].append(val) avg_metrics["avg_" + key] = sum(cum_metrics[key]) / len( cum_metrics[key]) # wandb.log(metrics, step=steps) # wandb.log(avg_metrics, step=steps) # if args.save_weights and os.path.exists(args.loadmodel): # wandb.save(args.loadmodel) if args.prepare_kitti and (args.all_data or args.kitti): in_path = 'output/%s' % (args.name) out_path = "/home/isaac/high-res-stereo/kitti_submission_output" out_path = prepare_kitti(in_path, out_path) subprocess.run( ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path]) print("KITTI submission evaluation saved to: " + out_path)
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument('--datapath', default='./data-mbtest/', help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--outdir', default='output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument('--testres', type=float, default=0.5, help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--dtype', type=int) args = parser.parse_args() # construct model model = hsm(128, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1, 3, 24 * multip, 32 * multip)) imgR = np.zeros((1, 3, 24 * multip, 32 * multip)) imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): model.eval() pred_disp, entropy = model(imgL, imgR) # Get arguments. method_name = sys.argv[1] if args.dtype == 0: # KITTI args.testres = 1.8 elif args.dtype == 1: # Middlebury args.testres = 1 elif args.dtype == 2: # ETH args.testres = 3.5 # Gengsahn said it's between 3~4. Find with linear grid search processed = get_transform() model.eval() datasets_dir_path = "datasets_middlebury2014" folders = [os.path.join(datasets_dir_path, 'training')] if not args.training_only: folders.append(os.path.join(datasets_dir_path, 'test')) for folder in folders: datasets = [ dataset for dataset in os.listdir(folder) if os.path.isdir(os.path.join(folder, dataset)) ] for dataset_name in datasets: im0_path = os.path.join(folder, dataset_name, 'im0.png') im1_path = os.path.join(folder, dataset_name, 'im1.png') calib = ReadMiddlebury2014CalibFile( os.path.join(folder, dataset_name, 'calib.txt')) output_dir_path = os.path.join(folder, dataset_name) imgL_o = (skimage.io.imread(im0_path).astype('float32'))[:, :, :3] imgR_o = (skimage.io.imread(im1_path).astype('float32'))[:, :, :3] imgsize = imgL_o.shape[:2] if args.max_disp > 0: max_disp = int(args.max_disp) else: path_to_replace = os.path.basename(os.path.normpath(im0_path)) with open(im0_path.replace(path_to_replace, 'calib.txt')) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression( model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression( model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression( model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression( model.module.maxdisp, 64).cuda() print("max disparity = " + str(model.module.maxdisp)) # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # test imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) print('time = %.2f' % (ttime * 1000)) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = im0_path.split('/')[-2] if not os.path.exists('%s/%s' % (args.outdir, idxname)): os.makedirs('%s/%s' % (args.outdir, idxname)) idxname = '%s/disp0%s' % (idxname, method_name) # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf np.save('%s/%s-disp.npy' % (args.outdir, idxname.split('/')[0]), (pred_disp)) np.save('%s/%s-ent.npy' % (args.outdir, idxname.split('/')[0]), (entropy)) cv2.imwrite( '%s/%s-disp.png' % (args.outdir, idxname.split('/')[0]), pred_disp / pred_disp[~invalid].max() * 255) cv2.imwrite('%s/%s-ent.png' % (args.outdir, idxname.split('/')[0]), entropy / entropy.max() * 255) with open('%s/%s.pfm' % (args.outdir, idxname), 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( '%s/%s/time%s.txt' % (args.outdir, idxname.split('/')[0], method_name), 'w') as f: f.write(str(ttime)) torch.cuda.empty_cache()
def main(): processed = get_transform() model.eval() # save predictions out_path = os.path.join("./mb_submission_output", args.name) if os.path.exists(out_path): raise FileExistsError os.mkdir(out_path) for (left_img_path, right_img_path, disp_path) in zip(left_val, right_val, disp_val_L): print(left_img_path) imgL_o = (skimage.io.imread(left_img_path).astype('float32'))[:, :, :3] imgR_o = (skimage.io.imread(right_img_path).astype('float32'))[:, :, :3] gt_o = readPFM(disp_path)[0] imgsize = imgL_o.shape[:2] with open(os.path.join(left_img_path[:-7], 'calib.txt')) as f: lines = f.readlines() max_disp = int(int(lines[6].split('=')[-1])) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() # resize imgL_o = cv2.resize(imgL_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgR_o = cv2.resize(imgR_o, None, fx=args.testres, fy=args.testres, interpolation=cv2.INTER_CUBIC) imgL = processed(imgL_o).numpy() imgR = processed(imgR_o).numpy() imgL = np.reshape(imgL, [1, 3, imgL.shape[1], imgL.shape[2]]) imgR = np.reshape(imgR, [1, 3, imgR.shape[1], imgR.shape[2]]) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 top_pad = max_h - imgL.shape[2] left_pad = max_w - imgL.shape[3] imgL = np.lib.pad(imgL, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) imgR = np.lib.pad(imgR, ((0, 0), (0, 0), (top_pad, 0), (0, left_pad)), mode='constant', constant_values=0) # test imgL = torch.FloatTensor(imgL) imgR = torch.FloatTensor(imgR) wandb.log( {"imgL": wandb.Image(imgL, caption=str(imgL.shape)), "imgR": wandb.Image(imgR, caption=str(imgR.shape))}) imgL = imgL.cuda() imgR = imgR.cuda() with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - imgL_o.shape[0] left_pad = max_w - imgL_o.shape[1] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = left_img_path.split('/')[-2] if not os.path.exists('%s/%s'%(out_path,idxname)): os.makedirs('%s/%s'%(out_path,idxname)) idxname = '%s/disp0HSM'%(idxname) with open('%s/%s.pfm'% (out_path, idxname),'w') as f: save_pfm(f,pred_disp[::-1,:]) with open('%s/%s/timeHSM.txt'%(out_path,idxname.split('/')[0]),'w') as f: f.write(str(ttime)) # resize to highres pred_disp = cv2.resize(pred_disp / args.testres, (imgsize[1], imgsize[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf pred_disp_png = (pred_disp * 256).astype('uint16') cv2.imwrite(os.path.join(out_path, idxname.split('/')[0] + ".png"), pred_disp_png) entropy_png = (entropy * 256).astype('uint16') # cv2.imwrite(os.path.join(out_dir, img_name), entropy_png) wandb.log({"disp": wandb.Image(pred_disp_png, caption=str(pred_disp_png.shape)), "entropy": wandb.Image(entropy_png, caption=str(entropy_png.shape))}) metrics = get_metrics(gt_o, pred_disp, max_disp) for (key, val) in metrics.items(): if key not in score_avg.keys(): score_avg[key] = [] score_avg[key].append(val) torch.cuda.empty_cache() for (key, val) in score_avg.items(): score_avg[key] = mean(score_avg[key]) print(score_avg) with open(os.path.join(out_path, "metrrics.txt")) as file: file.write(str(score_avg))
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument( '--datapath', default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014", help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--name', default='rvc_highres_output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=-1, #default used to be 0.5 help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--debug_image', type=str, default=None) parser.add_argument("--eth_testres", type=int, default=3.5) args = parser.parse_args() wandb.init(name=args.name, project="rvc_stereo", save_code=True, magic=True, config=args) use_adaptive_testres = False if args.testres == -1: use_adaptive_testres = True # construct model model = hsm(128, args.clean, level=args.level) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.eval() if args.testres > 0: dataset = RVCDataset(args.datapath, testres=args.testres) else: dataset = RVCDataset(args.datapath, eth_testres=args.eth_testres) dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=0) steps = 0 for (imgL, imgR, max_disp, origianl_image_size, dataset_type, img_name) in dataloader: # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1 img_name = img_name[0] if args.debug_image != None and not args.debug_image in img_name: continue print(img_name) if use_adaptive_testres: if dataset_type == 0: # Middlebury args.testres = 1 elif dataset_type == 2: args.testres = 1.8 elif dataset_type == 1: # Gengsahn said it's between 3~4. Find with linear grid search args.testres = 3.5 else: raise ValueError( "name of the folder does not contain any of: kitti, middlebury, eth3d" ) if args.max_disp > 0: max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * args.testres // 64 * 64) if (max_disp * args.testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() print(" max disparity = " + str(model.module.maxdisp)) ##fast pad max_h = int(imgL.shape[2] // 64 * 64) max_w = int(imgL.shape[3] // 64 * 64) if max_h < imgL.shape[2]: max_h += 64 if max_w < imgL.shape[3]: max_w += 64 wandb.log( { "imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))), "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape))) }, step=steps) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) torch.save(pred_disp, "/home/isaac/high-res-stereo/debug/rvc/out.pt") print(' time = %.2f' % (ttime * 1000)) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = max_h - origianl_image_size[0][0] left_pad = max_w - origianl_image_size[1][0] entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = img_name if not os.path.exists('%s/%s' % (args.name, idxname)): os.makedirs('%s/%s' % (args.name, idxname)) idxname = '%s/disp0%s' % (idxname, args.name) # resize to highres pred_disp = cv2.resize( pred_disp / args.testres, (origianl_image_size[1], origianl_image_size[0]), interpolation=cv2.INTER_LINEAR) # clip while keep inf invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[invalid] = np.inf pred_disp_png = pred_disp / pred_disp[~invalid].max() * 255 cv2.imwrite('%s/%s/disp.png' % (args.name, idxname.split('/')[0]), pred_disp_png) entorpy_png = entropy / entropy.max() * 255 cv2.imwrite('%s/%s/ent.png' % (args.name, idxname.split('/')[0]), entropy / entropy.max() * 255) out_pfm_path = '%s/%s.pfm' % (args.name, idxname) with open(out_pfm_path, 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( '%s/%s/time%s.txt' % (args.name, idxname.split('/')[0], args.name), 'w') as f: f.write(str(ttime)) print(" output = " + out_pfm_path) caption = img_name + ", " + str(tuple( pred_disp_png.shape)) + ", max disparity = " + str( max_disp) + ", time = " + str(ttime) wandb.log( { "disparity": wandb.Image(pred_disp_png, caption=caption), "entropy": wandb.Image(entorpy_png, caption=str(entorpy_png.shape)) }, step=steps) torch.cuda.empty_cache() steps += 1