def arrow_pic(field, fname): s = np.array(field.shape[:-1]) sz = np.min(s / 40) Y, X = s ys = np.arange(0.5, Y, sz) ny = len(ys) xs = np.arange(0.5, X, sz) nx = len(xs) x_mesh, y_mesh = np.meshgrid(xs, ys, indexing='ij') ipu = RectBivariateSpline(np.arange(X), np.arange(Y), field[..., 0]) uz_mesh = np.zeros_like(x_mesh) for i in range(nx): for j in range(ny): uz_mesh[i,j] = ipu(x_mesh[i,j], y_mesh[i,j]) ipv = RectBivariateSpline(np.arange(X), np.arange(Y), field[..., 1]) vz_mesh = np.zeros_like(x_mesh) for i in range(nx): for j in range(ny): vz_mesh[i,j] = ipv(x_mesh[i,j], y_mesh[i,j]) fig, ax = plt.subplots() ax.imshow(flow_to_image(field)) ax.quiver(xs, ys, uz_mesh, vz_mesh, angles='xy') ax.axis('off') fig.tight_layout() fig.savefig(fname)
def point_vec(img, flow): meshgrid = np.meshgrid(range(img.shape[1]), range(img.shape[0])) dispimg = cv2.resize(img, None, fx=4, fy=4) colorflow = flow_to_image(flow).astype(int) for i in range(img.shape[1]): # x for j in range(img.shape[0]): # y if flow[j, i, 2] != 1: continue if j % 10 != 0 or i % 10 != 0: continue xend = int((meshgrid[0][j, i] + flow[j, i, 0]) * 4) yend = int((meshgrid[1][j, i] + flow[j, i, 1]) * 4) leng = np.linalg.norm(flow[j, i, :2]) if leng < 1: continue dispimg = cv2.arrowedLine(dispimg, (meshgrid[0][j,i]*4,meshgrid[1][j,i]*4),\ (xend,yend), (int(colorflow[j,i,2]),int(colorflow[j,i,1]),int(colorflow[j,i,0])),5,tipLength=8/leng,line_type=cv2.LINE_AA) return dispimg
def showImage(self, slotNum, img, img_type='rgb', imgtitle='undef'): assert img_type in ['rgb', 'flow', 'seg'] if type(img) is torch.Tensor: img: torch.Tensor if img.ndimension() == 4: assert img.shape[0] == 1 img = img.squeeze(0).permute(1, 2, 0) if img.shape[0] < img.shape[1] and img.shape[0] < img.shape[2]: img = img.permute(1, 2, 0) img = img.detach().cpu().numpy() # winHorizSpace = 340 winHorizSpace = img.shape[1] + 0 winVertSpace = img.shape[0] + 78 # winLeftPos = (1920, 40) winLeftPos = (0, 0) nrWinsPerHoriz = 3 winName = str(slotNum) + ' ' + imgtitle if img_type == 'seg': self.shownImages[winName] = segmentation_to_label(img) else: self.shownImages[winName] = img.copy() if winName not in self.windows: self.windows.add(winName) cv2.namedWindow(winName) cv2.moveWindow( winName, slotNum % nrWinsPerHoriz * winHorizSpace + winLeftPos[0], slotNum // nrWinsPerHoriz * winVertSpace + winLeftPos[1]) cv2.setMouseCallback(winName, self.onClick) if img_type == 'rgb': img = img[:, :, ::-1] if img.dtype == np.float32: img = (img * 255).astype(np.uint8) if img_type == 'flow': img = flow_to_image(img)[:, :, ::-1] if img_type == 'seg': img = segmentation_to_image(img)[:, :, ::-1] cv2.imshow(winName, img)
def test_rain(): rain_image_path = 'haze_rain' prediction_file = 'flownets-pred-0000000.flo' left_name_base = 'haze_rain_light/render_haze_left_beta' right_name_base = 'haze_rain_light/render_haze_right_beta' flow_file = 'haze_rain_light/flow_left.flo' result = open('result.txt', 'wb') sum_error = 0 for beta in range(0, 200, 5): for contrast in range(120, 201, 5): img_files = [] left_name = left_name_base + str(beta) + 'contrast' + str( contrast) + '.png' right_name = right_name_base + str(beta) + 'contrast' + str( contrast) + '.png' img_files.append(right_name) img_files.append(left_name) # sanity check if os.path.exists(prediction_file): os.remove(prediction_file) FlowNet.run(this_dir, img_files, './model_simple') epe = fl.evaluate_flow_file(flow_file, prediction_file) flow = fl.read_flow(prediction_file) flowpic = fl.flow_to_image(flow) flow_image = Image.fromarray(flowpic) flow_image.save('beta' + str(beta) + 'contrast' + str(contrast) + 'flow.png') sum_error += epe result.write('beta: ' + str(beta) + ' contrast: ' + str(contrast) + ' epe: ' + str(epe) + '\n') print 'sum of average end point error: ', sum_error result.close()
def main(): model.eval() ttime_all = [] rmses = 0 nrmses = 0 flo = read_flow(args.flow) imgL_o = np.asarray(Image.open(args.left)) imgR_o = np.asarray(Image.open(args.right)) # resize maxh = imgL_o.shape[0] * args.testres maxw = imgL_o.shape[1] * args.testres max_h = int(maxh // 64 * 64) max_w = int(maxw // 64 * 64) if max_h < maxh: max_h += 64 if max_w < maxw: max_w += 64 input_size = imgL_o.shape imgL = cv2.resize(imgL_o, (max_w, max_h)) imgR = cv2.resize(imgR_o, (max_w, max_h)) # flip channel, subtract mean imgL = imgL[:, :, None].copy() / 255. - np.asarray(mean_L).mean(0)[ np.newaxis, np.newaxis, :] imgR = imgR[:, :, None].copy() / 255. - np.asarray(mean_R).mean(0)[ np.newaxis, np.newaxis, :] print(imgL.shape) imgL = np.transpose(imgL, [2, 0, 1])[np.newaxis] imgR = np.transpose(imgR, [2, 0, 1])[np.newaxis] # forward imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): imgLR = torch.cat([imgL, imgR], 0) model.eval() torch.cuda.synchronize() start_time = time.time() rts = model(imgLR) torch.cuda.synchronize() ttime = (time.time() - start_time) print('time = %.2f' % (ttime * 1000)) ttime_all.append(ttime) pred_disp, entropy = rts # upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() pred_disp = cv2.resize(np.transpose(pred_disp, (1, 2, 0)), (input_size[1], input_size[0])) pred_disp[:, :, 0] *= input_size[1] / max_w pred_disp[:, :, 1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0], pred_disp.shape[1], 3]) flow[:, :, :2] = pred_disp rmse = np.sqrt((np.linalg.norm(flow[:, :, :2] - flo[:, :, :2], ord=2, axis=-1)**2).mean()) rmses += rmse nrmses += rmse / np.sqrt( (np.linalg.norm(flo[:, :, :2], ord=2, axis=-1)**2).mean()) error = np.linalg.norm(flow[:, :, :2] - flo[:, :, :2], ord=2, axis=-1)**2 error = 255 * error / error.max() entropy = torch.squeeze(entropy).data.cpu().numpy() entropy = cv2.resize(entropy, (input_size[1], input_size[0])) idxname = args.left.split('/')[-1] cv2.imwrite('%s.png' % idxname.rsplit('.', 1)[0], flow_to_image(flow)[:, :, ::-1]) cv2.imwrite('%s-gt.png' % idxname.rsplit('.', 1)[0], flow_to_image(flo)[:, :, ::-1]) arrow_pic(flo, '%s-vec-gt.png' % idxname.rsplit('.', 1)[0]) arrow_pic(flow, '%s-vec.png' % idxname.rsplit('.', 1)[0]) cv2.imwrite('%s-err.png' % idxname.rsplit('.', 1)[0], error) torch.cuda.empty_cache() print(ttime_all, rmses, nrmses)
def main(): model.eval() flowl0 = "/gpfs/gpfs0/y.maximov/kolya/piv/SQG/SQG_00001_flow.flo" iml0 = "/gpfs/gpfs0/y.maximov/kolya/piv/SQG/SQG_00001_img1.tif" iml1 = "/gpfs/gpfs0/y.maximov/kolya/piv/SQG/SQG_00001_img2.tif" iml0 = default_loader(iml0) iml1 = default_loader(iml1) iml1 = np.asarray(iml1) / 255. iml0 = np.asarray(iml0) / 255. iml0 = iml0[:, :, None].copy() # iml0[:,:,::-1].copy() iml1 = iml1[:, :, None].copy() # iml1[:,:,::-1].copy() flowl0 = read_flow(flowl0) # flowl0 = random_incompressible_flow( # 1, # [256, 256], # np.random.choice([30, 40, 50]), # 10. ** (2 * np.random.rand()), # incompressible=False # ) # iml0, iml1 = image_from_flow( # ppp=np.random.uniform(0.008, 0.1), # pip=np.random.uniform(0.95, 1.0), # flow=flowl0, # intensity_bounds=(0.8, 1), # diameter_bounds=(0.35, 6) # ) # iml0 = iml0.transpose(1, 2, 0).copy() # iml1 = iml1.transpose(1, 2, 0).copy() # flowl0 = flowl0[0] # flowl0 = np.concatenate([ # flowl0, # np.ones(flowl0.shape[:-1] + (1,), dtype=flowl0.dtype) # ], axis=-1) flowl0 = np.ascontiguousarray(flowl0, dtype=np.float32) flowl0[np.isnan(flowl0)] = 1e6 # set to max cv2.imwrite('%s/%s/%s.png' % (args.outdir, "generated", "flow-orig"), flow_to_image(flowl0)[:, :, ::-1]) schedule_aug_coeff = 1.0 scl = None # 0.2 * schedule_aug_coeff # if scl > 0: # scl = [0.2 * schedule_aug_coeff, 0., 0.2 * schedule_aug_coeff] # else: # scl = None rot = 0.17 * schedule_aug_coeff if rot > 0: rot = [0.17 * schedule_aug_coeff, 0.0] else: rot = None trans = 0.2 * schedule_aug_coeff if trans > 0: trans = [0.2 * schedule_aug_coeff, 0.0] else: trans = None co_transform = flow_transforms.Compose([ # flow_transforms.Scale(1, order=0), flow_transforms.SpatialAug([256, 256], scale=scl, rot=rot, trans=trans, schedule_coeff=1, order=0, black=True), # flow_transforms.PCAAug(schedule_coeff=schedule_coeff), # flow_transforms.ChromaticAug(schedule_coeff=schedule_coeff, noise=self.noise), ]) augmented, flowl0 = co_transform([iml0, iml1], flowl0) iml0 = augmented[0] iml1 = augmented[1] cv2.imwrite('%s/%s/%s.png' % (args.outdir, "generated", "flow"), flow_to_image(flowl0)[:, :, ::-1]) cv2.imwrite('%s/%s/%s.png' % (args.outdir, "generated", "mask"), 255 * flowl0[:, :, -1]) cv2.imwrite('%s/%s/%s.png' % (args.outdir, "generated", "img1"), 255 * iml0[:, :, ::-1]) cv2.imwrite('%s/%s/%s.png' % (args.outdir, "generated", "img2"), 255 * iml1[:, :, ::-1])
def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'): # Calculate model hyperparameters # Resize to 64X maxh = height maxw = width max_h = int( maxh // 64 * 64 ) # Basically this is performing an integer division and modulo operation max_w = int(maxw // 64 * 64) # if modulo is not zero, then round it up if max_h < maxh: # The rounded-up integer is multiplied by 64x max_h += 64 if max_w < maxw: max_w += 64 # load model if (MODEL_OPTION == 'base'): model = VCN([1, max_w, max_h], md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4], fac=fac) else: model = VCN_small([1, max_w, max_h], md=[int(4 * (maxdisp / 256)), 4, 4, 4, 4], fac=fac) model = nn.DataParallel(model, device_ids=[0]) model.cuda() # load weights pretrained_dict = torch.load(modelpath) mean_L = pretrained_dict['mean_L'] mean_R = pretrained_dict['mean_R'] model.load_state_dict(pretrained_dict['state_dict'], strict=False) model.eval() print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) cap = cv2.VideoCapture('video.mp4') ret, old_frame = cap.read() while (1): ret, frame = cap.read() input_size = old_frame.shape imgL = cv2.resize(old_frame, (max_w, max_h)) imgR = cv2.resize(frame, (max_w, max_h)) # For gray input images # The model expects RGB images, in other words, 3 channels # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3] if len(old_frame.shape) == 2: old_frame = np.tile(old_frame[:, :, np.newaxis], (1, 1, 3)) frame = np.tile(frame[:, :, np.newaxis], (1, 1, 3)) # Flip channel, subtract mean # The model expects inputs of format [C,H,W] instead of [H,W,C] imgL = imgL[:, :, ::-1].copy() / 255. - np.asarray(mean_L).mean(0)[ np.newaxis, np.newaxis, :] imgR = imgR[:, :, ::-1].copy() / 255. - np.asarray(mean_R).mean(0)[ np.newaxis, np.newaxis, :] imgL = np.transpose(imgL, [2, 0, 1])[np.newaxis] imgR = np.transpose(imgR, [2, 0, 1])[np.newaxis] # Image to Torch tensor imgL = torch.FloatTensor(imgL).cuda() imgR = torch.FloatTensor(imgR).cuda() # Forward with torch.no_grad(): imgLR = torch.cat([imgL, imgR], 0) time1 = time.time() rts = model(imgLR) pred_disp, entropy = rts print(time.time() - time1) k = cv2.waitKey(25) if k == 27: break old_frame = frame.copy() # Upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy( ) # Remove batch dimension, torch tensor to numpy ndarray pred_disp = cv2.resize( np.transpose(pred_disp, (1, 2, 0)), (input_size[1], input_size[0]) ) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C] pred_disp[:, :, 0] *= input_size[1] / max_w pred_disp[:, :, 1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0], pred_disp.shape[1], 3]) flow[:, :, :2] = pred_disp # entropy = torch.squeeze(entropy).data.cpu().numpy() # entropy = cv2.resize(entropy, (input_size[1], input_size[0])) cv2.imshow('frame', flow_to_image(flow))
def eval_f(fp): import warnings warnings.filterwarnings("ignore") #gts if '2015' in args.dataset: gt_disp0 = disparity_loader(disp0p[fp]) gt_disp1 = disparity_loader(disp1p[fp]) gt_flow = read_flow(flow_paths[fp]).astype(np.float32) ints = load_calib_cam_to_cam(calib[fp]) K0 = ints['K_cam2'] fl = K0[0, 0] bl = ints['b20'] - ints['b30'] elif 'sintel' in args.dataset: gt_disp0 = disparity_read(disp0p[fp]) gt_disp1 = disparity_read(disp1p[fp]) gt_flow = read_flow(flow_paths[fp]).astype(np.float32) K0, _ = cam_read(calib[fp]) fl = K0[0, 0] bl = 0.1 d1mask = gt_disp0 > 0 d2mask = gt_disp1 > 0 flowmask = gt_flow[:, :, -1] == 1 validmask = np.logical_and(np.logical_and(d1mask, d2mask), flowmask) if '2015' in args.dataset: fgmask = cv2.imread(flow_paths[fp].replace('flow_occ', 'obj_map'), 0) > 0 fgmask = np.logical_and(fgmask, validmask) shape = gt_disp0.shape # pred idnum = expansionp[fp].split('/')[-1].split('.')[0] if args.method == 'ours': logdc = disparity_loader('%s/%s/mid-%s.pfm' % (args.path, args.dataset, idnum)) pred_flow = read_flow('%s/%s/flo-%s.pfm' % (args.path, args.dataset, idnum)) try: pred_disp = disparity_loader('%s/%s/%s_disp.pfm' % (args.path, args.dataset, idnum)) except: try: pred_disp = disparity_loader('%s/%s/%s.png' % (args.path, args.dataset, idnum)) except: try: pred_disp = disparity_loader( '%s/%s/disp-%s.pfm' % (args.path, args.dataset, idnum)) except: pred_disp = disparity_loader( '%s/%s/exp-%s.pfm' % (args.path, args.dataset, idnum)) pred_disp[pred_disp == np.inf] = pred_disp[pred_disp != np.inf].max() pred_disp[np.isnan(pred_disp)] = 1e-12 pred_disp[pred_disp < 1e-12] = 1e-12 pred_disp1 = pred_disp / np.exp(logdc) pred_flow = disparity_loader('%s/%s/flo-%s.pfm' % (args.path, args.dataset, idnum)) elif args.method == 'monodepth2': pred_disp = disparity_loader('%s/%s/%s_disp.pfm' % (args.path, args.dataset, idnum)) else: exit() #hom_p = np.stack((pred_disp.flatten(), np.ones(pred_disp.flatten().shape))).T[validmask.flatten()] #xx = np.linalg.inv(np.matmul(hom_p[:,:,np.newaxis],hom_p[:,np.newaxis,:]).sum(0)) #yy = (hom_p[:,:,np.newaxis]*gt_disp0.flatten()[validmask.flatten(),np.newaxis,np.newaxis]).sum(0) #st = xx.dot(yy) #pred_disp = pred_disp*st[0] + st[1] scale_factor = np.median((gt_disp0 / pred_disp)[validmask]) pred_disp = scale_factor * pred_disp pred_disp1 = scale_factor * pred_disp1 # eval d1err = np.abs(pred_disp - gt_disp0) d1err_map = (np.logical_and(d1err >= 3, d1err / gt_disp0 >= 0.05)) d1err = d1err_map[validmask] d2err = np.abs(pred_disp1 - gt_disp1) d2err_map = (np.logical_and(d2err >= 3, d2err / gt_disp1 >= 0.05)) d2err = d2err_map[validmask] flow_epe = np.sqrt(np.power(gt_flow - pred_flow, 2).sum(-1)) gt_flow_mag = np.linalg.norm(gt_flow[:, :, :2], 2, -1) flerr_map = np.logical_and(flow_epe > 3, flow_epe / gt_flow_mag > 0.05) flerr = flerr_map[validmask] flerr_map[~validmask] = False try: d1ferr = d1err_map[fgmask] d2ferr = d2err_map[fgmask] flferr = flerr_map[fgmask] except: d1ferr = np.zeros(1) d2ferr = np.zeros(1) flferr = np.zeros(1) sferr = np.logical_or(np.logical_or(d1err, d2err), flerr) sfferr = np.logical_or(np.logical_or(d1ferr, d2ferr), flferr) img = cv2.imread(test_left_img[fp])[:, :, ::-1] # cv2.imwrite('%s/%s/err-%s.png'%(args.path,args.dataset,idnum),np.vstack((gt_disp0,pred_disp,gt_disp1,pred_disp1,flerr_map.astype(float)*255))) if '2015' in args.dataset: flowvis = cv2.imread(test_left_img[fp].replace( 'image_2', 'viz_flow_occ'))[:, :, ::-1] else: flowvis = flow_to_image(gt_flow) pred_flow[:, :, -1] = 1 cv2.imwrite( '%s/%s/err-%s.png' % (args.path, args.dataset, idnum), np.vstack( (img, flowvis, 255 * visualize_flow(pred_flow, mode='RGB'), np.tile(flerr_map.reshape(shape)[:, :, None], 3).astype(float) * 255))[:, :, ::-1]) return d1err.mean(), d2err.mean(), flerr.mean(), sferr.mean(),\ d1ferr.mean(), d2ferr.mean(), flferr.mean(), sfferr.mean(),gt_flow_mag.mean()
test_left_img[i] for i, flag in enumerate(split) if flag == 2 ] test_right_img = [ test_right_img[i] for i, flag in enumerate(split) if flag == 2 ] flow_paths = [flow_paths[i] for i, flag in enumerate(split) if flag == 2] for i, gtflow_path in enumerate(flow_paths): num = gtflow_path.split('/')[-1].strip().replace('flow.flo', 'img1.png') if not 'test' in args.dataset and not 'clip' in args.dataset: gtflow = read_flow(gtflow_path) num = num.replace('jpg', 'png') flow = read_flow('%s/%s/flo-%s' % (args.path, args.dataset, num.replace('.png', '.pfm'))) if args.vis == 'yes': flowimg = flow_to_image(flow) * np.linalg.norm( flow[:, :, :2], 2, 2)[:, :, np.newaxis] / 100. / 255. mkdir_p('%s/%s/flowimg' % (args.path, args.dataset)) plt.imsave('%s/%s/flowimg/%s' % (args.path, args.dataset, num), flowimg) if 'test' in args.dataset or 'clip' in args.dataset: continue gtflowimg = flow_to_image(gtflow) mkdir_p('%s/%s/gtimg' % (args.path, args.dataset)) plt.imsave('%s/%s/gtimg/%s' % (args.path, args.dataset, num), gtflowimg) mask = gtflow[:, :, 2] == 1 gtflow = gtflow[:, :, :2] flow = flow[:, :, :2]
def main(): model.eval() ttime_all = [] rmses = 0 nrmses = 0 inx = test_left_img.index(args.image) print(test_left_img[inx]) flo = read_flow(test_flow[inx]) imgL_o = np.asarray(Image.open(test_left_img[inx])) imgR_o = np.asarray(Image.open(test_right_img[inx])) # resize maxh = imgL_o.shape[0]*args.testres maxw = imgL_o.shape[1]*args.testres max_h = int(maxh // 64 * 64) max_w = int(maxw // 64 * 64) if max_h < maxh: max_h += 64 if max_w < maxw: max_w += 64 input_size = imgL_o.shape imgL = cv2.resize(imgL_o,(max_w, max_h)) imgR = cv2.resize(imgR_o,(max_w, max_h)) # flip channel, subtract mean imgL = imgL[:, :, None].copy() / 255. - np.asarray(mean_L).mean(0)[np.newaxis,np.newaxis,:] imgR = imgR[:, :, None].copy() / 255. - np.asarray(mean_R).mean(0)[np.newaxis,np.newaxis,:] print(imgL.shape) imgL = np.transpose(imgL, [2,0,1])[np.newaxis] imgR = np.transpose(imgR, [2,0,1])[np.newaxis] # forward imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): imgLR = torch.cat([imgL,imgR],0) model.eval() torch.cuda.synchronize() start_time = time.time() rts = model(imgLR) torch.cuda.synchronize() ttime = (time.time() - start_time); print('time = %.2f' % (ttime*1000) ) ttime_all.append(ttime) pred_disp, entropy = rts # upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() pred_disp = cv2.resize(np.transpose(pred_disp,(1,2,0)), (input_size[1], input_size[0])) pred_disp[:,:,0] *= input_size[1] / max_w pred_disp[:,:,1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0],pred_disp.shape[1],3]) flow[:,:,:2] = pred_disp rmse = np.sqrt((np.linalg.norm(flow[:,:,:2] - flo[:,:,:2], ord=2, axis=-1) ** 2).mean()) rmses += rmse nrmses += rmse / np.sqrt((np.linalg.norm(flo[:,:,:2], ord=2, axis=-1) ** 2).mean()) error = np.linalg.norm(flow[:,:,:2] - flo[:,:,:2], ord=2, axis=-1) ** 2 error = 255 - 255 * error / error.max() entropy = torch.squeeze(entropy).data.cpu().numpy() entropy = cv2.resize(entropy, (input_size[1], input_size[0])) # save predictions if args.dataset == 'mbstereo': dirname = '%s/%s/%s'%(args.outdir, args.dataset, test_left_img[inx].split('/')[-2]) mkdir_p(dirname) idxname = ('%s/%s')%(dirname.rsplit('/',1)[-1],test_left_img[inx].split('/')[-1]) else: idxname = test_left_img[inx].split('/')[-1] if args.dataset == 'mbstereo': with open(test_left_img[inx].replace('im0.png','calib.txt')) as f: lines = f.readlines() #max_disp = int(int(lines[9].split('=')[-1])) max_disp = int(int(lines[6].split('=')[-1])) with open('%s/%s/%s'% (args.outdir, args.dataset,idxname.replace('im0.png','disp0IO.pfm')),'w') as f: save_pfm(f,np.clip(-flow[::-1,:,0].astype(np.float32),0,max_disp) ) with open('%s/%s/%s/timeIO.txt'%(args.outdir, args.dataset,idxname.split('/')[0]),'w') as f: f.write(str(ttime)) elif args.dataset == 'k15stereo' or args.dataset == 'k12stereo': skimage.io.imsave('%s/%s/%s.png'% (args.outdir, args.dataset,idxname.split('.')[0]),(-flow[:,:,0].astype(np.float32)*256).astype('uint16')) else: # write_flow('%s/%s/%s.png'% (args.outdir, args.dataset,idxname.rsplit('.',1)[0]), flow.copy()) cv2.imwrite('%s/%s/%s.png' % (args.outdir, args.dataset,idxname.rsplit('.',1)[0]), flow_to_image(flow)[:, :, ::-1]) cv2.imwrite('%s/%s/%s-gt.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]), flow_to_image(flo)[:, :, ::-1]) arrow_pic(flo, '%s/%s/%s-vec-gt.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0])) arrow_pic(flow, '%s/%s/%s-vec.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0])) test_compressibility( flo, flow, '%s/%s/%s-compr.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]) ) test_energy_spectrum( flo, flow, '%s/%s/%s-energy.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]) ) test_intermittency_r( flo, flow, '%s/%s/%s-interm-r.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]) ) test_intermittency_n( flo, flow, '%s/%s/%s-interm-n.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]) ) cv2.imwrite('%s/%s/%s-err.png' % (args.outdir, args.dataset, idxname.rsplit('.', 1)[0]), error) # cv2.imwrite('%s/%s/ent-%s.png'% (args.outdir, args.dataset,idxname.rsplit('.',1)[0]), entropy*200) torch.cuda.empty_cache() rmses /= len(test_left_img) nrmses /= len(test_left_img) print(np.mean(ttime_all), rmses, nrmses)
def main(): global args, logger args = get_parser().parse_args() logger = get_logger() logger.info(args) logger.info("=> creating model ...") # get input image size and save name list # each line of data_list should contain image_0, image_1, (optional gt) with open(args.data_list, 'r') as f: fnames = f.readlines() assert len(fnames[0].strip().split(' ')) == 2 + args.evaluate names = [l.strip().split(' ')[0].split('/')[-1] for l in fnames] sub_folders = [ l.strip().split(' ')[0][:-len(names[i])] for i, l in enumerate(fnames) ] names = [l.split('.')[0] for l in names] input_size = cv2.imread(join(args.data_root, fnames[0].split(' ')[0])).shape # transform mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] th, tw = get_target_size(input_size[0], input_size[1]) val_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)]) val_data = datasets.HD3Data(mode=args.task, data_root=args.data_root, data_list=args.data_list, label_num=args.evaluate, transform=val_transform, out_size=True) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) corr_range = [4, 4, 4, 4, 4, 4] if args.task == 'flow': corr_range = corr_range[:5] model = models.HD3Model(args.task, args.encoder, args.decoder, corr_range, args.context).cuda() logger.info(model) model = torch.nn.DataParallel(model).cuda() cudnn.enabled = True cudnn.benchmark = True if os.path.isfile(args.model_path): logger.info("=> loading checkpoint '{}'".format(args.model_path)) checkpoint = torch.load(args.model_path) model.load_state_dict(checkpoint['state_dict'], strict=True) logger.info("=> loaded checkpoint '{}'".format(args.model_path)) else: raise RuntimeError("=> no checkpoint found at '{}'".format( args.model_path)) vis_folder = os.path.join(args.save_folder, 'vis') vec_folder = os.path.join(args.save_folder, 'vec') check_makedirs(vis_folder) check_makedirs(vec_folder) # start testing logger.info('>>>>>>>>>>>>>>>> Start Test >>>>>>>>>>>>>>>>') data_time = AverageMeter() batch_time = AverageMeter() avg_epe = AverageMeter() model.eval() end = time.time() with torch.no_grad(): for i, (img_list, label_list, img_size) in enumerate(val_loader): data_time.update(time.time() - end) img_size = img_size.cpu().numpy() img_list = [img.to(torch.device("cuda")) for img in img_list] label_list = [ label.to(torch.device("cuda")) for label in label_list ] # resize test resized_img_list = [ F.interpolate(img, (th, tw), mode='bilinear', align_corners=True) for img in img_list ] output = model(img_list=resized_img_list, label_list=label_list, get_vect=True, get_epe=args.evaluate) scale_factor = 1 / 2**(7 - len(corr_range)) output['vect'] = resize_dense_vector(output['vect'] * scale_factor, img_size[0, 1], img_size[0, 0]) if args.evaluate: avg_epe.update(output['epe'].mean().data, img_list[0].size(0)) batch_time.update(time.time() - end) end = time.time() if (i + 1) % 10 == 0: logger.info( 'Test: [{}/{}] ' 'Data {data_time.val:.3f} ({data_time.avg:.3f}) ' 'Batch {batch_time.val:.3f} ({batch_time.avg:.3f}).'. format(i + 1, len(val_loader), data_time=data_time, batch_time=batch_time)) pred_vect = output['vect'].data.cpu().numpy() pred_vect = np.transpose(pred_vect, (0, 2, 3, 1)) curr_bs = pred_vect.shape[0] for idx in range(curr_bs): curr_idx = i * args.batch_size + idx curr_vect = pred_vect[idx] # make folders vis_sub_folder = join(vis_folder, sub_folders[curr_idx]) vec_sub_folder = join(vec_folder, sub_folders[curr_idx]) check_makedirs(vis_sub_folder) check_makedirs(vec_sub_folder) # save visualzation (disparity transformed to flow here) vis_fn = join(vis_sub_folder, names[curr_idx] + '.png') if args.task == 'flow': vis_flo = fl.flow_to_image(curr_vect) else: vis_flo = fl.flow_to_image(fl.disp2flow(curr_vect)) vis_flo = cv2.cvtColor(vis_flo, cv2.COLOR_RGB2BGR) cv2.imwrite(vis_fn, vis_flo) # save point estimates fn_suffix = 'png' if args.task == 'flow': fn_suffix = args.flow_format vect_fn = join(vec_sub_folder, names[curr_idx] + '.' + fn_suffix) if args.task == 'flow': if fn_suffix == 'png': # save png format flow mask_blob = np.ones( (img_size[idx][1], img_size[idx][0]), dtype=np.uint16) fl.write_kitti_png_file(vect_fn, curr_vect, mask_blob) else: # save flo format flow fl.write_flow(curr_vect, vect_fn) else: # save disparity map cv2.imwrite(vect_fn, np.uint16(-curr_vect[:, :, 0] * 256.0)) if args.evaluate: logger.info('Average End Point Error {avg_epe.avg:.2f}'.format( avg_epe=avg_epe)) logger.info('<<<<<<<<<<<<<<<<< End Test <<<<<<<<<<<<<<<<<')
def compare_optical_flow(self, plt_name, im1, im2, fig, ax): clahe = cv2.createCLAHE(clipLimit=3.0, tileGridSize=(8, 8)) im1_cv = skimage.img_as_ubyte(im1) im2_cv = skimage.img_as_ubyte(im2) im1_cv = clahe.apply(im1_cv) im2_cv = clahe.apply(im2_cv) cur_pts = cv2.goodFeaturesToTrack(im1_cv, 150, 0.01, 25) nxt_pts_klt, status, err = cv2.calcOpticalFlowPyrLK(im1_cv, im2_cv, cur_pts, None, winSize=( 21, 21, ), maxLevel=3) cur_pts = cur_pts.squeeze() # nxt_pts_nn, flow = self.compute_nn_flow(im1_cv, im2_cv, cur_pts) nxt_pts_nn, flow, confidence = self.compute_flow_hd3( Image.fromarray(im1_cv).convert("RGB"), Image.fromarray(im2_cv).convert("RGB"), cur_pts) nxt_pts_lkr, status_lkr, err_lkr = cv2.calcOpticalFlowPyrLK( im1_cv, im2_cv, cur_pts, nxt_pts_nn.copy(), winSize=( 21, 21, ), maxLevel=3, flags=cv2.OPTFLOW_USE_INITIAL_FLOW) nxt_pts_klt = nxt_pts_klt.squeeze() nxt_pts_klt = nxt_pts_klt[status.squeeze() == 1] cur_pts_filtered = cur_pts[status.squeeze() == 1] confidence_scaled = confidence * 255 ax[0, 0].clear() ax[0, 0].imshow(im1_cv, cmap='gray') ax[0, 0].scatter(cur_pts[:, 0], cur_pts[:, 1], s=3, c="b") ax[0, 2].clear() ax[0, 2].imshow(confidence_scaled, cmap='gray') ax[0, 2].scatter(cur_pts[:, 0], cur_pts[:, 1], s=3, c="b") ax[0, 1].clear() ax[0, 1].imshow(im2_cv, cmap='gray') ax[0, 1].scatter(cur_pts[:, 0], cur_pts[:, 1], s=3, c="b") ax[0, 1].scatter(nxt_pts_klt[:, 0], nxt_pts_klt[:, 1], s=3, c="r") ax[0, 1].scatter(nxt_pts_nn[:, 0], nxt_pts_nn[:, 1], s=3, c="g") ax[0, 1].scatter(nxt_pts_lkr[:, 0], nxt_pts_lkr[:, 1], s=3, c="y") ax[1, 0].clear() ax[1, 0].imshow((im1_cv.astype(np.float32) * 0.5 + im2_cv.astype(np.float32) * 0.5).astype(np.uint8), cmap='gray') for i in range(0, len(nxt_pts_klt)): ax[1, 0].annotate("", xy=nxt_pts_klt[i], xytext=cur_pts_filtered[i], arrowprops=dict(arrowstyle="->", color="r", linewidth=1)) for i in range(0, len(nxt_pts_nn)): ax[1, 0].annotate("", xy=nxt_pts_nn[i], xytext=cur_pts[i], arrowprops=dict(arrowstyle="->", color="g", linewidth=1)) for i in range(0, len(nxt_pts_lkr)): ax[1, 0].annotate("", xy=nxt_pts_lkr[i], xytext=cur_pts[i], arrowprops=dict(arrowstyle="->", color="y", linewidth=1)) ax[1, 1].clear() # ax[1, 1].imshow(visualize.flow_to_img(flow)) # ax[1, 1].imshow(np.linalg.norm(flow, axis=2) * 10) ax[1, 1].imshow(fl.flow_to_image(flow)) subsample_every_N = 20 quiver_X = np.arange(0, im1_cv.shape[0], subsample_every_N) quiver_Y = np.arange(0, im1_cv.shape[1], subsample_every_N) # mesh = np.meshgrid(quiver_X, quiver_Y) flow_subsampled = flow[::subsample_every_N, ::subsample_every_N] quiver_U = flow_subsampled[:, :, 0] quiver_V = -flow_subsampled[:, :, 1] ax[1, 1].quiver(quiver_Y, quiver_X, quiver_U, quiver_V) ax[0, 0].set_xlim(0, flow.shape[1]) ax[0, 0].set_ylim(0, flow.shape[0]) ax[0, 0].invert_yaxis() plt.gcf().suptitle(plt_name) plt.draw() plt.pause(0.001) blocking = BlockingKeyInput(fig=plt.gcf()) key = blocking(timeout=-1) return key
def plot_minibatch(i_batch, sample_batch, flow_gt, flow_pwc, depth_gt, mft_gt, mfw_gt, mft_pred, mfw_pred, latent_activations): import cv2 images = sample_batch['img_t'] mst_mixed_inv = (latent_activations).cpu().reshape(-1, 1, 25, 40) epe_batch = 0.0 n_active_neurons = 0 for im in range(images.size(0)): depth_gt_im = depth_gt[im, 0, :, :].cpu().numpy() depth_gt_im = cv2.resize(depth_gt_im, (mft_gt.size(3), mft_gt.size(2)), interpolation=cv2.INTER_AREA) depth_gt_im = torch.tensor(depth_gt_im).cuda() mf_gt = (mft_gt[im, :, :, :] / depth_gt_im) + mfw_gt[im, :, :, :] mf_gt[:, depth_gt_im < 1e-5] = 0.0 mf_pred = (mft_pred[im, :, :, :] / depth_gt_im) + mfw_pred[im, :, :, :] mf_pred[:, depth_gt_im < 1e-5] = 0.0 flow_gt_im = refresh_transforms.rescale_flow(flow_gt[im], mf_gt.size(1), mf_gt.size(2)).to(device) invalid = torch.isnan(flow_gt_im[0, :, :]) + torch.isnan( flow_gt_im[1, :, :]) obj_2d_gt = flow_gt_im - mf_gt obj_2d_pred = flow_gt_im - mf_pred th = 0.5 object_mask_gt = object_motion_lib.find_object_mask_sintel( obj_2d_gt, invalid, th) object_scene_2d_gt = obj_2d_gt * torch.stack( (object_mask_gt.float(), object_mask_gt.float())) object_mask_pred = object_motion_lib.find_object_mask_sintel( obj_2d_pred, invalid, th) object_scene_2d_pred = obj_2d_pred * torch.stack( (object_mask_pred.float(), object_mask_pred.float())) plt.clf() plt.suptitle(str(i_batch * images.size(0) + im), fontsize=16) pl = plt.subplot(6, 4, 1) pl.imshow(printlib.cputensor2array(images[im, :, :, :])) pl.set_title("t") pl.set_xticks([]) pl.set_yticks([]) fl = plt.subplot(6, 4, 5) img = flowlib.flow_to_image(printlib.gputensor2array(flow_gt[im])) fl.imshow(img) fl.set_title("GT Flow") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 6) img = flowlib.flow_to_image(printlib.gputensor2array(flow_pwc[im] * 4)) fl.imshow(img) fl.set_title("PWC Flow") fl.set_xticks([]) fl.set_yticks([]) dm = plt.subplot(6, 4, 9) # dm.imshow(printlib.gpudepth2invarray(depth_t[im, :, :])) dm.imshow(torch.squeeze(depth_gt[im, :, :]).cpu().numpy(), cmap='hot') # hist, bin_edges = np.histogram(torch.squeeze(depth_t[im, :, :]).cpu().numpy(), bins=100) # dm.bar(bin_edges[:-1], hist, width=1) # #mask = depthmap < 0.5 # #dm.imshow(mask) dm.set_title("Depth map t") dm.set_xticks([]) dm.set_yticks([]) fl = plt.subplot(6, 4, 3) img = flowlib.flow_to_image(printlib.gputensor2array(mf_gt)) fl.imshow(img) fl.set_title("GT MF") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 4) img = flowlib.flow_to_image(printlib.gputensor2array(mf_pred)) fl.imshow(img) fl.set_title("Predicted MF") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 7) img = flowlib.flow_to_image( printlib.gputensor2array(mft_gt[im, :, :, :])) fl.imshow(img) fl.set_title("GT MF-t") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 8) img = flowlib.flow_to_image( printlib.gputensor2array(mft_pred[im, :, :, :])) fl.imshow(img) fl.set_title("Predicted MF-t") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 11) img = flowlib.flow_to_image( printlib.gputensor2array(mfw_gt[im, :, :, :])) fl.imshow(img) fl.set_title("GT MF-w") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 12) img = flowlib.flow_to_image( printlib.gputensor2array(mfw_pred[im, :, :, :])) fl.imshow(img) fl.set_title("Predicted MF-w") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 15) img = flowlib.flow_to_image(printlib.gputensor2array(obj_2d_gt)) fl.imshow(img) fl.set_title("GT Object Res") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 16) img = flowlib.flow_to_image(printlib.gputensor2array(obj_2d_pred)) fl.imshow(img) fl.set_title("Predicted Object Res") fl.set_xticks([]) fl.set_yticks([]) act = plt.subplot(6, 4, 18) img = np.squeeze(mst_mixed_inv[im, :, :, :]) act.imshow(img, cmap='gray') act.set_title("Latent activations") act.set_xticks([]) act.set_yticks([]) n_active_neurons += round(torch.sum(img > 0.01).item()) pl = plt.subplot(6, 4, 19) mask = object_mask_gt.cpu().numpy() masked = np.ma.masked_where(mask == 0, mask) img = printlib.cputensor2array( refresh_transforms.rescale_img(images[im, :, :, :], mask.shape[0], mask.shape[1])) pl.imshow(img, interpolation='none') pl.imshow(masked, 'hsv', interpolation='none', alpha=0.5) #pl.imshow(object_mask_gt, cmap='gray') pl.set_xticks([]) pl.set_yticks([]) pl.set_title("GT object mask") pl = plt.subplot(6, 4, 20) mask = object_mask_pred.cpu().numpy() masked = np.ma.masked_where(mask == 0, mask) img = printlib.cputensor2array( refresh_transforms.rescale_img(images[im, :, :, :], mask.shape[0], mask.shape[1])) pl.imshow(img, interpolation='none') pl.imshow(masked, 'hsv', interpolation='none', alpha=0.5) # pl.imshow(object_mask_gt, cmap='gray') pl.set_xticks([]) pl.set_yticks([]) pl.set_title("Pred object mask") fl = plt.subplot(6, 4, 23) img = flowlib.flow_to_image( printlib.gputensor2array(object_scene_2d_gt)) fl.imshow(img) fl.set_title("GT scene object") fl.set_xticks([]) fl.set_yticks([]) fl = plt.subplot(6, 4, 24) img = flowlib.flow_to_image( printlib.gputensor2array(object_scene_2d_pred)) fl.imshow(img) fl.set_title("Pred scene object") fl.set_xticks([]) fl.set_yticks([]) tu = object_scene_2d_gt[0].cpu().numpy() tv = object_scene_2d_gt[1].cpu().numpy() u = object_scene_2d_pred[0].cpu().numpy() v = object_scene_2d_pred[1].cpu().numpy() epe = flowlib.flow_error(tu, tv, u, v) # print('epe: ', epe, ' #active neurons: ', n_active_neurons) epe_batch += epe plt.draw() plt.pause(5) return epe_batch, n_active_neurons
def train(train_data_loader, flow_model, mf_model, args, optimizer, training_writer): global n_iter train_loss = 0 mf_h = round(args.height / args.downscale_f) mf_w = round(args.width / args.downscale_f) planar_depth = torch.ones(1, 1, mf_h, mf_w).to(device) plt.rcParams.update({'font.size': 10}) fig = plt.figure(1) # run training for one epoch for i_batch, sample_batch in enumerate(train_data_loader): if i_batch * args.batch_size < args.start: continue if i_batch * args.batch_size > args.stop: print('Stopped after', args.stop, 'frames!') break # we do not need RGB for training image_t = sample_batch['img_t'].to(device) # we need only flow for training, setting any nan entry to zero flow_gt = sample_batch['flow'].to(device) invalid = torch.isnan(flow_gt[:, 0, :, :]) + torch.isnan( flow_gt[:, 1, :, :]) flow_gt[torch.stack((invalid, invalid), 1)] = 0.0 image_tplus1 = sample_batch['img_tplus1'].to(device) flow_pwc = flow_model(image_t, image_tplus1) # we need the depth and pose GT only for validation depth_gt = sample_batch['depth'].to(device) pose_gt_r = sample_batch['pose_r'].to(device) pose_gt_t = sample_batch['pose_t'].to(device) # because pwc downscales by 4.0 K = sample_batch['K'].float().to(device) fx = K[0, 0, 0] / args.downscale_f fy = K[0, 1, 1] / args.downscale_f inflow = flow_pwc.detach().clone() mfg_output = mf_model(inflow) pred_t_mf = mfg_output['t_mf'] pred_r_mf = mfg_output['r_mf'] latent_activations = torch.squeeze(mfg_output['out_conv5']).view( args.batch_size, -1) mft_gt = torch.zeros(args.batch_size, 2, mf_h, mf_w).cuda() mfw_gt = torch.zeros(args.batch_size, 2, mf_h, mf_w).cuda() mft_loss = torch.zeros(args.batch_size, 1) mfw_loss = torch.zeros(args.batch_size, 1) sparsity_loss = torch.zeros(args.batch_size, 1) scale_t_loss = torch.zeros(args.batch_size, 1) scale_w_loss = torch.zeros(args.batch_size, 1) A, B = motion_field.motion_mat_fxfy(mf_h, mf_w, fx, fy) for im in range(args.batch_size): t = pose_gt_t[im, :].contiguous().view(-1, 1) # Convert right hand to left hand coordinates w = pose_gt_r[im, :].contiguous().view(-1, 1) vtx, vty, vwx, vwy = motion_field.gen_motion_field( t, 1 * w, torch.squeeze(planar_depth), A, B) mft_gt[im, 0, :, :] = vtx * fx mft_gt[im, 1, :, :] = vty * fy mfw_gt[im, 0, :, :] = vwx * fx mfw_gt[im, 1, :, :] = vwy * fy # MF reconstruction scale factor scale_t_loss[im, 0] = (torch.norm(mfw_gt[im, :, :, :], 2) / torch.norm(mft_gt[im, :, :, :], 2)).clamp( min=1, max=100) scale_w_loss[im, 0] = (torch.norm(mft_gt[im, :, :, :], 2) / torch.norm(mfw_gt[im, :, :, :], 2)).clamp( min=1, max=100) # MF reconstruction loss mft_loss[im, 0] = loss_functions.mf_loss_fun(pred_t_mf[im, :, :, :], mft_gt[im, :, :, :]) mfw_loss[im, 0] = loss_functions.mf_loss_fun(pred_r_mf[im, :, :, :], mfw_gt[im, :, :, :]) # Sparsity constraint loss sparsity_loss[im, 0] = loss_functions.sparsity_loss_gen_sigmoid( latent_activations[im, :]) total_loss = torch.sum(scale_t_loss * mft_loss + scale_w_loss * mfw_loss + args.w_sparse * sparsity_loss) if torch.isnan(total_loss): print('loss is nan', n_iter) sys.exit() else: optimizer.zero_grad() total_loss.backward() optimizer.step() batch_sparsity_loss = torch.sum(sparsity_loss).item() batch_mft_loss = torch.sum(mft_loss).item() batch_mfw_loss = torch.sum(mfw_loss).item() batch_total_loss = torch.sum(total_loss).item() if args.log_freq > 0 and n_iter % args.log_freq == 0: training_writer.add_scalar('batch_translational_mf_loss', batch_mft_loss, n_iter) training_writer.add_scalar('batch_rotational_mf_loss', batch_mfw_loss, n_iter) training_writer.add_scalar('batch_sparsity_loss', batch_sparsity_loss, n_iter) if args.log_im_freq > 0 and n_iter % args.log_im_freq == 0: with torch.no_grad(): true_mf_t_array = flowlib.flow_to_image( printlib.gputensor2array(mft_gt[0, :, :, :])).transpose( 2, 0, 1) true_mf_w_array = flowlib.flow_to_image( printlib.gputensor2array(mfw_gt[0, :, :, :])).transpose( 2, 0, 1) pred_mft_tplus1 = pred_t_mf pred_mfw_tplus1 = pred_r_mf pred_mf_t_array = flowlib.flow_to_image( printlib.gputensor2array( pred_mft_tplus1[0, :, :, :])).transpose(2, 0, 1) pred_mf_w_array = flowlib.flow_to_image( printlib.gputensor2array( pred_mfw_tplus1[0, :, :, :])).transpose(2, 0, 1) training_writer.add_image( 'Translational-mf: True v. Predicted', torchvision.utils.make_grid([ torch.tensor(true_mf_t_array), torch.tensor(pred_mf_t_array) ]), n_iter) training_writer.add_image( 'Rotational-mf: True v. Predicted', torchvision.utils.make_grid([ torch.tensor(true_mf_w_array), torch.tensor(pred_mf_w_array) ]), n_iter) flow_array = flowlib.flow_to_image( printlib.gputensor2array(flow_gt[0, :, :, :])).transpose( 2, 0, 1) inflow_array = flowlib.flow_to_image( printlib.gputensor2array(inflow[0, :, :, :])).transpose( 2, 0, 1) training_writer.add_image( 'GT flow', torchvision.utils.make_grid([torch.tensor(flow_array)]), n_iter) training_writer.add_image( 'PWC flow', torchvision.utils.make_grid( [torch.tensor(torch.tensor(inflow_array))]), n_iter) del pred_mft_tplus1, pred_mfw_tplus1 del pred_t_mf, pred_r_mf, mft_gt, mfw_gt, depth_gt, mfg_output, flow_gt, flow_pwc train_loss += batch_total_loss n_iter += 1 return train_loss
if args.number_gpus > 1: block.log('Parallelizing') model = torch.nn.DataParallel(model, device_ids=list( range(args.number_gpus))) else: block.log('CUDA not being used') model.eval() # Prepare img pair # H x W x 3(RGB) im1 = imread(args.input1) im2 = imread(args.input2) # B x 3(RGB) x 2(pair) x H x W ims = np.array([[im1, im2]]).transpose((0, 4, 1, 2, 3)).astype(np.float32) ims = torch.from_numpy(ims) ims_v = Variable(ims, volatile=True).cuda() # B x 2 x H x W pred_flow = model(ims_v).cpu().data pred_flow = pred_flow[0].numpy().transpose((1, 2, 0)) # H x W x 2 flowlib.write_flow(pred_flow, os.path.join(args.save, 'output.flo')) flow_im = flowlib.flow_to_image(pred_flow) # Visualization # plt.imshow(flow_im) # plt.savefig(os.path.join(args.save, 'flow.png'), bbox_inches='tight') # plt.savefig(os.path.join(args.save, 'flow.png')) plt.imsave(os.path.join(args.save, 'flow.png'), flow_im)
def get_batch_flow_images(flow_batch): res = np.stack([ flowlib.flow_to_image(input.detach().cpu().numpy().transpose(1, 2, 0)) for input in flow_batch ]) return res
def save_flow_visualization(flow, file_path, maxrad): """Saves visualization of optical flow field to file_path.""" flow = flowlib.flow_to_image(flow, maxrad=maxrad) flow = cv2.cvtColor(flow, cv2.COLOR_RGB2BGR) cv2.imwrite(file_path, flow)
selftrained_folder = "work/inference/run.epoch-0-flow-field_selftrained/" combined_folder = "work/inference/combined/" if not os.path.exists(combined_folder): os.makedirs(combined_folder) #file_name = "000500" for file_name in os.listdir(pretrained_folder): pretrained_flow_file = pretrained_folder + file_name selftrained_flow_file = selftrained_folder + file_name #cvb.show_flow(pretrained_flow_file) #pretrained_flow = flow_to_image(pretrained_flow_file) #pretrained_flow = pretrained_flow[0].numpy().transpose((1,2,0)) # pretrained_flow = readFlow(pretrained_flow_file) pretrained_im = flow_to_image(pretrained_flow) selftrained_flow = readFlow(selftrained_flow_file) selftrained_im = flow_to_image(selftrained_flow) #plt.imshow(pretrained_im) #plt.savefig(file_name + '_pretrained.png', bbox_inches='tight') #plt.imshow(selftrained_im) #plt.savefig(file_name + '_selftrained.png', bbox_inches='tight') combined = np.concatenate((pretrained_im, selftrained_im), axis=1) cv2.imwrite(combined_folder + file_name.split('.')[0] + '_predicted.png', combined) img_list = os.listdir(combined_folder)
def calOpt(height=240, width=320, maxdisp=256, fac=1, modelpath='finetune_67999.tar'): # Calculate model hyperparameters # Resize to 64X maxh = height maxw = width max_h = int(maxh // 64 * 64) # Basically this is performing an integer division and modulo operation max_w = int(maxw // 64 * 64) # if modulo is not zero, then round it up if max_h < maxh: # The rounded-up integer is multiplied by 64x max_h += 64 if max_w < maxw: max_w += 64 # load model model = VCN([1, max_w, max_h], md=[int(4*(maxdisp/256)),4,4,4,4], fac=fac) model = nn.DataParallel(model, device_ids=[0]) model.cuda() # load weights pretrained_dict = torch.load(modelpath) mean_L=pretrained_dict['mean_L'] mean_R=pretrained_dict['mean_R'] model.load_state_dict(pretrained_dict['state_dict'], strict=False) model.eval() print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) start_time = time.time() # Load image and Resize # Note that the images are loaded as [H,W,C] i.e. [H,W,3] imgL_o = imageio.imread('image1.png')[:,:,:3] # In some cases, image files include alpha channel (the 4th channel) imgR_o = imageio.imread('image2.png')[:,:,:3] # Only get the RGB channels (1st 3 channels) input_size = imgL_o.shape imgL = cv2.resize(imgL_o,(max_w, max_h)) imgR = cv2.resize(imgR_o,(max_w, max_h)) read_time = time.time() # For gray input images # The model expects RGB images, in other words, 3 channels # This repeats H*W spatial values over the channel layer [H,W,1] -> [H,W,3] if len(imgL_o.shape) == 2: imgL_o = np.tile(imgL_o[:,:,np.newaxis],(1,1,3)) imgR_o = np.tile(imgR_o[:,:,np.newaxis],(1,1,3)) # Flip channel, subtract mean # The model expects inputs of format [C,H,W] instead of [H,W,C] imgL = imgL[:,:,::-1].copy() / 255. - np.asarray(mean_L).mean(0)[np.newaxis,np.newaxis,:] imgR = imgR[:,:,::-1].copy() / 255. - np.asarray(mean_R).mean(0)[np.newaxis,np.newaxis,:] imgL = np.transpose(imgL, [2,0,1])[np.newaxis] imgR = np.transpose(imgR, [2,0,1])[np.newaxis] # Image to Torch tensor imgL = torch.FloatTensor(imgL).cuda() imgR = torch.FloatTensor(imgR).cuda() # Forward with torch.no_grad(): imgLR = torch.cat([imgL,imgR],0) time1 = time.time() rts = model(imgLR) pred_disp, entropy = rts time2 = time.time() print(time2 - time1) forward_time = time.time() # Upsampling pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() # Remove batch dimension, torch tensor to numpy ndarray pred_disp = cv2.resize(np.transpose(pred_disp,(1,2,0)), (input_size[1], input_size[0])) # Resize to the original size, and transpose from [C,H,W] -> [H,W,C] pred_disp[:,:,0] *= input_size[1] / max_w pred_disp[:,:,1] *= input_size[0] / max_h flow = np.ones([pred_disp.shape[0],pred_disp.shape[1],3]) flow[:,:,:2] = pred_disp entropy = torch.squeeze(entropy).data.cpu().numpy() entropy = cv2.resize(entropy, (input_size[1], input_size[0])) upsample_time = time.time() print("Read: {}s".format(read_time - start_time)) print("Forward: {}s".format(forward_time - start_time)) print("Upsample: {}s".format(upsample_time - start_time)) # Show results showImage( flow_to_image(flow), "flow_to_image.png") showImage( point_vec(imgL_o,flow)[:,:,::-1], "vector_on_image.png") showImage( entropy, "entropy.png")
def main(): TrainImgLoader = torch.utils.data.DataLoader( data_inuse, batch_size=batch_size, shuffle=True, num_workers=worker_mul * batch_size, drop_last=True, worker_init_fn=_init_fn, pin_memory=True) log = logger.Logger(args.savemodel, name=args.logname) start_full_time = time.time() global total_iters for epoch in range(1, args.epochs + 1): total_train_loss = 0 total_train_aepe = 0 # training loop for batch_idx, (imgL_crop, imgR_crop, flowl0) in enumerate(TrainImgLoader): if batch_idx % 100 == 0: adjust_learning_rate(optimizer, total_iters) if total_iters < 1000: # subtract mean mean_L.append(np.asarray(imgL_crop.mean(0).mean(1).mean(1))) mean_R.append(np.asarray(imgR_crop.mean(0).mean(1).mean(1))) imgL_crop -= torch.from_numpy(np.asarray(mean_L).mean(0)[np.newaxis, :, np.newaxis, np.newaxis]).float() imgR_crop -= torch.from_numpy(np.asarray(mean_R).mean(0)[np.newaxis, :, np.newaxis, np.newaxis]).float() start_time = time.time() loss, vis = train(imgL_crop, imgR_crop, flowl0) print('Iter %d training loss = %.3f , time = %.2f' % (batch_idx, loss, time.time() - start_time)) total_train_loss += loss total_train_aepe += vis['AEPE'] if total_iters % 10 == 0: log.scalar_summary('train/loss_batch', loss, total_iters) log.scalar_summary('train/aepe_batch', vis['AEPE'], total_iters) if total_iters % 100 == 0: log.image_summary('train/left', (imgL_crop[0:1].float() + torch.from_numpy(np.asarray(mean_L).mean(0)[np.newaxis, :, np.newaxis, np.newaxis]).float()).squeeze(0) * 255, total_iters) log.image_summary('train/right', (imgR_crop[0:1].float() + torch.from_numpy(np.asarray(mean_R).mean(0)[np.newaxis, :, np.newaxis, np.newaxis]).float()).squeeze(0) * 255, total_iters) log.histo_summary('train/pred_hist', vis['output2'], total_iters) if len(np.asarray(vis['gt'])) > 0: log.histo_summary('train/gt_hist', np.asarray(vis['gt']), total_iters) gu = vis['gt'][0, :, :, 0] gv = vis['gt'][0, :, :, 1] gu = gu * np.asarray(vis['mask'][0].float().cpu()) gv = gv * np.asarray(vis['mask'][0].float().cpu()) mask = vis['mask'][0].float().cpu() log.image_summary('train/gt0', flow_to_image( np.concatenate((gu[:, :, np.newaxis], gv[:, :, np.newaxis], mask[:, :, np.newaxis]), -1))[ np.newaxis], total_iters) log.image_summary('train/output2', flow_to_image(vis['output2'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/output1', flow_to_image(vis['output1'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/output3', flow_to_image(vis['output3'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/output4', flow_to_image(vis['output4'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/output5', flow_to_image(vis['output5'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/output6', flow_to_image(vis['output6'][0].transpose((1, 2, 0)))[np.newaxis], total_iters) log.image_summary('train/oor', 255 * (np.clip(vis['oor'][np.newaxis], -1, 1) + 1) / 2, total_iters) torch.cuda.empty_cache() total_iters += 1 # get global counts with open('./iter_counts-%d.txt' % int(args.logname.split('-')[-1]), 'w') as f: f.write('%d' % total_iters) if (total_iters + 1) % 2000 == 0: # SAVE savefilename = args.savemodel + '/' + args.logname + '/finetune_' + str(total_iters) + '.tar' save_dict = model.state_dict() # save_dict = collections.OrderedDict( # {k: v for k, v in save_dict.items() if ('flow_reg' not in k or 'conv1' in k) and ('grid' not in k)}) torch.save({ 'iters': total_iters, 'state_dict': save_dict, 'train_loss': total_train_loss / len(TrainImgLoader), 'mean_L': mean_L, 'mean_R': mean_R, }, savefilename) log.scalar_summary('train/loss', total_train_loss / len(TrainImgLoader), epoch) log.scalar_summary('train/aepe', total_train_aepe / len(TrainImgLoader), epoch) print('full finetune time = %.2f HR' % ((time.time() - start_full_time) / 3600))
def main(): TrainImgLoader = torch.utils.data.DataLoader( data_inuse, batch_size= batch_size, shuffle= True, num_workers=int(worker_mul*batch_size), drop_last=True, worker_init_fn=_init_fn, pin_memory=True) log = logger.Logger(args.savemodel, name=args.logname) start_full_time = time.time() global total_iters # training loop for batch_idx, databatch in enumerate(TrainImgLoader): if batch_idx > args.niter: break if 'expansion' in args.stage: imgL_crop, imgR_crop, flowl0,imgAux,intr, imgoL, imgoR, occp = databatch else: imgL_crop, imgR_crop, flowl0 = databatch imgAux,intr, imgoL, imgoR, occp = None,None,None,None,None if batch_idx % 100 == 0: adjust_learning_rate(optimizer,total_iters) if total_iters < 1000 and not 'expansion' in args.stage: # subtract mean mean_L.append( np.asarray(imgL_crop.mean(0).mean(1).mean(1)) ) mean_R.append( np.asarray(imgR_crop.mean(0).mean(1).mean(1)) ) imgL_crop -= torch.from_numpy(np.asarray(mean_L).mean(0)[np.newaxis,:,np.newaxis, np.newaxis]).float() imgR_crop -= torch.from_numpy(np.asarray(mean_R).mean(0)[np.newaxis,:,np.newaxis, np.newaxis]).float() start_time = time.time() loss,vis = train(imgL_crop,imgR_crop, flowl0, imgAux,intr, imgoL, imgoR, occp) print('Iter %d training loss = %.3f , time = %.2f' %(batch_idx, loss, time.time() - start_time)) if total_iters %10 == 0: log.scalar_summary('train/loss_batch',loss, total_iters) log.scalar_summary('train/aepe_batch',vis['AEPE'], total_iters) if total_iters %100 == 0: log.image_summary('train/left',imgL_crop[0:1],total_iters) log.image_summary('train/right',imgR_crop[0:1],total_iters) if len(np.asarray(vis['gt']))>0: log.histo_summary('train/gt_hist',np.asarray(vis['gt']).reshape(-1,3)[np.asarray(vis['gt'])[:,:,:,-1].flatten().astype(bool)][:,:2], total_iters) gu = vis['gt'][0,:,:,0]; gv = vis['gt'][0,:,:,1] gu = gu*np.asarray(vis['mask'][0].float().cpu()); gv = gv*np.asarray(vis['mask'][0].float().cpu()) mask = vis['mask'][0].float().cpu() log.image_summary('train/gt0', flow_to_image(np.concatenate((gu[:,:,np.newaxis],gv[:,:,np.newaxis],mask[:,:,np.newaxis]),-1))[np.newaxis],total_iters) log.image_summary('train/output2',flow_to_image(vis['output2'][0].transpose((1,2,0)))[np.newaxis],total_iters) log.image_summary('train/output3',flow_to_image(vis['output3'][0].transpose((1,2,0)))[np.newaxis],total_iters) log.image_summary('train/output4',flow_to_image(vis['output4'][0].transpose((1,2,0)))[np.newaxis],total_iters) log.image_summary('train/output5',flow_to_image(vis['output5'][0].transpose((1,2,0)))[np.newaxis],total_iters) log.image_summary('train/output6',flow_to_image(vis['output6'][0].transpose((1,2,0)))[np.newaxis],total_iters) if 'expansion' in args.stage: log.image_summary('train/mid_gt',(1+imgAux[:1,:,:,6]/imgAux[:1,:,:,0]).log() ,total_iters) log.image_summary('train/mid',vis['mid'][np.newaxis],total_iters) log.image_summary('train/exp',vis['exp'][np.newaxis],total_iters) torch.cuda.empty_cache() total_iters += 1 # get global counts with open('%s/iter_counts-%d.txt'%(args.itersave,int(args.logname.split('-')[-1])), 'w') as f: f.write('%d'%total_iters) if (total_iters + 1)%2000==0: #SAVE savefilename = args.savemodel+'/'+args.logname+'/finetune_'+str(total_iters)+'.pth' save_dict = model.state_dict() save_dict = collections.OrderedDict({k:v for k,v in save_dict.items() if ('reg_modules' not in k or 'conv1' in k) and ('grid' not in k) and ('flow_reg' not in k)}) torch.save({ 'iters': total_iters, 'state_dict': save_dict, 'mean_L': mean_L, 'mean_R': mean_R, }, savefilename) print('full finetune time = %.2f HR' %((time.time() - start_full_time)/3600)) print(max_epo)