def __init__(self, img_size=[128, 416], smooth_term = 'lap'): super(LKVOKernel, self).__init__() self.img_size = img_size self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3) self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=5) self.depth_net = VggDepthEstimator(img_size) self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=5) self.smooth_term = smooth_term
def __init__(self, img_size=[128, 416], smooth_term = 'lap', use_expl_mask=False): super(SfMKernel, self).__init__() self.img_size = img_size self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3) self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4) self.depth_net = VggDepthEstimator(img_size) if use_expl_mask: self.pose_net = PoseExpNet(3) else: self.pose_net = PoseNet(3) self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4) self.smooth_term = smooth_term self.use_expl_mask = use_expl_mask
default=False, action="store_true", help='use post processing') FLAGS = parser.parse_args() # dataset_root = "/newfoundland/chaoyang/kitti" # model_path = "/home/chaoyang/LKVOLearner/checkpoints_new/12_model.pth" # test_file_list = "/newfoundland/chaoyang/SfMLearner/data/kitti/test_files_eigen.txt" dataset_root = FLAGS.dataset_root model_path = FLAGS.ckpt_file test_file_list = FLAGS.test_file_list output_path = FLAGS.output_path img_size = [128, 416] vgg_depth_net = VggDepthEstimator(img_size) vgg_depth_net.load_state_dict(torch.load(model_path)) vgg_depth_net.cuda() fliplr = FlipLR(imW=img_size[1], dim_w=2).cuda() def read_text_lines(file_path): f = open(file_path, 'r') lines = f.readlines() f.close() lines = [l.rstrip() for l in lines] return lines test_files = read_text_lines(test_file_list)
class LKVOKernel(nn.Module): """ only support single training isinstance """ def __init__(self, img_size=[128, 416], smooth_term='lap'): super(LKVOKernel, self).__init__() self.img_size = img_size self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3) self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4) self.pose_net = PoseNet(3) self.depth_net = VggDepthEstimator(img_size) self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4) self.smooth_term = smooth_term def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, do_data_augment=True, use_ssim=True, max_lk_iter_num=10, lk_level=1): assert (frames.size(0) == 1 and frames.dim() == 5) frames = frames.squeeze(0) camparams = camparams.squeeze(0).data if do_data_augment: if np.random.rand() > .5: # print("fliplr") frames = self.fliplr_func(frames) camparams[2] = self.img_size[1] - camparams[2] # camparams[5] = self.img_size[0] - camparams[5] bundle_size = frames.size(0) src_frame_idx = tuple(range(0, ref_frame_idx)) + tuple( range(ref_frame_idx + 1, bundle_size)) # ref_frame = frames[ref_frame_idx, :, :, :] # src_frames = frames[src_frame_idx, :, :, :] frames_pyramid = self.vo.pyramid_func(frames) ref_frame_pyramid = [ frame[ref_frame_idx, :, :, :] for frame in frames_pyramid ] src_frames_pyramid = [ frame[src_frame_idx, :, :, :] for frame in frames_pyramid ] self.vo.setCamera(fx=camparams[0], cx=camparams[2], fy=camparams[4], cy=camparams[5]) inv_depth_pyramid = self.depth_net.forward((frames - 127) / 127) inv_depth_mean_ten = inv_depth_pyramid[0].mean() * 0.1 inv_depth_norm_pyramid = [ depth / inv_depth_mean_ten for depth in inv_depth_pyramid ] inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False) ref_inv_depth_pyramid = [ depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid ] ref_inv_depth0_pyramid = [ depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid ] src_inv_depth_pyramid = [ depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid ] src_inv_depth0_pyramid = [ depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid ] self.vo.init(ref_frame_pyramid=ref_frame_pyramid, inv_depth_pyramid=ref_inv_depth0_pyramid) # init_pose with pose CNN p = self.pose_net.forward( (frames.view(1, -1, frames.size(2), frames.size(3)) - 127) / 127) rot_mat_batch = self.vo.twist2mat_batch_func(p[0, :, 0:3]).contiguous() trans_batch = p[0, :, 3:6].contiguous() #*inv_depth_mean_ten # fine tune pose with direct VO rot_mat_batch, trans_batch = self.vo.update_with_init_pose( src_frames_pyramid[0:lk_level], max_itr_num=max_lk_iter_num, rot_mat_batch=rot_mat_batch, trans_batch=trans_batch) # rot_mat_batch, trans_batch = \ # self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num) photometric_cost = self.vo.compute_phtometric_loss( self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0, 1, 2, 3], use_ssim=use_ssim) smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \ + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) cost = photometric_cost + lambda_S * smoothness_cost return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[ 0], ref_inv_depth0_pyramid[0] * inv_depth_mean_ten
class SfMKernel(nn.Module): """ only support single training isinstance """ def __init__(self, img_size=[128, 416], smooth_term = 'lap', use_expl_mask=False): super(SfMKernel, self).__init__() self.img_size = img_size self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3) self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4) self.depth_net = VggDepthEstimator(img_size) if use_expl_mask: self.pose_net = PoseExpNet(3) else: self.pose_net = PoseNet(3) self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4) self.smooth_term = smooth_term self.use_expl_mask = use_expl_mask def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, lambda_E=.01, do_data_augment=True, use_ssim=True): assert(frames.size(0) == 1 and frames.dim() == 5) frames = frames.squeeze(0) camparams = camparams.squeeze(0).data if do_data_augment: if np.random.rand()>.5: frames = self.fliplr_func(frames) camparams[2] = self.img_size[1] - camparams[2] bundle_size = frames.size(0) src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size)) frames_pyramid = self.vo.pyramid_func(frames) ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid] src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid] self.vo.setCamera(fx=camparams[0], cx=camparams[2], fy=camparams[4], cy=camparams[5]) self.vo.init_xy_pyramid(ref_frame_pyramid) if self.use_expl_mask: p, expl_mask_pyramid = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127) expl_mask_reg_cost = 0 for mask in expl_mask_pyramid: expl_mask_reg_cost += mask.mean() ref_expl_mask_pyramid = [mask.squeeze(0)[ref_frame_idx, ...] for mask in expl_mask_pyramid] src_expl_mask_pyramid = [mask.squeeze(0)[src_frame_idx, ...] for mask in expl_mask_pyramid] expl_mask = ref_expl_mask_pyramid[0] else: p = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127) ref_expl_mask_pyramid = None src_expl_mask_pyramid = None expl_mask_reg_cost = 0 expl_mask = None rot_mat_batch = self.vo.twist2mat_batch_func(p[0,:,0:3]) trans_batch = p[0,:,3:6] inv_depth_pyramid = self.depth_net.forward((frames-127)/127) inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1 #uncommment this to use normalization # normalize #trans_batch = trans_batch*inv_depth_mean_ten inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid] ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid] src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid] photometric_cost = self.vo.compute_phtometric_loss( ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0,1,2,3], use_ssim=use_ssim, ref_expl_mask_pyramid=ref_expl_mask_pyramid, src_expl_mask_pyramid=src_expl_mask_pyramid) # compute smoothness smoothness loss # instead of directly compute the loss on the finest level, it's evaluated on the downsamples. inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False) smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \ + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) cost = photometric_cost + lambda_S*smoothness_cost - lambda_E*expl_mask_reg_cost return cost, photometric_cost, smoothness_cost, ref_frame_pyramid[0], ref_inv_depth_pyramid[0]*inv_depth_mean_ten, expl_mask
class LKVOKernel(nn.Module): """ only support single training isinstance """ def __init__(self, img_size=[128, 416], smooth_term = 'lap'): super(LKVOKernel, self).__init__() self.img_size = img_size self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3) self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=5) self.depth_net = VggDepthEstimator(img_size) self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=5) self.smooth_term = smooth_term def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, do_data_augment=True, use_ssim=True, max_lk_iter_num=10): assert(frames.size(0) == 1 and frames.dim() == 5) frames = frames.squeeze(0) camparams = camparams.squeeze(0).data if do_data_augment: if np.random.rand()>.5: # print("fliplr") frames = self.fliplr_func(frames) camparams[2] = self.img_size[1] - camparams[2] # camparams[5] = self.img_size[0] - camparams[5] bundle_size = frames.size(0) src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size)) # ref_frame = frames[ref_frame_idx, :, :, :] # src_frames = frames[src_frame_idx, :, :, :] frames_pyramid = self.vo.pyramid_func(frames) ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid] src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid] self.vo.setCamera(fx=camparams[0], cx=camparams[2], fy=camparams[4], cy=camparams[5]) inv_depth_pyramid = self.depth_net.forward((frames-127)/127) inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1 # # inv_depth0_pyramid = self.pyramid_func(inv_depth_pyramid[0], do_detach=False) # ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_pyramid] # ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid] # src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_pyramid] # src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid] inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid] inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False) ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid] ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid] src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid] src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid] rot_mat_batch, trans_batch = \ self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num) # # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid) # smoothness_cost += self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid) # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid, levels=range(1,5)) # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid, levels=range(1,5)) photometric_cost = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0,1,2,3], use_ssim=use_ssim) smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \ + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) # photometric_cost0, reproj_cost0, _, _ = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, src_inv_depth0_pyramid, rot_mat_batch, trans_batch) # cost = photometric_cost + photometric_cost0 + reproj_cost + reproj_cost0 + lambda_S*smoothness_cost cost = photometric_cost + lambda_S*smoothness_cost return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[0], ref_inv_depth0_pyramid[0]*inv_depth_mean_ten