コード例 #1
0
 def __init__(self, img_size=[128, 416], smooth_term = 'lap'):
     super(LKVOKernel, self).__init__()
     self.img_size = img_size
     self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
     self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=5)
     self.depth_net = VggDepthEstimator(img_size)
     self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=5)
     self.smooth_term = smooth_term
コード例 #2
0
 def __init__(self, img_size=[128, 416], smooth_term = 'lap', use_expl_mask=False):
     super(SfMKernel, self).__init__()
     self.img_size = img_size
     self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
     self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4)
     self.depth_net = VggDepthEstimator(img_size)
     if use_expl_mask:
         self.pose_net = PoseExpNet(3)
     else:
         self.pose_net = PoseNet(3)
     self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4)
     self.smooth_term = smooth_term
     self.use_expl_mask = use_expl_mask
コード例 #3
0
                    default=False,
                    action="store_true",
                    help='use post processing')

FLAGS = parser.parse_args()

# dataset_root = "/newfoundland/chaoyang/kitti"
# model_path = "/home/chaoyang/LKVOLearner/checkpoints_new/12_model.pth"
# test_file_list = "/newfoundland/chaoyang/SfMLearner/data/kitti/test_files_eigen.txt"
dataset_root = FLAGS.dataset_root
model_path = FLAGS.ckpt_file
test_file_list = FLAGS.test_file_list
output_path = FLAGS.output_path

img_size = [128, 416]
vgg_depth_net = VggDepthEstimator(img_size)
vgg_depth_net.load_state_dict(torch.load(model_path))
vgg_depth_net.cuda()

fliplr = FlipLR(imW=img_size[1], dim_w=2).cuda()


def read_text_lines(file_path):
    f = open(file_path, 'r')
    lines = f.readlines()
    f.close()
    lines = [l.rstrip() for l in lines]
    return lines


test_files = read_text_lines(test_file_list)
コード例 #4
0
class LKVOKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term='lap'):
        super(LKVOKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0],
                           imW=img_size[1],
                           pyramid_layer_num=4)
        self.pose_net = PoseNet(3)
        self.depth_net = VggDepthEstimator(img_size)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4)
        self.smooth_term = smooth_term

    def forward(self,
                frames,
                camparams,
                ref_frame_idx,
                lambda_S=.5,
                do_data_augment=True,
                use_ssim=True,
                max_lk_iter_num=10,
                lk_level=1):
        assert (frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data

        if do_data_augment:
            if np.random.rand() > .5:
                # print("fliplr")
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]
                # camparams[5] = self.img_size[0] - camparams[5]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0, ref_frame_idx)) + tuple(
            range(ref_frame_idx + 1, bundle_size))
        # ref_frame = frames[ref_frame_idx, :, :, :]
        # src_frames = frames[src_frame_idx, :, :, :]
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [
            frame[ref_frame_idx, :, :, :] for frame in frames_pyramid
        ]
        src_frames_pyramid = [
            frame[src_frame_idx, :, :, :] for frame in frames_pyramid
        ]

        self.vo.setCamera(fx=camparams[0],
                          cx=camparams[2],
                          fy=camparams[4],
                          cy=camparams[5])

        inv_depth_pyramid = self.depth_net.forward((frames - 127) / 127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean() * 0.1

        inv_depth_norm_pyramid = [
            depth / inv_depth_mean_ten for depth in inv_depth_pyramid
        ]
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0],
                                               do_detach=False)
        ref_inv_depth_pyramid = [
            depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid
        ]
        ref_inv_depth0_pyramid = [
            depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid
        ]
        src_inv_depth_pyramid = [
            depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid
        ]
        src_inv_depth0_pyramid = [
            depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid
        ]

        self.vo.init(ref_frame_pyramid=ref_frame_pyramid,
                     inv_depth_pyramid=ref_inv_depth0_pyramid)
        # init_pose with pose CNN
        p = self.pose_net.forward(
            (frames.view(1, -1, frames.size(2), frames.size(3)) - 127) / 127)
        rot_mat_batch = self.vo.twist2mat_batch_func(p[0, :, 0:3]).contiguous()
        trans_batch = p[0, :, 3:6].contiguous()  #*inv_depth_mean_ten
        # fine tune pose with direct VO
        rot_mat_batch, trans_batch = self.vo.update_with_init_pose(
            src_frames_pyramid[0:lk_level],
            max_itr_num=max_lk_iter_num,
            rot_mat_batch=rot_mat_batch,
            trans_batch=trans_batch)
        # rot_mat_batch, trans_batch = \
        #     self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num)

        photometric_cost = self.vo.compute_phtometric_loss(
            self.vo.ref_frame_pyramid,
            src_frames_pyramid,
            ref_inv_depth_pyramid,
            src_inv_depth_pyramid,
            rot_mat_batch,
            trans_batch,
            levels=[0, 1, 2, 3],
            use_ssim=use_ssim)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        cost = photometric_cost + lambda_S * smoothness_cost
        return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[
            0], ref_inv_depth0_pyramid[0] * inv_depth_mean_ten
コード例 #5
0
class SfMKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term = 'lap', use_expl_mask=False):
        super(SfMKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=4)
        self.depth_net = VggDepthEstimator(img_size)
        if use_expl_mask:
            self.pose_net = PoseExpNet(3)
        else:
            self.pose_net = PoseNet(3)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=4)
        self.smooth_term = smooth_term
        self.use_expl_mask = use_expl_mask


    def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, lambda_E=.01, do_data_augment=True, use_ssim=True):
        assert(frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data


        if do_data_augment:
            if np.random.rand()>.5:
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size))
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid]
        src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid]


        self.vo.setCamera(fx=camparams[0], cx=camparams[2],
                            fy=camparams[4], cy=camparams[5])
        self.vo.init_xy_pyramid(ref_frame_pyramid)
        if self.use_expl_mask:
            p, expl_mask_pyramid = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127)
            expl_mask_reg_cost = 0
            for mask in expl_mask_pyramid:
                expl_mask_reg_cost += mask.mean()
            ref_expl_mask_pyramid = [mask.squeeze(0)[ref_frame_idx, ...] for mask in expl_mask_pyramid]
            src_expl_mask_pyramid = [mask.squeeze(0)[src_frame_idx, ...] for mask in expl_mask_pyramid]
            expl_mask = ref_expl_mask_pyramid[0]

        else:
            p = self.pose_net.forward((frames.view(1, -1, frames.size(2), frames.size(3))-127) / 127)
            ref_expl_mask_pyramid = None
            src_expl_mask_pyramid = None
            expl_mask_reg_cost = 0
            expl_mask = None

        rot_mat_batch = self.vo.twist2mat_batch_func(p[0,:,0:3])
        trans_batch = p[0,:,3:6]

        inv_depth_pyramid = self.depth_net.forward((frames-127)/127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1 #uncommment this to use normalization

        # normalize
        #trans_batch = trans_batch*inv_depth_mean_ten
        inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid]

        ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid]

        photometric_cost = self.vo.compute_phtometric_loss(
                                                ref_frame_pyramid,
                                                src_frames_pyramid,
                                                ref_inv_depth_pyramid,
                                                src_inv_depth_pyramid,
                                                rot_mat_batch, trans_batch,
                                                levels=[0,1,2,3], use_ssim=use_ssim,
                                                ref_expl_mask_pyramid=ref_expl_mask_pyramid,
                                                src_expl_mask_pyramid=src_expl_mask_pyramid)
        # compute smoothness smoothness loss
        # instead of directly compute the loss on the finest level, it's evaluated on the downsamples.
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        cost = photometric_cost + lambda_S*smoothness_cost - lambda_E*expl_mask_reg_cost
        
        return cost, photometric_cost, smoothness_cost, ref_frame_pyramid[0], ref_inv_depth_pyramid[0]*inv_depth_mean_ten, expl_mask
コード例 #6
0
class LKVOKernel(nn.Module):
    """
     only support single training isinstance
    """
    def __init__(self, img_size=[128, 416], smooth_term = 'lap'):
        super(LKVOKernel, self).__init__()
        self.img_size = img_size
        self.fliplr_func = FlipLR(imW=img_size[1], dim_w=3)
        self.vo = DirectVO(imH=img_size[0], imW=img_size[1], pyramid_layer_num=5)
        self.depth_net = VggDepthEstimator(img_size)
        self.pyramid_func = ImagePyramidLayer(chan=1, pyramid_layer_num=5)
        self.smooth_term = smooth_term


    def forward(self, frames, camparams, ref_frame_idx, lambda_S=.5, do_data_augment=True, use_ssim=True, max_lk_iter_num=10):
        assert(frames.size(0) == 1 and frames.dim() == 5)
        frames = frames.squeeze(0)
        camparams = camparams.squeeze(0).data


        if do_data_augment:
            if np.random.rand()>.5:
                # print("fliplr")
                frames = self.fliplr_func(frames)
                camparams[2] = self.img_size[1] - camparams[2]
                # camparams[5] = self.img_size[0] - camparams[5]

        bundle_size = frames.size(0)
        src_frame_idx = tuple(range(0,ref_frame_idx)) + tuple(range(ref_frame_idx+1,bundle_size))
        # ref_frame = frames[ref_frame_idx, :, :, :]
        # src_frames = frames[src_frame_idx, :, :, :]
        frames_pyramid = self.vo.pyramid_func(frames)
        ref_frame_pyramid = [frame[ref_frame_idx, :, :, :] for frame in frames_pyramid]
        src_frames_pyramid = [frame[src_frame_idx, :, :, :] for frame in frames_pyramid]


        self.vo.setCamera(fx=camparams[0], cx=camparams[2],
                            fy=camparams[4], cy=camparams[5])

        inv_depth_pyramid = self.depth_net.forward((frames-127)/127)
        inv_depth_mean_ten = inv_depth_pyramid[0].mean()*0.1
        #
        # inv_depth0_pyramid = self.pyramid_func(inv_depth_pyramid[0], do_detach=False)
        # ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_pyramid]
        # ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid]
        # src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_pyramid]
        # src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid]

        inv_depth_norm_pyramid = [depth/inv_depth_mean_ten for depth in inv_depth_pyramid]
        inv_depth0_pyramid = self.pyramid_func(inv_depth_norm_pyramid[0], do_detach=False)
        ref_inv_depth_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        ref_inv_depth0_pyramid = [depth[ref_frame_idx, :, :] for depth in inv_depth0_pyramid]
        src_inv_depth_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth_norm_pyramid]
        src_inv_depth0_pyramid = [depth[src_frame_idx, :, :] for depth in inv_depth0_pyramid]

        rot_mat_batch, trans_batch = \
            self.vo.forward(ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, max_itr_num=max_lk_iter_num)
        #
        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid)
        # smoothness_cost += self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid)

        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth_pyramid, levels=range(1,5))
        # smoothness_cost = self.vo.multi_scale_smoothness_cost(inv_depth0_pyramid, levels=range(1,5))
        photometric_cost = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth_pyramid, src_inv_depth_pyramid, rot_mat_batch, trans_batch, levels=[0,1,2,3], use_ssim=use_ssim)
        smoothness_cost = self.vo.multi_scale_image_aware_smoothness_cost(inv_depth0_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term) \
                            + self.vo.multi_scale_image_aware_smoothness_cost(inv_depth_norm_pyramid, frames_pyramid, levels=[2,3], type=self.smooth_term)

        # photometric_cost0, reproj_cost0, _, _ = self.vo.compute_phtometric_loss(self.vo.ref_frame_pyramid, src_frames_pyramid, ref_inv_depth0_pyramid, src_inv_depth0_pyramid, rot_mat_batch, trans_batch)


        # cost = photometric_cost + photometric_cost0 + reproj_cost + reproj_cost0 + lambda_S*smoothness_cost
        cost = photometric_cost + lambda_S*smoothness_cost
        return cost, photometric_cost, smoothness_cost, self.vo.ref_frame_pyramid[0], ref_inv_depth0_pyramid[0]*inv_depth_mean_ten