Esempio n. 1
0
    def stereo_synthesize_loss(self,
                               source_img,
                               target_ms,
                               target_depth_ms,
                               pose_t2s,
                               intrinsic,
                               suffix=""):
        """
        synthesize image from source to target
        :param source_img: [batch, num_src*height, width, 3]
        :param target_ms: list of [batch, height/scale, width/scale, 3]
        :param target_depth_ms: list of [batch, height/scale, width/scale, 1]
        :param pose_t2s: [batch, num_src, 4, 4]
        :param intrinsic: [batch, num_src, 3, 3]
        :param suffix: "" if right to left, else "_R"
        """
        pose_stereo = cp.pose_matr2rvec_batch(tf.expand_dims(pose_t2s, 1))

        # synth_target_ms: list of [batch, 1, height/scale, width/scale, 3]
        synth_target_ms = SynthesizeMultiScale()(source_img, intrinsic,
                                                 target_depth_ms, pose_stereo)
        losses = []
        for i, (synth_img_sc,
                target_img_sc) in enumerate(zip(synth_target_ms, target_ms)):
            loss = layers.Lambda(
                lambda inputs: self.photometric_loss(inputs[0], inputs[1]),
                name=f"photo_loss_{i}" + suffix)([synth_img_sc, target_img_sc])
            losses.append(loss)
        return losses, synth_target_ms
Esempio n. 2
0
def save_worst_views(frame, x, model, sample_inds, save_path, scale=1):
    if frame not in sample_inds['frame'].tolist():
        return

    colname = list(sample_inds)[-1]
    indices = sample_inds.loc[sample_inds['frame'] == frame, :].index.tolist()

    stacked_image = x['image']
    intrinsic = x['intrinsic']
    depth_gt = x['depth_gt']
    pose_gt = x['pose_gt']
    pose_gt = cp.pose_matr2rvec_batch(pose_gt)
    depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
    source_image, target_image = uf.split_into_source_and_target(stacked_image)

    predictions = model(x['image'])
    disp_pred_ms = predictions['disp_ms']
    pose_pred = predictions['pose']
    depth_pred_ms = uf.safe_reciprocal_number_ms(disp_pred_ms)

    depth_pred_ms = [depth*scale for depth in depth_pred_ms]

    synthesizer = SynthesizeMultiScale()
    synth_target_pred_ms = synthesizer(source_image, intrinsic, depth_pred_ms, pose_pred)
    synth_target_gt_ms = synthesizer(source_image, intrinsic, depth_gt_ms, pose_gt)

    for ind in indices:
        srcidx = sample_inds.loc[ind, 'srcidx']
        view_imgs = {"target": target_image, "synthesized": synth_target_pred_ms[0][0, srcidx],
                     "depth": depth_pred_ms[0][0, srcidx], "synth_by_gt": synth_target_gt_ms[0][0, srcidx]}
        view = uf.stack_titled_images(view_imgs)
        filename = op.join(save_path, f"{colname[:3]}_{frame:04d}_{srcidx}.png")
        print("save file:", filename)
        cv2.imwrite(filename, view)
Esempio n. 3
0
def compute_photo_loss(target_true, source_image, intrinsic, depth_pred_ms, pose_pred):
    # synthesize target image
    synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic, depth_pred_ms, pose_pred)
    losses = []
    target_pred = synth_target_ms[0]
    # photometric loss: [batch, num_src]
    loss = photometric_loss(target_pred, target_true)
    return loss
Esempio n. 4
0
def test_synthesize_batch_multi_scale():
    """
    gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인
    실제 target image와 복원된 "multi" scale target image를 눈으로 비교
    """
    print("===== start test_synthesize_batch_multi_scale")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    for i, features in enumerate(dataset):
        print("----- test_synthesize_batch_multi_scale")
        stacked_image = features['image']
        intrinsic = features['intrinsic']
        depth_gt = features['depth_gt']
        pose_gt = features['pose_gt']
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        pred_pose = cp.pose_matr2rvec_batch(pose_gt)

        # EXECUTE
        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 depth_gt_ms, pred_pose)

        # compare target image and reconstructed images
        # recon_img0[0, 0]: reconstructed from the first image
        target_image = uf.to_uint8_image(target_image).numpy()[0]
        source_image = uf.to_uint8_image(source_image).numpy()[
            0, 0:opts.IM_HEIGHT]
        recon_img0 = uf.to_uint8_image(synth_target_ms[0]).numpy()[0, 0]
        recon_img1 = uf.to_uint8_image(synth_target_ms[2]).numpy()[0, 0]
        recon_img1 = cv2.resize(recon_img1, (opts.IM_WIDTH, opts.IM_HEIGHT),
                                cv2.INTER_NEAREST)
        view = np.concatenate(
            [source_image, target_image, recon_img0, recon_img1], axis=0)
        print("Check if all the images are the same")
        cv2.imshow("source, target, and reconstructed", view)
        cv2.waitKey(WAIT_KEY)
        if i >= 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_synthesize_batch_multi_scale passed")
Esempio n. 5
0
    def augment_data(self, features, predictions, suffix=""):
        """
        gather additional data required to compute losses
        :param features: {image, intrinsic}
                image: stacked image snippet [batch, snippet_len*height, width, 3]
                intrinsic: camera projection matrix [batch, 3, 3]
        :param predictions: {disp_ms, pose}
                disp_ms: multi scale disparities, list of [batch, height/scale, width/scale, 1]
                pose: poses that transform points from target to source [batch, num_src, 6]
        :param suffix: suffix to keys
        :return augm_data: {depth_ms, source, target, target_ms, synth_target_ms}
                depth_ms: multi scale depth, list of [batch, height/scale, width/scale, 1]
                source: source frames [batch, num_src*height, width, 3]
                target: target frame [batch, height, width, 3]
                target_ms: multi scale target frame, list of [batch, height/scale, width/scale, 3]
                synth_target_ms: multi scale synthesized target frames generated from each source image,
                                list of [batch, num_src, height/scale, width/scale, 3]
        """
        augm_data = dict()
        pred_depth_ms = predictions["depth_ms" + suffix]
        pred_pose = predictions["pose" + suffix]

        stacked_image = features["image" + suffix]
        intrinsic = features["intrinsic" + suffix]
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        target_ms = uf.multi_scale_like(target_image, pred_depth_ms)
        augm_data["source" + suffix] = source_image
        augm_data["target" + suffix] = target_image
        augm_data["target_ms" + suffix] = target_ms

        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 pred_depth_ms, pred_pose)
        augm_data["synth_target_ms" + suffix] = synth_target_ms

        return augm_data
Esempio n. 6
0
def test_photo_loss(source_image, intrinsic, depth_gt_ms, pose_gt, target_ms):
    synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                             depth_gt_ms, pose_gt)

    losses = []
    recon_image = 0
    for scale, synt_target, orig_target in zip([1, 2, 4, 8], synth_target_ms,
                                               target_ms):
        # EXECUTE
        loss = ls.photometric_loss_l1(synt_target, orig_target)
        losses.append(loss)
        if scale == 1:
            recon_target = uf.to_uint8_image(synt_target).numpy()
            recon_image = cv2.resize(recon_target[0, 0],
                                     (opts.IM_WIDTH, opts.IM_HEIGHT),
                                     interpolation=cv2.INTER_NEAREST)

    losses = tf.stack(losses, axis=2)  # [batch, num_src, num_scales]
    batch_loss = tf.reduce_sum(losses, axis=[1, 2])
    scale_loss = tf.reduce_sum(losses, axis=[0, 1])
    print("all photometric loss:", losses)
    print("batch mean photometric loss:", batch_loss)
    print("scale mean photometric loss:", scale_loss)
    return batch_loss, scale_loss, recon_image
Esempio n. 7
0
def make_reconstructed_views(model, dataset):
    recon_views = []
    next_idx = 0
    stride = 10
    stereo_loss = lm.StereoDepthLoss("L1")
    total_loss = lm.TotalLoss()

    for i, features in enumerate(dataset):
        if i < next_idx:
            continue
        if i // stride > 5:
            stride *= 10
        next_idx += stride

        predictions = model(features)
        augm_data = total_loss.augment_data(features, predictions)
        if opts.STEREO:
            augm_data_rig = total_loss.augment_data(features, predictions,
                                                    "_R")
            augm_data.update(augm_data_rig)

        synth_target_ms = SynthesizeMultiScale()(
            src_img_stacked=augm_data["source"],
            intrinsic=features["intrinsic"],
            pred_depth_ms=predictions["depth_ms"],
            pred_pose=predictions["pose"])

        scaleidx, batchidx, srcidx = 0, 0, 0
        target_depth = predictions["depth_ms"][0][batchidx]
        target_depth = tf.clip_by_value(target_depth, 0., 20.) / 10. - 1.
        time_source = augm_data["source"][batchidx, srcidx *
                                          opts.IM_HEIGHT:(srcidx + 1) *
                                          opts.IM_HEIGHT]
        view_imgs = {
            "left_target":
            augm_data["target"][0],
            "target_depth":
            target_depth,
            f"source_{srcidx}":
            time_source,
            f"synthesized_from_src{srcidx}":
            synth_target_ms[scaleidx][batchidx, srcidx]
        }
        view_imgs["time_diff"] = tf.abs(
            view_imgs["left_target"] -
            view_imgs[f"synthesized_from_src{srcidx}"])

        if opts.STEREO:
            loss_left, synth_left_ms = \
                stereo_loss.stereo_synthesize_loss(source_img=augm_data["target_R"],
                                                   target_ms=augm_data["target_ms"],
                                                   target_depth_ms=predictions["depth_ms"],
                                                   pose_t2s=tf.linalg.inv(features["stereo_T_LR"]),
                                                   intrinsic=features["intrinsic"])

            # print("stereo synth size", tf.size(synth_left_ms[scaleidx]).numpy())
            # zeromask = tf.cast(tf.math.equal(synth_left_ms[scaleidx], 0.), tf.int32)
            # print("stereo synth zero count", tf.reduce_sum(zeromask).numpy())
            print(f"saving synthesized image {i}, stereo loss R2L:",
                  tf.squeeze(loss_left[0]).numpy())
            view_imgs["right_source"] = augm_data["target_R"][batchidx]
            view_imgs["synthesized_from_right"] = synth_left_ms[scaleidx][
                batchidx, srcidx]
            view_imgs["stereo_diff"] = tf.abs(
                view_imgs["left_target"] - view_imgs["synthesized_from_right"])

        view1 = uf.stack_titled_images(view_imgs)
        recon_views.append(view1)

    return recon_views
Esempio n. 8
0
def test_photometric_loss_quality(suffix=""):
    """
    gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인하고
    복원된 이미지로부터 계산되는 photometric loss를 확인
    assert 없음
    """
    print("\n===== start test_photometric_loss_quality")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    for i, features in enumerate(dataset):
        print("\n--- fetch a batch data")
        stacked_image = features["image" + suffix]
        intrinsic = features["intrinsic" + suffix]
        depth_gt = features["depth_gt" + suffix]
        pose_gt = features["pose_gt" + suffix]

        # identity pose results in NaN data
        pose_gt_np = pose_gt.numpy()
        for pose_seq in pose_gt_np:
            for pose in pose_seq:
                assert not np.isclose(np.identity(4, dtype=np.float),
                                      pose).all()

        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        pose_gt = cp.pose_matr2rvec_batch(pose_gt)
        target_ms = uf.multi_scale_like(target_image, depth_gt_ms)
        batch, height, width, _ = target_image.get_shape().as_list()

        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 depth_gt_ms, pose_gt)

        srcimgs = uf.to_uint8_image(source_image).numpy()[0]
        srcimg0 = srcimgs[0:height]
        srcimg3 = srcimgs[height * 3:height * 4]

        losses = []
        for scale, synt_target, orig_target in zip([1, 2, 4, 8],
                                                   synth_target_ms, target_ms):
            # EXECUTE
            loss = ls.photometric_loss_l1(synt_target, orig_target)
            losses.append(loss)

            recon_target = uf.to_uint8_image(synt_target).numpy()
            recon0 = cv2.resize(recon_target[0, 0], (width, height),
                                interpolation=cv2.INTER_NEAREST)
            recon3 = cv2.resize(recon_target[0, 3], (width, height),
                                interpolation=cv2.INTER_NEAREST)
            target = uf.to_uint8_image(orig_target).numpy()[0]
            target = cv2.resize(target, (width, height),
                                interpolation=cv2.INTER_NEAREST)
            view = np.concatenate([target, srcimg0, recon0, srcimg3, recon3],
                                  axis=0)
            print(f"1/{scale} scale, photo loss:", tf.reduce_sum(loss, axis=1))
            cv2.imshow("photo loss", view)
            cv2.waitKey(WAIT_KEY)

        losses = tf.stack(losses, axis=2)  # [batch, num_src, num_scales]
        print("all photometric loss:", tf.reduce_sum(losses, axis=1))
        print("batch mean photometric loss:", tf.reduce_sum(losses,
                                                            axis=[1, 2]))
        print("scale mean photometric loss:", tf.reduce_sum(losses,
                                                            axis=[0, 1]))
        if i > 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_photometric_loss_quality passed")