Example #1
0
def test_synthesize_batch_view():
    """
    gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인
    실제 target image와 복원된 "single" scale target image를 눈으로 비교
    """
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    print("\n===== start test_synthesize_batch_view")
    scale_idx = 1

    for i, features in enumerate(dataset):
        stacked_image = features['image']
        intrinsic = features['intrinsic']
        depth_gt = features['depth_gt']
        pose_gt = features['pose_gt']
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        batch, height, width, _ = target_image.get_shape().as_list()

        # check only 1 scale
        depth_scaled = depth_gt_ms[scale_idx]
        width_ori = source_image.get_shape().as_list()[2]
        batch, height_sc, width_sc, _ = depth_scaled.get_shape().as_list()
        scale = int(width_ori // width_sc)
        # create synthesizer
        synthesizer = SynthesizeBatchBasic(shape=(batch, height_sc, width_sc),
                                           num_src=4,
                                           scale=scale)
        # adjust intrinsic upto scale
        intrinsic_sc = layers.Lambda(
            lambda intrin: synthesizer.scale_intrinsic(intrin, scale),
            name=f"scale_intrin_sc{scale}")(intrinsic)
        # reorganize source images: [batch, 4, height, width, 3]
        srcimg_scaled = layers.Lambda(
            lambda image: synthesizer.reshape_source_images(image),
            name=f"reorder_source_sc{scale}")(source_image)

        # EXECUTE
        recon_image_sc = synthesizer.synthesize_batch_view(srcimg_scaled,
                                                           depth_scaled,
                                                           pose_gt,
                                                           intrinsic_sc,
                                                           suffix=f"sc{scale}")

        print("reconstructed image shape:", recon_image_sc.get_shape())
        # convert single target image in batch
        target_image = uf.to_uint8_image(target_image[0]).numpy()
        recon_image = uf.to_uint8_image(recon_image_sc[0]).numpy()
        recon_image = recon_image.reshape((4 * height_sc, width_sc, 3))
        recon_image = cv2.resize(recon_image, (width, height * 4),
                                 interpolation=cv2.INTER_NEAREST)
        view = np.concatenate([target_image, recon_image], axis=0)
        cv2.imshow("synthesize_batch", view)
        cv2.waitKey(WAIT_KEY)
        if i >= 3:
            break

    cv2.destroyAllWindows()
Example #2
0
def save_worst_views(frame, x, model, sample_inds, save_path, scale=1):
    if frame not in sample_inds['frame'].tolist():
        return

    colname = list(sample_inds)[-1]
    indices = sample_inds.loc[sample_inds['frame'] == frame, :].index.tolist()

    stacked_image = x['image']
    intrinsic = x['intrinsic']
    depth_gt = x['depth_gt']
    pose_gt = x['pose_gt']
    pose_gt = cp.pose_matr2rvec_batch(pose_gt)
    depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
    source_image, target_image = uf.split_into_source_and_target(stacked_image)

    predictions = model(x['image'])
    disp_pred_ms = predictions['disp_ms']
    pose_pred = predictions['pose']
    depth_pred_ms = uf.safe_reciprocal_number_ms(disp_pred_ms)

    depth_pred_ms = [depth*scale for depth in depth_pred_ms]

    synthesizer = SynthesizeMultiScale()
    synth_target_pred_ms = synthesizer(source_image, intrinsic, depth_pred_ms, pose_pred)
    synth_target_gt_ms = synthesizer(source_image, intrinsic, depth_gt_ms, pose_gt)

    for ind in indices:
        srcidx = sample_inds.loc[ind, 'srcidx']
        view_imgs = {"target": target_image, "synthesized": synth_target_pred_ms[0][0, srcidx],
                     "depth": depth_pred_ms[0][0, srcidx], "synth_by_gt": synth_target_gt_ms[0][0, srcidx]}
        view = uf.stack_titled_images(view_imgs)
        filename = op.join(save_path, f"{colname[:3]}_{frame:04d}_{srcidx}.png")
        print("save file:", filename)
        cv2.imwrite(filename, view)
Example #3
0
    def __call__(self):
        """
        In the code below, the 'n' in conv'n' or upconv'n' represents scale of the feature map
        conv'n' implies that it is scaled by 1/2^n
        """
        batch, snippet, height, width, channel = self.total_shape
        input_shape = (height * snippet, width, channel)
        input_tensor = layers.Input(shape=input_shape,
                                    batch_size=batch,
                                    name="depthnet_input")
        source_image, target_image = layers.Lambda(
            lambda image: uf.split_into_source_and_target(image),
            name="depthnet_split_image")(input_tensor)

        conv0 = self.conv2d_d(target_image, 32, 7, strides=1, name="dp_conv0b")
        conv1 = self.conv2d_d(conv0, 32, 7, strides=2, name="dp_conv1a")
        conv1 = self.conv2d_d(conv1, 64, 5, strides=1, name="dp_conv1b")
        conv2 = self.conv2d_d(conv1, 64, 5, strides=2, name="dp_conv2a")
        conv2 = self.conv2d_d(conv2, 128, 3, strides=1, name="dp_conv2b")
        conv3 = self.conv2d_d(conv2, 128, 3, strides=2, name="dp_conv3a")
        conv3 = self.conv2d_d(conv3, 256, 3, strides=1, name="dp_conv3b")
        conv4 = self.conv2d_d(conv3, 256, 3, strides=2, name="dp_conv4a")
        conv4 = self.conv2d_d(conv4, 512, 3, strides=1, name="dp_conv4b")
        conv5 = self.conv2d_d(conv4, 512, 3, strides=2, name="dp_conv5a")
        conv5 = self.conv2d_d(conv5, 512, 3, strides=1, name="dp_conv5b")
        conv6 = self.conv2d_d(conv5, 512, 3, strides=2, name="dp_conv6a")
        conv6 = self.conv2d_d(conv6, 512, 3, strides=1, name="dp_conv6b")
        conv7 = self.conv2d_d(conv6, 512, 3, strides=2, name="dp_conv7a")

        upconv6 = self.upconv_with_skip_connection(conv7, conv6, 512,
                                                   "dp_up6")  # 1/64
        upconv5 = self.upconv_with_skip_connection(upconv6, conv5, 512,
                                                   "dp_up5")  # 1/32
        upconv4 = self.upconv_with_skip_connection(upconv5, conv4, 256,
                                                   "dp_up4")  # 1/16
        upconv3 = self.upconv_with_skip_connection(upconv4, conv3, 128,
                                                   "dp_up3")  # 1/8
        depth3, dpconv2_up, dpconv3 = self.get_scaled_depth(
            upconv3, height // 4, width // 4, "dp_depth3")
        upconv2 = self.upconv_with_skip_connection(upconv3, conv2, 64,
                                                   "dp_up2", dpconv2_up)  # 1/4
        depth2, dpconv1_up, dpconv2 = self.get_scaled_depth(
            upconv2, height // 2, width // 2, "dp_depth2")
        upconv1 = self.upconv_with_skip_connection(upconv2, conv1, 32,
                                                   "dp_up1", dpconv1_up)  # 1/2
        depth1, dpconv0_up, dpconv1 = self.get_scaled_depth(
            upconv1, height, width, "dp_depth1")
        upconv0 = self.upconv_with_skip_connection(upconv1, dpconv0_up, 16,
                                                   "dp_up0")  # 1
        depth0, dpconvn1_up, dpconv0 = self.get_scaled_depth(
            upconv0, height, width, "dp_depth0")

        outputs = {
            "depth_ms": [depth0, depth1, depth2, depth3],
            "debug_out": [dpconv0, upconv0, dpconv3, upconv3]
        }
        depthnet = tf.keras.Model(inputs=input_tensor,
                                  outputs=outputs,
                                  name="depthnet")
        return depthnet
Example #4
0
def test_photometric_loss_quantity(suffix=""):
    """
    gt depth와 gt pose를 입력했을 때 나오는 photometric loss와
    gt pose에 노이즈를 추가하여 나오는 photometric loss를 비교
    두 가지 pose로 복원된 영상을 눈으로 확인하고 gt 데이터의 loss가 더 낮음을 확인 (assert)
    """
    print("\n===== start test_photometric_loss_quantity")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    for i, features in enumerate(dataset):
        print("\n--- fetch a batch data")
        stacked_image = features["image" + suffix]
        intrinsic = features["intrinsic" + suffix]
        depth_gt = features["depth_gt" + suffix]
        pose_gt = features["pose_gt" + suffix]
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        pose_gt = cp.pose_matr2rvec_batch(pose_gt)
        target_ms = uf.multi_scale_like(target_image, depth_gt_ms)

        # EXECUTE
        batch_loss_right, scale_loss_right, recon_image_right = \
            test_photo_loss(source_image, intrinsic, depth_gt_ms, pose_gt, target_ms)

        print("\ncorrupt poses")
        pose_gt = pose_gt.numpy()
        pose_gt = pose_gt + np.random.uniform(-0.2, 0.2, pose_gt.shape)
        pose_gt = tf.constant(pose_gt, dtype=tf.float32)

        # EXECUTE
        batch_loss_wrong, scale_loss_wrong, recon_image_wrong = \
            test_photo_loss(source_image, intrinsic, depth_gt_ms, pose_gt, target_ms)

        # TEST
        print("loss diff: wrong - right =",
              batch_loss_wrong - batch_loss_right)
        # Due to randomness, allow minority of frames to fail to the test
        assert (np.sum(batch_loss_right.numpy() < batch_loss_wrong.numpy()) >
                opts.BATCH_SIZE // 4)
        assert (np.sum(scale_loss_right.numpy() < scale_loss_wrong.numpy()) >
                opts.BATCH_SIZE // 4)

        target = uf.to_uint8_image(target_image).numpy()[0]
        view = np.concatenate([target, recon_image_right, recon_image_wrong],
                              axis=0)
        cv2.imshow("pose corruption", view)
        cv2.waitKey(WAIT_KEY)
        if i > 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_photometric_loss_quantity passed")
Example #5
0
    def __call__(self, features):
        predictions = dict()
        for netname, model in self.models.items():
            pred = model(features["image"])
            predictions.update(pred)
            preds_right = model(features["image_R"])
            preds_right = {
                key + "_R": value
                for key, value in preds_right.items()
            }
            predictions.update(preds_right)

        if "depth_ms" in predictions:
            predictions["disp_ms"] = uf.safe_reciprocal_number_ms(
                predictions["depth_ms"])
        if "depth_ms_R" in predictions:
            predictions["disp_ms_R"] = uf.safe_reciprocal_number_ms(
                predictions["depth_ms_R"])

        # predicts stereo extrinsic in both directions: left to right, right to left
        if "posenet" in self.models:
            posenet = self.models["posenet"]
            left_source, left_target = uf.split_into_source_and_target(
                features["image"])
            right_source, right_target = uf.split_into_source_and_target(
                features["image_R"])
            num_src = opts.SNIPPET_LEN - 1
            lr_input = layers.concatenate([right_target] * num_src +
                                          [left_target],
                                          axis=1)
            rl_input = layers.concatenate([left_target] * num_src +
                                          [right_target],
                                          axis=1)
            # pose that transforms points from right to left (T_LR)
            pose_lr = posenet(lr_input)
            # pose that transforms points from left to right (T_RL)
            pose_rl = posenet(rl_input)
            predictions["pose_LR"] = pose_lr["pose"]
            predictions["pose_RL"] = pose_rl["pose"]

        return predictions
Example #6
0
    def __call__(self):
        batch, snippet, height, width, channel = self.total_shape
        input_shape = (height * snippet, width, channel)
        input_tensor = layers.Input(shape=input_shape,
                                    batch_size=batch,
                                    name="depthnet_input")
        source_image, target_image = layers.Lambda(
            lambda image: uf.split_into_source_and_target(image),
            name="depthnet_split_image")(input_tensor)

        features_ms = PretrainedModel(
            self.net_name, self.pretrained_weight).encode(target_image)
        outputs = self.decode(features_ms)
        depthnet = tf.keras.Model(inputs=input_tensor,
                                  outputs=outputs,
                                  name=self.net_name + "_base")
        return depthnet
Example #7
0
def test_synthesize_batch_multi_scale():
    """
    gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인
    실제 target image와 복원된 "multi" scale target image를 눈으로 비교
    """
    print("===== start test_synthesize_batch_multi_scale")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    for i, features in enumerate(dataset):
        print("----- test_synthesize_batch_multi_scale")
        stacked_image = features['image']
        intrinsic = features['intrinsic']
        depth_gt = features['depth_gt']
        pose_gt = features['pose_gt']
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        pred_pose = cp.pose_matr2rvec_batch(pose_gt)

        # EXECUTE
        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 depth_gt_ms, pred_pose)

        # compare target image and reconstructed images
        # recon_img0[0, 0]: reconstructed from the first image
        target_image = uf.to_uint8_image(target_image).numpy()[0]
        source_image = uf.to_uint8_image(source_image).numpy()[
            0, 0:opts.IM_HEIGHT]
        recon_img0 = uf.to_uint8_image(synth_target_ms[0]).numpy()[0, 0]
        recon_img1 = uf.to_uint8_image(synth_target_ms[2]).numpy()[0, 0]
        recon_img1 = cv2.resize(recon_img1, (opts.IM_WIDTH, opts.IM_HEIGHT),
                                cv2.INTER_NEAREST)
        view = np.concatenate(
            [source_image, target_image, recon_img0, recon_img1], axis=0)
        print("Check if all the images are the same")
        cv2.imshow("source, target, and reconstructed", view)
        cv2.waitKey(WAIT_KEY)
        if i >= 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_synthesize_batch_multi_scale passed")
Example #8
0
def test_reshape_source_images():
    """
    위 아래로 쌓인 원본 이미지를 batch 아래 한 차원을 더 만들어서 reshape이 잘 됐는지 확인(assert)
    """
    print("===== start test_reshape_source_images")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()
    dataset = iter(dataset)
    features = next(dataset)
    stacked_image = features['image']
    source_image, target_image = uf.split_into_source_and_target(stacked_image)
    print("batch source image shape", source_image.shape)
    # create synthesizer
    batch, height, width, _ = target_image.get_shape().as_list()
    synthesizer = SynthesizeBatchBasic(
        (batch, int(height / 2), int(width / 2)), 4, 2)

    # EXECUTE
    reshaped_image = synthesizer.reshape_source_images(source_image)

    print("reorganized source image shape",
          reshaped_image.get_shape().as_list())
    reshaped_image = uf.to_uint8_image(reshaped_image).numpy()
    imgidx = 2
    scsize = (int(opts.IM_HEIGHT / 2), int(opts.IM_WIDTH / 2))
    scaled_image = tf.image.resize(source_image,
                                   size=(scsize[0] * 4, scsize[1]),
                                   method="bilinear")
    scaled_image = uf.to_uint8_image(scaled_image).numpy()
    scaled_image = scaled_image[0, scsize[0] * imgidx:scsize[0] * (imgidx + 1)]
    # compare second image in the stacked images
    assert np.isclose(scaled_image, reshaped_image[0, imgidx]).all()

    view = np.concatenate([scaled_image, reshaped_image[0, 1]], axis=0)
    cv2.imshow("original and reshaped", view)
    cv2.waitKey(WAIT_KEY)
    print("!!! test_reshape_source_images passed")
    cv2.destroyAllWindows()
Example #9
0
def evaluate_batch(index, x, model):
    num_src = opts.SNIPPET_LEN - 1

    stacked_image = x['image']
    intrinsic = x['intrinsic']
    depth_true = x['depth_gt']
    pose_true_mat = x['pose_gt']
    source_image, target_image = uf.split_into_source_and_target(stacked_image)

    predictions = model(x['image'])
    disp_pred_ms = predictions['disp_ms']
    pose_pred = predictions['pose']
    depth_pred_ms = uf.safe_reciprocal_number_ms(disp_pred_ms)

    # evaluate depth from numpy arrays and take only 'abs_rel' metric
    depth_err, scale = compute_depth_error(depth_pred_ms[0].numpy()[0], depth_true.numpy()[0])
    smooth_loss = compute_smooth_loss(disp_pred_ms[0], target_image)

    pose_pred_mat = cp.pose_rvec2matr_batch(pose_pred)
    # pose error output: [batch, num_src]
    trj_err, trj_len = compute_trajectory_error(pose_pred_mat, pose_true_mat, scale)
    rot_err = ef.calc_rotational_error_tensor(pose_pred_mat, pose_true_mat)

    # compute photometric loss: [batch, num_src]
    photo_loss = compute_photo_loss(target_image, source_image, intrinsic, depth_pred_ms, pose_pred)

    depth_res = [index, smooth_loss, depth_err]
    # pose_res: [num_src, -1]
    pose_res = np.stack([np.array([index] * 4), np.arange(num_src), photo_loss.numpy().reshape(-1),
                         trj_err.numpy().reshape(-1), trj_len.numpy().reshape(-1),
                         rot_err.numpy().reshape(-1)], axis=1)

    # to collect trajectory
    trajectory = np.concatenate([np.array([index] * 4)[:, np.newaxis], np.arange(num_src)[:, np.newaxis],
                                 pose_true_mat.numpy()[:, :, :3, 3].reshape((-1, 3)),
                                 pose_pred_mat.numpy()[:, :, :3, 3].reshape((-1, 3))*scale], axis=1)
    return depth_res, pose_res, trajectory
Example #10
0
    def augment_data(self, features, predictions, suffix=""):
        """
        gather additional data required to compute losses
        :param features: {image, intrinsic}
                image: stacked image snippet [batch, snippet_len*height, width, 3]
                intrinsic: camera projection matrix [batch, 3, 3]
        :param predictions: {disp_ms, pose}
                disp_ms: multi scale disparities, list of [batch, height/scale, width/scale, 1]
                pose: poses that transform points from target to source [batch, num_src, 6]
        :param suffix: suffix to keys
        :return augm_data: {depth_ms, source, target, target_ms, synth_target_ms}
                depth_ms: multi scale depth, list of [batch, height/scale, width/scale, 1]
                source: source frames [batch, num_src*height, width, 3]
                target: target frame [batch, height, width, 3]
                target_ms: multi scale target frame, list of [batch, height/scale, width/scale, 3]
                synth_target_ms: multi scale synthesized target frames generated from each source image,
                                list of [batch, num_src, height/scale, width/scale, 3]
        """
        augm_data = dict()
        pred_depth_ms = predictions["depth_ms" + suffix]
        pred_pose = predictions["pose" + suffix]

        stacked_image = features["image" + suffix]
        intrinsic = features["intrinsic" + suffix]
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        target_ms = uf.multi_scale_like(target_image, pred_depth_ms)
        augm_data["source" + suffix] = source_image
        augm_data["target" + suffix] = target_image
        augm_data["target_ms" + suffix] = target_ms

        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 pred_depth_ms, pred_pose)
        augm_data["synth_target_ms" + suffix] = synth_target_ms

        return augm_data
Example #11
0
def test_smootheness_loss_quantity():
    """
    gt depth로부터 계산되는 smootheness loss 비교
    gt depth에 일부를 0으로 처리하여 전체적인 gradient를 높인 depth의 smootheness loss 비교
    두 가지 depth를 눈으로 확인하고 gt 데이터의 loss가 더 낮음을 확인 (assert)
    """
    print("\n===== start test_smootheness_loss_quantity")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()
    """
    gather additional data required to compute losses
    :param features: {image, intrinsic}
            image: stacked image snippet [batch, snippet_len*height, width, 3]
            intrinsic: camera projection matrix [batch, 3, 3]
    :param predictions: {disp_ms, pose}
            disp_ms: multi scale disparities, list of [batch, height/scale, width/scale, 1]
            pose: poses that transform points from target to source [batch, num_src, 6]
    :return augm_data: {depth_ms, source, target, target_ms, synth_target_ms}
            depth_ms: multi scale depth, list of [batch, height/scale, width/scale, 1]
            source: source frames [batch, num_src*height, width, 3]
            target: target frame [batch, height, width, 3]
            target_ms: multi scale target frame, list of [batch, height/scale, width/scale, 3]
            synth_target_ms: multi scale synthesized target frames generated from each source image,
                            list of [batch, num_src, height/scale, width/scale, 3]
    """

    for i, features in enumerate(dataset):
        print("\n--- fetch a batch data")

        stacked_image = features["image"]
        depth_gt = features["depth_gt"]
        # interpolate depth
        depth_gt = tf.image.resize(depth_gt,
                                   size=(int(opts.IM_HEIGHT / 2),
                                         int(opts.IM_WIDTH / 2)),
                                   method="bilinear")
        depth_gt = tf.image.resize(depth_gt,
                                   size=(opts.IM_HEIGHT, opts.IM_WIDTH),
                                   method="bilinear")
        # make multi-scale data
        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)

        # EXECUTE
        batch_loss_right = tu_smootheness_loss(depth_gt, target_image)
        print("> batch photometric losses:", batch_loss_right)

        print("> corrupt depth to increase gradient of depth")
        depth_gt = depth_gt.numpy()
        depth_gt_right = np.copy(depth_gt)
        depth_gt_wrong = np.copy(depth_gt)
        depth_gt_wrong[:, 10:200:20] = 0
        depth_gt_wrong[:, 11:200:20] = 0
        depth_gt_wrong[:, 12:200:20] = 0
        depth_gt_wrong = tf.constant(depth_gt_wrong, dtype=tf.float32)

        # EXECUTE
        batch_loss_wrong = tu_smootheness_loss(depth_gt_wrong, target_image)

        # TEST
        print("> loss diff: wrong - right =",
              batch_loss_wrong - batch_loss_right)
        assert np.sum(
            batch_loss_right.numpy() <= batch_loss_wrong.numpy()).all()

        view = np.concatenate([depth_gt_right[0], depth_gt_wrong[0]], axis=0)
        cv2.imshow("target image corruption", view)
        cv2.waitKey(WAIT_KEY)
        if i > 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_smootheness_loss_quantity passed")
Example #12
0
def test_photometric_loss_quality(suffix=""):
    """
    gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인하고
    복원된 이미지로부터 계산되는 photometric loss를 확인
    assert 없음
    """
    print("\n===== start test_photometric_loss_quality")
    dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR,
                                        "kitti_raw_test")).get_generator()

    for i, features in enumerate(dataset):
        print("\n--- fetch a batch data")
        stacked_image = features["image" + suffix]
        intrinsic = features["intrinsic" + suffix]
        depth_gt = features["depth_gt" + suffix]
        pose_gt = features["pose_gt" + suffix]

        # identity pose results in NaN data
        pose_gt_np = pose_gt.numpy()
        for pose_seq in pose_gt_np:
            for pose in pose_seq:
                assert not np.isclose(np.identity(4, dtype=np.float),
                                      pose).all()

        source_image, target_image = uf.split_into_source_and_target(
            stacked_image)
        depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8])
        pose_gt = cp.pose_matr2rvec_batch(pose_gt)
        target_ms = uf.multi_scale_like(target_image, depth_gt_ms)
        batch, height, width, _ = target_image.get_shape().as_list()

        synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic,
                                                 depth_gt_ms, pose_gt)

        srcimgs = uf.to_uint8_image(source_image).numpy()[0]
        srcimg0 = srcimgs[0:height]
        srcimg3 = srcimgs[height * 3:height * 4]

        losses = []
        for scale, synt_target, orig_target in zip([1, 2, 4, 8],
                                                   synth_target_ms, target_ms):
            # EXECUTE
            loss = ls.photometric_loss_l1(synt_target, orig_target)
            losses.append(loss)

            recon_target = uf.to_uint8_image(synt_target).numpy()
            recon0 = cv2.resize(recon_target[0, 0], (width, height),
                                interpolation=cv2.INTER_NEAREST)
            recon3 = cv2.resize(recon_target[0, 3], (width, height),
                                interpolation=cv2.INTER_NEAREST)
            target = uf.to_uint8_image(orig_target).numpy()[0]
            target = cv2.resize(target, (width, height),
                                interpolation=cv2.INTER_NEAREST)
            view = np.concatenate([target, srcimg0, recon0, srcimg3, recon3],
                                  axis=0)
            print(f"1/{scale} scale, photo loss:", tf.reduce_sum(loss, axis=1))
            cv2.imshow("photo loss", view)
            cv2.waitKey(WAIT_KEY)

        losses = tf.stack(losses, axis=2)  # [batch, num_src, num_scales]
        print("all photometric loss:", tf.reduce_sum(losses, axis=1))
        print("batch mean photometric loss:", tf.reduce_sum(losses,
                                                            axis=[1, 2]))
        print("scale mean photometric loss:", tf.reduce_sum(losses,
                                                            axis=[0, 1]))
        if i > 3:
            break

    cv2.destroyAllWindows()
    print("!!! test_photometric_loss_quality passed")