def __call__(self, features, predictions, augm_data): pose_lr_pred = predictions["pose_LR"] pose_rl_pred = predictions["pose_RL"] pose_lr_true_mat = features["stereo_T_LR"] pose_lr_true_mat = tf.expand_dims(pose_lr_true_mat, axis=1) pose_rl_true_mat = tf.linalg.inv(pose_lr_true_mat) pose_lr_true = cp.pose_matr2rvec_batch(pose_lr_true_mat) pose_rl_true = cp.pose_matr2rvec_batch(pose_rl_true_mat) # loss: [batch, num_src] loss = tf.keras.losses.MSE(pose_lr_true, pose_lr_pred) + tf.keras.losses.MSE( pose_rl_true, pose_rl_pred) # loss: [batch] loss = tf.reduce_mean(loss, axis=1) return loss
def save_worst_views(frame, x, model, sample_inds, save_path, scale=1): if frame not in sample_inds['frame'].tolist(): return colname = list(sample_inds)[-1] indices = sample_inds.loc[sample_inds['frame'] == frame, :].index.tolist() stacked_image = x['image'] intrinsic = x['intrinsic'] depth_gt = x['depth_gt'] pose_gt = x['pose_gt'] pose_gt = cp.pose_matr2rvec_batch(pose_gt) depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8]) source_image, target_image = uf.split_into_source_and_target(stacked_image) predictions = model(x['image']) disp_pred_ms = predictions['disp_ms'] pose_pred = predictions['pose'] depth_pred_ms = uf.safe_reciprocal_number_ms(disp_pred_ms) depth_pred_ms = [depth*scale for depth in depth_pred_ms] synthesizer = SynthesizeMultiScale() synth_target_pred_ms = synthesizer(source_image, intrinsic, depth_pred_ms, pose_pred) synth_target_gt_ms = synthesizer(source_image, intrinsic, depth_gt_ms, pose_gt) for ind in indices: srcidx = sample_inds.loc[ind, 'srcidx'] view_imgs = {"target": target_image, "synthesized": synth_target_pred_ms[0][0, srcidx], "depth": depth_pred_ms[0][0, srcidx], "synth_by_gt": synth_target_gt_ms[0][0, srcidx]} view = uf.stack_titled_images(view_imgs) filename = op.join(save_path, f"{colname[:3]}_{frame:04d}_{srcidx}.png") print("save file:", filename) cv2.imwrite(filename, view)
def stereo_synthesize_loss(self, source_img, target_ms, target_depth_ms, pose_t2s, intrinsic, suffix=""): """ synthesize image from source to target :param source_img: [batch, num_src*height, width, 3] :param target_ms: list of [batch, height/scale, width/scale, 3] :param target_depth_ms: list of [batch, height/scale, width/scale, 1] :param pose_t2s: [batch, num_src, 4, 4] :param intrinsic: [batch, num_src, 3, 3] :param suffix: "" if right to left, else "_R" """ pose_stereo = cp.pose_matr2rvec_batch(tf.expand_dims(pose_t2s, 1)) # synth_target_ms: list of [batch, 1, height/scale, width/scale, 3] synth_target_ms = SynthesizeMultiScale()(source_img, intrinsic, target_depth_ms, pose_stereo) losses = [] for i, (synth_img_sc, target_img_sc) in enumerate(zip(synth_target_ms, target_ms)): loss = layers.Lambda( lambda inputs: self.photometric_loss(inputs[0], inputs[1]), name=f"photo_loss_{i}" + suffix)([synth_img_sc, target_img_sc]) losses.append(loss) return losses, synth_target_ms
def test_photometric_loss_quantity(suffix=""): """ gt depth와 gt pose를 입력했을 때 나오는 photometric loss와 gt pose에 노이즈를 추가하여 나오는 photometric loss를 비교 두 가지 pose로 복원된 영상을 눈으로 확인하고 gt 데이터의 loss가 더 낮음을 확인 (assert) """ print("\n===== start test_photometric_loss_quantity") dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR, "kitti_raw_test")).get_generator() for i, features in enumerate(dataset): print("\n--- fetch a batch data") stacked_image = features["image" + suffix] intrinsic = features["intrinsic" + suffix] depth_gt = features["depth_gt" + suffix] pose_gt = features["pose_gt" + suffix] source_image, target_image = uf.split_into_source_and_target( stacked_image) depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8]) pose_gt = cp.pose_matr2rvec_batch(pose_gt) target_ms = uf.multi_scale_like(target_image, depth_gt_ms) # EXECUTE batch_loss_right, scale_loss_right, recon_image_right = \ test_photo_loss(source_image, intrinsic, depth_gt_ms, pose_gt, target_ms) print("\ncorrupt poses") pose_gt = pose_gt.numpy() pose_gt = pose_gt + np.random.uniform(-0.2, 0.2, pose_gt.shape) pose_gt = tf.constant(pose_gt, dtype=tf.float32) # EXECUTE batch_loss_wrong, scale_loss_wrong, recon_image_wrong = \ test_photo_loss(source_image, intrinsic, depth_gt_ms, pose_gt, target_ms) # TEST print("loss diff: wrong - right =", batch_loss_wrong - batch_loss_right) # Due to randomness, allow minority of frames to fail to the test assert (np.sum(batch_loss_right.numpy() < batch_loss_wrong.numpy()) > opts.BATCH_SIZE // 4) assert (np.sum(scale_loss_right.numpy() < scale_loss_wrong.numpy()) > opts.BATCH_SIZE // 4) target = uf.to_uint8_image(target_image).numpy()[0] view = np.concatenate([target, recon_image_right, recon_image_wrong], axis=0) cv2.imshow("pose corruption", view) cv2.waitKey(WAIT_KEY) if i > 3: break cv2.destroyAllWindows() print("!!! test_photometric_loss_quantity passed")
def tu_make_prediction(features, suffix="", const_depth=None): depth = features["depth_gt" + suffix] if const_depth is not None: if const_depth == 0: const_depth = tf.reduce_mean(depth) print("mean depth", const_depth) depth = tf.constant(const_depth, tf.float32, shape=depth.get_shape().as_list()) depth_ms = uf.multi_scale_depths(depth, [1, 2, 4, 8]) poses = features["pose_gt" + suffix] poses = cp.pose_matr2rvec_batch(poses) predictions = {"pose" + suffix: poses, "depth_ms" + suffix: depth_ms} return predictions
def test_synthesize_batch_multi_scale(): """ gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인 실제 target image와 복원된 "multi" scale target image를 눈으로 비교 """ print("===== start test_synthesize_batch_multi_scale") dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR, "kitti_raw_test")).get_generator() for i, features in enumerate(dataset): print("----- test_synthesize_batch_multi_scale") stacked_image = features['image'] intrinsic = features['intrinsic'] depth_gt = features['depth_gt'] pose_gt = features['pose_gt'] source_image, target_image = uf.split_into_source_and_target( stacked_image) depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8]) pred_pose = cp.pose_matr2rvec_batch(pose_gt) # EXECUTE synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic, depth_gt_ms, pred_pose) # compare target image and reconstructed images # recon_img0[0, 0]: reconstructed from the first image target_image = uf.to_uint8_image(target_image).numpy()[0] source_image = uf.to_uint8_image(source_image).numpy()[ 0, 0:opts.IM_HEIGHT] recon_img0 = uf.to_uint8_image(synth_target_ms[0]).numpy()[0, 0] recon_img1 = uf.to_uint8_image(synth_target_ms[2]).numpy()[0, 0] recon_img1 = cv2.resize(recon_img1, (opts.IM_WIDTH, opts.IM_HEIGHT), cv2.INTER_NEAREST) view = np.concatenate( [source_image, target_image, recon_img0, recon_img1], axis=0) print("Check if all the images are the same") cv2.imshow("source, target, and reconstructed", view) cv2.waitKey(WAIT_KEY) if i >= 3: break cv2.destroyAllWindows() print("!!! test_synthesize_batch_multi_scale passed")
def test_photometric_loss_quality(suffix=""): """ gt depth와 gt pose를 입력했을 때 스케일 별로 복원되는 이미지를 정성적으로 확인하고 복원된 이미지로부터 계산되는 photometric loss를 확인 assert 없음 """ print("\n===== start test_photometric_loss_quality") dataset = TfrecordGenerator(op.join(opts.DATAPATH_TFR, "kitti_raw_test")).get_generator() for i, features in enumerate(dataset): print("\n--- fetch a batch data") stacked_image = features["image" + suffix] intrinsic = features["intrinsic" + suffix] depth_gt = features["depth_gt" + suffix] pose_gt = features["pose_gt" + suffix] # identity pose results in NaN data pose_gt_np = pose_gt.numpy() for pose_seq in pose_gt_np: for pose in pose_seq: assert not np.isclose(np.identity(4, dtype=np.float), pose).all() source_image, target_image = uf.split_into_source_and_target( stacked_image) depth_gt_ms = uf.multi_scale_depths(depth_gt, [1, 2, 4, 8]) pose_gt = cp.pose_matr2rvec_batch(pose_gt) target_ms = uf.multi_scale_like(target_image, depth_gt_ms) batch, height, width, _ = target_image.get_shape().as_list() synth_target_ms = SynthesizeMultiScale()(source_image, intrinsic, depth_gt_ms, pose_gt) srcimgs = uf.to_uint8_image(source_image).numpy()[0] srcimg0 = srcimgs[0:height] srcimg3 = srcimgs[height * 3:height * 4] losses = [] for scale, synt_target, orig_target in zip([1, 2, 4, 8], synth_target_ms, target_ms): # EXECUTE loss = ls.photometric_loss_l1(synt_target, orig_target) losses.append(loss) recon_target = uf.to_uint8_image(synt_target).numpy() recon0 = cv2.resize(recon_target[0, 0], (width, height), interpolation=cv2.INTER_NEAREST) recon3 = cv2.resize(recon_target[0, 3], (width, height), interpolation=cv2.INTER_NEAREST) target = uf.to_uint8_image(orig_target).numpy()[0] target = cv2.resize(target, (width, height), interpolation=cv2.INTER_NEAREST) view = np.concatenate([target, srcimg0, recon0, srcimg3, recon3], axis=0) print(f"1/{scale} scale, photo loss:", tf.reduce_sum(loss, axis=1)) cv2.imshow("photo loss", view) cv2.waitKey(WAIT_KEY) losses = tf.stack(losses, axis=2) # [batch, num_src, num_scales] print("all photometric loss:", tf.reduce_sum(losses, axis=1)) print("batch mean photometric loss:", tf.reduce_sum(losses, axis=[1, 2])) print("scale mean photometric loss:", tf.reduce_sum(losses, axis=[0, 1])) if i > 3: break cv2.destroyAllWindows() print("!!! test_photometric_loss_quality passed")