def define_pred_graph(self): """Prediction graph contruction. """ opts = self.opts if opts.debug_synth_texture: img_height = opts.img_height img_width = opts.img_width bs = opts.batch_size nl = opts.n_layers self.src_ldi_gt_disps = tf.placeholder( tf.float32, [nl, bs, img_height, img_width, 1], name='src_ldi_gt_disps') self.trg_ldi_gt_disps = tf.placeholder( tf.float32, [nl, bs, img_height, img_width, 1], name='trg_ldi_gt_disps') self.focal_disps = None # Transform from trg to src frame self.inv_rot_mat = nn_helpers.transpose(self.rot_mat) self.inv_trans_mat = -tf.matmul(self.inv_rot_mat, self.trans_mat) n_layers = opts.n_layers if opts.use_unet: _, feat_dec_src, skip_feat_src, _ = nets.encoder_decoder_unet( self.imgs_src, nl_diff_enc_dec=opts.n_layerwise_steps) _, feat_dec_trg, skip_feat_trg, _ = nets.encoder_decoder_unet( self.imgs_trg, reuse=True, nl_diff_enc_dec=opts.n_layerwise_steps) else: _, feat_dec_src, skip_feat_src, _ = nets.encoder_decoder_simple( self.imgs_src, nl_diff_enc_dec=opts.n_layerwise_steps) _, feat_dec_trg, skip_feat_trg, _ = nets.encoder_decoder_simple( self.imgs_trg, reuse=True, nl_diff_enc_dec=opts.n_layerwise_steps) self.ldi_src = nets.ldi_predictor( feat_dec_src, n_layers=n_layers, n_layerwise_steps=opts.n_layerwise_steps, skip_feat=skip_feat_src, pred_masks=opts.pred_ldi_masks) self.ldi_src[2] *= opts.max_disp self.ldi_trg = nets.ldi_predictor( feat_dec_trg, n_layers=n_layers, reuse=True, n_layerwise_steps=opts.n_layerwise_steps, skip_feat=skip_feat_trg, pred_masks=opts.pred_ldi_masks) self.ldi_trg[2] *= opts.max_disp # Select inverse depths predicted for the 1st layer of the LDIs if opts.debug_synth_texture: self.ldi_src[2] = 0 * self.ldi_src[2] + 1 * self.src_ldi_gt_disps self.ldi_trg[2] = 0 * self.ldi_trg[2] + 1 * self.trg_ldi_gt_disps self.disps_src = self.ldi_src[2][0] self.disps_trg = self.ldi_trg[2][0]
def transform_plane_eqns(rot, t, n_hat, a): """Transforms plane euqations according to frame transformation. Args: rot: relative rotation, are [...] X 3 X 3 matrices t: [...] X 3 X 1, translations from source to target camera n_hat: [...] X 1 X 3, plane normal w.r.t source camera frame a: [...] X 1 X 1, plane equation displacement Returns: n_hat_t: [...] X 1 X 3, plane normal w.r.t target camera frame a_t: [...] X 1 X 1, plane plane equation displacement """ with tf.name_scope('transform_plane_eqns'): rot_t = nn_helpers.transpose(rot) n_hat_t = tf.matmul(n_hat, rot_t) a_t = a - tf.matmul(n_hat, tf.matmul(rot_t, t)) return n_hat_t, a_t
def inverse_projection_matrix(k_s, k_t, rot, t): """Projection matrix for transforming a trg pixel coordinates to src frame. Args: k_s: intrinsics for source cameras, are [...] X 3 X 3 matrices k_t: intrinsics for target cameras, are [...] X 3 X 3 matrices rot: relative rotation from source to target, are [...] X 3 X 3 matrices t: [...] X 3 X 1 translations from source to target camera Returns: transform: [...] X 4 X 4 projection matrix """ with tf.name_scope('inverse_projection_matrix'): k_t_inv = tf.matrix_inverse(k_t, name='k_t_inv') rot_inv = nn_helpers.transpose(rot) t_inv = -1 * tf.matmul(rot_inv, t) return tf.matmul( pad_intrinsic(k_s), tf.matmul(pad_extrinsic(rot_inv, t_inv), pad_intrinsic(k_t_inv)))
def inv_homography_dmat(k_t, rot, t, n_hat, a): """Computes M where M*(u,v,1) = d_t. Args: k_t: intrinsics for target cameras, are [...] X 3 X 3 matrices rot: relative rotation, are [...] X 3 X 3 matrices t: [...] X 3 X 1, translations from source to target camera n_hat: [...] X 1 X 3, plane normal w.r.t source camera frame a: [...] X 1 X 1, plane equation displacement Returns: d_mat: [...] X 1 X 3 matrices """ with tf.name_scope('inv_homography'): rot_t = nn_helpers.transpose(rot) k_t_inv = tf.matrix_inverse(k_t, name='k_t_inv') denom = a - tf.matmul(tf.matmul(n_hat, rot_t), t) d_mat = nn_helpers.divide_safe( -1 * tf.matmul(tf.matmul(n_hat, rot_t), k_t_inv), denom, name='dmat') return d_mat
def inv_homography(k_s, k_t, rot, t, n_hat, a): """Computes inverse homography matrix. Args: k_s: intrinsics for source cameras, are [...] X 3 X 3 matrices k_t: intrinsics for target cameras, are [...] X 3 X 3 matrices rot: relative rotation, are [...] X 3 X 3 matrices t: [...] X 3 X 1, translations from source to target camera n_hat: [...] X 1 X 3, plane normal w.r.t source camera frame a: [...] X 1 X 1, plane equation displacement Returns: homography: [...] X 3 X 3 inverse homography matrices """ with tf.name_scope('inv_homography'): rot_t = nn_helpers.transpose(rot) k_t_inv = tf.matrix_inverse(k_t, name='k_t_inv') denom = a - tf.matmul(tf.matmul(n_hat, rot_t), t) numerator = tf.matmul(tf.matmul(tf.matmul(rot_t, t), n_hat), rot_t) inv_hom = tf.matmul(tf.matmul( k_s, rot_t + nn_helpers.divide_safe(numerator, denom)), k_t_inv, name='inv_hom') return inv_hom
def define_pred_graph(self): """Prediction graph contruction. """ opts = self.opts if opts.debug_synth_texture: img_height = opts.img_height img_width = opts.img_width bs = opts.batch_size nl = opts.n_layers self.src_ldi_gt_disps = tf.placeholder( tf.float32, [nl, bs, img_height, img_width, 1], name='src_ldi_gt_disps') self.trg_ldi_gt_disps = tf.placeholder( tf.float32, [nl, bs, img_height, img_width, 1], name='trg_ldi_gt_disps') if opts.dataset == 'synthetic': img_height = opts.img_height img_width = opts.img_width bs = opts.batch_size self.src_gt_disp = tf.placeholder(tf.float32, [bs, img_height, img_width, 1], name='src_gt_disp') self.trg_gt_disp = tf.placeholder(tf.float32, [bs, img_height, img_width, 1], name='trg_gt_disp') self.src_gt_disp_bg = tf.placeholder( tf.float32, [bs, img_height, img_width, 1], name='src_gt_disp_bg') self.trg_gt_disp_bg = tf.placeholder( tf.float32, [bs, img_height, img_width, 1], name='trg_gt_disp_bg') self.src_gt_tex_bg = tf.placeholder(tf.float32, [bs, img_height, img_width, 3], name='src_gt_disp_bg') self.trg_gt_tex_bg = tf.placeholder(tf.float32, [bs, img_height, img_width, 3], name='trg_gt_disp_bg') src2trg_mat = projection.forward_projection_matrix( self.k_s, self.k_t, self.rot_mat, self.trans_mat) trg2src_mat = projection.inverse_projection_matrix( self.k_s, self.k_t, self.rot_mat, self.trans_mat) self.disocclusion_mask_src = projection.disocclusion_mask( self.src_gt_disp, self.trg_gt_disp, self.pixel_coords, src2trg_mat) self.disocclusion_mask_trg = projection.disocclusion_mask( self.trg_gt_disp, self.src_gt_disp, self.pixel_coords, trg2src_mat) if opts.dataset == 'kitti': img_height = opts.img_height img_width = opts.img_width bs = opts.batch_size self.src_gt_disp = tf.placeholder(tf.float32, [bs, img_height, img_width, 1], name='src_gt_disp') self.trg_gt_disp = tf.placeholder(tf.float32, [bs, img_height, img_width, 1], name='trg_gt_disp') self.disocclusion_mask_src = tf.equal(self.src_gt_disp, 0) self.disocclusion_mask_trg = tf.equal(self.trg_gt_disp, 0) self.focal_disps = None # Transform from trg to src frame. self.inv_rot_mat = nn_helpers.transpose(self.rot_mat) self.inv_trans_mat = -tf.matmul(self.inv_rot_mat, self.trans_mat) n_layers = opts.n_layers if opts.use_unet: _, feat_dec_src, skip_feat_src, _ = nets.encoder_decoder_unet( self.imgs_src, nl_diff_enc_dec=opts.n_layerwise_steps, is_training=opts.batch_norm_training) _, feat_dec_trg, skip_feat_trg, _ = nets.encoder_decoder_unet( self.imgs_trg, reuse=True, nl_diff_enc_dec=opts.n_layerwise_steps, is_training=opts.batch_norm_training) else: _, feat_dec_src, skip_feat_src, _ = nets.encoder_decoder_simple( self.imgs_src, nl_diff_enc_dec=opts.n_layerwise_steps, is_training=opts.batch_norm_training) _, feat_dec_trg, skip_feat_trg, _ = nets.encoder_decoder_simple( self.imgs_trg, reuse=True, nl_diff_enc_dec=opts.n_layerwise_steps, is_training=opts.batch_norm_training) self.ldi_src = nets.ldi_predictor( feat_dec_src, n_layers=n_layers, n_layerwise_steps=opts.n_layerwise_steps, skip_feat=skip_feat_src, pred_masks=opts.pred_ldi_masks, is_training=opts.batch_norm_training) self.ldi_src[2] *= opts.max_disp self.ldi_trg = nets.ldi_predictor( feat_dec_trg, n_layers=n_layers, reuse=True, n_layerwise_steps=opts.n_layerwise_steps, skip_feat=skip_feat_trg, pred_masks=opts.pred_ldi_masks, is_training=opts.batch_norm_training) self.ldi_trg[2] *= opts.max_disp # Select inverse depths predicted for the 1st layer of the LDIs. if opts.debug_synth_texture: self.ldi_src[2] = 0 * self.ldi_src[2] + 1 * self.src_ldi_gt_disps self.ldi_trg[2] = 0 * self.ldi_trg[2] + 1 * self.trg_ldi_gt_disps self.disps_src = self.ldi_src[2][0] self.disps_trg = self.ldi_trg[2][0]