def infer_tgt_views(self, raw_src_image, RT, intrinsic): b, h, w, _ = raw_src_image.get_shape().as_list() z_size = 856 with tf.name_scope('preprocessing'): src_image = self.image2tensor(raw_src_image) self.manual_check = RT RT, inv_RT = self.reshape_posematrix(RT) with tf.name_scope('Encoder'): z_enc_out = Encoder(src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() # print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [b, -1, 4]) z_app = z_enc_out[:, :, :, 600:] # print('z geo', z_geo) # print('z app', z_app) z_geo_tf = tf.matmul(z_geo, inv_RT) # print('z geo tf', z_geo_tf) # print('inv_RT', inv_RT) z_geo_tf = tf.reshape( z_geo_tf, [b, 1, 1, 600]) # TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 # self.depth_scale_vis = 125. / depth_scale # self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder(z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) # print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) # print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False) # print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply( warped_pred, 1 - mask_pred) # Collect output tensors pred = {} pred['out_depth'] = depth_pred pred['out_mask'] = mask_pred pred['out_pixel'] = pixel_pred pred['warped_image'] = warped_pred pred['tgt_image'] = fake_tgt return pred
def into_depth_and_rgb_block(self, raw_src_image, raw_src_depth, pose): b, h, w, _ = raw_src_image.get_shape().as_list() z_size = 856 z_geo_size = 600 with tf.name_scope('preprocessing'): src_image = self.image2tensor(raw_src_image) if len(raw_src_depth.get_shape()) != 4: src_depth = tf.expand_dims(raw_src_depth, axis=3) else: src_depth = raw_src_depth # self.manual_check = pose with tf.name_scope('concat_rgbd'): #conv_depth = conv2d(raw_src_depth, 32, is_train=True, k_h=3, k_w=3, s=1) #conv_rgb = conv2d(src_image, 32*3, is_train=True, k_h=3, k_w=3, s=1) input_rgbd = tf.concat([src_image, src_depth], axis=3) with tf.name_scope('Encoder'): z_enc_out = Encoder(input_rgbd, num_outputs=z_size, reuse_weights=tf.AUTO_REUSE) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() # print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :z_geo_size], [b, -1, 4]) z_app = z_enc_out[:, :, :, z_geo_size:] # print('z geo', z_geo) # print('z app', z_app) z_geo_tf = tf.matmul(z_geo, pose) # print('z geo tf', z_geo_tf) # print('inv_RT', inv_RT) z_geo_tf = tf.reshape( z_geo_tf, [b, 1, 1, 600]) # TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 # self.depth_scale_vis = 125. / depth_scale # self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder', reuse_weights=tf.AUTO_REUSE) depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder', reuse_weights=tf.AUTO_REUSE) pixel_pred = tf.nn.tanh(pixel_dec_out) # print('pixel pred', pixel_pred) # with tf.name_scope('prediction'): # warped_pred = projective_inverse_warp(src_image, tf.squeeze(depth_pred), RT, intrinsic, ret_flows=False) # print('warped pred', warped_pred) # tgt_img_tf = projective_inverse_warp(src_image, raw_tgt_depth, RT, intrinsic, ret_flows=False) # Collect output tensors pred = {} pred['out_depth'] = depth_pred pred['out_pixel'] = pixel_pred # pred['warped_image'] = warped_pred # pred['inverse_warping_image'] = tgt_img_tf # pred['tgt_image'] = fake_tgt return pred
def build_train_graph(self, is_train=True): z_size = 856 with tf.name_scope('Encoder'): z_enc_out = Encoder(self.src_image, num_outputs=z_size) _, z_h, z_w, _ = z_enc_out.get_shape().as_list() print('encoder out', z_enc_out) # transform latent vector z_geo = tf.reshape(z_enc_out[:, :, :, :600], [self.batch_size, -1, 4]) z_app = z_enc_out[:, :, :, 600:] print('z geo', z_geo) print('z app', z_app) z_geo_tf = tf.matmul(z_geo, self.inv_RT) print('z geo tf', z_geo_tf) print('inv_RT', self.inv_RT) z_geo_tf = tf.reshape(z_geo_tf, [self.batch_size, 1,1, 600]) #TODO: solving z_h and z_w values z_tf = tf.concat([z_geo_tf, z_app], axis=3) print('z tf', z_tf) with tf.name_scope('Depth'): if self.data == 'car': depth_bias = 2 depth_scale = 1.0 self.depth_scale_vis = 125. / depth_scale self.depth_bias_vis = depth_bias - depth_scale depth_dec_out = Decoder(z_geo_tf, 1, variable_scope='Depth_Decoder') depth_pred = depth_scale * tf.nn.tanh(depth_dec_out) + depth_bias with tf.name_scope('Mask'): mask_dec_out = Decoder (z_geo_tf, 1, variable_scope='Mask_Decoder') mask_pred = tf.nn.sigmoid(mask_dec_out) print('mask pred', mask_pred) with tf.name_scope('Pixel'): pixel_dec_out = Decoder(z_tf, 3, variable_scope='Pixel_Decoder') pixel_pred = tf.nn.tanh(pixel_dec_out) print('pixel pred', pixel_pred) with tf.name_scope('prediction'): warped_pred = projective_inverse_warp(self.src_image, tf.squeeze(depth_pred), self.RT, self.intrinsic, ret_flows=False) print('warped pred', warped_pred) fake_tgt = tf.multiply(pixel_pred, mask_pred) + tf.multiply(warped_pred, 1-mask_pred) with tf.name_scope('loss'): self.eval_loss ={} depth_loss = tf.reduce_mean(tf.abs(self.tgt_image - warped_pred)) * self.loss_weight pixel_loss = tf.reduce_mean(tf.abs(self.tgt_image - pixel_pred)) * self.loss_weight mask_loss = tf.reduce_mean(tf.abs(self.tgt_image - fake_tgt)) * self.loss_weight self.total_loss = depth_loss + pixel_loss + mask_loss self.eval_loss['depth_loss'] = depth_loss self.eval_loss['pixel_loss'] = pixel_loss self.eval_loss['mask_loss'] = mask_loss self.eval_loss['total_loss'] = self.total_loss # Summaries tf.summary.image('src_image', self.deprocess_image(self.src_image)) tf.summary.image('tgt_image', self.deprocess_image(self.tgt_image)) tf.summary.image('fake_tgt_image', self.deprocess_image(fake_tgt)) tf.summary.image('pixel_pred_image', self.deprocess_image(pixel_pred)) tf.summary.image('warped_pred_image', warped_pred) tf.summary.scalar('total_loss', self.total_loss) # Define optimizers with tf.name_scope('train_optimizers'): self.optimizer = tf.train.AdamOptimizer(self.learning_rate, self.beta1) train_vars = [var for var in tf.trainable_variables()] grads_and_vars = self.optimizer.compute_gradients(self.total_loss, var_list=train_vars) self.train_op = self.optimizer.apply_gradients(grads_and_vars)