def _test_random_data(self, dtype, inverse_depth, normalize_flow): # random depth map and camera pose depth = np.random.uniform(5,10, (1,1,6,12)).astype(dtype) if inverse_depth: depth = 1/depth rotation = np.random.uniform(0.0,0.05, (1,3)).astype(dtype) translation = (np.array([[1,0,0]]) + np.random.uniform(-0.2,0.2, (1,3))).astype(dtype) intrinsics = np.array([[1,1,0.5,0.5]]).astype(dtype) flow = ops.depth_to_flow( depth=depth, intrinsics=intrinsics, rotation=rotation, translation=translation, inverse_depth=inverse_depth, normalize_flow=normalize_flow,).eval() # rotation = angleaxis_to_rotation_matrix(rotation[0])[np.newaxis,:,:] rotation = angleaxis_to_quaternion(rotation[0])[np.newaxis,:] computed_depth = ops.flow_to_depth2( flow=flow, intrinsics=intrinsics, rotation=rotation, translation=translation, inverse_depth=inverse_depth, normalized_flow=normalize_flow, rotation_format='quaternion').eval() print('depth\n',depth) print('computed_depth\n',computed_depth) self.assertAllClose(depth, computed_depth, rtol=1e-4, atol=1e-4)
def prepare_ground_truth_tensors(depth, rotation, translation, intrinsics): """Computes ground truth tensors at lower resolution and scale invariant gradient (sig) images of some ground truth tensors. depth: Tensor depth map with inverse depth values rotation: Tensor rotation in angle axis format with 3 elements translation: Tensor the camera translation intrinsics: Tensor Tensor with the intrinsic camera parameters Returns a dictionary with ground truth data for depth, normal and flow for different resolutions. """ depth1, depth2, depth3, depth4, depth5 = recursive_median_downsample(depth,5) flow0 = sops.depth_to_flow(depth, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow0') flow2 = sops.depth_to_flow(depth2, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow2') flow5 = sops.depth_to_flow(depth5, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow5') normal0 = sops.depth_to_normals(depth, intrinsics, inverse_depth=True) normal2 = sops.depth_to_normals(depth2, intrinsics, inverse_depth=True) sig_params = {'deltas':[1,2,4,8,16], 'weights':[1,1,1,1,1], 'epsilon': 0.001} depth0_sig = scale_invariant_gradient(depth, **sig_params) depth2_sig = scale_invariant_gradient(depth2, **sig_params) flow2_sig = scale_invariant_gradient(flow2, **sig_params) return { 'depth0': depth, 'depth0_sig': depth0_sig, 'depth2': depth2, 'depth2_sig': depth2_sig, 'flow0': flow0, 'flow2': flow2, 'flow2_sig': flow2_sig, 'flow5': flow5, 'normal0': normal0, 'normal2': normal2, }
def flow_from_depth(inputs): depth = inputs[0] motion = inputs[1] intrinsics = [0.89115971, 1.18821287, 0.5, 0.5] depth_nchw = tf.transpose(depth, [0, 3, 1, 2]) rotation = tf.slice(motion, [0,0], [-1, 3]) translation = tf.slice(motion, [0,3], [-1, 3]) flow_ncwh = lmbspecialops.depth_to_flow(depth_nchw, intrinsics, rotation, translation, rotation_format='angleaxis3', inverse_depth=True, normalize_flow=True) return tf.transpose(flow_ncwh, [0, 2, 3, 1])
def create_ground_truth_file(dataset, dataset_dir): """Creates a hdf5 file with the ground truth test data dataset: str name of the dataset dataset_dir: str path to the directory containing the datasets Returns the path to the created file """ ds = dataset # destination file ground_truth_file = '{0}_ground_truth.h5'.format(ds) if os.path.isfile(ground_truth_file): return ground_truth_file # skip existing files print('creating {0}'.format(ground_truth_file)) # data types requested from the reader op data_tensors_keys = ('IMAGE_PAIR', 'MOTION', 'DEPTH', 'INTRINSICS') reader_params = { 'batch_size': 1, 'test_phase': True, # deactivates randomization 'builder_threads': 1, # must be 1 in test phase 'inverse_depth': True, 'motion_format': 'ANGLEAXIS6', # True is also possible here. If set to True we store ground truth with # precomputed normalization. False keeps the original information. 'norm_trans_scale_depth': False, # original data resolution 'scaled_height': 480, 'scaled_width': 640, 'scene_pool_size': 5, # no augmentation 'augment_rot180': 0, 'augment_mirror_x': 0, 'top_output': data_tensors_keys, 'source': [{'path': os.path.join(dataset_dir,'{0}_test.h5'.format(ds))}], } reader_tensors = multi_vi_h5_data_reader(len(data_tensors_keys), json.dumps(reader_params)) # create a dict to make the distinct data tensors accessible via keys data_dict = dict(zip(data_tensors_keys,reader_tensors[2])) info_tensor = reader_tensors[0] sample_ids_tensor = reader_tensors[1] rotation_tensor, translation_tensor = tf.split(data_dict['MOTION'], 2, axis=1) flow_tensor = sops.depth_to_flow(data_dict['DEPTH'], data_dict['INTRINSICS'], rotation_tensor, translation_tensor, inverse_depth=True, normalize_flow=True) gpu_options = tf.GPUOptions() gpu_options.per_process_gpu_memory_fraction=0.8 # leave some memory to other processes session = tf.InteractiveSession(config=tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options)) fetch_dict = {'INFO': info_tensor, 'SAMPLE_IDS': sample_ids_tensor, 'FLOW': flow_tensor} fetch_dict.update(data_dict) with h5py.File(ground_truth_file) as f: number_of_test_iterations = 1 # will be set to the correct value in the while loop iteration = 0 while iteration < number_of_test_iterations: data = session.run(fetch_dict) # get number of iterations from the info vector number_of_test_iterations = int(data['INFO'][0]) # write ground truth data to the file group = f.require_group(str(iteration)) group['image_pair'] = data['IMAGE_PAIR'][0] group['depth'] = data['DEPTH'][0] group['motion'] = data['MOTION'][0] group['flow'] = data['FLOW'][0] group['intrinsics'] = data['INTRINSICS'][0] # save sample id as attribute of the group. # the evaluation code will use this to check if prediction and ground truth match. sample_id = (''.join(map(chr, data['SAMPLE_IDS']))).strip() group.attrs['sample_id'] = np.string_(sample_id) iteration += 1 del session tf.reset_default_graph() return ground_truth_file
def flow_block_demon_original(image_pair, image2_2=None, intrinsics=None, prev_predictions=None, data_format='channels_first'): """Creates a flow network image_pair: Tensor Image pair concatenated along the channel axis. image2_2: Tensor Second image at resolution level 2 (downsampled two times) intrinsics: Tensor The normalized intrinsic parameters prev_predictions: dict of Tensor Predictions from the previous depth block Returns a dict with the predictions """ conv_params = {'data_format': data_format} # contracting part conv1 = convrelu2_caffe_padding(name='conv1', inputs=image_pair, num_outputs=32, kernel_size=9, stride=2, **conv_params) if prev_predictions is None: conv2 = convrelu2_caffe_padding(name='conv2', inputs=conv1, num_outputs=64, kernel_size=7, stride=2, **conv_params) conv2_1 = convrelu2_caffe_padding(name='conv2_1', inputs=conv2, num_outputs=64, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv2_1) else: conv2 = convrelu2_caffe_padding(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create warped input if data_format == 'channels_first': prev_depth_nchw = prev_predictions['predict_depth2'] else: prev_depth_nchw = convert_NHWC_to_NCHW( prev_predictions['predict_depth2']) _flow_from_depth_motion = sops.depth_to_flow( intrinsics=intrinsics, depth=prev_depth_nchw, rotation=prev_predictions['predict_rotation'], translation=prev_predictions['predict_translation'], inverse_depth=True, normalize_flow=True, ) # set flow vectors to zero if the motion is too large. # this also eliminates nan values which can be produced by very bad camera parameters flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion, axis=1, keep_dims=True) flow_from_depth_motion_norm = tf.concat( (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1) tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32) flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0, _flow_from_depth_motion, tmp_zeros) image2_2_warped = sops.warp2d( input=image2_2 if data_format == 'channels_first' else convert_NHWC_to_NCHW(image2_2), displacements=flow_from_depth_motion, normalized=True, border_mode='value', ) if data_format == 'channels_last': flow_from_depth_motion = convert_NCHW_to_NHWC( flow_from_depth_motion) image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped) extra_inputs = (image2_2_warped, flow_from_depth_motion, prev_predictions['predict_depth2'], prev_predictions['predict_normal2']) # stop gradient here extra_inputs_concat = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2_caffe_padding(name='conv2_extra_inputs', inputs=extra_inputs_concat, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat( (conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2_caffe_padding(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) conv3 = convrelu2_caffe_padding(name='conv3', inputs=conv2_1, num_outputs=128, kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2_caffe_padding(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv3_1) conv4 = convrelu2_caffe_padding(name='conv4', inputs=conv3_1, num_outputs=256, kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2_caffe_padding(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv4_1) conv5 = convrelu2_caffe_padding(name='conv5', inputs=conv4_1, num_outputs=512, kernel_size=5, stride=2, **conv_params) conv5_1 = convrelu2_caffe_padding(name='conv5_1', inputs=conv5, num_outputs=512, kernel_size=3, stride=1, **conv_params) tf.add_to_collection('endpoints', conv5_1) # expanding part with tf.variable_scope('predict_flow5'): predict_flowconf5 = _predict_flow_caffe_padding( conv5_1, predict_confidence=True, **conv_params) tf.add_to_collection('endpoints', predict_flowconf5) with tf.variable_scope('upsample_flow5to4'): predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2, **conv_params) with tf.variable_scope('refine4'): concat4 = _refine_caffe_padding( inp=conv5_1, num_outputs=256, upsampled_prediction=predict_flowconf5to4, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine_caffe_padding( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine_caffe_padding( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_flow2'): predict_flowconf2 = _predict_flow_caffe_padding( concat2, predict_confidence=True, **conv_params) tf.add_to_collection('endpoints', predict_flowconf2) return { 'predict_flowconf5': predict_flowconf5, 'predict_flowconf2': predict_flowconf2 }
def flow_block(image_pair, image2_2=None, intrinsics=None, prev_predictions=None, data_format='channels_first', kernel_regularizer=None): """Creates a flow network image_pair: Tensor Image pair concatenated along the channel axis. image2_2: Tensor Second image at resolution level 2 (downsampled two times) intrinsics: Tensor The normalized intrinsic parameters prev_predictions: dict of Tensor Predictions from the previous depth block Returns a dict with the predictions """ conv_params = { 'data_format': data_format, 'kernel_regularizer': kernel_regularizer } # contracting part conv1 = convrelu2(name='conv1', inputs=image_pair, num_outputs=(24, 32), kernel_size=9, stride=2, **conv_params) if prev_predictions is None: conv2 = convrelu2(name='conv2', inputs=conv1, num_outputs=(48, 64), kernel_size=7, stride=2, **conv_params) conv2_1 = convrelu2(name='conv2_1', inputs=conv2, num_outputs=64, kernel_size=3, stride=1, **conv_params) else: conv2 = convrelu2(name='conv2', inputs=conv1, num_outputs=32, kernel_size=7, stride=2, **conv_params) # create warped input if data_format == 'channels_first': prev_depth_nchw = prev_predictions['predict_depth2'] else: prev_depth_nchw = convert_NHWC_to_NCHW( prev_predictions['predict_depth2']) _flow_from_depth_motion = sops.depth_to_flow( intrinsics=intrinsics, depth=prev_depth_nchw, rotation=prev_predictions['predict_rotation'], translation=prev_predictions['predict_translation'], inverse_depth=True, normalize_flow=True, ) # set flow vectors to zero if the motion is too large. # this also eliminates nan values which can be produced by very bad camera parameters flow_from_depth_motion_norm = tf.norm(_flow_from_depth_motion, axis=1, keep_dims=True) flow_from_depth_motion_norm = tf.concat( (flow_from_depth_motion_norm, flow_from_depth_motion_norm), axis=1) tmp_zeros = tf.zeros_like(_flow_from_depth_motion, dtype=tf.float32) flow_from_depth_motion = tf.where(flow_from_depth_motion_norm < 1.0, _flow_from_depth_motion, tmp_zeros) image2_2_warped = sops.warp2d( input=image2_2 if data_format == 'channels_first' else convert_NHWC_to_NCHW(image2_2), displacements=flow_from_depth_motion, normalized=True, border_mode='value', ) if data_format == 'channels_last': flow_from_depth_motion = convert_NCHW_to_NHWC( flow_from_depth_motion) image2_2_warped = convert_NCHW_to_NHWC(image2_2_warped) extra_inputs = (image2_2_warped, flow_from_depth_motion, prev_predictions['predict_depth2'], prev_predictions['predict_normal2']) # stop gradient here extra_inputs_concat = tf.stop_gradient( tf.concat(extra_inputs, axis=1 if data_format == 'channels_first' else 3)) conv_extra_inputs = convrelu2(name='conv2_extra_inputs', inputs=extra_inputs_concat, num_outputs=32, kernel_size=3, stride=1, **conv_params) conv2_concat = tf.concat( (conv2, conv_extra_inputs), axis=1 if data_format == 'channels_first' else 3) conv2_1 = convrelu2(name='conv2_1', inputs=conv2_concat, num_outputs=64, kernel_size=3, stride=1, **conv_params) conv3 = convrelu2(name='conv3', inputs=conv2_1, num_outputs=(96, 128), kernel_size=5, stride=2, **conv_params) conv3_1 = convrelu2(name='conv3_1', inputs=conv3, num_outputs=128, kernel_size=3, stride=1, **conv_params) conv4 = convrelu2(name='conv4', inputs=conv3_1, num_outputs=(192, 256), kernel_size=5, stride=2, **conv_params) conv4_1 = convrelu2(name='conv4_1', inputs=conv4, num_outputs=256, kernel_size=3, stride=1, **conv_params) conv5 = convrelu2(name='conv5', inputs=conv4_1, num_outputs=384, kernel_size=5, stride=2, **conv_params) conv5_1 = convrelu2(name='conv5_1', inputs=conv5, num_outputs=384, kernel_size=3, stride=1, **conv_params) dense_slice_shape = conv5_1.get_shape().as_list() if data_format == 'channels_first': dense_slice_shape[1] = 96 else: dense_slice_shape[-1] = 96 units = 1 for i in range(1, len(dense_slice_shape)): units *= dense_slice_shape[i] dense5 = tf.layers.dense(tf.contrib.layers.flatten( tf.slice(conv5_1, [0, 0, 0, 0], dense_slice_shape)), units=units, activation=myLeakyRelu, kernel_initializer=default_weights_initializer(), kernel_regularizer=kernel_regularizer, name='dense5') print(dense5) conv5_1_dense5 = tf.concat( (conv5_1, tf.reshape(dense5, dense_slice_shape)), axis=1 if data_format == 'channels_first' else 3) # expanding part with tf.variable_scope('predict_flow5'): predict_flowconf5 = _predict_flow(conv5_1_dense5, predict_confidence=True, **conv_params) with tf.variable_scope('upsample_flow5to4'): predict_flowconf5to4 = _upsample_prediction(predict_flowconf5, 2, **conv_params) with tf.variable_scope('refine4'): concat4 = _refine( inp=conv5_1_dense5, num_outputs=256, upsampled_prediction=predict_flowconf5to4, features_direct=conv4_1, **conv_params, ) with tf.variable_scope('refine3'): concat3 = _refine( inp=concat4, num_outputs=128, features_direct=conv3_1, **conv_params, ) with tf.variable_scope('refine2'): concat2 = _refine( inp=concat3, num_outputs=64, features_direct=conv2_1, **conv_params, ) with tf.variable_scope('predict_flow2'): predict_flowconf2 = _predict_flow(concat2, predict_confidence=True, **conv_params) return { 'predict_flowconf5': predict_flowconf5, 'predict_flowconf2': predict_flowconf2 }
def create_depthsweep_images_tensor(image, rotation, translation, intrinsics, depth_values, border_radius=1, name=None): """Create warped images tensor (N*D*C*H*W) with the depth values. image: Tensor rotation: Tensor translation: Tensor intrinsics: Tensor depth_values: list of float or Tensor with shape NCHW with inverse depth values border_radius: int Returns the tensor of warped images in NDCHW format with D = number of depth labels C = image channels Returns A tensor with a mask which is 1 if there is a valid pixel for all depth labels in NCHW format with C=1. A mask which indicates pixels where all warped images have a valid value. The tensor with the generated depth values per pixel """ with tf.name_scope(name, "createDepthsweepImagesTensor", [image, rotation, translation, intrinsics]): image = tf.convert_to_tensor(image, name='image', dtype=tf.float32) rotation = tf.convert_to_tensor(rotation, name='rotation', dtype=tf.float32) translation = tf.convert_to_tensor(translation, name='translation', dtype=tf.float32) intrinsics = tf.convert_to_tensor(intrinsics, name='intrinsics', dtype=tf.float32) image.get_shape().with_rank(4) rotation.get_shape().with_rank(2) translation.get_shape().with_rank(2) intrinsics.get_shape().with_rank(2) if isinstance(depth_values, (list, tuple, np.ndarray)): shape = image.get_shape().as_list() shape[1] = 1 depths = [] for d in depth_values: depths.append( tf.constant(value=d, shape=shape, dtype=tf.float32)) depths = tf.concat(depths, axis=1) else: # Tensor depths = depth_values depths_shape = depths.get_shape() depths_shape.with_rank(4) num_labels = depths_shape[1].value mask_orig = create_border_mask_for_image(border_radius, image) mask = tf.tile(tf.expand_dims(mask_orig, axis=1), [1, num_labels, 1, 1, 1]) image = tf.tile(tf.expand_dims(image, axis=1), [1, num_labels, 1, 1, 1]) rotation = tf.tile(tf.expand_dims(rotation, axis=1), [1, num_labels, 1]) translation = tf.tile(tf.expand_dims(translation, axis=1), [1, num_labels, 1]) intrinsics = tf.tile(tf.expand_dims(intrinsics, axis=1), [1, num_labels, 1]) image_shape_NDCHW = image.get_shape().as_list() image_shape_NCHW = list(image_shape_NDCHW[1:]) image_shape_NCHW[0] *= image.get_shape()[0].value image = tf.reshape(image, image_shape_NCHW) mask_shape_NDCHW = mask.get_shape().as_list() mask_shape_NCHW = list(mask_shape_NDCHW[1:]) mask_shape_NCHW[0] *= mask.get_shape()[0].value mask = tf.reshape(mask, mask_shape_NCHW) flows = sops.depth_to_flow( depth=depths, intrinsics=intrinsics, rotation=rotation, translation=translation, inverse_depth=True, normalize_flow=False, ) images_warped = sops.warp2d_tf(image, flows, normalized=False, border_mode='value') images_warped = tf.reshape(images_warped, image_shape_NDCHW) masks_warped = sops.warp2d_tf(mask, flows, normalized=False, border_mode='value') masks_warped = tf.reshape(masks_warped, mask_shape_NDCHW) masks_warped_all = mask_orig * tf.cast(tf.reduce_all( tf.not_equal(masks_warped, 0.0), axis=1), dtype=tf.float32) return images_warped, masks_warped_all, depths