def prepare_ground_truth_tensors(depth, rotation, translation, intrinsics): """Computes ground truth tensors at lower resolution and scale invariant gradient (sig) images of some ground truth tensors. depth: Tensor depth map with inverse depth values rotation: Tensor rotation in angle axis format with 3 elements translation: Tensor the camera translation intrinsics: Tensor Tensor with the intrinsic camera parameters Returns a dictionary with ground truth data for depth, normal and flow for different resolutions. """ depth1, depth2, depth3, depth4, depth5 = recursive_median_downsample(depth,5) flow0 = sops.depth_to_flow(depth, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow0') flow2 = sops.depth_to_flow(depth2, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow2') flow5 = sops.depth_to_flow(depth5, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow5') normal0 = sops.depth_to_normals(depth, intrinsics, inverse_depth=True) normal2 = sops.depth_to_normals(depth2, intrinsics, inverse_depth=True) sig_params = {'deltas':[1,2,4,8,16], 'weights':[1,1,1,1,1], 'epsilon': 0.001} depth0_sig = scale_invariant_gradient(depth, **sig_params) depth2_sig = scale_invariant_gradient(depth2, **sig_params) flow2_sig = scale_invariant_gradient(flow2, **sig_params) return { 'depth0': depth, 'depth0_sig': depth0_sig, 'depth2': depth2, 'depth2_sig': depth2_sig, 'flow0': flow0, 'flow2': flow2, 'flow2_sig': flow2_sig, 'flow5': flow5, 'normal0': normal0, 'normal2': normal2, }
def normals_loss_from_depth_gt(depth_true, normals_pred): intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]]) #intrinsics = np.broadcast_to(np.array([[0.89115971, 1.18821287, 0.5, 0.5]]),(batch_size,4)) depth_true_nchw = tf.transpose(depth_true, [0, 3, 1, 2]) normals_nchw = lmbspecialops.depth_to_normals( depth_true_nchw, [0.89115971, 1.18821287, 0.5, 0.5], inverse_depth=True) # convert to channels last normals = tf.transpose(normals_nchw, [0, 2, 3, 1]) return euclidean_distance_loss(normals, normals_pred)
def svkitti_batch_demon(dataset, hyp, shuffle=True, center_crop=False, include_poses=False): with tf.device('/cpu:0'): with open(dataset) as f: content = f.readlines() records = [hyp.dataset_location + line.strip() for line in content] nRecords = len(records) print('found %d records' % nRecords) # for record in records: # assert os.path.isfile(record), 'Record at %s was not found' % record queue = tf.train.string_input_producer(records, shuffle=shuffle) (h, w, i1, i2, d1, d2, f12, f23, v1, v2, p1, p2, m1, m2) = read_and_decode_svkitti(queue) i1 = tf.cast(i1, tf.float32) * 1. / 255 - 0.5 i2 = tf.cast(i2, tf.float32) * 1. / 255 - 0.5 d1 = tf.cast(d1, tf.float32) d2 = tf.cast(d2, tf.float32) v1 = tf.cast(v1, tf.float32) # 1 at non-sky pixels v2 = tf.cast(v2, tf.float32) # these are stored in [0,255], and 255 means moving. m1 = tf.cast(m1, tf.float32) * 1. / 255 m2 = tf.cast(m2, tf.float32) * 1. / 255 # d1 = d1*v1 # put 0 depth at invalid spots # d2 = d2*v2 demon_height = 192 demon_width = 256 svkitti_height = 375 svkitti_width = 1242 demon_fx = 0.89115971 demon_fy = 1.18821287 svkitti_fx = 725. / svkitti_width svkitti_fy = 725. / svkitti_height # Calculate crop width/height given (d_fx)(d_w) = (s_x)(s_w) relationship (same for height) crop_width = int(round((demon_fx * demon_width) / svkitti_fx)) # 390.8 crop_height = int(round( (demon_fy * demon_height) / svkitti_fy)) # 118.0 # f12 /= [svkitti_width, svkitti_height] # f23 /= [svkitti_width, svkitti_height] # image tensors need to be cropped. we'll do them all at once. allCat = tf.concat(axis=2, values=[i1, i2, d1, d2, f12, f23, v1, v2, m1, m2], name="allCat") # image tensors need to be cropped. we'll do them all at once. if center_crop: off_h = ((h - crop_height - 1) / tf.constant(2)) off_w = ((w - crop_width - 1) / tf.constant(2)) allCat_crop = tf.slice(allCat, [off_h, off_w, 0], [crop_height, crop_width, -1], name="cropped_tensor") else: print_shape(allCat) allCat_crop, off_h, off_w = random_crop(allCat, crop_height, crop_width, h, w) print_shape(allCat_crop) # We need to reshape the crop to match the demon dimensions of 256 x 192 allCat_crop = tf.image.resize_images(allCat_crop, [demon_height, demon_width]) # Split out each channel properly i1 = tf.slice(allCat_crop, [0, 0, 0], [-1, -1, 3], name="i1") i2 = tf.slice(allCat_crop, [0, 0, 3], [-1, -1, 3], name="i2") d1 = tf.slice(allCat_crop, [0, 0, 6], [-1, -1, 1], name="d1") d2 = tf.slice(allCat_crop, [0, 0, 7], [-1, -1, 1], name="d2") f12 = tf.slice(allCat_crop, [0, 0, 8], [-1, -1, 2], name="f12") f23 = tf.slice(allCat_crop, [0, 0, 10], [-1, -1, 2], name="f23") v1 = tf.slice(allCat_crop, [0, 0, 12], [-1, -1, 1], name="v1") v2 = tf.slice(allCat_crop, [0, 0, 13], [-1, -1, 1], name="v2") m1 = tf.slice(allCat_crop, [0, 0, 14], [-1, -1, 1], name="m1") m2 = tf.slice(allCat_crop, [0, 0, 15], [-1, -1, 1], name="m2") # Normalize flow so displacement by the image size corresponds to 1 f12 = f12 / [demon_width, demon_height] f23 = f23 / [demon_width, demon_height] """ Calculate relative camera motion from pose 1 to pose 2 """ # Note: We use negative poses as vkitti coordinate sysem is not what demon is trained on: # vkitti: +x points right, +y points down, +z points forwards # demon: +x points left, +y points down, +z points forwards # transformation = tf.constant([[-1., 0., 0., 0.,], # [0., 1., 0., 0.,], # [0., 0., 1., 0.,], # [0., 0., 0., 1.,]], dtype=tf.float32) # p1 = tf.matmul(tf.matmul(transformation, p1), transformation) # p2 = tf.matmul(tf.matmul(transformation, p2), transformation) rel_rt = ominus(tf.expand_dims(p2, 0), tf.expand_dims(p1, 0))[0, ...] rel_r = rel_rt[0:3, 0:3] rel_t = rel_rt[0:3, 3] # Important!! Convert from pose to extrinsic matrix rel_r = tf.matrix_transpose(rel_r) rel_t = tf.matmul(-rel_r, tf.expand_dims(rel_t, 1))[:, 0] # Convert rotation matrix to rotation about axis (norm encodes angle of rotation) rel_r = rotation_from_matrix(rel_r) rel_r.set_shape([ 3, ]) # Normalize translation so ||t||2 == 1 t_norm = tf.norm(rel_t) rel_t = rel_t / t_norm # Which means we need to scale the depth by the same factor d1 = d1 / t_norm d2 = d2 / t_norm """ Encode depth (we want inverse depth) """ d1 = encode_depth(d1, hyp.depth_encoding) d2 = encode_depth(d2, hyp.depth_encoding) """ Compute normals from gt depth """ depth_resize = tf.image.resize_images( tf.expand_dims(d1, 0), [48, 64], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) depth_nchw = tf.expand_dims(tf.transpose(d1, perm=[2, 0, 1]), 0) depth_resize_nchw = tf.transpose(depth_resize, perm=[0, 3, 1, 2]) # Use intrinsics of demon intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]], dtype=tf.float32) normals = sops.depth_to_normals(depth_nchw, intrinsics, inverse_depth=True) normals_from_downsampled = sops.depth_to_normals(depth_resize_nchw, intrinsics, inverse_depth=True) # unsure why, but ~2% of values are NaN normals = tf.where(tf.is_nan(normals), tf.zeros_like(normals), normals) normals_from_downsampled = tf.where( tf.is_nan(normals_from_downsampled), tf.zeros_like(normals_from_downsampled), normals_from_downsampled) # Remove BS dimension and transpose back to NHWC normals = tf.transpose(tf.squeeze(normals), perm=[1, 2, 0]) normals_from_downsampled = tf.transpose( tf.squeeze(normals_from_downsampled), perm=[1, 2, 0]) """ i1: image_1 i2: image_2 d1: depth_1 f12: flow 1 -> 2 v1: valid flow map 1 rel_r: relative camera rotation from p1 to p2 rel_t: relative camera translation from p1 to p2 """ batch = tf.train.batch([ i1, i2, d1, d2, f12, f23, v1, normals, normals_from_downsampled, rel_r, rel_t, p1, p2, m1, off_h, off_w ], num_threads=16, batch_size=hyp.bs, dynamic_pad=True) return batch
def blendswap_batch(dataset, hyp, shuffle=True): with tf.device('/cpu:0'): with open(dataset) as f: content = f.readlines() records = [hyp.dataset_location + line.strip() for line in content] nRecords = len(records) print('found %d records' % nRecords) for record in records: assert os.path.isfile( record), 'Record at %s was not found' % record queue = tf.train.string_input_producer(records, shuffle=shuffle) (h, w, i1, i2, d1, f12, relativeTranslation1to2, relativeRotation1to2) = read_and_decode_blendswap(queue) i1 = tf.cast(i1, tf.float32) * 1. / 255 - 0.5 i2 = tf.cast(i2, tf.float32) * 1. / 255 - 0.5 d1 = tf.cast(d1, tf.float32) demon_height = 192 demon_width = 256 demon_fx = 0.89115971 demon_fy = 1.18821287 fx = 0.46875 fy = 0.8333333333 # Calculate crop width/height given (d_fx)(d_w) = (s_x)(s_w) relationship (same for height) crop_width = int(round((demon_fx * demon_width) / fx)) # 487 crop_height = int(round((demon_fy * demon_height) / fy)) # 274 # image tensors need to be cropped. we'll do them all at once. allCat = tf.concat(axis=2, values=[i1, i2, d1, f12], name="allCat") # image tensors need to be cropped. we'll do them all at once. print_shape(allCat) allCat_crop, off_h, off_w = random_crop(allCat, crop_height, crop_width, h, w) print_shape(allCat_crop) # We need to reshape the crop to match the demon dimensions of 256 x 192 allCat_crop = tf.image.resize_images(allCat_crop, [demon_height, demon_width]) # Split out each channel properly i1 = tf.slice(allCat_crop, [0, 0, 0], [-1, -1, 3], name="i1") i2 = tf.slice(allCat_crop, [0, 0, 3], [-1, -1, 3], name="i2") d1 = tf.slice(allCat_crop, [0, 0, 6], [-1, -1, 1], name="d1") f12 = tf.slice(allCat_crop, [0, 0, 7], [-1, -1, 2], name="f1") # Normalize flow so displacement by the image size corresponds to 1 # f12 = f12 / [demon_width, demon_height] # Normalize translation so ||t||2 == 1 t_norm = tf.norm(relativeTranslation1to2) relativeTranslation1to2 = relativeTranslation1to2 / t_norm # Which means we need to scale the depth by the same factor d1 = d1 / t_norm # Encode depth (we want inverse depth) d1 = encode_depth(d1, hyp.depth_encoding) # encode """ Compute normals from gt depth """ depth_resize = tf.image.resize_images( tf.expand_dims(d1, 0), [48, 64], method=tf.image.ResizeMethod.NEAREST_NEIGHBOR) depth_resize_nchw = tf.transpose(depth_resize, perm=[0, 3, 1, 2]) # Use intrinsics of demon intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]], dtype=tf.float32) normals_from_downsampled = sops.depth_to_normals(depth_resize_nchw, intrinsics, inverse_depth=True) # unsure why, but ~2% of values are NaN normals_from_downsampled = tf.where( tf.is_nan(normals_from_downsampled), tf.zeros_like(normals_from_downsampled), normals_from_downsampled) # Remove BS dimension and transpose back to NHWC normals_from_downsampled = tf.transpose( tf.squeeze(normals_from_downsampled), perm=[1, 2, 0]) """ i1: image_1 i2: image_2 d1: depth_1 f12: flow 1 -> 2 v1: valid flow map 1 r_rel: relative camera rotation from p1 to p2 t_rel: relative camera translation from p1 to p2 """ batch = tf.train.batch([ i1, i2, d1, f12, normals_from_downsampled, relativeRotation1to2, relativeTranslation1to2, off_h, off_w ], num_threads=1, batch_size=hyp.bs, dynamic_pad=True) return batch
def prepare_gt(data, from_inverse_depth = False, to_inverse_depth = False, disparity_map = False, data_format='channels_last', # TODO: Support channels first is not implemented yet focal_norm = False, computate_sig=True, img_keys = ['image'], depth_keys = ['depth'], normal_keys = [], compute_normals = True, # TODO: Support online normal estimation focal_factor = 100., downsampling_depth=5, sig_params = None,**kargs): gt={} if sig_params is None: sig_params = {'deltas':[1,2,4,8,16], 'weights':[1,1,1,1,1], 'epsilon': 0.001} K = data['intrinsics'] w = data['depth'].shape[-2].value h = data['depth'].shape[-3].value focal_w = K[0,0] focal_h = K[0,1] ppw = K[0,2] pph = K[0,3] print([h,w]) print([focal_w,focal_h, ppw,pph]) print([focal_w/w,focal_h/h, ppw/w,pph/h]) w,h=float(w),float(h) intrinsics = tf.math.divide(K,tf.convert_to_tensor([[w,h,w,h]])) # Prepare depth images at different scales: norm_mul = 1. if focal_norm: focal_orig = ((K[0,0]+K[0,1])/2.) norm_mul = focal_factor/focal_orig if to_inverse_depth: norm_mul = tf.reciprocal(norm_mul) for dk in depth_keys: depth = data[dk]*norm_mul if (from_inverse_depth ^ to_inverse_depth): # XOR (true if different) depth=tf.reciprocal(depth) for i in range(downsampling_depth): gt[dk+str(i)]=depth if computate_sig: gt['sig_'+dk+str(i)]=scale_invariant_gradient(depth,**sig_params) if compute_normals: infn = lambda x: convert_NHWC_to_NCHW(tf.expand_dims(x,0)) outfn = lambda x: tf.squeeze(convert_NCHW_to_NHWC(x),0) gt['norm_'+dk+str(i)]=outfn( sops.depth_to_normals(infn(depth), intrinsics,inverse_depth=to_inverse_depth)) depth = nn_downsampling(depth,2) # Prepare normal images at different scales: for nk in normal_keys: normals = data[nk] for i in range(downsampling_depth): gt[nk+str(i)]=normals normals = nn_downsampling(normals,2) data['gt']=gt return data