Example #1
0
def prepare_ground_truth_tensors(depth, rotation, translation, intrinsics):
    """Computes ground truth tensors at lower resolution and scale invariant gradient (sig)
    images of some ground truth tensors.
    
    depth: Tensor
        depth map with inverse depth values
        
    rotation: Tensor
        rotation in angle axis format with 3 elements

    translation: Tensor
        the camera translation

    intrinsics: Tensor
        Tensor with the intrinsic camera parameters

    Returns a dictionary with ground truth data for depth, normal and flow for
    different resolutions.
    """
    depth1, depth2, depth3, depth4, depth5 = recursive_median_downsample(depth,5)
    flow0 = sops.depth_to_flow(depth, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow0')
    flow2 = sops.depth_to_flow(depth2, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow2')
    flow5 = sops.depth_to_flow(depth5, intrinsics, rotation, translation, inverse_depth=True, normalize_flow=True, name='DepthToFlow5')
    
    normal0 = sops.depth_to_normals(depth, intrinsics, inverse_depth=True)
    normal2 = sops.depth_to_normals(depth2, intrinsics, inverse_depth=True)
    
    sig_params = {'deltas':[1,2,4,8,16], 'weights':[1,1,1,1,1], 'epsilon': 0.001}

    depth0_sig = scale_invariant_gradient(depth, **sig_params)
    depth2_sig = scale_invariant_gradient(depth2, **sig_params)
    flow2_sig = scale_invariant_gradient(flow2, **sig_params)
    
    return {
            'depth0': depth, 
            'depth0_sig': depth0_sig, 
            'depth2': depth2, 
            'depth2_sig': depth2_sig, 
            'flow0': flow0, 
            'flow2': flow2, 
            'flow2_sig': flow2_sig, 
            'flow5': flow5, 
            'normal0': normal0,
            'normal2': normal2,
            }
def normals_loss_from_depth_gt(depth_true, normals_pred):

    intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]])
    #intrinsics = np.broadcast_to(np.array([[0.89115971, 1.18821287, 0.5, 0.5]]),(batch_size,4))
    depth_true_nchw = tf.transpose(depth_true, [0, 3, 1, 2])

    normals_nchw = lmbspecialops.depth_to_normals(
        depth_true_nchw, [0.89115971, 1.18821287, 0.5, 0.5],
        inverse_depth=True)

    # convert to channels last
    normals = tf.transpose(normals_nchw, [0, 2, 3, 1])

    return euclidean_distance_loss(normals, normals_pred)
Example #3
0
def svkitti_batch_demon(dataset,
                        hyp,
                        shuffle=True,
                        center_crop=False,
                        include_poses=False):
    with tf.device('/cpu:0'):
        with open(dataset) as f:
            content = f.readlines()
        records = [hyp.dataset_location + line.strip() for line in content]
        nRecords = len(records)
        print('found %d records' % nRecords)
        # for record in records:
        #     assert os.path.isfile(record), 'Record at %s was not found' % record

        queue = tf.train.string_input_producer(records, shuffle=shuffle)

        (h, w, i1, i2, d1, d2, f12, f23, v1, v2, p1, p2, m1,
         m2) = read_and_decode_svkitti(queue)

        i1 = tf.cast(i1, tf.float32) * 1. / 255 - 0.5
        i2 = tf.cast(i2, tf.float32) * 1. / 255 - 0.5
        d1 = tf.cast(d1, tf.float32)
        d2 = tf.cast(d2, tf.float32)
        v1 = tf.cast(v1, tf.float32)  # 1 at non-sky pixels
        v2 = tf.cast(v2, tf.float32)
        # these are stored in [0,255], and 255 means moving.
        m1 = tf.cast(m1, tf.float32) * 1. / 255
        m2 = tf.cast(m2, tf.float32) * 1. / 255
        # d1 = d1*v1 # put 0 depth at invalid spots
        # d2 = d2*v2

        demon_height = 192
        demon_width = 256
        svkitti_height = 375
        svkitti_width = 1242
        demon_fx = 0.89115971
        demon_fy = 1.18821287
        svkitti_fx = 725. / svkitti_width
        svkitti_fy = 725. / svkitti_height

        # Calculate crop width/height given (d_fx)(d_w) = (s_x)(s_w) relationship (same for height)
        crop_width = int(round((demon_fx * demon_width) / svkitti_fx))  # 390.8
        crop_height = int(round(
            (demon_fy * demon_height) / svkitti_fy))  # 118.0

        # f12 /= [svkitti_width, svkitti_height]
        # f23 /= [svkitti_width, svkitti_height]

        # image tensors need to be cropped. we'll do them all at once.
        allCat = tf.concat(axis=2,
                           values=[i1, i2, d1, d2, f12, f23, v1, v2, m1, m2],
                           name="allCat")

        # image tensors need to be cropped. we'll do them all at once.
        if center_crop:
            off_h = ((h - crop_height - 1) / tf.constant(2))
            off_w = ((w - crop_width - 1) / tf.constant(2))
            allCat_crop = tf.slice(allCat, [off_h, off_w, 0],
                                   [crop_height, crop_width, -1],
                                   name="cropped_tensor")
        else:
            print_shape(allCat)
            allCat_crop, off_h, off_w = random_crop(allCat, crop_height,
                                                    crop_width, h, w)
            print_shape(allCat_crop)

        # We need to reshape the crop to match the demon dimensions of 256 x 192
        allCat_crop = tf.image.resize_images(allCat_crop,
                                             [demon_height, demon_width])

        # Split out each channel properly
        i1 = tf.slice(allCat_crop, [0, 0, 0], [-1, -1, 3], name="i1")
        i2 = tf.slice(allCat_crop, [0, 0, 3], [-1, -1, 3], name="i2")
        d1 = tf.slice(allCat_crop, [0, 0, 6], [-1, -1, 1], name="d1")
        d2 = tf.slice(allCat_crop, [0, 0, 7], [-1, -1, 1], name="d2")
        f12 = tf.slice(allCat_crop, [0, 0, 8], [-1, -1, 2], name="f12")
        f23 = tf.slice(allCat_crop, [0, 0, 10], [-1, -1, 2], name="f23")
        v1 = tf.slice(allCat_crop, [0, 0, 12], [-1, -1, 1], name="v1")
        v2 = tf.slice(allCat_crop, [0, 0, 13], [-1, -1, 1], name="v2")
        m1 = tf.slice(allCat_crop, [0, 0, 14], [-1, -1, 1], name="m1")
        m2 = tf.slice(allCat_crop, [0, 0, 15], [-1, -1, 1], name="m2")

        # Normalize flow so displacement by the image size corresponds to 1
        f12 = f12 / [demon_width, demon_height]
        f23 = f23 / [demon_width, demon_height]
        """ Calculate relative camera motion from pose 1 to pose 2 """
        # Note: We use negative poses as vkitti coordinate sysem is not what demon is trained on:
        # vkitti: +x points right, +y points down, +z points forwards
        # demon: +x points left, +y points down, +z points forwards
        # transformation = tf.constant([[-1., 0., 0., 0.,],
        #                               [0., 1., 0., 0.,],
        #                               [0., 0., 1., 0.,],
        #                               [0., 0., 0., 1.,]], dtype=tf.float32)
        # p1 = tf.matmul(tf.matmul(transformation, p1), transformation)
        # p2 = tf.matmul(tf.matmul(transformation, p2), transformation)
        rel_rt = ominus(tf.expand_dims(p2, 0), tf.expand_dims(p1, 0))[0, ...]
        rel_r = rel_rt[0:3, 0:3]
        rel_t = rel_rt[0:3, 3]

        # Important!! Convert from pose to extrinsic matrix
        rel_r = tf.matrix_transpose(rel_r)
        rel_t = tf.matmul(-rel_r, tf.expand_dims(rel_t, 1))[:, 0]

        # Convert rotation matrix to rotation about axis (norm encodes angle of rotation)
        rel_r = rotation_from_matrix(rel_r)
        rel_r.set_shape([
            3,
        ])

        # Normalize translation so ||t||2 == 1
        t_norm = tf.norm(rel_t)
        rel_t = rel_t / t_norm

        # Which means we need to scale the depth by the same factor
        d1 = d1 / t_norm
        d2 = d2 / t_norm
        """ Encode depth (we want inverse depth) """
        d1 = encode_depth(d1, hyp.depth_encoding)
        d2 = encode_depth(d2, hyp.depth_encoding)
        """ Compute normals from gt depth """
        depth_resize = tf.image.resize_images(
            tf.expand_dims(d1, 0), [48, 64],
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        depth_nchw = tf.expand_dims(tf.transpose(d1, perm=[2, 0, 1]), 0)
        depth_resize_nchw = tf.transpose(depth_resize, perm=[0, 3, 1, 2])
        # Use intrinsics of demon
        intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]],
                                 dtype=tf.float32)
        normals = sops.depth_to_normals(depth_nchw,
                                        intrinsics,
                                        inverse_depth=True)
        normals_from_downsampled = sops.depth_to_normals(depth_resize_nchw,
                                                         intrinsics,
                                                         inverse_depth=True)

        # unsure why, but ~2% of values are NaN
        normals = tf.where(tf.is_nan(normals), tf.zeros_like(normals), normals)
        normals_from_downsampled = tf.where(
            tf.is_nan(normals_from_downsampled),
            tf.zeros_like(normals_from_downsampled), normals_from_downsampled)

        # Remove BS dimension and transpose back to NHWC
        normals = tf.transpose(tf.squeeze(normals), perm=[1, 2, 0])
        normals_from_downsampled = tf.transpose(
            tf.squeeze(normals_from_downsampled), perm=[1, 2, 0])
        """
        i1: image_1
        i2: image_2
        d1: depth_1
        f12: flow 1 -> 2
        v1: valid flow map 1
        rel_r: relative camera rotation from p1 to p2
        rel_t: relative camera translation from p1 to p2
        """
        batch = tf.train.batch([
            i1, i2, d1, d2, f12, f23, v1, normals, normals_from_downsampled,
            rel_r, rel_t, p1, p2, m1, off_h, off_w
        ],
                               num_threads=16,
                               batch_size=hyp.bs,
                               dynamic_pad=True)
        return batch
Example #4
0
def blendswap_batch(dataset, hyp, shuffle=True):
    with tf.device('/cpu:0'):
        with open(dataset) as f:
            content = f.readlines()
        records = [hyp.dataset_location + line.strip() for line in content]
        nRecords = len(records)
        print('found %d records' % nRecords)
        for record in records:
            assert os.path.isfile(
                record), 'Record at %s was not found' % record

        queue = tf.train.string_input_producer(records, shuffle=shuffle)

        (h, w, i1, i2, d1, f12, relativeTranslation1to2,
         relativeRotation1to2) = read_and_decode_blendswap(queue)

        i1 = tf.cast(i1, tf.float32) * 1. / 255 - 0.5
        i2 = tf.cast(i2, tf.float32) * 1. / 255 - 0.5
        d1 = tf.cast(d1, tf.float32)

        demon_height = 192
        demon_width = 256
        demon_fx = 0.89115971
        demon_fy = 1.18821287
        fx = 0.46875
        fy = 0.8333333333

        # Calculate crop width/height given (d_fx)(d_w) = (s_x)(s_w) relationship (same for height)
        crop_width = int(round((demon_fx * demon_width) / fx))  # 487
        crop_height = int(round((demon_fy * demon_height) / fy))  # 274

        # image tensors need to be cropped. we'll do them all at once.
        allCat = tf.concat(axis=2, values=[i1, i2, d1, f12], name="allCat")

        # image tensors need to be cropped. we'll do them all at once.
        print_shape(allCat)
        allCat_crop, off_h, off_w = random_crop(allCat, crop_height,
                                                crop_width, h, w)
        print_shape(allCat_crop)

        # We need to reshape the crop to match the demon dimensions of 256 x 192
        allCat_crop = tf.image.resize_images(allCat_crop,
                                             [demon_height, demon_width])

        # Split out each channel properly
        i1 = tf.slice(allCat_crop, [0, 0, 0], [-1, -1, 3], name="i1")
        i2 = tf.slice(allCat_crop, [0, 0, 3], [-1, -1, 3], name="i2")
        d1 = tf.slice(allCat_crop, [0, 0, 6], [-1, -1, 1], name="d1")
        f12 = tf.slice(allCat_crop, [0, 0, 7], [-1, -1, 2], name="f1")

        # Normalize flow so displacement by the image size corresponds to 1
        # f12 = f12 / [demon_width, demon_height]

        # Normalize translation so ||t||2 == 1
        t_norm = tf.norm(relativeTranslation1to2)
        relativeTranslation1to2 = relativeTranslation1to2 / t_norm

        # Which means we need to scale the depth by the same factor
        d1 = d1 / t_norm

        # Encode depth (we want inverse depth)
        d1 = encode_depth(d1, hyp.depth_encoding)  # encode
        """ Compute normals from gt depth """
        depth_resize = tf.image.resize_images(
            tf.expand_dims(d1, 0), [48, 64],
            method=tf.image.ResizeMethod.NEAREST_NEIGHBOR)
        depth_resize_nchw = tf.transpose(depth_resize, perm=[0, 3, 1, 2])
        # Use intrinsics of demon
        intrinsics = tf.constant([[0.89115971, 1.18821287, 0.5, 0.5]],
                                 dtype=tf.float32)
        normals_from_downsampled = sops.depth_to_normals(depth_resize_nchw,
                                                         intrinsics,
                                                         inverse_depth=True)

        # unsure why, but ~2% of values are NaN
        normals_from_downsampled = tf.where(
            tf.is_nan(normals_from_downsampled),
            tf.zeros_like(normals_from_downsampled), normals_from_downsampled)

        # Remove BS dimension and transpose back to NHWC
        normals_from_downsampled = tf.transpose(
            tf.squeeze(normals_from_downsampled), perm=[1, 2, 0])
        """
        i1: image_1
        i2: image_2
        d1: depth_1
        f12: flow 1 -> 2
        v1: valid flow map 1
        r_rel: relative camera rotation from p1 to p2
        t_rel: relative camera translation from p1 to p2
        """
        batch = tf.train.batch([
            i1, i2, d1, f12, normals_from_downsampled, relativeRotation1to2,
            relativeTranslation1to2, off_h, off_w
        ],
                               num_threads=1,
                               batch_size=hyp.bs,
                               dynamic_pad=True)
        return batch
Example #5
0
def prepare_gt(data,
    from_inverse_depth = False, 
    to_inverse_depth = False,
    disparity_map = False,
    data_format='channels_last', # TODO: Support channels first is not implemented yet
    focal_norm = False,
    computate_sig=True,
    img_keys = ['image'], 
    depth_keys = ['depth'], normal_keys = [],
    compute_normals = True, # TODO: Support online normal estimation
    focal_factor = 100.,
    downsampling_depth=5,
    sig_params = None,**kargs):
    
    gt={}
    if sig_params is None:
        sig_params = {'deltas':[1,2,4,8,16], 'weights':[1,1,1,1,1], 'epsilon': 0.001}
    
    K = data['intrinsics']
    w = data['depth'].shape[-2].value
    h = data['depth'].shape[-3].value

    focal_w = K[0,0]
    focal_h = K[0,1]
    ppw = K[0,2]
    pph = K[0,3]
    print([h,w])
    print([focal_w,focal_h, ppw,pph])
    print([focal_w/w,focal_h/h, ppw/w,pph/h])
    w,h=float(w),float(h)
    intrinsics = tf.math.divide(K,tf.convert_to_tensor([[w,h,w,h]]))
    # Prepare depth images at different scales:
    norm_mul = 1.


    if focal_norm:
        focal_orig = ((K[0,0]+K[0,1])/2.)
        norm_mul = focal_factor/focal_orig
        if to_inverse_depth:
            norm_mul = tf.reciprocal(norm_mul)

    for dk in depth_keys:
        depth = data[dk]*norm_mul
        if (from_inverse_depth ^ to_inverse_depth): # XOR (true if different)
            depth=tf.reciprocal(depth)
        for i in range(downsampling_depth):
            gt[dk+str(i)]=depth
            if computate_sig:
                gt['sig_'+dk+str(i)]=scale_invariant_gradient(depth,**sig_params)
            if compute_normals:
                infn = lambda x: convert_NHWC_to_NCHW(tf.expand_dims(x,0))
                outfn = lambda x: tf.squeeze(convert_NCHW_to_NHWC(x),0)
                gt['norm_'+dk+str(i)]=outfn(
                    sops.depth_to_normals(infn(depth),
                    intrinsics,inverse_depth=to_inverse_depth))
            depth = nn_downsampling(depth,2)
    # Prepare normal images at different scales:
    for nk in normal_keys:
        normals = data[nk]
        for i in range(downsampling_depth):
            gt[nk+str(i)]=normals
            normals = nn_downsampling(normals,2)

    data['gt']=gt
    return data