예제 #1
0
def approximate_fill_rate_loss(gt, pred, fov="kinect"):
    """
    Calculates the approximate fill rate loss between two depth
    maps through squared error. Used during early development
    testing.

    :param gt: ground truth depth image (tf.float32 [b, h, w])
    :param pred: Predicted depth bins (tf.float32 [b, h, w, c])
    :param fov: a string literal that determines the type of camera used
    :return: the loss
    """
    gt_xyz = depth_to_xyz(gt, fov=fov)
    pred_depth = bins_to_depth(pred)
    pred_xyz = depth_to_xyz(pred_depth, fov=fov)

    lim = tf.constant(cfg["fill_rate_loss_lim"], dtype=tf.float32)
    gt_mask = clip_by_border(gt_xyz, lim=lim)

    gt_xyz = tf.multiply(gt_xyz, tf.cast(tf.logical_not(gt_mask), tf.float32))
    pred_xyz = tf.multiply(pred_xyz,
                           tf.cast(tf.logical_not(gt_mask), tf.float32))

    sq_diff = tf.square(tf.subtract(gt_xyz, pred_xyz))
    mse_fr_loss = tf.reduce_mean(sq_diff)

    return mse_fr_loss
예제 #2
0
def virtual_normal_loss(gt, pred, fov="kinect"):
    """
    Calculates the virtual normal loss between gt depth image and predicted depth bins

    :param gt: d ground truth image of shape (b, h, w)
    :param pred: predicted softmax depth bins from the neural net of shape (b, h, w, c)
    :return: virtual normal loss
    """
    if len(gt.shape) == 4:
        gt = gt[:, :, :, 0]

    gt_xyz = depth_to_xyz(gt, input_shape=gt.shape, fov=fov)
    pred_depth = bins_to_depth(pred)
    pred_xyz = depth_to_xyz(pred_depth, input_shape=pred_depth.shape, fov=fov)
    gt_p_groups, pred_p_groups = generate_random_p_groups(
        gt_xyz,
        pred_xyz,
        shape=gt_xyz.shape,
        sample_ratio=cfg["vnl_sample_ratio"])
    valid_mask = tf.logical_not(generate_invalid_mask(gt_p_groups))
    valid_mask = tf.expand_dims(valid_mask, axis=-1)
    gt_normals = generate_unit_normals(gt_p_groups)
    pred_normals = generate_unit_normals(pred_p_groups)
    normals_loss = tf.subtract(gt_normals, pred_normals)  # [b, n, 3xyz]
    normals_loss = tf.multiply(normals_loss, tf.cast(valid_mask, tf.float32))
    loss = tf.math.sqrt(
        tf.reduce_sum(tf.math.square(normals_loss), axis=-1) +
        1.0e-10)  # [b, n]
    loss = tf.reshape(loss, (-1, ))
    loss = tf.reduce_mean(loss)
    return loss
예제 #3
0
def evaluate_model(model_path):
    """
    Evaluates a model using common metrics for comparison
    @param model_path: String path to the model
    @return: dictionary with the criteria as the key, and their values
    """
    model = load_model(model_path)
    ds = load_nyudv2(batch=4, shuffle=False, split='validation')
    criteria = {
        'err_absRel': 0,
        'err_squaRel': 0,
        'err_rms': 0,
        'err_silog': 0,
        'err_logRms': 0,
        'err_silog2': 0,
        'err_delta1': 0,
        'err_delta2': 0,
        'err_delta3': 0,
        'err_log10': 0,
        'err_whdr': 0,
        'n_pixels': 0
    }

    for rgb, d in ds:
        pred_bins = model.predict(rgb)
        pred = bins_to_depth(pred_bins)
        criteria = evaluate_error(d, pred, criteria)
    return criteria
예제 #4
0
def custom_accuracy(gt, pred):
    """
    Custom accuracy for evaluating performance during training and validation
    @param gt: Reshaped ground truth depth map, (224, 224, 1)
    @param pred: Predicted depth bins, (224, 224, 150), (output from model)
    @return: Accuracy as inverse mean square error in range 0-1, where 1 is perfect accuracy
    """
    pred_depth = bins_to_depth(pred)
    return 1. / (1. +
                 tf.keras.metrics.MSE(gt, tf.expand_dims(pred_depth, axis=-1)))
예제 #5
0
def depth_model(shape=(224, 224, 3)):
    """
    Sets up an encoder-decoder model that only returns a depth map
    @param shape: Input shape to the model (h, w, c)
    @return: tf.keras.Model with depth map output
    """
    inputs = tf.keras.Input(shape=shape)
    [x, x_softmax] = full_model(shape)(inputs)
    depth = bins_to_depth(x_softmax)
    return tf.keras.Model(inputs=inputs, outputs=depth)
예제 #6
0
def test_model(rgb, d, model):
    """
    Runs a prediction with the model, and displays the input along with the estimation for visual comparison
    @param rgb: Input RGB image, (224, 224, 3)
    @param d: Ground truth depth map corresponding to rgb, (224, 224, 1)
    @param model: The model object to run the prediction on
    @return: None, displays images
    """
    print("Testing model...")
    rgb = tf.expand_dims(rgb, 0)  # Convert from [h, w, c] to [1, h, w, c]
    d_est = model.predict(rgb)
    d_est = bins_to_depth(d_est)
    display_images([rgb[0], d, d_est[0]])
    return None
 def test_run(self):
     d_file = "C:/Users/Victor/Documents/Github/garbage_view/data/train/data_0/000345.raw"
     with open(d_file, "rb") as file:
         d_img = file.read()
     d_img = np.array(struct.unpack("H" * 480 * 640, d_img),
                      dtype='uint16').reshape((480, 640, 1))
     d_img = tf.expand_dims(d_img, axis=0)
     _, gt_depth = resize_normalize(d_img, d_img)
     gt_depth = gt_depth[:, :, :, 0] / 1000
     #gt_depth = tf.random.uniform(shape=(8, 224, 224), minval=0.25, maxval=3.)
     gt_bins = depth_to_bins(gt_depth)
     one_hot = tf.one_hot(gt_bins, 150)
     gt_depth = bins_to_depth(one_hot)
     no_loss = actual_fill_rate_loss(gt_depth, one_hot)
     print(no_loss)
     self.assertTrue(no_loss == 0.)
     pred_depth = gt_depth + 0.01
     pred_bins = depth_to_bins(pred_depth)
     one_hot = tf.one_hot(pred_bins, 150)
     some_loss = actual_fill_rate_loss(gt_depth, one_hot)
     print(some_loss)
     self.assertTrue(some_loss != 0.0)
예제 #8
0
def actual_fill_rate_loss(gt, pred, fov="kinect", z_zero=1.3):
    """
    Calculates the fill rate loss between two depth maps through fill rate error

    :param gt: ground truth depth image (tf.float32 [b, h, w])
    :param pred: Predicted depth bins (tf.float32 [b, h, w, c])
    :param fov: a string literal that determines the type of camera used (string)
    :param z_zero: distance to top of container (float)
    :return: the loss (tf.float32 [,])
    """
    # Ensure the dimensions are in order and convert to point clouds
    batch_dims = gt.shape[0]
    if not batch_dims:
        batch_dims = 1
    gt = gt[:, :, :, 0]
    gt_xyz = depth_to_xyz(gt, fov=fov)
    pred_depth = bins_to_depth(pred)
    pred_xyz = depth_to_xyz(pred_depth, fov=fov)
    # Extract the region of interest and clip the point clouds accordingly
    lim = tf.constant(cfg["fill_rate_loss_lim"], dtype=tf.float32)
    gt_mask = clip_by_border(gt_xyz, lim=lim)

    x, y, z = tf.split(gt_xyz, num_or_size_splits=3, axis=-1)
    z = z_zero - z
    gt_xyz = tf.concat([x, y, z], axis=-1)
    x, y, z = tf.split(pred_xyz, num_or_size_splits=3, axis=-1)
    z = z_zero - z
    pred_xyz = tf.concat([x, y, z], axis=-1)

    gt_xyz = tf.multiply(
        gt_xyz,
        tf.expand_dims(tf.cast(tf.logical_not(gt_mask), tf.float32), axis=-1))
    pred_xyz = tf.multiply(
        pred_xyz,
        tf.expand_dims(tf.cast(tf.logical_not(gt_mask), tf.float32), axis=-1))
    # Extract indices for triangulation
    indices = tf.constant([[[0, 0], [1, 0], [0, 1]], [[0, 1], [1, 0], [1, 1]]],
                          dtype=tf.int32)  # [2, 3, 3] int32
    # [2, 3, 3] -> [(224-1)*(224-1)*2, 3, 3]
    indices = tf.tile(indices, (223, 1, 1))
    x = tf.constant([i // 2 for i in range(223 * 2)], dtype=tf.int32)  # [446]
    a = indices[:, :, 0] + tf.tile(tf.expand_dims(x, axis=-1),
                                   (1, 3))  # [446, 3] + [446, 3]
    a = tf.tile(a, (223, 1))

    indices = tf.tile(indices, (223, 1, 1))
    x = tf.constant([i // (223 * 2) for i in range(223 * 223 * 2)],
                    dtype=tf.int32)
    b = indices[:, :, 1] + tf.tile(tf.expand_dims(x, axis=-1), (1, 3))

    indices = tf.stack([a, b], axis=-1)
    indices = tf.tile(tf.expand_dims(indices, axis=0), (batch_dims, 1, 1, 1))

    # Construct triangles and get their area and average height to calculate volumes
    gt_triangles = tf.gather_nd(
        gt_xyz, indices, batch_dims=1)  # [b, (223*223*2), 3(points), 3(xyz)]
    pred_triangles = tf.gather_nd(
        pred_xyz, indices, batch_dims=1)  # [b, (223*223*2), 3(points), 3(xyz)]

    gt_heights = tf.reduce_mean(gt_triangles[:, :, :, 2],
                                axis=-1)  # [b, (223*223*2),]
    pred_heights = tf.reduce_mean(pred_triangles[:, :, :, 2],
                                  axis=-1)  # [b, (223*223*2),]

    gt_areas = (((gt_triangles[:, :, 1, 0] - gt_triangles[:, :, 0, 0]) *
                 (gt_triangles[:, :, 2, 1] - gt_triangles[:, :, 0, 1])) -
                ((gt_triangles[:, :, 2, 0] - gt_triangles[:, :, 0, 0]) *
                 (gt_triangles[:, :, 1, 1] - gt_triangles[:, :, 0, 1])))
    gt_areas = tf.abs(0.5 * gt_areas)
    pred_areas = (((pred_triangles[:, :, 1, 0] - pred_triangles[:, :, 0, 0]) *
                   (pred_triangles[:, :, 2, 1] - pred_triangles[:, :, 0, 1])) -
                  ((pred_triangles[:, :, 2, 0] - pred_triangles[:, :, 0, 0]) *
                   (pred_triangles[:, :, 1, 1] - pred_triangles[:, :, 0, 1])))
    pred_areas = tf.abs(0.5 * pred_areas)
    gt_volumes = tf.multiply(gt_heights, gt_areas)
    pred_volumes = tf.multiply(pred_heights, pred_areas)
    # Loss returned is difference in volume
    return tf.abs(tf.reduce_sum(gt_volumes) - tf.reduce_sum(pred_volumes))