def approximate_fill_rate_loss(gt, pred, fov="kinect"): """ Calculates the approximate fill rate loss between two depth maps through squared error. Used during early development testing. :param gt: ground truth depth image (tf.float32 [b, h, w]) :param pred: Predicted depth bins (tf.float32 [b, h, w, c]) :param fov: a string literal that determines the type of camera used :return: the loss """ gt_xyz = depth_to_xyz(gt, fov=fov) pred_depth = bins_to_depth(pred) pred_xyz = depth_to_xyz(pred_depth, fov=fov) lim = tf.constant(cfg["fill_rate_loss_lim"], dtype=tf.float32) gt_mask = clip_by_border(gt_xyz, lim=lim) gt_xyz = tf.multiply(gt_xyz, tf.cast(tf.logical_not(gt_mask), tf.float32)) pred_xyz = tf.multiply(pred_xyz, tf.cast(tf.logical_not(gt_mask), tf.float32)) sq_diff = tf.square(tf.subtract(gt_xyz, pred_xyz)) mse_fr_loss = tf.reduce_mean(sq_diff) return mse_fr_loss
def virtual_normal_loss(gt, pred, fov="kinect"): """ Calculates the virtual normal loss between gt depth image and predicted depth bins :param gt: d ground truth image of shape (b, h, w) :param pred: predicted softmax depth bins from the neural net of shape (b, h, w, c) :return: virtual normal loss """ if len(gt.shape) == 4: gt = gt[:, :, :, 0] gt_xyz = depth_to_xyz(gt, input_shape=gt.shape, fov=fov) pred_depth = bins_to_depth(pred) pred_xyz = depth_to_xyz(pred_depth, input_shape=pred_depth.shape, fov=fov) gt_p_groups, pred_p_groups = generate_random_p_groups( gt_xyz, pred_xyz, shape=gt_xyz.shape, sample_ratio=cfg["vnl_sample_ratio"]) valid_mask = tf.logical_not(generate_invalid_mask(gt_p_groups)) valid_mask = tf.expand_dims(valid_mask, axis=-1) gt_normals = generate_unit_normals(gt_p_groups) pred_normals = generate_unit_normals(pred_p_groups) normals_loss = tf.subtract(gt_normals, pred_normals) # [b, n, 3xyz] normals_loss = tf.multiply(normals_loss, tf.cast(valid_mask, tf.float32)) loss = tf.math.sqrt( tf.reduce_sum(tf.math.square(normals_loss), axis=-1) + 1.0e-10) # [b, n] loss = tf.reshape(loss, (-1, )) loss = tf.reduce_mean(loss) return loss
def evaluate_model(model_path): """ Evaluates a model using common metrics for comparison @param model_path: String path to the model @return: dictionary with the criteria as the key, and their values """ model = load_model(model_path) ds = load_nyudv2(batch=4, shuffle=False, split='validation') criteria = { 'err_absRel': 0, 'err_squaRel': 0, 'err_rms': 0, 'err_silog': 0, 'err_logRms': 0, 'err_silog2': 0, 'err_delta1': 0, 'err_delta2': 0, 'err_delta3': 0, 'err_log10': 0, 'err_whdr': 0, 'n_pixels': 0 } for rgb, d in ds: pred_bins = model.predict(rgb) pred = bins_to_depth(pred_bins) criteria = evaluate_error(d, pred, criteria) return criteria
def custom_accuracy(gt, pred): """ Custom accuracy for evaluating performance during training and validation @param gt: Reshaped ground truth depth map, (224, 224, 1) @param pred: Predicted depth bins, (224, 224, 150), (output from model) @return: Accuracy as inverse mean square error in range 0-1, where 1 is perfect accuracy """ pred_depth = bins_to_depth(pred) return 1. / (1. + tf.keras.metrics.MSE(gt, tf.expand_dims(pred_depth, axis=-1)))
def depth_model(shape=(224, 224, 3)): """ Sets up an encoder-decoder model that only returns a depth map @param shape: Input shape to the model (h, w, c) @return: tf.keras.Model with depth map output """ inputs = tf.keras.Input(shape=shape) [x, x_softmax] = full_model(shape)(inputs) depth = bins_to_depth(x_softmax) return tf.keras.Model(inputs=inputs, outputs=depth)
def test_model(rgb, d, model): """ Runs a prediction with the model, and displays the input along with the estimation for visual comparison @param rgb: Input RGB image, (224, 224, 3) @param d: Ground truth depth map corresponding to rgb, (224, 224, 1) @param model: The model object to run the prediction on @return: None, displays images """ print("Testing model...") rgb = tf.expand_dims(rgb, 0) # Convert from [h, w, c] to [1, h, w, c] d_est = model.predict(rgb) d_est = bins_to_depth(d_est) display_images([rgb[0], d, d_est[0]]) return None
def test_run(self): d_file = "C:/Users/Victor/Documents/Github/garbage_view/data/train/data_0/000345.raw" with open(d_file, "rb") as file: d_img = file.read() d_img = np.array(struct.unpack("H" * 480 * 640, d_img), dtype='uint16').reshape((480, 640, 1)) d_img = tf.expand_dims(d_img, axis=0) _, gt_depth = resize_normalize(d_img, d_img) gt_depth = gt_depth[:, :, :, 0] / 1000 #gt_depth = tf.random.uniform(shape=(8, 224, 224), minval=0.25, maxval=3.) gt_bins = depth_to_bins(gt_depth) one_hot = tf.one_hot(gt_bins, 150) gt_depth = bins_to_depth(one_hot) no_loss = actual_fill_rate_loss(gt_depth, one_hot) print(no_loss) self.assertTrue(no_loss == 0.) pred_depth = gt_depth + 0.01 pred_bins = depth_to_bins(pred_depth) one_hot = tf.one_hot(pred_bins, 150) some_loss = actual_fill_rate_loss(gt_depth, one_hot) print(some_loss) self.assertTrue(some_loss != 0.0)
def actual_fill_rate_loss(gt, pred, fov="kinect", z_zero=1.3): """ Calculates the fill rate loss between two depth maps through fill rate error :param gt: ground truth depth image (tf.float32 [b, h, w]) :param pred: Predicted depth bins (tf.float32 [b, h, w, c]) :param fov: a string literal that determines the type of camera used (string) :param z_zero: distance to top of container (float) :return: the loss (tf.float32 [,]) """ # Ensure the dimensions are in order and convert to point clouds batch_dims = gt.shape[0] if not batch_dims: batch_dims = 1 gt = gt[:, :, :, 0] gt_xyz = depth_to_xyz(gt, fov=fov) pred_depth = bins_to_depth(pred) pred_xyz = depth_to_xyz(pred_depth, fov=fov) # Extract the region of interest and clip the point clouds accordingly lim = tf.constant(cfg["fill_rate_loss_lim"], dtype=tf.float32) gt_mask = clip_by_border(gt_xyz, lim=lim) x, y, z = tf.split(gt_xyz, num_or_size_splits=3, axis=-1) z = z_zero - z gt_xyz = tf.concat([x, y, z], axis=-1) x, y, z = tf.split(pred_xyz, num_or_size_splits=3, axis=-1) z = z_zero - z pred_xyz = tf.concat([x, y, z], axis=-1) gt_xyz = tf.multiply( gt_xyz, tf.expand_dims(tf.cast(tf.logical_not(gt_mask), tf.float32), axis=-1)) pred_xyz = tf.multiply( pred_xyz, tf.expand_dims(tf.cast(tf.logical_not(gt_mask), tf.float32), axis=-1)) # Extract indices for triangulation indices = tf.constant([[[0, 0], [1, 0], [0, 1]], [[0, 1], [1, 0], [1, 1]]], dtype=tf.int32) # [2, 3, 3] int32 # [2, 3, 3] -> [(224-1)*(224-1)*2, 3, 3] indices = tf.tile(indices, (223, 1, 1)) x = tf.constant([i // 2 for i in range(223 * 2)], dtype=tf.int32) # [446] a = indices[:, :, 0] + tf.tile(tf.expand_dims(x, axis=-1), (1, 3)) # [446, 3] + [446, 3] a = tf.tile(a, (223, 1)) indices = tf.tile(indices, (223, 1, 1)) x = tf.constant([i // (223 * 2) for i in range(223 * 223 * 2)], dtype=tf.int32) b = indices[:, :, 1] + tf.tile(tf.expand_dims(x, axis=-1), (1, 3)) indices = tf.stack([a, b], axis=-1) indices = tf.tile(tf.expand_dims(indices, axis=0), (batch_dims, 1, 1, 1)) # Construct triangles and get their area and average height to calculate volumes gt_triangles = tf.gather_nd( gt_xyz, indices, batch_dims=1) # [b, (223*223*2), 3(points), 3(xyz)] pred_triangles = tf.gather_nd( pred_xyz, indices, batch_dims=1) # [b, (223*223*2), 3(points), 3(xyz)] gt_heights = tf.reduce_mean(gt_triangles[:, :, :, 2], axis=-1) # [b, (223*223*2),] pred_heights = tf.reduce_mean(pred_triangles[:, :, :, 2], axis=-1) # [b, (223*223*2),] gt_areas = (((gt_triangles[:, :, 1, 0] - gt_triangles[:, :, 0, 0]) * (gt_triangles[:, :, 2, 1] - gt_triangles[:, :, 0, 1])) - ((gt_triangles[:, :, 2, 0] - gt_triangles[:, :, 0, 0]) * (gt_triangles[:, :, 1, 1] - gt_triangles[:, :, 0, 1]))) gt_areas = tf.abs(0.5 * gt_areas) pred_areas = (((pred_triangles[:, :, 1, 0] - pred_triangles[:, :, 0, 0]) * (pred_triangles[:, :, 2, 1] - pred_triangles[:, :, 0, 1])) - ((pred_triangles[:, :, 2, 0] - pred_triangles[:, :, 0, 0]) * (pred_triangles[:, :, 1, 1] - pred_triangles[:, :, 0, 1]))) pred_areas = tf.abs(0.5 * pred_areas) gt_volumes = tf.multiply(gt_heights, gt_areas) pred_volumes = tf.multiply(pred_heights, pred_areas) # Loss returned is difference in volume return tf.abs(tf.reduce_sum(gt_volumes) - tf.reduce_sum(pred_volumes))