def create_gt_dict(n_max_instances): ''' Returns gt_dict containing: - instance_per_point: BxN - normal_per_point: BxNx3 - type_per_instance: BxK - points_per_instance: BxKxN'x3, sampled points on each instance - parameters: a dict, each entry is a BxKx... tensor ''' # create gt_dict gt_dict = {} gt_dict['instance_per_point'] = tf.placeholder(dtype=tf.int32, shape=[None, None]) gt_dict['normal_per_point'] = tf.placeholder(dtype=tf.float32, shape=[None, None, 3]) gt_dict['type_per_instance'] = tf.placeholder( dtype=tf.int32, shape=[None, n_max_instances]) gt_dict['points_per_instance'] = tf.placeholder( dtype=tf.float32, shape=[None, n_max_instances, None, 3]) gt_dict['parameters'] = {} for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.insert_gt_placeholders(gt_dict['parameters'], n_max_instances=n_max_instances) return gt_dict
def fill_gt_dict_with_batch_data(feed_dict, gt_dict, batch): feed_dict.update({ gt_dict['points_per_instance']: batch['P_gt'], gt_dict['normal_per_point']: batch['normal_gt'], gt_dict['instance_per_point']: batch['I_gt'], gt_dict['type_per_instance']: batch['T_gt'], }) for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.fill_gt_placeholders(feed_dict, gt_dict['parameters'], batch)
def get_param_dims_pair_list(n_instances_per_type): pred_ph = {} for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.insert_prediction_placeholders(pred_ph, n_instances_per_type) param_pair_list = [] for key in pred_ph: ph = pred_ph[key] if len(ph.get_shape()) == 2: param_pair_list.append((key, [n_instances_per_type, 1])) else: param_pair_list.append( (key, [n_instances_per_type, ph.get_shape()[2]])) return param_pair_list
def get_per_point_model(scope, P, n_max_instances, is_training, bn_decay): ''' Inputs: - P: BxNx3 tensor, the input point cloud - K := n_max_instances Outputs: a dict, containing - W: BxNxK, segmentation instances, fractional - normal_per_point: BxNx3, normal per point - type_per_point: BxNxT, type per points. NOTE: this is before taking softmax! - parameters - a dict, each entry is a BxKx... tensor ''' n_registered_primitives = fitter_factory.get_n_registered_primitives() with tf.variable_scope(scope): net_results = build_pointnet2_seg( 'est_net', X=P, out_dims=[n_max_instances, 3, n_registered_primitives], is_training=is_training, bn_decay=bn_decay) W, normal_per_point, type_per_point = net_results W = tf.nn.softmax(W, axis=2) # BxNxK normal_per_point = tf.nn.l2_normalize(normal_per_point, axis=2) # BxNx3 fitter_feed = { 'P': P, 'W': W, 'normal_per_point': normal_per_point, } parameters = {} for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.compute_parameters(fitter_feed, parameters) return { 'W': W, 'normal_per_point': normal_per_point, 'type_per_point': type_per_point, 'parameters': parameters, }
def evaluate(pred_dict, gt_dict, is_eval, is_nn, P_in=None): ''' Input: pred_dict should contain: - W: BxNxK, segmentation instances. Allow zero rows to indicate unassigned points. - normal_per_point: BxNx3, normal per point - type_per_point: type per points - This should be logit of shape BxNxT if is_eval=False, and actual value of shape BxN otherwise - can contain -1 - parameters - a dict, each entry is a BxKx... tensor gt_dict should be obtained from calling create_gt_dict P_in - BxNx3 is the input point cloud, used only when is_eval=True Returns: {loss_dict, matching_indices} + stats from calculate_eval_stats(), where - loss_dict contains: - normal_loss: B, averaged over all N points - type_loss: B, averaged over all N points. - This is cross entropy loss during training, and accuracy during test time - miou_loss: BxK, mean IoU loss for each matched instances - residue_loss: BxK, residue loss for each instance - parameter_loss: BxK, parameter loss for each instance - avg_miou_loss: B - avg_residue_loss: B - avg_parameter_loss: B - matching_indices: BxK, where (b,k)th ground truth primitive is matched with (b, matching_indices[b, k]) ''' # dimension tensors W = pred_dict['W'] batch_size = tf.shape(W)[0] n_points = tf.shape(W)[1] n_max_instances = W.get_shape()[2] # n_max_instances should not be dynamic n_registered_primitives = fitter_factory.get_n_registered_primitives() if is_eval and is_nn: # at evaluation, want W to be binary and filtered (if is from nn) W = nn_filter_W(W) # shortcuts # note that I_gt can contain -1, indicating instance of unknown primitive type I_gt = gt_dict['instance_per_point'] # BxN T_gt = gt_dict['type_per_instance'] # BxK n_instances_gt = tf.reduce_max( I_gt, axis=1 ) + 1 # only count known primitive type instances, as -1 will be ignored mask_gt = tf.sequence_mask( n_instances_gt, maxlen=n_max_instances ) # BxK, mask_gt[b, k] = 1 iff instace k is present in the ground truth batch b matching_indices = tf.stop_gradient( tf.py_func(hungarian_matching, [W, I_gt], Tout=tf.int32)) # BxK miou_loss = compute_miou_loss( W, I_gt, matching_indices) # losses all have dimension BxK normal_loss = compute_normal_loss(pred_dict['normal_per_point'], gt_dict['normal_per_point'], angle_diff=is_eval) # B per_point_type_loss = compute_per_point_type_loss( pred_dict['type_per_point'], I_gt, T_gt, is_eval=is_eval) # B residue_losses = [] # a length T array of BxK tensors parameter_losses = [] # a length T array of BxK tensors residue_per_point_array = [] # a length T array of BxKxN' tensors for fitter_cls in fitter_factory.get_all_fitter_classes(): residue_per_point = fitter_cls.compute_residue_loss( pred_dict['parameters'], gt_dict['points_per_instance'], matching_indices) # BxKxN' residue_per_point_array.append(residue_per_point) residue_losses.append(tf.reduce_mean(residue_per_point, axis=2)) parameter_loss = fitter_cls.compute_parameter_loss( pred_dict['parameters'], gt_dict['parameters'], matching_indices, angle_diff=is_eval) if parameter_loss is None: parameter_loss = tf.zeros(dtype=tf.float32, shape=[batch_size, n_max_instances]) parameter_losses.append(parameter_loss) residue_losses = tf.stack(residue_losses, axis=2) parameter_losses = tf.stack(parameter_losses, axis=2) residue_per_point_array = tf.stack(residue_per_point_array, axis=3) # BxKxN'xT # Aggregate losses across fitters residue_loss = aggregate_loss_from_stacked(residue_losses, T_gt) # BxK parameter_loss = aggregate_loss_from_stacked(parameter_losses, T_gt) # BxK loss_dict = { 'normal_loss': normal_loss, 'type_loss': per_point_type_loss, 'miou_loss': miou_loss, 'residue_loss': residue_loss, 'parameter_loss': parameter_loss, 'avg_miou_loss': reduce_mean_masked_instance(miou_loss, mask_gt), 'avg_residue_loss': reduce_mean_masked_instance(residue_loss, mask_gt), 'avg_parameter_loss': reduce_mean_masked_instance(parameter_loss, mask_gt), } result = {'loss_dict': loss_dict, 'matching_indices': matching_indices} if is_eval: result.update( calculate_eval_stats( W=W, matching_indices=matching_indices, mask_gt=mask_gt, P_in=P_in, type_per_point=pred_dict['type_per_point'], T_gt=T_gt, parameters=pred_dict['parameters'], residue_losses=residue_losses, parameter_loss=parameter_loss, residue_per_point_array=residue_per_point_array, )) return result
def calculate_eval_stats(W, matching_indices, mask_gt, P_in, type_per_point, T_gt, parameters, residue_losses, parameter_loss, residue_per_point_array): ''' Returns a dict containing: - stats : { per_instance_type_accuracy: B, average primitive type accuracy for a shape avg_residue_loss_without_gt: B, average residue loss using the predicted type parameter_loss_without_gt: B, average parameter loss using the predicted type (over only primitives with matched type) } - null_mask: BxK, indicating which predicted primitives are null - mask_gt_nulled: BxK, indicated which ground truth primitive is not null and is matched with a predicted (non-null) primitive - instance_per_point: BxN, non-one-hot version of W - type_per_intance: BxK, type for predicted primitives - residue_gt_primitive: BxKxN', distance from sampled points on ground truth S_k to the predicted primitive matched with S_k - residue_to_closest: BxN, distance from each input point to the closest predicted primitive ''' batch_size = tf.shape(W)[0] n_points = tf.shape(W)[1] n_max_instances = W.get_shape()[2] # n_max_instances should not be dynamic n_registered_primitives = fitter_factory.get_n_registered_primitives() null_mask = tf.reduce_sum(W, axis=1) < 0.5 # BxK # null_mask indicates which predicted primitives are null I = tf.where( tf.reduce_sum(W, axis=2) > 0.5, tf.argmax(W, axis=2, output_type=tf.int32), tf.fill([batch_size, n_points], -1)) # BxN # I can have -1 entries, indicating unassigned points, just like I_gt, and tf.one_hot(I) == W per_point_type_one_hot = tf.one_hot(type_per_point, depth=n_registered_primitives, dtype=tf.float32) # BxNxT instance_type_prob = tf.reduce_sum( tf.expand_dims(W, axis=3) * tf.expand_dims(per_point_type_one_hot, axis=2), axis=1) # BxKxT instance_type = tf.argmax(instance_type_prob, axis=2, output_type=tf.int32) # BxK null_mask_gt = batched_gather( null_mask, matching_indices, axis=1) # BxK, indicating which gt primitive is not matched mask_gt_nulled = tf.logical_and(mask_gt, tf.logical_not( null_mask_gt)) # only count these gt primitives towards some metrics residue_loss_without_gt = aggregate_loss_from_stacked( residue_losses, batched_gather(instance_type, matching_indices, axis=1)) # BxK avg_residue_loss_without_gt = reduce_mean_masked_instance( residue_loss_without_gt, mask_gt_nulled) # B # for parameter loss w/o gt, only count when the predicted type matches the ground truth type instance_matched_mask = tf.equal(T_gt, batched_gather(instance_type, matching_indices, axis=1)) # BxK, boolean per_instance_type_accuracy = reduce_mean_masked_instance( tf.to_float(instance_matched_mask), mask_gt_nulled) # B parameter_loss_without_gt = reduce_mean_masked_instance( parameter_loss, tf.logical_and(instance_matched_mask, mask_gt_nulled)) result = { 'stats': { 'per_instance_type_accuracy': per_instance_type_accuracy, # B 'avg_residue_loss_without_gt': avg_residue_loss_without_gt, # B 'parameter_loss_without_gt': parameter_loss_without_gt, # B }, } residue_matrix = [] identity_matching_indices = tf.tile( tf.expand_dims(tf.range(n_max_instances), axis=0), [batch_size, 1]) # BxK for fitter_cls in fitter_factory.get_all_fitter_classes(): residue_per_point = fitter_cls.compute_residue_loss( parameters, tf.tile(tf.expand_dims(P_in, axis=1), [1, n_max_instances, 1, 1]), matching_indices=identity_matching_indices) # BxKxN residue_matrix.append(residue_per_point) residue_matrix = tf.stack(residue_matrix, axis=3) # BxKxNxT, this matrix might be large! # residue_to_primitive[b,n,k] = residue_matrix[b, k, n, instance_type[b,k]] indices_0 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(batch_size), axis=1), axis=2), [1, n_points, n_max_instances]) # BxNxK indices_1 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(n_max_instances), axis=0), axis=1), [batch_size, n_points, 1]) # BxNxK indices_2 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(n_points), axis=0), axis=2), [batch_size, 1, n_max_instances]) # BxNxK indices_3 = tf.tile(tf.expand_dims(instance_type, axis=1), [1, n_points, 1]) # BxNxK residue_to_primitive = tf.gather_nd( residue_matrix, indices=tf.stack([indices_0, indices_1, indices_2, indices_3], axis=3)) # BxNxK # set null primitive residues to a large number null_mask_W_like = tf.tile(tf.expand_dims(null_mask, axis=1), [1, n_points, 1]) # BxNxK residue_to_primitive = tf.where(null_mask_W_like, 1e8 * tf.ones_like(residue_to_primitive), residue_to_primitive) # BxNxK residue_to_closest = tf.reduce_min(residue_to_primitive, axis=2) # BxN residue_gt_primitive = aggregate_per_point_loss_from_stacked( residue_per_point_array, batched_gather(instance_type, matching_indices, axis=1)) # BxKxN', squared distance # Save information for downstream analysis result['null_mask'] = null_mask # BxK result['mask_gt_nulled'] = mask_gt_nulled # BxK result['instance_per_point'] = I # BxN result['type_per_instance'] = instance_type # BxK result['residue_gt_primitive'] = tf.sqrt(residue_gt_primitive) # BxKxN' result['residue_to_closest'] = tf.sqrt(residue_to_closest) # BxN return result
def get_direct_regression_model(scope, P, n_max_instances, gt_dict, is_training, bn_decay): ''' Inputs: - P: BxNx3 tensor, the input point cloud - K := n_max_instances - gt_dict: ground truth dictionary, needed since we are also computing the loss Outputs: (pred_dict, total_loss), where pred_dict contains - W: BxNxK, segmentation instances, binary - normal_per_point: BxNx3, normal per point, except in DPPN we don't predict normal, so all normals are constant - type_per_point: BxNxT, type per points, binary - parameters - a dict, each entry is a BxKx... tensor ''' n_registered_primitives = fitter_factory.get_n_registered_primitives() batch_size = tf.shape(P)[0] n_points = tf.shape(P)[1] param_pair_list = get_param_dims_pair_list(n_max_instances) flattened_param_dims = [pr[1][0] * pr[1][1] for pr in param_pair_list] reg_result = build_pointnet2_cls('direct_reg_net', point_cloud=P, out_dims=flattened_param_dims, is_training=is_training, bn_decay=bn_decay) parameters = {} for idx, cls_result in enumerate(reg_result): param_name, param_dim = param_pair_list[idx] if param_dim[1] == 1: parameters[param_name] = cls_result # BxK else: parameters[param_name] = tf.reshape(cls_result, [-1, *param_dim]) # normalize quantities for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.normalize_parameters(parameters) residue_losses = [] for fitter_cls in fitter_factory.get_all_fitter_classes(): residue_per_point = fitter_cls.compute_residue_loss_pairwise( parameters, gt_dict['points_per_instance']) # BxKxKxN' residue_avg = tf.reduce_mean(residue_per_point, axis=3) # BxKxK # residue_avg[b, k1, k2] is roughly the distance between gt instance k1 and predicted instance k2 residue_losses.append(residue_avg) residue_matrix = tf.stack(residue_losses, axis=3) # BxKxKxT residue_matrix_flattened = tf.reshape( residue_matrix, shape=[batch_size, n_max_instances, -1]) # BxKxKT n_instance_gt = tf.reduce_max(gt_dict['instance_per_point'], axis=1) + 1 mask_gt = tf.sequence_mask(n_instance_gt, maxlen=n_max_instances) matching_indices = tf.stop_gradient( tf.py_func(hungarian_matching, [residue_matrix_flattened, n_instance_gt], Tout=tf.int32)) # BxK matching_matrix = tf.reshape( tf.one_hot(matching_indices, depth=n_max_instances * n_registered_primitives), [ batch_size, n_max_instances, n_max_instances, n_registered_primitives ]) # BxKxKxT # only 1 element in matching_matrix[b, k, ..., ...] is nonzero direct_loss = tf.reduce_sum(matching_matrix * residue_matrix, axis=[2, 3]) # BxK direct_loss = tf.reduce_sum(direct_loss, axis=1) / tf.to_float( n_instance_gt) # B # reorder parameters matching_instance_id = tf.argmax(tf.reduce_sum(matching_matrix, axis=3), axis=2, output_type=tf.int32) # BxK matching_instance_type = tf.argmax(tf.reduce_sum(matching_matrix, axis=2), axis=2, output_type=tf.int32) # BxK for param in parameters: parameters[param] = batched_gather(parameters[param], matching_instance_id, axis=1) # now kth instance has type matching_instance_type[b, k] with parameters[b, k,...] # next construct W: BxNxK residue_per_point_matrix = [] identity_matching_indices = tf.tile( tf.expand_dims(tf.range(n_max_instances), axis=0), [batch_size, 1]) # BxK for fitter_cls in fitter_factory.get_all_fitter_classes(): residue_per_point = fitter_cls.compute_residue_loss( parameters, tf.tile(tf.expand_dims(P, axis=1), [1, n_max_instances, 1, 1]), matching_indices=identity_matching_indices) # BxKxN residue_per_point_matrix.append(residue_per_point) residue_per_point_matrix = tf.stack(residue_per_point_matrix, axis=3) # BxKxNxT # dist(P[b,n], instance k) = residue_per_point_matrix[b, k, n, matching_instance_type[b, k]] indices_0 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(batch_size), axis=1), axis=1), [1, n_points, n_max_instances]) # BxNxK indices_1 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(n_max_instances), axis=0), axis=0), [batch_size, n_points, 1]) # BxNxK indices_2 = tf.tile( tf.expand_dims(tf.expand_dims(tf.range(n_points), axis=0), axis=2), [batch_size, 1, n_max_instances]) # BxNxK indices_3 = tf.tile(tf.expand_dims(matching_instance_type, axis=1), [1, n_points, 1]) # BxNxK P_to_instance_dist = tf.gather_nd( residue_per_point_matrix, indices=tf.stack([indices_0, indices_1, indices_2, indices_3], axis=3)) # BxNxK instance_per_point = tf.argmin(P_to_instance_dist, axis=2, output_type=tf.int32) # BxN W = tf.one_hot(instance_per_point, depth=n_max_instances) # BxNxK type_per_point = batched_gather(matching_instance_type, instance_per_point, axis=1) # we do not predict normal normal_per_point = tf.tile( tf.expand_dims(tf.expand_dims(tf.constant([1, 0, 0], dtype=tf.float32), axis=0), axis=0), [batch_size, n_points, 1]) # BxNx3 return { 'W': W, 'normal_per_point': normal_per_point, 'type_per_point': tf.one_hot(type_per_point, depth=n_registered_primitives), 'parameters': parameters, }, direct_loss
bundle_dir = conf.get_bundle_dir() if not os.path.exists(bundle_dir): os.makedirs(bundle_dir) tf_conf = tf.ConfigProto() tf_conf.allow_soft_placement = True tf_conf.gpu_options.allow_growth = True graph = tf.Graph() with graph.as_default(): pred_ph = {} pred_ph['W'] = tf.placeholder(shape=[None, None, n_max_instances], dtype=tf.float32) pred_ph['normal_per_point'] = tf.placeholder(shape=[None, None, 3], dtype=tf.float32) pred_ph['type_per_point'] = tf.placeholder(shape=[None, None], dtype=tf.int32) # should be BxN in test pred_ph['parameters'] = {} for fitter_cls in fitter_factory.get_all_fitter_classes(): fitter_cls.insert_prediction_placeholders(pred_ph['parameters'], n_max_instances) gt_ph = evaluation.create_gt_dict(n_max_instances) P_in = tf.placeholder(shape=[None, None, 3], dtype=tf.float32) eval_result_node = evaluation.evaluate(pred_ph, gt_ph, is_eval=True, is_nn=conf.is_nn(), P_in=P_in) stats = { 'total_miou_loss': 0.0, 'total_normal_loss': 0.0, 'total_type_loss': 0.0, 'total_residue_loss': 0.0, 'total_parameter_loss': 0.0, 'per_instance_type_accuracy': 0.0, 'avg_residue_loss_without_gt': 0.0, 'parameter_loss_without_gt': 0.0,