예제 #1
0
def _filter_valid_objects(inputs):
  """Removes the objects that do not contain 3d info.

  Args:
    inputs: A dictionary containing input tensors.
  """
  if standard_fields.InputDataFields.objects_class not in inputs:
    return

  valid_objects_mask = tf.reshape(
      tf.greater(inputs[standard_fields.InputDataFields.objects_class], 0),
      [-1])
  if standard_fields.InputDataFields.objects_has_3d_info in inputs:
    objects_with_3d_info = tf.reshape(
        tf.cast(
            inputs[standard_fields.InputDataFields.objects_has_3d_info],
            dtype=tf.bool), [-1])
    valid_objects_mask = tf.logical_and(objects_with_3d_info,
                                        valid_objects_mask)
  if standard_fields.InputDataFields.objects_difficulty in inputs:
    valid_objects_mask = tf.logical_and(
        valid_objects_mask,
        tf.greater(
            tf.reshape(
                inputs[standard_fields.InputDataFields.objects_difficulty],
                [-1]), 0))
  for key in _OBJECT_KEYS:
    if key in inputs:
      inputs[key] = tf.boolean_mask(inputs[key], valid_objects_mask)
예제 #2
0
def flip_randomly_points_and_normals_motions(points, normals, motions,
                                             is_training):
    """Flip points and normals against x or/and y axis.

  Args:
    points: A tf.float32 tensor of size [N, 3] containing points.
    normals: A tf.float32 tensor of size [N, 3] containing points or None.
    motions: A tf.float32 tensor of size [N, 3] containing motion vectors or
      None.
    is_training: True if in training stage. Random flipping only takes place
      during training.

  Returns:
    flipped_points: Flipped points. A tf.float32 tensor of size [N, 3].
    flipped_normals: Flipped normals. A tf.float32 tensor of size [N, 3]. It
      will be None of the normals is None.
  """
    if is_training:
        x_cond = tf.greater(
            tf.random.uniform([], minval=0.0, maxval=1.0, dtype=tf.float32),
            0.5)
        x_rotate = tf.cond(x_cond, lambda: tf.constant(1.0, dtype=tf.float32),
                           lambda: tf.constant(-1.0, dtype=tf.float32))
        y_cond = tf.greater(
            tf.random.uniform([], minval=0.0, maxval=1.0, dtype=tf.float32),
            0.5)
        y_rotate = tf.cond(y_cond, lambda: tf.constant(1.0, dtype=tf.float32),
                           lambda: tf.constant(-1.0, dtype=tf.float32))
        (points, normals,
         motions) = flip_points_and_normals_motions(points=points,
                                                    normals=normals,
                                                    motions=motions,
                                                    x_rotate=x_rotate,
                                                    y_rotate=y_rotate)
    return points, normals, motions
def _box_rotation_regression_loss(loss_type, is_balanced,
                                  input_boxes_rotation_matrix,
                                  input_boxes_instance_id,
                                  output_boxes_rotation_matrix, delta):
  """Computes regression loss on object rotations."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    gt_rotation_matrix = tf.reshape(input_boxes_rotation_matrix, [-1, 9])
    predicted_rotation_matrix = tf.reshape(output_boxes_rotation_matrix,
                                           [-1, 9])
    if loss_type == 'huber':
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type == 'absolute_difference':
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    rotation_losses = loss_fn(
        y_true=gt_rotation_matrix, y_pred=predicted_rotation_matrix)
    return tf.reduce_mean(rotation_losses * tf.reshape(weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_rotation_matrix)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_rotation_matrix)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
예제 #4
0
 def get_metric_dictionary(self):
   metrics_dict = {}
   class_recall_list = []  # used for calculating mean pixel accuracy.
   class_iou_list = []     # used for calculating mean iou.
   for c in self.class_range:
     tp = self.true_positive_metrics[c].result()
     fp = self.false_positive_metrics[c].result()
     fn = self.false_negative_metrics[c].result()
     class_recall = tp / (tp + fn)
     class_precision = tf.where(
         tf.greater(tp + fn, 0.0), _safe_div(tp, (tp + fp)),
         tf.constant(np.NaN))
     class_iou = tf.where(
         tf.greater(tp + fn, 0.0), tp / (tp + fn + fp), tf.constant(np.NaN))
     class_recall_list.append(class_recall)
     class_iou_list.append(class_iou)
     class_name = _get_class_name(class_id=c, label_map=self.label_map)
     metrics_dict[self.eval_prefix +
                  '_recall/{}'.format(class_name)] = class_recall
     metrics_dict[self.eval_prefix +
                  '_precision/{}'.format(class_name)] = class_precision
     metrics_dict[self.eval_prefix + '_iou/{}'.format(class_name)] = class_iou
   mean_pixel_accuracy = _non_nan_mean(class_recall_list)
   mean_iou = _non_nan_mean(class_iou_list)
   metrics_dict[self.eval_prefix +
                '_avg/mean_pixel_accuracy'] = mean_pixel_accuracy
   metrics_dict[self.eval_prefix + '_avg/mean_iou'] = mean_iou
   return metrics_dict
예제 #5
0
def randomly_crop_points(mesh_inputs,
                         view_indices_2d_inputs,
                         x_random_crop_size,
                         y_random_crop_size,
                         epsilon=1e-5):
  """Randomly crops points.

  Args:
    mesh_inputs: A dictionary containing input mesh (point) tensors.
    view_indices_2d_inputs: A dictionary containing input point to view
      correspondence tensors.
    x_random_crop_size: Size of the random crop in x dimension. If None, random
      crop will not take place on x dimension.
    y_random_crop_size: Size of the random crop in y dimension. If None, random
      crop will not take place on y dimension.
    epsilon: Epsilon (a very small value) used to add as a small margin to
      thresholds.
  """
  if x_random_crop_size is None and y_random_crop_size is None:
    return

  points = mesh_inputs[standard_fields.InputDataFields.point_positions]
  num_points = tf.shape(points)[0]
  # Pick a random point
  if x_random_crop_size is not None or y_random_crop_size is not None:
    random_index = tf.random.uniform([],
                                     minval=0,
                                     maxval=num_points,
                                     dtype=tf.int32)
    center_x = points[random_index, 0]
    center_y = points[random_index, 1]

  points_x = points[:, 0]
  points_y = points[:, 1]
  min_x = tf.reduce_min(points_x) - epsilon
  max_x = tf.reduce_max(points_x) + epsilon
  min_y = tf.reduce_min(points_y) - epsilon
  max_y = tf.reduce_max(points_y) + epsilon

  if x_random_crop_size is not None:
    min_x = center_x - x_random_crop_size / 2.0 - epsilon
    max_x = center_x + x_random_crop_size / 2.0 + epsilon

  if y_random_crop_size is not None:
    min_y = center_y - y_random_crop_size / 2.0 - epsilon
    max_y = center_y + y_random_crop_size / 2.0 + epsilon

  x_mask = tf.logical_and(tf.greater(points_x, min_x), tf.less(points_x, max_x))
  y_mask = tf.logical_and(tf.greater(points_y, min_y), tf.less(points_y, max_y))
  points_mask = tf.logical_and(x_mask, y_mask)

  for key in sorted(mesh_inputs):
    mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask)

  for key in sorted(view_indices_2d_inputs):
    view_indices_2d_inputs[key] = tf.transpose(
        tf.boolean_mask(
            tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), points_mask),
        [1, 0, 2])
def box_corner_distance_loss_on_object_tensors(inputs,
                                               outputs,
                                               loss_type,
                                               delta=1.0,
                                               is_balanced=False):
    """Computes regression loss on object corner locations using object tensors.

  Args:
    inputs: A dictionary of tf.Tensors with our input data.
    outputs: A dictionary of tf.Tensors with the network output.
    loss_type: Loss type.
    delta: float, the voxel where the huber loss function changes from a
      quadratic to linear.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for each object instance.

  Returns:
    localization_loss: A tf.float32 scalar corresponding to localization loss.
  """
    def fn(inputs_1, outputs_1):
        return _box_corner_distance_loss_on_object_tensors(
            inputs=inputs_1,
            outputs=outputs_1,
            loss_type=loss_type,
            delta=delta,
            is_balanced=is_balanced)

    batch_size = len(inputs[standard_fields.InputDataFields.objects_length])
    losses = []
    for b in range(batch_size):
        inputs_1 = batch_utils.get_batch_size_1_input_objects(inputs=inputs,
                                                              b=b)
        outputs_1 = batch_utils.get_batch_size_1_output_objects(
            outputs=outputs, b=b)
        cond_input = tf.greater(
            tf.shape(
                inputs_1[standard_fields.InputDataFields.objects_length])[0],
            0)
        cond_output = tf.greater(
            tf.shape(outputs_1[
                standard_fields.DetectionResultFields.objects_length])[0], 0)
        cond = tf.logical_and(cond_input, cond_output)
        # pylint: disable=cell-var-from-loop
        loss = tf.cond(cond,
                       lambda: fn(inputs_1=inputs_1, outputs_1=outputs_1),
                       lambda: tf.constant(0.0, dtype=tf.float32))
        # pylint: enable=cell-var-from-loop
        losses.append(loss)
    return tf.reduce_mean(tf.stack(losses))
예제 #7
0
def change_intensity_range(intensities,
                           threshold=2.5,
                           normalization_factor1=2500.0,
                           normalization_factor2=12.0):
    """Changes the range of intensity values.

  Args:
    intensities: A tensor containing intensity values. It is assumed it has a
      range of 0 to around 65000.
    threshold: A parameter used for re-ranging intensity values.
    normalization_factor1: A parameter used for re-ranging intensity values.
    normalization_factor2: A parameter used for re-ranging intensity values.

  Returns:
    Tensor with re-ranged intensity values.
  """
    intensities = tf.cast(intensities, dtype=tf.float32)
    intensities_large_mask = tf.cast(tf.greater(intensities, threshold),
                                     dtype=tf.float32)
    intensities_small = intensities * (1.0 - intensities_large_mask)
    intensities_large = ((threshold +
                          (intensities - threshold) / normalization_factor2) *
                         intensities_large_mask)
    return (
        (intensities_small + intensities_large) / normalization_factor1) - 1.0
def _box_corner_distance_loss_on_object_tensors(inputs, outputs, loss_type,
                                                delta, is_balanced):
    """Computes huber loss on object corner locations."""
    valid_mask_class = tf.greater(
        tf.reshape(inputs[standard_fields.InputDataFields.objects_class],
                   [-1]), 0)
    valid_mask_instance = tf.greater(
        tf.reshape(inputs[standard_fields.InputDataFields.objects_instance_id],
                   [-1]), 0)
    valid_mask = tf.logical_and(valid_mask_class, valid_mask_instance)

    def fn():
        for field in standard_fields.get_input_object_fields():
            if field in inputs:
                inputs[field] = tf.boolean_mask(inputs[field], valid_mask)
        for field in standard_fields.get_output_object_fields():
            if field in outputs:
                outputs[field] = tf.boolean_mask(outputs[field], valid_mask)
        return _box_corner_distance_loss(
            loss_type=loss_type,
            is_balanced=is_balanced,
            input_boxes_length=inputs[
                standard_fields.InputDataFields.objects_length],
            input_boxes_height=inputs[
                standard_fields.InputDataFields.objects_height],
            input_boxes_width=inputs[
                standard_fields.InputDataFields.objects_width],
            input_boxes_center=inputs[
                standard_fields.InputDataFields.objects_center],
            input_boxes_rotation_matrix=inputs[
                standard_fields.InputDataFields.objects_rotation_matrix],
            input_boxes_instance_id=inputs[
                standard_fields.InputDataFields.objects_instance_id],
            output_boxes_length=outputs[
                standard_fields.DetectionResultFields.objects_length],
            output_boxes_height=outputs[
                standard_fields.DetectionResultFields.objects_height],
            output_boxes_width=outputs[
                standard_fields.DetectionResultFields.objects_width],
            output_boxes_center=outputs[
                standard_fields.DetectionResultFields.objects_center],
            output_boxes_rotation_matrix=outputs[
                standard_fields.DetectionResultFields.objects_rotation_matrix],
            delta=delta)

    return tf.cond(tf.reduce_any(valid_mask), fn,
                   lambda: tf.constant(0.0, dtype=tf.float32))
def classification_loss_using_mask_iou_func_unbatched(
        embeddings, instance_ids, sampled_embeddings, sampled_instance_ids,
        sampled_class_labels, sampled_logits, similarity_strategy,
        is_balanced):
    """Classification loss using mask iou.

  Args:
    embeddings: A tf.float32 tensor of size [n, f].
    instance_ids: A tf.int32 tensor of size [n].
    sampled_embeddings: A tf.float32 tensor of size [num_samples, f].
    sampled_instance_ids: A tf.int32 tensor of size [num_samples].
    sampled_class_labels: A tf.int32 tensor of size [num_samples, 1].
    sampled_logits: A tf.float32 tensor of size [num_samples, num_classes].
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.

  Returns:
    A tf.float32 loss scalar tensor.
  """
    predicted_soft_masks = metric_learning_utils.embedding_centers_to_soft_masks(
        embedding=embeddings,
        centers=sampled_embeddings,
        similarity_strategy=similarity_strategy)
    predicted_masks = tf.cast(tf.greater(predicted_soft_masks, 0.5),
                              dtype=tf.float32)
    gt_masks = tf.cast(tf.equal(tf.expand_dims(sampled_instance_ids, axis=1),
                                tf.expand_dims(instance_ids, axis=0)),
                       dtype=tf.float32)
    pairwise_iou = instance_segmentation_utils.points_mask_pairwise_iou(
        masks1=predicted_masks, masks2=gt_masks)
    num_classes = sampled_logits.get_shape().as_list()[1]
    sampled_class_labels_one_hot = tf.one_hot(indices=tf.reshape(
        sampled_class_labels, [-1]),
                                              depth=num_classes)
    sampled_class_labels_one_hot_fg = sampled_class_labels_one_hot[:, 1:]
    iou_coefs = tf.tile(tf.reshape(pairwise_iou, [-1, 1]),
                        [1, num_classes - 1])
    sampled_class_labels_one_hot_fg *= iou_coefs
    sampled_class_labels_one_hot_bg = tf.maximum(
        1.0 - tf.math.reduce_sum(
            sampled_class_labels_one_hot_fg, axis=1, keepdims=True), 0.0)
    sampled_class_labels_one_hot = tf.concat(
        [sampled_class_labels_one_hot_bg, sampled_class_labels_one_hot_fg],
        axis=1)
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(sampled_instance_ids, axis=1))
        params['weights'] = weights
    return classification_loss_fn(logits=sampled_logits,
                                  labels=sampled_class_labels_one_hot,
                                  **params)
def _box_size_regression_loss(loss_type, is_balanced, input_boxes_length,
                              input_boxes_height, input_boxes_width,
                              input_boxes_instance_id, output_boxes_length,
                              output_boxes_height, output_boxes_width, delta):
  """Computes regression loss on object sizes."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    gt_length = tf.reshape(input_boxes_length, [-1, 1])
    gt_height = tf.reshape(input_boxes_height, [-1, 1])
    gt_width = tf.reshape(input_boxes_width, [-1, 1])
    predicted_length = tf.reshape(output_boxes_length, [-1, 1])
    predicted_height = tf.reshape(output_boxes_height, [-1, 1])
    predicted_width = tf.reshape(output_boxes_width, [-1, 1])
    predicted_length /= gt_length
    predicted_height /= gt_height
    predicted_width /= gt_width
    predicted_size = tf.concat(
        [predicted_length, predicted_height, predicted_width], axis=1)
    gt_size = tf.ones_like(predicted_size)
    if loss_type == 'huber':
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type == 'absolute_difference':
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    size_losses = loss_fn(y_true=gt_size, y_pred=predicted_size)
    return tf.reduce_mean(size_losses * tf.reshape(weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _voxel_hard_negative_classification_loss_unbatched(inputs_1, outputs_1,
                                                       is_intermediate, gamma):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    background_mask = tf.equal(labels, 0)
    num_background_points = tf.reduce_sum(
        tf.cast(background_mask, dtype=tf.int32))

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        masked_logits = tf.boolean_mask(logits, background_mask)
        masked_weights = tf.pow(
            1.0 - tf.reshape(tf.nn.softmax(masked_logits)[:, 0], [-1, 1]),
            gamma)
        num_points = tf.shape(masked_logits)[0]
        masked_weights = masked_weights * tf.cast(
            num_points, dtype=tf.float32) / tf.reduce_sum(masked_weights)
        masked_labels_one_hot = tf.one_hot(indices=tf.boolean_mask(
            labels, background_mask),
                                           depth=num_classes)
        loss = classification_loss_fn(logits=masked_logits,
                                      labels=masked_labels_one_hot,
                                      weights=masked_weights)
        return loss

    cond = tf.logical_and(tf.greater(num_background_points, 0),
                          tf.greater(tf.shape(labels)[0], 0))
    return tf.cond(cond, loss_fn, lambda: tf.constant(0.0, dtype=tf.float32))
예제 #12
0
    def _build_train_op(self):
        """Builds a training op.

    Returns:
      An op performing one step of training from replay data.
    """
        # click_indicator: [B, S]
        # q_values: [B, A]
        # actions: [B, S]
        # slate_q_values: [B, S]
        # replay_click_q: [B]
        click_indicator = self._replay.rewards[:, :,
                                               self._click_response_index]
        slate_q_values = tf.compat.v1.batch_gather(
            self._replay_net_outputs.q_values,
            tf.cast(self._replay.actions, dtype=tf.int32))
        # Only get the Q from the clicked document.
        replay_click_q = tf.reduce_sum(input_tensor=slate_q_values *
                                       click_indicator,
                                       axis=1,
                                       name='replay_click_q')

        target = tf.stop_gradient(self._build_target_q_op())

        clicked = tf.reduce_sum(input_tensor=click_indicator, axis=1)
        clicked_indices = tf.squeeze(tf.compat.v1.where(tf.equal(clicked, 1)),
                                     axis=1)
        # clicked_indices is a vector and tf.gather selects the batch dimension.
        q_clicked = tf.gather(replay_click_q, clicked_indices)
        target_clicked = tf.gather(target, clicked_indices)

        def get_train_op():
            loss = tf.reduce_mean(input_tensor=tf.square(q_clicked -
                                                         target_clicked))
            if self.summary_writer is not None:
                with tf.compat.v1.variable_scope('Losses'):
                    tf.compat.v1.summary.scalar('Loss', loss)

            return loss

        loss = tf.cond(pred=tf.greater(tf.reduce_sum(input_tensor=clicked), 0),
                       true_fn=get_train_op,
                       false_fn=lambda: tf.constant(0.),
                       name='')

        return self.optimizer.minimize(loss)
예제 #13
0
 def _body_fn(i, indices_range, indices):
   """Computes the indices of the i-th point feature in each segment."""
   indices_i = tf.math.unsorted_segment_max(
       data=indices_range, segment_ids=segment_ids, num_segments=num_segments)
   indices_i_positive_mask = tf.greater(indices_i, 0)
   indices_i_positive = tf.boolean_mask(indices_i, indices_i_positive_mask)
   boolean_mask = tf.scatter_nd(
       indices=tf.cast(
           tf.expand_dims(indices_i_positive - 1, axis=1), dtype=tf.int64),
       updates=tf.ones_like(indices_i_positive, dtype=tf.int32),
       shape=(n,))
   indices_range *= (1 - boolean_mask)
   indices_i *= tf.cast(indices_i_positive_mask, dtype=tf.int32)
   indices_i = tf.pad(
       tf.expand_dims(indices_i, axis=1),
       paddings=[[0, 0], [i, num_samples_per_voxel - i - 1]])
   indices += indices_i
   i = i + 1
   return i, indices_range, indices
예제 #14
0
def _get_class_labels_and_predictions(labels, logits, num_classes,
                                      multi_label):
    """Returns list of per-class-labels and list of per-class-predictions.

  Args:
    labels: A `Tensor` of size [n, k]. In the
      multi-label case, values are either 0 or 1 and k = num_classes. Otherwise,
      k = 1 and values are in [0, num_classes).
    logits: A `Tensor` of size [n, `num_classes`]
      representing the logits of each pixel and semantic class.
    num_classes: Number of classes.
    multi_label: Boolean which defines if we are in a multi_label setting, where
      pixels can have multiple labels, or not.

  Returns:
    class_labels: List of size num_classes, where each entry is a `Tensor' of
      size [batch_size, height, width] of type float with values of 0 or 1
      representing the ground truth labels.
    class_predictions: List of size num_classes, each entry is a `Tensor' of
      size [batch_size, height, width] of type float with values of 0 or 1
      representing the predicted labels.
  """
    class_predictions = [None] * num_classes
    if multi_label:
        class_labels = tf.split(labels, num_or_size_splits=num_classes, axis=1)
        class_logits = tf.split(logits, num_or_size_splits=num_classes, axis=1)
        for c in range(num_classes):
            class_predictions[c] = tf.cast(tf.greater(class_logits[c], 0),
                                           dtype=tf.float32)
    else:
        class_predictions_flat = tf.argmax(logits, 1)
        class_labels = [None] * num_classes
        for c in range(num_classes):
            class_labels[c] = tf.cast(tf.equal(labels, c), dtype=tf.float32)
            class_predictions[c] = tf.cast(tf.equal(class_predictions_flat, c),
                                           dtype=tf.float32)
    return class_labels, class_predictions
예제 #15
0
def precision(y, y_hat, name='precision'):
    tp = true_p(y, y_hat)
    fp = false_p(y, y_hat)
    return tf.where(
        tf.greater(tp + fp, 0), tf.div(tp, tp + fp), 0, name)
예제 #16
0
def recall(y, y_hat, name='recall'):
    tp = true_p(y, y_hat)
    fn = false_p(y, y_hat)
    return tf.where(
        tf.greater(tp + fn, 0),
        tf.div(tp, tp + fn), 0, name)
def _box_corner_distance_loss(
    loss_type, is_balanced, input_boxes_length, input_boxes_height,
    input_boxes_width, input_boxes_center, input_boxes_rotation_matrix,
    input_boxes_instance_id, output_boxes_length, output_boxes_height,
    output_boxes_width, output_boxes_center, output_boxes_rotation_matrix,
    delta):
  """Computes regression loss on object corner locations."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    normalized_box_size = 5.0
    predicted_boxes_length = output_boxes_length
    predicted_boxes_height = output_boxes_height
    predicted_boxes_width = output_boxes_width
    predicted_boxes_center = output_boxes_center
    predicted_boxes_rotation_matrix = output_boxes_rotation_matrix
    gt_boxes_length = input_boxes_length
    gt_boxes_height = input_boxes_height
    gt_boxes_width = input_boxes_width
    gt_boxes_center = input_boxes_center
    gt_boxes_rotation_matrix = input_boxes_rotation_matrix
    if loss_type in ['normalized_huber', 'normalized_euclidean']:
      predicted_boxes_length /= (gt_boxes_length / normalized_box_size)
      predicted_boxes_height /= (gt_boxes_height / normalized_box_size)
      predicted_boxes_width /= (gt_boxes_width / normalized_box_size)
      gt_boxes_length = tf.ones_like(
          gt_boxes_length, dtype=tf.float32) * normalized_box_size
      gt_boxes_height = tf.ones_like(
          gt_boxes_height, dtype=tf.float32) * normalized_box_size
      gt_boxes_width = tf.ones_like(
          gt_boxes_width, dtype=tf.float32) * normalized_box_size
    gt_box_corners = box_utils.get_box_corners_3d(
        boxes_length=gt_boxes_length,
        boxes_height=gt_boxes_height,
        boxes_width=gt_boxes_width,
        boxes_rotation_matrix=gt_boxes_rotation_matrix,
        boxes_center=gt_boxes_center)
    predicted_box_corners = box_utils.get_box_corners_3d(
        boxes_length=predicted_boxes_length,
        boxes_height=predicted_boxes_height,
        boxes_width=predicted_boxes_width,
        boxes_rotation_matrix=predicted_boxes_rotation_matrix,
        boxes_center=predicted_boxes_center)
    corner_weights = tf.tile(weights, [1, 8])
    if loss_type in ['huber', 'normalized_huber']:
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type in ['normalized_absolute_difference', 'absolute_difference']:
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    box_corner_losses = loss_fn(
        y_true=tf.reshape(gt_box_corners, [-1, 3]),
        y_pred=tf.reshape(predicted_box_corners, [-1, 3]))
    return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def _box_classification_using_center_distance_loss_unbatched(
        inputs_1, outputs_1, is_intermediate, is_balanced,
        max_positive_normalized_distance):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        output_object_centers = outputs_1[standard_fields.DetectionResultFields
                                          .intermediate_object_center_voxels]
        output_object_length = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_length_voxels]
        output_object_height = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_height_voxels]
        output_object_width = outputs_1[standard_fields.DetectionResultFields.
                                        intermediate_object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            intermediate_object_rotation_matrix_voxels]
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        output_object_centers = outputs_1[
            standard_fields.DetectionResultFields.object_center_voxels]
        output_object_length = outputs_1[
            standard_fields.DetectionResultFields.object_length_voxels]
        output_object_height = outputs_1[
            standard_fields.DetectionResultFields.object_height_voxels]
        output_object_width = outputs_1[
            standard_fields.DetectionResultFields.object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            object_rotation_matrix_voxels]
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    normalized_center_distance = loss_utils.get_normalized_corner_distances(
        predicted_boxes_center=output_object_centers,
        predicted_boxes_length=output_object_length,
        predicted_boxes_height=output_object_height,
        predicted_boxes_width=output_object_width,
        predicted_boxes_rotation_matrix=output_object_rotation_matrix,
        gt_boxes_center=inputs_1[
            standard_fields.InputDataFields.object_center_voxels],
        gt_boxes_length=inputs_1[
            standard_fields.InputDataFields.object_length_voxels],
        gt_boxes_height=inputs_1[
            standard_fields.InputDataFields.object_height_voxels],
        gt_boxes_width=inputs_1[
            standard_fields.InputDataFields.object_width_voxels],
        gt_boxes_rotation_matrix=inputs_1[
            standard_fields.InputDataFields.object_rotation_matrix_voxels])
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    instances = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_instance_id_voxels],
        [-1])
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(instances, axis=1))
        params['weights'] = weights

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        labels_one_hot = tf.one_hot(indices=(labels - 1),
                                    depth=(num_classes - 1))
        inverse_distance_coef = tf.maximum(
            tf.minimum(
                1.0 -
                normalized_center_distance / max_positive_normalized_distance,
                1.0), 0.0)
        labels_one_hot = tf.reshape(inverse_distance_coef,
                                    [-1, 1]) * labels_one_hot
        background_label = 1.0 - tf.math.reduce_sum(
            labels_one_hot, axis=1, keepdims=True)
        labels_one_hot = tf.concat([background_label, labels_one_hot], axis=1)
        loss = classification_loss_fn(logits=logits,
                                      labels=labels_one_hot,
                                      **params)
        return loss

    return tf.cond(tf.greater(tf.shape(labels)[0], 0), loss_fn,
                   lambda: tf.constant(0.0, dtype=tf.float32))
예제 #19
0
def preprocess(inputs,
               output_keys=None,
               is_training=False,
               using_sequence_dataset=False,
               num_frame_to_load=1,
               transform_points_fn=None,
               image_preprocess_fn_dic=None,
               images_points_correspondence_fn=None,
               compute_semantic_labels_fn=None,
               compute_motion_labels_fn=None,
               view_names=(),
               points_key='points',
               colors_key='colors',
               normals_key='normals',
               intensities_key='intensities',
               elongations_key='elongations',
               semantic_labels_key='semantic_labels',
               motion_labels_key='motion_labels',
               spin_coords_key=None,
               points_in_image_frame_key=None,
               num_points_to_randomly_sample=None,
               x_min_degree_rotation=None,
               x_max_degree_rotation=None,
               y_min_degree_rotation=None,
               y_max_degree_rotation=None,
               z_min_degree_rotation=None,
               z_max_degree_rotation=None,
               points_pad_or_clip_size=None,
               voxels_pad_or_clip_size=None,
               voxel_grid_cell_size=(0.1, 0.1, 0.1),
               num_offset_bins_x=4,
               num_offset_bins_y=4,
               num_offset_bins_z=4,
               point_feature_keys=('point_offsets', ),
               point_to_voxel_segment_func=tf.math.unsorted_segment_mean,
               x_random_crop_size=None,
               y_random_crop_size=None,
               min_scale_ratio=None,
               max_scale_ratio=None,
               semantic_labels_offset=0,
               ignore_labels=(),
               remove_unlabeled_images_and_points=False,
               labeled_view_name=None,
               only_keep_first_return_lidar_points=False):
    """Preprocesses a dictionary of `Tensor` inputs.

  If is_training=True, it will randomly rotate the points around the z axis,
  and will randomly flip the points with respect to x and/or y axis.

  Note that the preprocessor function does not correct normal vectors if they
  exist in the inputs.
  Note that the preprocessing effects all values of `inputs` that are `Tensors`.

  Args:
    inputs: A dictionary of inputs. Each value must be a `Tensor`.
    output_keys: Either None, or a list of strings containing the keys in the
      dictionary that is returned by the preprocess function.
    is_training: Whether we're training or testing.
    using_sequence_dataset: if true, the inputs will contain scene and multiple
      frames data.
    num_frame_to_load: If greater than 1, load multiframe point cloud point
      positions and its correspondence.
    transform_points_fn: Fn to transform other frames to a specific frame's
      coordinate.
    image_preprocess_fn_dic: Image preprocessing function. Maps view names to
      their image preprocessing functions. Set it to None, if there are no
      images to preprocess or you are not interested in preprocesing images.
    images_points_correspondence_fn: The function that computes correspondence
      between images and points.
    compute_semantic_labels_fn: If not None, semantic labels will be computed
      using this function.
    compute_motion_labels_fn: If not None, motion labels will be computed using
      this function.
    view_names: Names corresponding to 2d views of the scene.
    points_key: The key used for `points` in the inputs.
    colors_key: The key used for `colors` in the inputs.
    normals_key: The key used for 'normals' in the inputs.
    intensities_key: The key used for 'intensities' in the inputs.
    elongations_key: The key used for 'elongations' in the inputs.
    semantic_labels_key: The key used for 'semantic_labels' in the inputs.
    motion_labels_key: The key used for 'motion_labels' in the inputs.
    spin_coords_key: The key used for 'spin_coords' in the inputs. In Waymo
      data, spin_coords is a [num_points, 3] tensor that contains scan_index,
      shot_index, return_index. In Waymo data, return_index of the first return
      points is 0.
    points_in_image_frame_key: A string that identifies the tensor that contains
      the points_in_image_frame tensor. If None, it won't be used.
    num_points_to_randomly_sample: Number of points to randomly sample. If None,
      it will keep the original points and does not perform sampling.
    x_min_degree_rotation: Min degree of rotation around the x axis.
    x_max_degree_rotation: Max degree of ratation around the x axis.
    y_min_degree_rotation: Min degree of rotation around the y axis.
    y_max_degree_rotation: Max degree of ratation around the y axis.
    z_min_degree_rotation: Min degree of rotation around the z axis.
    z_max_degree_rotation: Max degree of ratation around the z axis.
    points_pad_or_clip_size: Number of target points to pad or clip to. If None,
      it will not perform the point padding.
    voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None,
      it will not perform the voxel padding.
    voxel_grid_cell_size: A three dimensional tuple determining the voxel grid
      size.
    num_offset_bins_x: Number of bins for point offsets in x direction.
    num_offset_bins_y: Number of bins for point offsets in y direction.
    num_offset_bins_z: Number of bins for point offsets in z direction.
    point_feature_keys: The keys used to form the voxel features.
    point_to_voxel_segment_func: The function used to aggregate the features
      of the points that fall in the same voxel.
    x_random_crop_size: Size of the random crop in x dimension. If None, random
      crop will not take place on x dimension.
    y_random_crop_size: Size of the random crop in y dimension. If None, random
      crop will not take place on y dimension.
    min_scale_ratio: Minimum scale ratio. Used for scaling point cloud.
    max_scale_ratio: Maximum scale ratio. Used for scaling point cloud.
    semantic_labels_offset: An integer offset that will be added to labels.
    ignore_labels: A tuple containing labels that should be ignored when
      computing the loss and metrics.
    remove_unlabeled_images_and_points: If True, removes the images that are not
      labeled and also removes the points that are associated with those images.
    labeled_view_name: The name of the view that is labeled, otherwise None.
    only_keep_first_return_lidar_points: If True, we only keep the first return
      lidar points.

  Returns:
    The mean subtracted points with an optional rotation applied.

  Raises:
    ValueError: if `inputs` doesn't contain the points_key.
    ValueError: if `points_in_image_frame` does not have rank 3.
  """
    inputs = dict(inputs)

    if using_sequence_dataset:
        all_frame_inputs = inputs
        scene = all_frame_inputs['scene']
        frame1 = all_frame_inputs['frame1']
        frame_start_index = all_frame_inputs['frame_start_index']
        inputs = dict(
            all_frame_inputs['frame0']
        )  # so that the following processing code can be unchanged.

    # Initializing empty dictionary for mesh, image, indices_2d and non tensor
    # inputs.
    non_tensor_inputs = {}
    view_image_inputs = {}
    view_indices_2d_inputs = {}
    mesh_inputs = {}

    if image_preprocess_fn_dic is None:
        image_preprocess_fn_dic = {}

    # Convert all float64 to float32 and all int64 to int32.
    for key in sorted(inputs):
        if isinstance(inputs[key], tf.Tensor):
            if inputs[key].dtype == tf.float64:
                inputs[key] = tf.cast(inputs[key], dtype=tf.float32)
            if inputs[key].dtype == tf.int64:
                inputs[key] = tf.cast(inputs[key], dtype=tf.int32)

    if points_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_positions] = inputs[points_key]
    if colors_key is not None and colors_key in inputs:
        inputs[
            standard_fields.InputDataFields.point_colors] = inputs[colors_key]
    if normals_key is not None and normals_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_normals] = inputs[normals_key]
    if intensities_key is not None and intensities_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_intensities] = inputs[intensities_key]
    if elongations_key is not None and elongations_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_elongations] = inputs[elongations_key]
    if semantic_labels_key is not None and semantic_labels_key in inputs:
        inputs[standard_fields.InputDataFields.
               object_class_points] = inputs[semantic_labels_key]
    if motion_labels_key is not None and motion_labels_key in inputs:
        inputs[standard_fields.InputDataFields.
               object_flow_points] = inputs[motion_labels_key]
    if spin_coords_key is not None and spin_coords_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_spin_coordinates] = inputs[spin_coords_key]

    # Acquire point / image correspondences.
    if images_points_correspondence_fn is not None:
        fn_outputs = images_points_correspondence_fn(inputs)
        if 'points_position' in fn_outputs:
            inputs[standard_fields.InputDataFields.
                   point_positions] = fn_outputs['points_position']
        if 'points_intensity' in fn_outputs and intensities_key is not None:
            inputs[standard_fields.InputDataFields.
                   point_intensities] = fn_outputs['points_intensity']
        if 'points_elongation' in fn_outputs and elongations_key is not None:
            inputs[standard_fields.InputDataFields.
                   point_elongations] = fn_outputs['points_elongation']
        if 'points_label' in fn_outputs and semantic_labels_key is not None:
            inputs[standard_fields.InputDataFields.
                   object_class_points] = fn_outputs['points_label']
        if 'view_images' in fn_outputs:
            for key in sorted(fn_outputs['view_images']):
                if len(fn_outputs['view_images'][key].shape) != 4:
                    raise ValueError(('%s image should have rank 4.' % key))
            view_image_inputs = fn_outputs['view_images']
        if 'view_indices_2d' in fn_outputs:
            for key in sorted(fn_outputs['view_indices_2d']):
                if len(fn_outputs['view_indices_2d'][key].shape) != 3:
                    raise ValueError(
                        ('%s indices_2d should have rank 3.' % key))
            view_indices_2d_inputs = fn_outputs['view_indices_2d']
    else:
        if points_in_image_frame_key is not None:
            inputs['rgb_view/features'] = inputs['image']
            inputs['rgb_view/indices_2d'] = inputs[points_in_image_frame_key]
            if len(inputs['rgb_view/indices_2d'].shape) != 3:
                raise ValueError('`points_in_image_frame` should have rank 3.')

    frame0 = inputs.copy()
    if num_frame_to_load > 1:
        point_positions_list = [
            frame0[standard_fields.InputDataFields.point_positions]
        ]
        if view_indices_2d_inputs:
            view_indices_2d_list = [view_indices_2d_inputs[view_names[0]]]
        frame_source_list = [
            tf.zeros([
                tf.shape(
                    frame0[standard_fields.InputDataFields.point_positions])[0]
            ], tf.int32)
        ]
        for i in range(1, num_frame_to_load):
            target_frame_key = 'frame' + str(i)
            if images_points_correspondence_fn is not None:
                frame_i = images_points_correspondence_fn(
                    all_frame_inputs[target_frame_key])
            else:
                raise ValueError(
                    'images_points_correspondence_fn is needed for loading multi-frame pointclouds.'
                )
            transformed_point_positions = transform_points_fn(
                scene, frame_i['points_position'], frame_start_index,
                i + frame_start_index)
            point_positions_list.append(transformed_point_positions)
            if view_indices_2d_inputs:
                view_indices_2d_list.append(
                    frame_i['view_indices_2d'][view_names[0]])
            frame_source_list.append(
                tf.ones([tf.shape(transformed_point_positions)[0]], tf.int32) *
                i)

        # add multi-frame info to override inputs and view_indices_2d_inputs
        inputs[standard_fields.InputDataFields.
               point_frame_index] = tf.expand_dims(tf.concat(frame_source_list,
                                                             axis=0),
                                                   axis=1)
        inputs[standard_fields.InputDataFields.point_positions] = tf.concat(
            point_positions_list, axis=0)
        if view_indices_2d_inputs:
            view_indices_2d_inputs[view_names[0]] = tf.concat(
                view_indices_2d_list, axis=1)

    # Validate inputs.
    if standard_fields.InputDataFields.point_positions not in inputs:
        raise ValueError('`inputs` must contain a point_positions')
    if inputs[
            standard_fields.InputDataFields.point_positions].shape.ndims != 2:
        raise ValueError('points must be of rank 2.')
    if inputs[standard_fields.InputDataFields.point_positions].shape[1] != 3:
        raise ValueError('point should be 3 dimensional.')

    # Remove normal nans.
    if standard_fields.InputDataFields.point_normals in inputs:
        inputs[standard_fields.InputDataFields.point_normals] = tf.where(
            tf.math.is_nan(
                inputs[standard_fields.InputDataFields.point_normals]),
            tf.zeros_like(
                inputs[standard_fields.InputDataFields.point_normals]),
            inputs[standard_fields.InputDataFields.point_normals])

    # Compute semantic labels if compute_semantic_labels_fn is not None
    # An example is when the ground-truth contains 3d object boxes and not per
    # point labels. This would be a function that infers point labels from boxes.
    if compute_semantic_labels_fn is not None:
        inputs[standard_fields.InputDataFields.
               object_class_points] = compute_semantic_labels_fn(
                   inputs=frame0,
                   points_key=standard_fields.InputDataFields.point_positions)
    if compute_motion_labels_fn is not None:
        inputs[standard_fields.InputDataFields.
               object_flow_points] = compute_motion_labels_fn(
                   scene=scene,
                   frame0=frame0,
                   frame1=frame1,
                   frame_start_index=frame_start_index,
                   points_key=standard_fields.InputDataFields.point_positions)

    # Splitting inputs to {view_image_inputs,
    #                      view_indices_2d_inputs,
    #                      mesh_inputs,
    #                      non_tensor_inputs}
    mesh_keys = []
    for key in [
            standard_fields.InputDataFields.point_positions,
            standard_fields.InputDataFields.point_colors,
            standard_fields.InputDataFields.point_normals,
            standard_fields.InputDataFields.point_intensities,
            standard_fields.InputDataFields.point_elongations,
            standard_fields.InputDataFields.object_class_points,
            standard_fields.InputDataFields.point_spin_coordinates,
            standard_fields.InputDataFields.object_flow_points,
            standard_fields.InputDataFields.point_frame_index,
    ]:
        if key is not None and key in inputs:
            mesh_keys.append(key)
    view_image_names = [('%s/features' % key) for key in view_names]
    view_indices_2d_names = [('%s/indices_2d' % key) for key in view_names]

    # Additional key collecting
    for k, v in six.iteritems(inputs):
        if k in view_image_names:
            view_image_inputs[k] = v
        elif k in view_indices_2d_names:
            view_indices_2d_inputs[k] = v
        elif k in mesh_keys:
            if num_frame_to_load > 1:
                pad_size = tf.shape(
                    inputs[standard_fields.InputDataFields.
                           point_positions])[0] - tf.shape(v)[0]
                if k == standard_fields.InputDataFields.object_class_points:
                    pad_value = -1
                else:
                    pad_value = 0
                v = tf.pad(v, [[0, pad_size], [0, 0]],
                           constant_values=pad_value)
            mesh_inputs[k] = v
        else:
            non_tensor_inputs[k] = v

    # Remove points that are not in the lidar first return (optional)
    if only_keep_first_return_lidar_points:
        _remove_second_return_lidar_points(
            mesh_inputs=mesh_inputs,
            view_indices_2d_inputs=view_indices_2d_inputs)

    # Randomly sample points
    preprocessor_utils.randomly_sample_points(
        mesh_inputs=mesh_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        target_num_points=num_points_to_randomly_sample)

    # Add weights if it does not exist in inputs. The weight of the points with
    # label in `ignore_labels` is set to 0. This helps the loss and metrics to
    # ignore those labels.
    use_weights = (
        standard_fields.InputDataFields.object_class_points in mesh_inputs
        or standard_fields.InputDataFields.object_flow_points in mesh_inputs)
    if use_weights:
        if num_frame_to_load > 1:
            num_valid_points_frame0 = tf.shape(
                frame0[standard_fields.InputDataFields.point_positions])[0]
            num_additional_frame_points = tf.shape(
                mesh_inputs[standard_fields.InputDataFields.
                            object_class_points])[0] - num_valid_points_frame0
            weights = tf.concat([
                tf.ones([num_valid_points_frame0, 1], tf.float32),
                tf.zeros([num_additional_frame_points, 1], tf.float32)
            ],
                                axis=0)
        else:
            weights = tf.ones_like(mesh_inputs[
                standard_fields.InputDataFields.object_class_points],
                                   dtype=tf.float32)

    if standard_fields.InputDataFields.object_class_points in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.object_class_points] = tf.cast(
                mesh_inputs[
                    standard_fields.InputDataFields.object_class_points],
                dtype=tf.int32)
        for ignore_label in ignore_labels:
            weights *= tf.cast(tf.not_equal(
                mesh_inputs[
                    standard_fields.InputDataFields.object_class_points],
                ignore_label),
                               dtype=tf.float32)
        mesh_inputs[
            standard_fields.InputDataFields.point_loss_weights] = weights
        mesh_inputs[standard_fields.InputDataFields.
                    object_class_points] += semantic_labels_offset

    # We normalize the intensities and elongations to be in a smaller range.
    if standard_fields.InputDataFields.point_intensities in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_intensities] = change_intensity_range(
                        intensities=mesh_inputs[
                            standard_fields.InputDataFields.point_intensities])
    if standard_fields.InputDataFields.point_elongations in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.point_elongations] = (tf.cast(
                mesh_inputs[standard_fields.InputDataFields.point_elongations],
                dtype=tf.float32) * 2.0 / 255.0) - 1.0

    # Random scale the points.
    if min_scale_ratio is not None and max_scale_ratio is not None:
        scale_ratio = tf.random.uniform([],
                                        minval=min_scale_ratio,
                                        maxval=max_scale_ratio,
                                        dtype=tf.float32)
        mesh_inputs[
            standard_fields.InputDataFields.point_positions] *= scale_ratio
        if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
            mesh_inputs[standard_fields.InputDataFields.
                        object_flow_points] *= scale_ratio

    # Random crop the points.
    randomly_crop_points(mesh_inputs=mesh_inputs,
                         view_indices_2d_inputs=view_indices_2d_inputs,
                         x_random_crop_size=x_random_crop_size,
                         y_random_crop_size=y_random_crop_size)

    # If training, pick the best labeled image and points that project to it.
    # In many datasets, only one image is labeled anyways.
    if remove_unlabeled_images_and_points:
        pick_labeled_image(mesh_inputs=mesh_inputs,
                           view_image_inputs=view_image_inputs,
                           view_indices_2d_inputs=view_indices_2d_inputs,
                           view_name=labeled_view_name)

    # Process images.
    preprocessor_utils.preprocess_images(
        view_image_inputs=view_image_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        image_preprocess_fn_dic=image_preprocess_fn_dic,
        is_training=is_training)

    # Record the original points.
    original_points = mesh_inputs[
        standard_fields.InputDataFields.point_positions]
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        original_colors = mesh_inputs[
            standard_fields.InputDataFields.point_colors]
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        original_normals = mesh_inputs[
            standard_fields.InputDataFields.point_normals]

    # Update feature visibility count.
    if 'feature_visibility_count' in mesh_inputs:
        mesh_inputs['feature_visibility_count'] = tf.maximum(
            mesh_inputs['feature_visibility_count'], 1)
        mesh_inputs['features'] /= tf.cast(
            mesh_inputs['feature_visibility_count'], dtype=tf.float32)

    # Subtract mean from points.
    mean_points = tf.reduce_mean(
        mesh_inputs[standard_fields.InputDataFields.point_positions], axis=0)
    mesh_inputs[
        standard_fields.InputDataFields.point_positions] -= tf.expand_dims(
            mean_points, axis=0)

    # Rotate points randomly.
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        normals = mesh_inputs[standard_fields.InputDataFields.point_normals]
    else:
        normals = None

    if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
        motions = mesh_inputs[
            standard_fields.InputDataFields.object_flow_points]
    else:
        motions = None

    (mesh_inputs[standard_fields.InputDataFields.point_positions],
     rotated_normals, rotated_motions) = rotate_randomly(
         points=mesh_inputs[standard_fields.InputDataFields.point_positions],
         normals=normals,
         motions=motions,
         x_min_degree_rotation=x_min_degree_rotation,
         x_max_degree_rotation=x_max_degree_rotation,
         y_min_degree_rotation=y_min_degree_rotation,
         y_max_degree_rotation=y_max_degree_rotation,
         z_min_degree_rotation=z_min_degree_rotation,
         z_max_degree_rotation=z_max_degree_rotation)

    # Random flipping in x and y directions.
    (mesh_inputs[standard_fields.InputDataFields.point_positions],
     flipped_normals,
     flipped_motions) = flip_randomly_points_and_normals_motions(
         points=mesh_inputs[standard_fields.InputDataFields.point_positions],
         normals=rotated_normals,
         motions=rotated_motions,
         is_training=is_training)
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.point_normals] = flipped_normals
    if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    object_flow_points] = flipped_motions
    # Normalize RGB to [-1.0, 1.0].
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.point_colors] = tf.cast(
            mesh_inputs[standard_fields.InputDataFields.point_colors],
            dtype=tf.float32)
        mesh_inputs[standard_fields.InputDataFields.point_colors] *= (2.0 /
                                                                      255.0)
        mesh_inputs[standard_fields.InputDataFields.point_colors] -= 1.0

    # Add original points to mesh inputs.
    mesh_inputs[standard_fields.InputDataFields.
                point_positions_original] = original_points
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_colors_original] = original_colors
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_normals_original] = original_normals

    # Pad or clip the point tensors.
    pad_or_clip(mesh_inputs=mesh_inputs,
                view_indices_2d_inputs=view_indices_2d_inputs,
                pad_or_clip_size=points_pad_or_clip_size)
    if num_frame_to_load > 1:
        # Note: num_valid_points is the sum of 'num_points_per_fram' for now.
        # num_points_per_frame is each frame's valid num of points.
        # TODO(huangrui): if random sampling is called earlier, the count here
        # is not guaranteed to be in order. need sorting.
        if num_points_to_randomly_sample is not None:
            raise ValueError(
                'randomly sample is not compatible with padding multi frame point clouds yet!'
            )
        _, _, mesh_inputs[standard_fields.InputDataFields.
                          num_valid_points_per_frame] = tf.unique_with_counts(
                              tf.reshape(
                                  mesh_inputs[standard_fields.InputDataFields.
                                              point_frame_index], [-1]))
        if points_pad_or_clip_size is not None:
            padded_points = tf.where_v2(
                tf.greater(
                    points_pad_or_clip_size, mesh_inputs[
                        standard_fields.InputDataFields.num_valid_points]),
                points_pad_or_clip_size -
                mesh_inputs[standard_fields.InputDataFields.num_valid_points],
                0)

            # Correct the potential unique count error from optionally padded 0s point
            # frame index.
            mesh_inputs[
                standard_fields.InputDataFields.
                num_valid_points_per_frame] -= tf.pad(
                    tf.expand_dims(padded_points, 0), [[
                        0,
                        tf.shape(mesh_inputs[standard_fields.InputDataFields.
                                             num_valid_points_per_frame])[0] -
                        1
                    ]])

    # Putting back the dictionaries together
    processed_inputs = mesh_inputs.copy()
    processed_inputs.update(non_tensor_inputs)
    for key in sorted(view_image_inputs):
        processed_inputs[('%s/features' % key)] = view_image_inputs[key]
    for key in sorted(view_indices_2d_inputs):
        processed_inputs[('%s/indices_2d' % key)] = view_indices_2d_inputs[key]

    # Create features that do not exist
    if 'point_offsets' in point_feature_keys:
        preprocessor_utils.add_point_offsets(
            inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size)
    if 'point_offset_bins' in point_feature_keys:
        preprocessor_utils.add_point_offset_bins(
            inputs=processed_inputs,
            voxel_grid_cell_size=voxel_grid_cell_size,
            num_bins_x=num_offset_bins_x,
            num_bins_y=num_offset_bins_y,
            num_bins_z=num_offset_bins_z)

    # Voxelize point features
    preprocessor_utils.voxelize_point_features(
        inputs=processed_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size,
        point_feature_keys=point_feature_keys,
        point_to_voxel_segment_func=point_to_voxel_segment_func,
        num_frame_to_load=num_frame_to_load)

    # Voxelize point / image correspondence indices
    preprocessor_utils.voxelize_point_to_view_correspondences(
        inputs=processed_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size)

    # Voxelizing the semantic labels
    preprocessor_utils.voxelize_semantic_labels(
        inputs=processed_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size)

    # Voxelizing the loss weights
    preprocessor_utils.voxelize_property_tensor(
        inputs=processed_inputs,
        point_tensor_key=standard_fields.InputDataFields.point_loss_weights,
        corresponding_voxel_tensor_key=standard_fields.InputDataFields.
        voxel_loss_weights,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size,
        segment_func=tf.math.unsorted_segment_max)

    # Voxelizing the object flow
    if standard_fields.InputDataFields.object_flow_points in processed_inputs:
        preprocessor_utils.voxelize_property_tensor(
            inputs=processed_inputs,
            point_tensor_key=standard_fields.InputDataFields.
            object_flow_points,
            corresponding_voxel_tensor_key='object_flow_voxels_max',
            voxels_pad_or_clip_size=voxels_pad_or_clip_size,
            voxel_grid_cell_size=voxel_grid_cell_size,
            segment_func=tf.math.unsorted_segment_max)
        preprocessor_utils.voxelize_property_tensor(
            inputs=processed_inputs,
            point_tensor_key=standard_fields.InputDataFields.
            object_flow_points,
            corresponding_voxel_tensor_key='object_flow_voxels_min',
            voxels_pad_or_clip_size=voxels_pad_or_clip_size,
            voxel_grid_cell_size=voxel_grid_cell_size,
            segment_func=tf.math.unsorted_segment_min)
        processed_inputs[standard_fields.InputDataFields.
                         object_flow_voxels] = processed_inputs[
                             'object_flow_voxels_max'] + processed_inputs[
                                 'object_flow_voxels_min']

    if num_frame_to_load > 1:
        mesh_inputs[
            standard_fields.InputDataFields.num_valid_points] = mesh_inputs[
                standard_fields.InputDataFields.num_valid_points_per_frame][0]

    # Filter preprocessed_inputs by output_keys if it is not None.
    if output_keys is not None:
        processed_inputs = {
            k: v
            for k, v in six.iteritems(processed_inputs) if k in output_keys
        }
    return processed_inputs