Ejemplo n.º 1
0
        def fwd_fn(query_queries_fwd, query_values_fwd, support_keys_fwd,
                   support_values_fwd, labels_fwd):
            """CrossTransformer forward, using a while loop to save memory."""
            initial = (0,
                       tf.zeros([tf.reduce_max(labels) + 1, zero_dim],
                                dtype=query_queries_fwd.dtype))

            def loop_body(idx, dist):
                dist_new = self._get_dist(query_queries_fwd[idx:idx + 1],
                                          query_values_fwd[idx:idx + 1],
                                          support_keys_fwd, support_values_fwd,
                                          labels_fwd)
                dist = tf.concat([dist, dist_new], axis=1)
                return (idx + 1, dist)

            _, res = tf.while_loop(
                lambda x, _: x < tf.shape(query_queries_fwd)[0],
                loop_body,
                initial,
                parallel_iterations=1)
            return res
def _voxel_hard_negative_classification_loss_unbatched(inputs_1, outputs_1,
                                                       is_intermediate, gamma):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    background_mask = tf.equal(labels, 0)
    num_background_points = tf.reduce_sum(
        tf.cast(background_mask, dtype=tf.int32))

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        masked_logits = tf.boolean_mask(logits, background_mask)
        masked_weights = tf.pow(
            1.0 - tf.reshape(tf.nn.softmax(masked_logits)[:, 0], [-1, 1]),
            gamma)
        num_points = tf.shape(masked_logits)[0]
        masked_weights = masked_weights * tf.cast(
            num_points, dtype=tf.float32) / tf.reduce_sum(masked_weights)
        masked_labels_one_hot = tf.one_hot(indices=tf.boolean_mask(
            labels, background_mask),
                                           depth=num_classes)
        loss = classification_loss_fn(logits=masked_logits,
                                      labels=masked_labels_one_hot,
                                      weights=masked_weights)
        return loss

    cond = tf.logical_and(tf.greater(num_background_points, 0),
                          tf.greater(tf.shape(labels)[0], 0))
    return tf.cond(cond, loss_fn, lambda: tf.constant(0.0, dtype=tf.float32))
Ejemplo n.º 3
0
def identity_knn_graph(points, num_valid_points, k):  # pylint: disable=unused-argument
    """Returns each points as its own neighbor k times.

  Args:
    points: A tf.float32 tensor of size [num_batches, N, D] where D is the point
      dimensions.
    num_valid_points: A tf.int32 tensor of size [num_batches] containing the
      number of valid points in each batch example.
    k: Number of neighbors for each point.

  Returns:
    distances: A tf.float32 tensor of [num_batches, N, k]. Distances is all
      zeros since each point is returned as its own neighbor.
    indices: A tf.int32 tensor of [num_batches, N, k]. Each row will contain
      values that are identical to the index of that row.
  """
    num_batches = points.get_shape()[0]
    num_points = tf.shape(points)[1]
    indices = tf.expand_dims(tf.range(num_points), axis=1)
    indices = tf.tile(tf.expand_dims(indices, axis=0), [num_batches, 1, k])
    distances = tf.zeros([num_batches, num_points, k], dtype=tf.float32)
    return distances, indices
Ejemplo n.º 4
0
def sparse_voxel_grid_to_pointcloud(voxel_features, segment_ids,
                                    num_valid_voxels, num_valid_points):
    """Convert voxel features back to points given their segment ids.

  Args:
    voxel_features: A tf.float32 tensor of size [batch_size, N', F].
    segment_ids: A size [batch_size, N] tf.int32 tensor of IDs for each point
      indicating which (flattened) voxel cell its data was mapped to.
    num_valid_voxels: A tf.int32 tensor of size [batch_size] containing the
      number of valid voxels in each batch example.
    num_valid_points: A tf.int32 tensor of size [batch_size] containing the
      number of valid points in each batch example.

  Returns:
    point_features: A tf.float32 tensor of size [batch_size, N, F].

  Raises:
    ValueError: If batch_size is unknown at graph construction time.
  """
    batch_size = voxel_features.shape[0]
    if batch_size is None:
        raise ValueError('batch_size is unknown at graph construction time.')
    num_points = tf.shape(segment_ids)[1]

    def fn(i):
        num_valid_voxels_i = num_valid_voxels[i]
        num_valid_points_i = num_valid_points[i]
        voxel_features_i = voxel_features[i, :num_valid_voxels_i, :]
        segment_ids_i = segment_ids[i, :num_valid_points_i]
        point_features = tf.gather(voxel_features_i, segment_ids_i)
        point_features_rank = len(point_features.get_shape().as_list())
        point_features_paddings = [[0, num_points - num_valid_points_i]]
        for _ in range(point_features_rank - 1):
            point_features_paddings.append([0, 0])
        point_features = tf.pad(point_features,
                                paddings=point_features_paddings)
        return point_features

    return tf.map_fn(fn=fn, elems=tf.range(batch_size), dtype=tf.float32)
 def fn():
   """Loss function for when number of input and output boxes is positive."""
   if is_balanced:
     weights = loss_utils.get_balanced_loss_weights_multiclass(
         labels=input_boxes_instance_id)
   else:
     weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                       dtype=tf.float32)
   gt_rotation_matrix = tf.reshape(input_boxes_rotation_matrix, [-1, 9])
   predicted_rotation_matrix = tf.reshape(output_boxes_rotation_matrix,
                                          [-1, 9])
   if loss_type == 'huber':
     loss_fn = tf.keras.losses.Huber(
         delta=delta, reduction=tf.keras.losses.Reduction.NONE)
   elif loss_type == 'absolute_difference':
     loss_fn = tf.keras.losses.MeanAbsoluteError(
         reduction=tf.keras.losses.Reduction.NONE)
   else:
     raise ValueError(('Unknown loss type %s.' % loss_type))
   rotation_losses = loss_fn(
       y_true=gt_rotation_matrix, y_pred=predicted_rotation_matrix)
   return tf.reduce_mean(rotation_losses * tf.reshape(weights, [-1]))
Ejemplo n.º 6
0
def knn_graph_from_points_unbatched(points, k, distance_upper_bound, mask=None):
  """Returns the distances and indices of the neighbors of each point.

  Note that each point will have at least k neighbors unless the number of
  points is less than k. In that case, the python function that is wrapped in
  py_function will raise a value error.

  Args:
    points: A tf.float32 tensor of size [N, D] where D is the point dimensions.
    k: Number of neighbors for each point.
    distance_upper_bound: Only build the graph using points that are closer than
      this distance.
    mask: If not None, A tf.bool tensor of size [N]. If None, it is ignored.
      If not None, knn will be applied to only points where the mask is True.
      The points where the mask is False will have themselves as their
      neighbors.

  Returns:
    distances: A tf.float32 tensor of size [N, k].
    indices: A tf.int32 tensor of [N, k].
  """

  def fn(np_points, np_mask):
    return np_knn_graph_from_points_unbatched(
        points=np_points,
        k=k,
        distance_upper_bound=distance_upper_bound,
        mask=np_mask)

  num_points = tf.shape(points)[0]
  if mask is None:
    mask = tf.cast(tf.ones([num_points], dtype=tf.int32), dtype=tf.bool)
  else:
    mask = tf.reshape(mask, [num_points])
  distances, indices = tf.compat.v1.py_func(fn, [points, mask],
                                            [tf.float32, tf.int32])
  distances = tf.reshape(distances, [num_points, k])
  indices = tf.reshape(indices, [num_points, k])
  return distances, indices
Ejemplo n.º 7
0
 def fn(i):
   """Map function."""
   num_valid_points_i = num_valid_points[i]
   points_i = points[i, :num_valid_points_i, :]
   features_i = features[i, :num_valid_points_i, :]
   voxel_features_i, voxel_indices_i, segment_ids_i, voxel_start_location_i = (
       pointcloud_to_sparse_voxel_grid_unbatched(
           points=points_i,
           features=features_i,
           grid_cell_size=grid_cell_size,
           segment_func=segment_func))
   num_valid_voxels_i = tf.shape(voxel_features_i)[0]
   (voxel_features_i, voxel_indices_i, num_valid_voxels_i,
    segment_ids_i) = _pad_or_clip_voxels(
        voxel_features=voxel_features_i,
        voxel_indices=voxel_indices_i,
        num_valid_voxels=num_valid_voxels_i,
        segment_ids=segment_ids_i,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size)
   segment_ids_i = tf.pad(
       segment_ids_i, paddings=[[0, num_points - num_valid_points_i]])
   return (voxel_features_i, voxel_indices_i, num_valid_voxels_i,
           segment_ids_i, voxel_start_location_i)
Ejemplo n.º 8
0
def select_slate_greedy(slate_size, s_no_click, s, q):
  """Selects the slate using the adaptive greedy algorithm.

  This algorithm corresponds to the method "GS" in
  Ie et al. https://arxiv.org/abs/1905.12767.

  Args:
    slate_size: int, the size of the recommendation slate.
    s_no_click: float tensor, the score for not clicking any document.
    s: [num_of_documents] tensor, the scores for clicking documents.
    q: [num_of_documents] tensor, the predicted q values for documents.

  Returns:
    [slate_size] tensor, the selected slate.
  """

  def argmax(v, mask):
    return tf.argmax(
        input=(v - tf.reduce_min(input_tensor=v) + 1) * mask, axis=0)

  numerator = tf.constant(0.)
  denominator = tf.constant(0.) + s_no_click
  mask = tf.ones(tf.shape(input=q)[0])

  def set_element(v, i, x):
    mask = tf.one_hot(i, tf.shape(input=v)[0])
    v_new = tf.ones_like(v) * x
    return tf.where(tf.equal(mask, 1), v_new, v)

  for _ in range(slate_size):
    k = argmax((numerator + s * q) / (denominator + s), mask)
    mask = set_element(mask, k, 0)
    numerator = numerator + tf.gather(s * q, k)
    denominator = denominator + tf.gather(s, k)

  output_slate = tf.where(tf.equal(mask, 0))
  return output_slate
Ejemplo n.º 9
0
    def _build_train_op(self):
        """Builds the training op for Rainbow.

    Returns:
      train_op: An op performing one step of training.
    """
        target_distribution = tf.stop_gradient(
            self._build_target_distribution())

        # size of indices: batch_size x 1.
        indices = tf.range(tf.shape(self._replay_logits)[0])[:, None]
        # size of reshaped_actions: batch_size x 2.
        reshaped_actions = tf.concat([indices, self._replay.actions[:, None]],
                                     1)
        # For each element of the batch, fetch the logits for its selected action.
        chosen_action_logits = tf.gather_nd(self._replay_logits,
                                            reshaped_actions)

        loss = tf.nn.softmax_cross_entropy_with_logits(
            labels=target_distribution, logits=chosen_action_logits)

        optimizer = tf.train.AdamOptimizer(learning_rate=self.learning_rate,
                                           epsilon=self.optimizer_epsilon)

        update_priorities_op = self._replay.tf_set_priority(
            self._replay.indices, tf.sqrt(loss + 1e-10))

        target_priorities = self._replay.tf_get_priority(self._replay.indices)
        target_priorities = tf.math.add(target_priorities, 1e-10)
        target_priorities = 1.0 / tf.sqrt(target_priorities)
        target_priorities /= tf.reduce_max(target_priorities)

        weighted_loss = target_priorities * loss

        with tf.control_dependencies([update_priorities_op]):
            return optimizer.minimize(
                tf.reduce_mean(weighted_loss)), weighted_loss
Ejemplo n.º 10
0
    def decode_with_label(self, example_string):
        """Processes a single example string.

    Extracts and processes the image, and ignores the label. We assume that the
    image has three channels.

    Args:
      example_string: str, an Example protocol buffer.

    Returns:
      image_rescaled: the image, resized to `image_size x image_size` and
        rescaled to [-1, 1]. Note that Gaussian data augmentation may cause
        values to go beyond this range.
      label: tf.int
    """
        ex_decoded = read_example_and_parse_image(example_string)
        image_decoded = ex_decoded['image']
        image_resized = tf.image.resize_images(
            image_decoded, [self.image_size, self.image_size],
            method=tf.image.ResizeMethod.BILINEAR,
            align_corners=True)
        image_resized = tf.cast(image_resized, tf.float32)
        image = 2 * (image_resized / 255.0 - 0.5)  # Rescale to [-1, 1].

        if self.data_augmentation is not None:
            if self.data_augmentation.enable_gaussian_noise:
                image = image + tf.random_normal(tf.shape(
                    image)) * self.data_augmentation.gaussian_noise_std

            if self.data_augmentation.enable_jitter:
                j = self.data_augmentation.jitter_amount
                paddings = tf.constant([[j, j], [j, j], [0, 0]])
                image = tf.pad(image, paddings, 'REFLECT')
                image = tf.image.random_crop(
                    image, [self.image_size, self.image_size, 3])
        return image, tf.cast(ex_decoded['label'], dtype=tf.int32)
Ejemplo n.º 11
0
 def set_element(v, i, x):
     mask = tf.one_hot(i, tf.shape(input=v)[0])
     v_new = tf.ones_like(v) * x
     return tf.where(tf.equal(mask, 1), v_new, v)
def classification_loss_using_mask_iou(inputs,
                                       outputs,
                                       num_samples,
                                       max_instance_id=None,
                                       similarity_strategy='distance',
                                       is_balanced=True,
                                       is_intermediate=False):
    """Classification loss with an iou threshold.

  Args:
    inputs: A dictionary that contains
      num_valid_voxels - A tf.int32 tensor of size [batch_size].
      instance_ids - A tf.int32 tensor of size [batch_size, n].
      class_labels - A tf.int32 tensor of size [batch_size, n]. It is assumed
        that the background voxels are assigned to class 0.
    outputs: A dictionart that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
      logits - A tf.float32 tensor of size [batch_size, n, num_classes]. It is
        assumed that background is class 0.
    num_samples: An int determining the number of samples.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
    instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
    class_labels_key = standard_fields.InputDataFields.object_class_voxels
    num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
    if is_intermediate:
        embedding_key = (standard_fields.DetectionResultFields.
                         intermediate_instance_embedding_voxels)
        logits_key = (standard_fields.DetectionResultFields.
                      intermediate_object_semantic_voxels)
    else:
        embedding_key = (
            standard_fields.DetectionResultFields.instance_embedding_voxels)
        logits_key = standard_fields.DetectionResultFields.object_semantic_voxels
    if instance_ids_key not in inputs:
        raise ValueError('instance_ids is missing in inputs.')
    if class_labels_key not in inputs:
        raise ValueError('class_labels is missing in inputs.')
    if num_voxels_key not in inputs:
        raise ValueError('num_voxels is missing in inputs.')
    if embedding_key not in outputs:
        raise ValueError('embedding is missing in outputs.')
    if logits_key not in outputs:
        raise ValueError('logits is missing in outputs.')
    batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
    if batch_size is None:
        raise ValueError(
            'batch_size is not defined at graph construction time.')
    num_valid_voxels = inputs[num_voxels_key]
    num_voxels = tf.shape(inputs[instance_ids_key])[1]
    valid_mask = tf.less(
        tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
        tf.expand_dims(num_valid_voxels, axis=1))
    return classification_loss_using_mask_iou_func(
        embeddings=outputs[embedding_key],
        logits=outputs[logits_key],
        instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]),
        class_labels=inputs[class_labels_key],
        num_samples=num_samples,
        valid_mask=valid_mask,
        max_instance_id=max_instance_id,
        similarity_strategy=similarity_strategy,
        is_balanced=is_balanced)
def _box_corner_distance_loss(
    loss_type, is_balanced, input_boxes_length, input_boxes_height,
    input_boxes_width, input_boxes_center, input_boxes_rotation_matrix,
    input_boxes_instance_id, output_boxes_length, output_boxes_height,
    output_boxes_width, output_boxes_center, output_boxes_rotation_matrix,
    delta):
  """Computes regression loss on object corner locations."""

  def fn():
    """Loss function for when number of input and output boxes is positive."""
    if is_balanced:
      weights = loss_utils.get_balanced_loss_weights_multiclass(
          labels=input_boxes_instance_id)
    else:
      weights = tf.ones([tf.shape(input_boxes_instance_id)[0], 1],
                        dtype=tf.float32)
    normalized_box_size = 5.0
    predicted_boxes_length = output_boxes_length
    predicted_boxes_height = output_boxes_height
    predicted_boxes_width = output_boxes_width
    predicted_boxes_center = output_boxes_center
    predicted_boxes_rotation_matrix = output_boxes_rotation_matrix
    gt_boxes_length = input_boxes_length
    gt_boxes_height = input_boxes_height
    gt_boxes_width = input_boxes_width
    gt_boxes_center = input_boxes_center
    gt_boxes_rotation_matrix = input_boxes_rotation_matrix
    if loss_type in ['normalized_huber', 'normalized_euclidean']:
      predicted_boxes_length /= (gt_boxes_length / normalized_box_size)
      predicted_boxes_height /= (gt_boxes_height / normalized_box_size)
      predicted_boxes_width /= (gt_boxes_width / normalized_box_size)
      gt_boxes_length = tf.ones_like(
          gt_boxes_length, dtype=tf.float32) * normalized_box_size
      gt_boxes_height = tf.ones_like(
          gt_boxes_height, dtype=tf.float32) * normalized_box_size
      gt_boxes_width = tf.ones_like(
          gt_boxes_width, dtype=tf.float32) * normalized_box_size
    gt_box_corners = box_utils.get_box_corners_3d(
        boxes_length=gt_boxes_length,
        boxes_height=gt_boxes_height,
        boxes_width=gt_boxes_width,
        boxes_rotation_matrix=gt_boxes_rotation_matrix,
        boxes_center=gt_boxes_center)
    predicted_box_corners = box_utils.get_box_corners_3d(
        boxes_length=predicted_boxes_length,
        boxes_height=predicted_boxes_height,
        boxes_width=predicted_boxes_width,
        boxes_rotation_matrix=predicted_boxes_rotation_matrix,
        boxes_center=predicted_boxes_center)
    corner_weights = tf.tile(weights, [1, 8])
    if loss_type in ['huber', 'normalized_huber']:
      loss_fn = tf.keras.losses.Huber(
          delta=delta, reduction=tf.keras.losses.Reduction.NONE)
    elif loss_type in ['normalized_absolute_difference', 'absolute_difference']:
      loss_fn = tf.keras.losses.MeanAbsoluteError(
          reduction=tf.keras.losses.Reduction.NONE)
    else:
      raise ValueError(('Unknown loss type %s.' % loss_type))
    box_corner_losses = loss_fn(
        y_true=tf.reshape(gt_box_corners, [-1, 3]),
        y_pred=tf.reshape(predicted_box_corners, [-1, 3]))
    return tf.reduce_mean(box_corner_losses * tf.reshape(corner_weights, [-1]))

  cond_input = tf.greater(tf.shape(input_boxes_length)[0], 0)
  cond_output = tf.greater(tf.shape(output_boxes_length)[0], 0)
  cond = tf.logical_and(cond_input, cond_output)
  return tf.cond(cond, fn, lambda: tf.constant(0.0, dtype=tf.float32))
def compute_motion_labels(scene,
                          frame0,
                          frame1,
                          frame_start_index,
                          points_key,
                          box_margin=0.1):
  """Compute motion label for each point.

  Args:
    scene: dict of tensor containing scene.
    frame0: dict of tensor containing points and objects.
    frame1: dict of tensor containing points and objects.
    frame_start_index: starting frame index.
    points_key:  A string corresponding to the tensor of point positions in
      inputs.
    box_margin: A margin value to enlarge box, so that surrounding points are
      included.

  Returns:
    A motion tensor of [N, 3] shape.

  """
  point_positions = frame0[points_key]
  frame0_object_names = frame0['objects/name']
  frame1_object_names = frame1['objects/name']
  bool_matrix = tf.math.equal(
      tf.expand_dims(frame0_object_names, axis=1),
      tf.expand_dims(frame1_object_names, axis=0))
  match_indices = tf.where(bool_matrix)

  # object box level
  box_dimension = tf.gather(
      frame0['objects/shape/dimension'], match_indices[:, 0], axis=0)
  boxes_length = box_dimension[:, 0:1]
  boxes_width = box_dimension[:, 1:2]
  boxes_height = box_dimension[:, 2:3]
  boxes_rotation_matrix = tf.gather(
      frame0['objects/pose/R'], match_indices[:, 0], axis=0)
  boxes_center = tf.gather(
      frame0['objects/pose/t'], match_indices[:, 0], axis=0)
  frame1_box_rotation_matrix = tf.gather(
      frame1['objects/pose/R'], match_indices[:, 1], axis=0)
  frame1_box_center = tf.gather(
      frame1['objects/pose/t'], match_indices[:, 1], axis=0)

  # frame level
  frame0_rotation = scene['frames/pose/R'][frame_start_index]
  frame1_rotation = scene['frames/pose/R'][frame_start_index + 1]
  frame0_translation = scene['frames/pose/t'][frame_start_index]
  frame1_translation = scene['frames/pose/t'][frame_start_index + 1]

  frame1_box_center_global = tf.tensordot(
      frame1_box_center, frame1_rotation, axes=(1, 1)) + frame1_translation
  frame1_box_center_in_frame0 = tf.tensordot(
      frame1_box_center_global - frame0_translation,
      frame0_rotation,
      axes=(1, 0))

  # only find index on boxes that are matched between two frames
  points_box_index = box_utils.map_points_to_boxes(
      points=point_positions,
      boxes_length=boxes_length,
      boxes_height=boxes_height,
      boxes_width=boxes_width,
      boxes_rotation_matrix=boxes_rotation_matrix,
      boxes_center=boxes_center,
      box_margin=box_margin)

  # TODO(huangrui): disappered object box have 0 motion.
  # Probably consider set to nan or ignore_label.

  # 1. gather points in surviving matched box only,
  #    and replicate rotation/t to same length;
  # 2. get points in box frame, apply new rotation/t per point;
  # 3. new location minus old location -> motion vector;
  # 4. scatter it to a larger motion_vector with 0 for
  #    points ouside of matched boxes.

  # Need to limit boxes to those matched boxes.
  # otherwise the points_box_index will contain useless box.

  # index in all point array, of points that are inside the box.
  points_inside_box_index = tf.where(points_box_index + 1)[:, 0]
  box_index = tf.gather(points_box_index, points_inside_box_index)
  points_inside_box = tf.gather(point_positions, points_inside_box_index)
  box_rotation_per_point = tf.gather(boxes_rotation_matrix, box_index)
  box_center_per_point = tf.gather(boxes_center, box_index)
  # Tensor [N, 3, 3] and [N, 3]. note we are transform points reversely.
  points_in_box_frame = tf.einsum('ikj,ik->ij', box_rotation_per_point,
                                  points_inside_box - box_center_per_point)

  # Transform rotation of box from frame1 coordinate to frame0 coordinate
  # note, transpose is implemented via changing summation axis
  frame1_box_rotation_matrix_global = tf.transpose(
      tf.tensordot(frame1_rotation, frame1_box_rotation_matrix, axes=(1, 1)),
      perm=(1, 0, 2))
  frame1_box_rotation_matrix_in_frame0 = tf.transpose(
      tf.tensordot(
          frame0_rotation, frame1_box_rotation_matrix_global, axes=(0, 1)),
      perm=(1, 0, 2))

  # this is the points_position_after_following_frame1_box's motion.
  frame1_box_rotation_in_frame0_per_point = tf.gather(
      frame1_box_rotation_matrix_in_frame0, box_index)
  frame1_box_center_in_frame0_per_point = tf.gather(frame1_box_center_in_frame0,
                                                    box_index)

  points_in_box_frame1 = tf.einsum(
      'ijk,ik->ij', frame1_box_rotation_in_frame0_per_point,
      points_in_box_frame) + frame1_box_center_in_frame0_per_point
  motion_vector = points_in_box_frame1 - points_inside_box

  scattered_vector = tf.scatter_nd(
      indices=tf.expand_dims(points_inside_box_index, axis=1),
      updates=motion_vector,
      shape=tf.shape(point_positions, out_type=tf.dtypes.int64))

  return scattered_vector
def _box_classification_using_center_distance_loss_unbatched(
        inputs_1, outputs_1, is_intermediate, is_balanced,
        max_positive_normalized_distance):
    """Loss function for input and outputs of batch size 1."""
    inputs_1, outputs_1 = _get_voxels_valid_inputs_outputs(inputs_1=inputs_1,
                                                           outputs_1=outputs_1)
    if is_intermediate:
        output_object_centers = outputs_1[standard_fields.DetectionResultFields
                                          .intermediate_object_center_voxels]
        output_object_length = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_length_voxels]
        output_object_height = outputs_1[standard_fields.DetectionResultFields.
                                         intermediate_object_height_voxels]
        output_object_width = outputs_1[standard_fields.DetectionResultFields.
                                        intermediate_object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            intermediate_object_rotation_matrix_voxels]
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        output_object_centers = outputs_1[
            standard_fields.DetectionResultFields.object_center_voxels]
        output_object_length = outputs_1[
            standard_fields.DetectionResultFields.object_length_voxels]
        output_object_height = outputs_1[
            standard_fields.DetectionResultFields.object_height_voxels]
        output_object_width = outputs_1[
            standard_fields.DetectionResultFields.object_width_voxels]
        output_object_rotation_matrix = outputs_1[
            standard_fields.DetectionResultFields.
            object_rotation_matrix_voxels]
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    normalized_center_distance = loss_utils.get_normalized_corner_distances(
        predicted_boxes_center=output_object_centers,
        predicted_boxes_length=output_object_length,
        predicted_boxes_height=output_object_height,
        predicted_boxes_width=output_object_width,
        predicted_boxes_rotation_matrix=output_object_rotation_matrix,
        gt_boxes_center=inputs_1[
            standard_fields.InputDataFields.object_center_voxels],
        gt_boxes_length=inputs_1[
            standard_fields.InputDataFields.object_length_voxels],
        gt_boxes_height=inputs_1[
            standard_fields.InputDataFields.object_height_voxels],
        gt_boxes_width=inputs_1[
            standard_fields.InputDataFields.object_width_voxels],
        gt_boxes_rotation_matrix=inputs_1[
            standard_fields.InputDataFields.object_rotation_matrix_voxels])
    labels = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_class_voxels], [-1])
    instances = tf.reshape(
        inputs_1[standard_fields.InputDataFields.object_instance_id_voxels],
        [-1])
    params = {}
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(instances, axis=1))
        params['weights'] = weights

    def loss_fn():
        """Loss function."""
        num_classes = logits.get_shape().as_list()[-1]
        if num_classes is None:
            raise ValueError('Number of classes is unknown.')
        labels_one_hot = tf.one_hot(indices=(labels - 1),
                                    depth=(num_classes - 1))
        inverse_distance_coef = tf.maximum(
            tf.minimum(
                1.0 -
                normalized_center_distance / max_positive_normalized_distance,
                1.0), 0.0)
        labels_one_hot = tf.reshape(inverse_distance_coef,
                                    [-1, 1]) * labels_one_hot
        background_label = 1.0 - tf.math.reduce_sum(
            labels_one_hot, axis=1, keepdims=True)
        labels_one_hot = tf.concat([background_label, labels_one_hot], axis=1)
        loss = classification_loss_fn(logits=logits,
                                      labels=labels_one_hot,
                                      **params)
        return loss

    return tf.cond(tf.greater(tf.shape(labels)[0], 0), loss_fn,
                   lambda: tf.constant(0.0, dtype=tf.float32))
Ejemplo n.º 16
0
def pointcloud_to_sparse_voxel_grid(points, features, num_valid_points,
                                    grid_cell_size, voxels_pad_or_clip_size,
                                    segment_func):
  """Converts a pointcloud into a voxel grid.

  This function calls the `pointcloud_to_sparse_voxel_grid_unbatched`
  function avove in a while loop to map a batch of points to a batch of voxels.

  Args:
    points: A tf.float32 tensor of size [batch_size, N, 3].
    features: A tf.float32 tensor of size [batch_size, N, F].
    num_valid_points: A tf.int32 tensor of size [num_batches] containing the
      number of valid points in each batch example.
    grid_cell_size: A tf.float32 tensor of size [3].
    voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None,
      it will not perform the padding.
    segment_func: A tensorflow function that operates on segments. Examples are
      one of tf.math.unsorted_segment_{min/max/mean/prod/sum}.

  Returns:
    voxel_features: A tf.float32 tensor of size [batch_size, N', F]
      or [batch_size, N', G, F] where G is the number of points sampled per
      voxel.
    voxel_indices: A tf.int32 tensor of size [batch_size, N', 3].
    num_valid_voxels: A tf.int32 tensor of size [batch_size].
    segment_ids: A size [batch_size, N] tf.int32 tensor of IDs for each point
      indicating which (flattened) voxel cell its data was mapped to.
    voxel_start_location: A size [batch_size, 3] tf.float32 tensor of voxel
      start locations.

  Raises:
    ValueError: If pooling method is unknown.
  """
  batch_size = points.get_shape().as_list()[0]
  if batch_size is None:
    batch_size = tf.shape(points)[0]
  num_points = tf.shape(points)[1]

  def fn(i):
    """Map function."""
    num_valid_points_i = num_valid_points[i]
    points_i = points[i, :num_valid_points_i, :]
    features_i = features[i, :num_valid_points_i, :]
    voxel_features_i, voxel_indices_i, segment_ids_i, voxel_start_location_i = (
        pointcloud_to_sparse_voxel_grid_unbatched(
            points=points_i,
            features=features_i,
            grid_cell_size=grid_cell_size,
            segment_func=segment_func))
    num_valid_voxels_i = tf.shape(voxel_features_i)[0]
    (voxel_features_i, voxel_indices_i, num_valid_voxels_i,
     segment_ids_i) = _pad_or_clip_voxels(
         voxel_features=voxel_features_i,
         voxel_indices=voxel_indices_i,
         num_valid_voxels=num_valid_voxels_i,
         segment_ids=segment_ids_i,
         voxels_pad_or_clip_size=voxels_pad_or_clip_size)
    segment_ids_i = tf.pad(
        segment_ids_i, paddings=[[0, num_points - num_valid_points_i]])
    return (voxel_features_i, voxel_indices_i, num_valid_voxels_i,
            segment_ids_i, voxel_start_location_i)

  return tf.map_fn(
      fn=fn,
      elems=tf.range(batch_size),
      dtype=(tf.float32, tf.int32, tf.int32, tf.int32, tf.float32))
Ejemplo n.º 17
0
  def _build_train_op(self, optimizer):
    """Build the TensorFlow graph used to learn the bisimulation metric.

    Args:
      optimizer: a tf.train optimizer.
    Returns:
      A TensorFlow op to minimize the bisimulation loss.
    """
    self.online_network = tf.make_template('Online',
                                           self._network_template)
    self.target_network = tf.make_template('Target',
                                           self._network_template)
    self.s1_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s1_ph')
    self.s2_ph = tf.placeholder(tf.float64, (self.batch_size, 2),
                                name='s2_ph')
    self.s1_online_distances = self.online_network(
        self._concat_states(self.s1_ph))
    self.s1_target_distances = self.target_network(
        self._concat_states(self.s1_ph))
    self.s2_target_distances = self.target_network(
        self._concat_states(self.s2_ph))
    self.action_ph = tf.placeholder(tf.int32, (self.batch_size,))
    self.rewards_ph = tf.placeholder(tf.float64, (self.batch_size,))
    # We use an expanding horizon for computing the distances.
    self.bisim_horizon_ph = tf.placeholder(tf.float64, ())
    # bisimulation_target_1 = rew_diff + gamma * next_distance.
    bisimulation_target_1 = tf.stop_gradient(self._build_bisimulation_target())
    # bisimulation_target_2 = curr_distance.
    bisimulation_target_2 = tf.stop_gradient(self.s1_target_distances)
    # We slowly taper in the maximum according to the bisim horizon.
    bisimulation_target = tf.maximum(
        bisimulation_target_1, bisimulation_target_2 * self.bisim_horizon_ph)
    # We zero-out diagonal entries, since those are estimating the distance
    # between a state and itself, which we know to be 0.
    diagonal_mask = 1.0 - tf.diag(tf.ones(self.batch_size, dtype=tf.float64))
    diagonal_mask = tf.reshape(diagonal_mask, (self.batch_size**2, 1))
    bisimulation_target *= diagonal_mask
    bisimulation_estimate = self.s1_online_distances
    # We start with a mask that includes everything.
    loss_mask = tf.ones(tf.shape(bisimulation_estimate))
    # We have to enforce that states being compared are done only using the same
    # action.
    indicators = self.action_ph
    indicators = tf.cast(indicators, tf.float64)
    # indicators will initially have shape [batch_size], we first tile it:
    square_ids = tf.tile([indicators], [self.batch_size, 1])
    # We subtract square_ids from its transpose:
    square_ids = square_ids - tf.transpose(square_ids)
    # At this point all zero-entries are the ones with equal IDs.
    # Now we would like to convert the zeros in this matrix to 1s, and make
    # everything else a 0. We can do this with the following operation:
    loss_mask = 1 - tf.abs(tf.sign(square_ids))
    # Now reshape to match the shapes of the estimate and target.
    loss_mask = tf.reshape(loss_mask, (self.batch_size**2, 1))
    larger_targets = bisimulation_target - bisimulation_estimate
    larger_targets_count = tf.reduce_sum(
        tf.cast(larger_targets > 0., tf.float64))
    tf.summary.scalar('Learning/LargerTargets', larger_targets_count)
    tf.summary.scalar('Learning/NumUpdates', tf.count_nonzero(loss_mask))
    tf.summary.scalar('Learning/BisimHorizon', self.bisim_horizon_ph)
    bisimulation_loss = tf.losses.mean_squared_error(
        bisimulation_target,
        bisimulation_estimate,
        weights=loss_mask)
    tf.summary.scalar('Learning/loss', bisimulation_loss)
    # Plot average distance between sampled representations.
    average_distance = tf.reduce_mean(bisimulation_estimate)
    tf.summary.scalar('Approx/AverageDistance', average_distance)
    return optimizer.minimize(bisimulation_loss)
Ejemplo n.º 18
0
def random_eraser(images,
                  min_size,
                  max_size,
                  probability,
                  max_operations,
                  probability_additional_operations,
                  augment_entire_batch = False):
  """Earses a random rectangle shaped areas in the second image or image batch.

  Args:
    images: Stacked image pair that should be augmented with shape
      [2, height, width, 3] or a batch of images that should be augmented with
      shape [batch, height, width, 3].
    min_size: Minimum size of erased rectangle.
    max_size: Maximum size of erased rectangle.
    probability: Probability of applying this augementation function.
    max_operations: Maximum number total areas that should be erased.
    probability_additional_operations: Probability for each additional area to
      be erased if augementation is applied.
    augment_entire_batch: If true the input is treated as batch of images to
      which the augmentation should be applid.

  Returns:
    Possibly augemented images.
  """
  perform_erase = tf.less(tf.random.uniform([]), probability)
  height = tf.shape(images)[-3]
  width = tf.shape(images)[-2]

  # Returns augemented images.
  def true_fn(images):
    if augment_entire_batch:
      image_2 = images
      mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True)
      print(mean_color.shape)
    else:
      image_1, image_2 = tf.unstack(images)
      mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True)
    def body(var_img, mean_color):
      x0 = tf.random.uniform([], 0, width, dtype=tf.int32)
      y0 = tf.random.uniform([], 0, height, dtype=tf.int32)
      dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
      dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
      x = tf.range(width)
      x_mask = (x0 <= x) & (x < x0+dx)
      y = tf.range(height)
      y_mask = (y0 <= y) & (y < y0+dy)
      mask = x_mask & y_mask[:, tf.newaxis]
      mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype)
      result = var_img * (1 - mask) + mean_color * mask
      return result
    # Perform at least one erase operation.
    image_2 = body(image_2, mean_color)
    # Perform additional erase operations.
    for _ in range(max_operations - 1):
      perform_erase = tf.less(
          tf.random.uniform([]), probability_additional_operations)
      image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color),
                        lambda: image_2)
    if augment_entire_batch:
      images = image_2
    else:
      images = tf.stack([image_1, image_2])
    return images

  # Returns unaugmented images.
  def false_fn(images):
    return images

  return tf.cond(perform_erase,
                 lambda: true_fn(images),
                 lambda: false_fn(images))
Ejemplo n.º 19
0
def random_crop(images,
                flow,
                mask,
                crop_height,
                crop_width,
                relative_offset,
                probability_crop_offset):
  """Performs a random crop with the given height and width."""
  # early return if crop_height or crop_width is not specified
  if crop_height is None or crop_width is None:
    return images, flow, mask

  orig_height = tf.shape(images)[-3]
  orig_width = tf.shape(images)[-2]

  # check if crop size fits the image size
  scale = 1.0
  ratio = tf.cast(crop_height, tf.float32) / tf.cast(orig_height, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  ratio = tf.cast(crop_width, tf.float32) / tf.cast(orig_width, tf.float32)
  scale = tf.math.maximum(scale, ratio)
  # compute minimum required hight
  new_height = tf.cast(
      tf.math.ceil(tf.cast(orig_height, tf.float32) * scale), tf.int32)
  new_width = tf.cast(
      tf.math.ceil(tf.cast(orig_width, tf.float32) * scale), tf.int32)
  # perform resize (scales with 1 if not required)
  images = smurf_utils.resize(images, new_height, new_width, is_flow=False)

  # compute joint offset
  max_offset_h = new_height - tf.cast(crop_height, dtype=tf.int32)
  max_offset_w = new_width - tf.cast(crop_width, dtype=tf.int32)
  joint_offset_h = tf.random.uniform([], maxval=max_offset_h+1, dtype=tf.int32)
  joint_offset_w = tf.random.uniform([], maxval=max_offset_w+1, dtype=tf.int32)

  # compute relative offset
  min_relative_offset_h = tf.math.maximum(
      joint_offset_h - relative_offset, 0)
  max_relative_offset_h = tf.math.minimum(
      joint_offset_h + relative_offset, max_offset_h)
  min_relative_offset_w = tf.math.maximum(
      joint_offset_w - relative_offset, 0)
  max_relative_offset_w = tf.math.minimum(
      joint_offset_w + relative_offset, max_offset_w)

  relative_offset_h = tf.random.uniform(
      [], minval=min_relative_offset_h, maxval=max_relative_offset_h+1,
      dtype=tf.int32)
  relative_offset_w = tf.random.uniform(
      [], minval=min_relative_offset_w, maxval=max_relative_offset_w+1,
      dtype=tf.int32)

  set_crop_offset = tf.random.uniform([]) < probability_crop_offset
  relative_offset_h = tf.cond(
      set_crop_offset, lambda: relative_offset_h, lambda: joint_offset_h)
  relative_offset_w = tf.cond(
      set_crop_offset, lambda: relative_offset_w, lambda: joint_offset_w)

  # crop both images
  image_1, image_2 = tf.unstack(images)
  image_1 = tf.image.crop_to_bounding_box(
      image_1, offset_height=joint_offset_h, offset_width=joint_offset_w,
      target_height=crop_height, target_width=crop_width)
  image_2 = tf.image.crop_to_bounding_box(
      image_2, offset_height=relative_offset_h, offset_width=relative_offset_w,
      target_height=crop_height, target_width=crop_width)
  images = tf.stack([image_1, image_2])

  if flow is not None:
    # perform resize (scales with 1 if not required)
    flow, mask = smurf_utils.resize(
        flow, new_height, new_width, is_flow=True, mask=mask)

    # crop flow and mask
    flow = tf.image.crop_to_bounding_box(
        flow,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)
    mask = tf.image.crop_to_bounding_box(
        mask,
        offset_height=joint_offset_h,
        offset_width=joint_offset_w,
        target_height=crop_height,
        target_width=crop_width)

    # correct flow for relative shift (/crop)
    flow_delta = tf.stack(
        [tf.cast(relative_offset_h - joint_offset_h, tf.float32),
         tf.cast(relative_offset_w - joint_offset_w, tf.float32)])
    flow = (flow - flow_delta) * mask
  return images, flow, mask, joint_offset_h, joint_offset_w
Ejemplo n.º 20
0
def project_distribution(supports, weights, target_support,
                         validate_args=False):
  """Projects a batch of (support, weights) onto target_support.

  Based on equation (7) in (Bellemare et al., 2017):
    https://arxiv.org/abs/1707.06887
  In the rest of the comments we will refer to this equation simply as Eq7.

  This code is not easy to digest, so we will use a running example to clarify
  what is going on, with the following sample inputs:
    * supports =       [[0, 2, 4, 6, 8],
                        [1, 3, 4, 5, 6]]
    * weights =        [[0.1, 0.6, 0.1, 0.1, 0.1],
                        [0.1, 0.2, 0.5, 0.1, 0.1]]
    * target_support = [4, 5, 6, 7, 8]
  In the code below, comments preceded with 'Ex:' will be referencing the above
  values.

  Args:
    supports: Tensor of shape (batch_size, num_dims) defining supports for the
      distribution.
    weights: Tensor of shape (batch_size, num_dims) defining weights on the
      original support points. Although for the CategoricalDQN agent these
      weights are probabilities, it is not required that they are.
    target_support: Tensor of shape (num_dims) defining support of the projected
      distribution. The values must be monotonically increasing. Vmin and Vmax
      will be inferred from the first and last elements of this tensor,
      respectively. The values in this tensor must be equally spaced.
    validate_args: Whether we will verify the contents of the
      target_support parameter.

  Returns:
    A Tensor of shape (batch_size, num_dims) with the projection of a batch of
    (support, weights) onto target_support.

  Raises:
    ValueError: If target_support has no dimensions, or if shapes of supports,
      weights, and target_support are incompatible.
  """
  target_support_deltas = target_support[1:] - target_support[:-1]
  # delta_z = `\Delta z` in Eq7.
  delta_z = target_support_deltas[0]
  validate_deps = []
  supports.shape.assert_is_compatible_with(weights.shape)
  supports[0].shape.assert_is_compatible_with(target_support.shape)
  target_support.shape.assert_has_rank(1)
  if validate_args:
    # Assert that supports and weights have the same shapes.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(tf.equal(tf.shape(supports), tf.shape(weights))),
            [supports, weights]))
    # Assert that elements of supports and target_support have the same shape.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(
                tf.equal(tf.shape(supports)[1], tf.shape(target_support))),
            [supports, target_support]))
    # Assert that target_support has a single dimension.
    validate_deps.append(
        tf.Assert(
            tf.equal(tf.size(tf.shape(target_support)), 1), [target_support]))
    # Assert that the target_support is monotonically increasing.
    validate_deps.append(
        tf.Assert(tf.reduce_all(target_support_deltas > 0), [target_support]))
    # Assert that the values in target_support are equally spaced.
    validate_deps.append(
        tf.Assert(
            tf.reduce_all(tf.equal(target_support_deltas, delta_z)),
            [target_support]))

  with tf.control_dependencies(validate_deps):
    # Ex: `v_min, v_max = 4, 8`.
    v_min, v_max = target_support[0], target_support[-1]
    # Ex: `batch_size = 2`.
    batch_size = tf.shape(supports)[0]
    # `N` in Eq7.
    # Ex: `num_dims = 5`.
    num_dims = tf.shape(target_support)[0]
    # clipped_support = `[\hat{T}_{z_j}]^{V_max}_{V_min}` in Eq7.
    # Ex: `clipped_support = [[[ 4.  4.  4.  6.  8.]]
    #                         [[ 4.  4.  4.  5.  6.]]]`.
    clipped_support = tf.clip_by_value(supports, v_min, v_max)[:, None, :]
    # Ex: `tiled_support = [[[[ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]
    #                         [ 4.  4.  4.  6.  8.]]
    #                        [[ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]
    #                         [ 4.  4.  4.  5.  6.]]]]`.
    tiled_support = tf.tile([clipped_support], [1, 1, num_dims, 1])
    # Ex: `reshaped_target_support = [[[ 4.]
    #                                  [ 5.]
    #                                  [ 6.]
    #                                  [ 7.]
    #                                  [ 8.]]
    #                                 [[ 4.]
    #                                  [ 5.]
    #                                  [ 6.]
    #                                  [ 7.]
    #                                  [ 8.]]]`.
    reshaped_target_support = tf.tile(target_support[:, None], [batch_size, 1])
    reshaped_target_support = tf.reshape(reshaped_target_support,
                                         [batch_size, num_dims, 1])
    # numerator = `|clipped_support - z_i|` in Eq7.
    # Ex: `numerator = [[[[ 0.  0.  0.  2.  4.]
    #                     [ 1.  1.  1.  1.  3.]
    #                     [ 2.  2.  2.  0.  2.]
    #                     [ 3.  3.  3.  1.  1.]
    #                     [ 4.  4.  4.  2.  0.]]
    #                    [[ 0.  0.  0.  1.  2.]
    #                     [ 1.  1.  1.  0.  1.]
    #                     [ 2.  2.  2.  1.  0.]
    #                     [ 3.  3.  3.  2.  1.]
    #                     [ 4.  4.  4.  3.  2.]]]]`.
    numerator = tf.abs(tiled_support - reshaped_target_support)
    quotient = 1 - (numerator / delta_z)
    # clipped_quotient = `[1 - numerator / (\Delta z)]_0^1` in Eq7.
    # Ex: `clipped_quotient = [[[[ 1.  1.  1.  0.  0.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  1.  0.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  0.  1.]]
    #                           [[ 1.  1.  1.  0.  0.]
    #                            [ 0.  0.  0.  1.  0.]
    #                            [ 0.  0.  0.  0.  1.]
    #                            [ 0.  0.  0.  0.  0.]
    #                            [ 0.  0.  0.  0.  0.]]]]`.
    clipped_quotient = tf.clip_by_value(quotient, 0, 1)
    # Ex: `weights = [[ 0.1  0.6  0.1  0.1  0.1]
    #                 [ 0.1  0.2  0.5  0.1  0.1]]`.
    weights = weights[:, None, :]
    # inner_prod = `\sum_{j=0}^{N-1} clipped_quotient * p_j(x', \pi(x'))`
    # in Eq7.
    # Ex: `inner_prod = [[[[ 0.1  0.6  0.1  0.  0. ]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.1 0. ]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.  0.1]]
    #                     [[ 0.1  0.2  0.5  0.  0. ]
    #                      [ 0.   0.   0.   0.1 0. ]
    #                      [ 0.   0.   0.   0.  0.1]
    #                      [ 0.   0.   0.   0.  0. ]
    #                      [ 0.   0.   0.   0.  0. ]]]]`.
    inner_prod = clipped_quotient * weights
    # Ex: `projection = [[ 0.8 0.0 0.1 0.0 0.1]
    #                    [ 0.8 0.1 0.1 0.0 0.0]]`.
    projection = tf.reduce_sum(inner_prod, 3)
    projection = tf.reshape(projection, [batch_size, num_dims])
    return projection
Ejemplo n.º 21
0
def knn_graph_from_points(points,
                          num_valid_points,
                          k,
                          distance_upper_bound,
                          mask=None):
    """Returns the distances and indices of the neighbors of each point.

  Note that each point will have at least k neighbors unless the number of
  points is less than k. In that case, the python function that is wrapped in
  py_function will raise a value error.

  Args:
    points: A tf.float32 tensor of size [batch_size, N, D] where D is the point
      dimensions.
    num_valid_points: A tf.int32 tensor of size [batch_size] containing the
      number of valid points in each batch example.
    k: Number of neighbors for each point.
    distance_upper_bound: Only build the graph using points that are closer than
      this distance.
    mask: If not None, A tf.bool tensor of size [batch_size, N]. If None, it is
      ignored. If not None, knn will be applied to only points where the mask is
      True. The points where the mask is False will have themselves as their
      neighbors.

  Returns:
    distances: A tf.float32 tensor of size [batch_size, N, k].
    indices: A tf.int32 tensor of size [batch_size, N, k].

  Raises:
    ValueError: If batch_size is unknown.
  """
    if points.get_shape().as_list()[0] is None:
        raise ValueError('Batch size is unknown.')
    batch_size = points.get_shape().as_list()[0]
    num_points = tf.shape(points)[1]

    def fn_knn_graph_from_points_unbatched(i):
        """Computes knn graph for example i in the batch."""
        num_valid_points_i = num_valid_points[i]
        points_i = points[i, :num_valid_points_i, :]
        if mask is None:
            mask_i = None
        else:
            mask_i = mask[i, :num_valid_points_i]
        distances_i, indices_i = knn_graph_from_points_unbatched(
            points=points_i,
            k=k,
            distance_upper_bound=distance_upper_bound,
            mask=mask_i)
        distances_i = tf.pad(distances_i,
                             paddings=[[0, num_points - num_valid_points_i],
                                       [0, 0]])
        indices_i = tf.pad(indices_i,
                           paddings=[[0, num_points - num_valid_points_i],
                                     [0, 0]])
        return distances_i, indices_i

    distances, indices = tf.map_fn(fn=fn_knn_graph_from_points_unbatched,
                                   elems=tf.range(batch_size),
                                   dtype=(tf.float32, tf.int32))

    return distances, indices
Ejemplo n.º 22
0
def preprocess(inputs,
               output_keys=None,
               is_training=False,
               using_sequence_dataset=False,
               num_frame_to_load=1,
               transform_points_fn=None,
               image_preprocess_fn_dic=None,
               images_points_correspondence_fn=None,
               compute_semantic_labels_fn=None,
               compute_motion_labels_fn=None,
               view_names=(),
               points_key='points',
               colors_key='colors',
               normals_key='normals',
               intensities_key='intensities',
               elongations_key='elongations',
               semantic_labels_key='semantic_labels',
               motion_labels_key='motion_labels',
               spin_coords_key=None,
               points_in_image_frame_key=None,
               num_points_to_randomly_sample=None,
               x_min_degree_rotation=None,
               x_max_degree_rotation=None,
               y_min_degree_rotation=None,
               y_max_degree_rotation=None,
               z_min_degree_rotation=None,
               z_max_degree_rotation=None,
               points_pad_or_clip_size=None,
               voxels_pad_or_clip_size=None,
               voxel_grid_cell_size=(0.1, 0.1, 0.1),
               num_offset_bins_x=4,
               num_offset_bins_y=4,
               num_offset_bins_z=4,
               point_feature_keys=('point_offsets', ),
               point_to_voxel_segment_func=tf.math.unsorted_segment_mean,
               x_random_crop_size=None,
               y_random_crop_size=None,
               min_scale_ratio=None,
               max_scale_ratio=None,
               semantic_labels_offset=0,
               ignore_labels=(),
               remove_unlabeled_images_and_points=False,
               labeled_view_name=None,
               only_keep_first_return_lidar_points=False):
    """Preprocesses a dictionary of `Tensor` inputs.

  If is_training=True, it will randomly rotate the points around the z axis,
  and will randomly flip the points with respect to x and/or y axis.

  Note that the preprocessor function does not correct normal vectors if they
  exist in the inputs.
  Note that the preprocessing effects all values of `inputs` that are `Tensors`.

  Args:
    inputs: A dictionary of inputs. Each value must be a `Tensor`.
    output_keys: Either None, or a list of strings containing the keys in the
      dictionary that is returned by the preprocess function.
    is_training: Whether we're training or testing.
    using_sequence_dataset: if true, the inputs will contain scene and multiple
      frames data.
    num_frame_to_load: If greater than 1, load multiframe point cloud point
      positions and its correspondence.
    transform_points_fn: Fn to transform other frames to a specific frame's
      coordinate.
    image_preprocess_fn_dic: Image preprocessing function. Maps view names to
      their image preprocessing functions. Set it to None, if there are no
      images to preprocess or you are not interested in preprocesing images.
    images_points_correspondence_fn: The function that computes correspondence
      between images and points.
    compute_semantic_labels_fn: If not None, semantic labels will be computed
      using this function.
    compute_motion_labels_fn: If not None, motion labels will be computed using
      this function.
    view_names: Names corresponding to 2d views of the scene.
    points_key: The key used for `points` in the inputs.
    colors_key: The key used for `colors` in the inputs.
    normals_key: The key used for 'normals' in the inputs.
    intensities_key: The key used for 'intensities' in the inputs.
    elongations_key: The key used for 'elongations' in the inputs.
    semantic_labels_key: The key used for 'semantic_labels' in the inputs.
    motion_labels_key: The key used for 'motion_labels' in the inputs.
    spin_coords_key: The key used for 'spin_coords' in the inputs. In Waymo
      data, spin_coords is a [num_points, 3] tensor that contains scan_index,
      shot_index, return_index. In Waymo data, return_index of the first return
      points is 0.
    points_in_image_frame_key: A string that identifies the tensor that contains
      the points_in_image_frame tensor. If None, it won't be used.
    num_points_to_randomly_sample: Number of points to randomly sample. If None,
      it will keep the original points and does not perform sampling.
    x_min_degree_rotation: Min degree of rotation around the x axis.
    x_max_degree_rotation: Max degree of ratation around the x axis.
    y_min_degree_rotation: Min degree of rotation around the y axis.
    y_max_degree_rotation: Max degree of ratation around the y axis.
    z_min_degree_rotation: Min degree of rotation around the z axis.
    z_max_degree_rotation: Max degree of ratation around the z axis.
    points_pad_or_clip_size: Number of target points to pad or clip to. If None,
      it will not perform the point padding.
    voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None,
      it will not perform the voxel padding.
    voxel_grid_cell_size: A three dimensional tuple determining the voxel grid
      size.
    num_offset_bins_x: Number of bins for point offsets in x direction.
    num_offset_bins_y: Number of bins for point offsets in y direction.
    num_offset_bins_z: Number of bins for point offsets in z direction.
    point_feature_keys: The keys used to form the voxel features.
    point_to_voxel_segment_func: The function used to aggregate the features
      of the points that fall in the same voxel.
    x_random_crop_size: Size of the random crop in x dimension. If None, random
      crop will not take place on x dimension.
    y_random_crop_size: Size of the random crop in y dimension. If None, random
      crop will not take place on y dimension.
    min_scale_ratio: Minimum scale ratio. Used for scaling point cloud.
    max_scale_ratio: Maximum scale ratio. Used for scaling point cloud.
    semantic_labels_offset: An integer offset that will be added to labels.
    ignore_labels: A tuple containing labels that should be ignored when
      computing the loss and metrics.
    remove_unlabeled_images_and_points: If True, removes the images that are not
      labeled and also removes the points that are associated with those images.
    labeled_view_name: The name of the view that is labeled, otherwise None.
    only_keep_first_return_lidar_points: If True, we only keep the first return
      lidar points.

  Returns:
    The mean subtracted points with an optional rotation applied.

  Raises:
    ValueError: if `inputs` doesn't contain the points_key.
    ValueError: if `points_in_image_frame` does not have rank 3.
  """
    inputs = dict(inputs)

    if using_sequence_dataset:
        all_frame_inputs = inputs
        scene = all_frame_inputs['scene']
        frame1 = all_frame_inputs['frame1']
        frame_start_index = all_frame_inputs['frame_start_index']
        inputs = dict(
            all_frame_inputs['frame0']
        )  # so that the following processing code can be unchanged.

    # Initializing empty dictionary for mesh, image, indices_2d and non tensor
    # inputs.
    non_tensor_inputs = {}
    view_image_inputs = {}
    view_indices_2d_inputs = {}
    mesh_inputs = {}

    if image_preprocess_fn_dic is None:
        image_preprocess_fn_dic = {}

    # Convert all float64 to float32 and all int64 to int32.
    for key in sorted(inputs):
        if isinstance(inputs[key], tf.Tensor):
            if inputs[key].dtype == tf.float64:
                inputs[key] = tf.cast(inputs[key], dtype=tf.float32)
            if inputs[key].dtype == tf.int64:
                inputs[key] = tf.cast(inputs[key], dtype=tf.int32)

    if points_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_positions] = inputs[points_key]
    if colors_key is not None and colors_key in inputs:
        inputs[
            standard_fields.InputDataFields.point_colors] = inputs[colors_key]
    if normals_key is not None and normals_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_normals] = inputs[normals_key]
    if intensities_key is not None and intensities_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_intensities] = inputs[intensities_key]
    if elongations_key is not None and elongations_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_elongations] = inputs[elongations_key]
    if semantic_labels_key is not None and semantic_labels_key in inputs:
        inputs[standard_fields.InputDataFields.
               object_class_points] = inputs[semantic_labels_key]
    if motion_labels_key is not None and motion_labels_key in inputs:
        inputs[standard_fields.InputDataFields.
               object_flow_points] = inputs[motion_labels_key]
    if spin_coords_key is not None and spin_coords_key in inputs:
        inputs[standard_fields.InputDataFields.
               point_spin_coordinates] = inputs[spin_coords_key]

    # Acquire point / image correspondences.
    if images_points_correspondence_fn is not None:
        fn_outputs = images_points_correspondence_fn(inputs)
        if 'points_position' in fn_outputs:
            inputs[standard_fields.InputDataFields.
                   point_positions] = fn_outputs['points_position']
        if 'points_intensity' in fn_outputs and intensities_key is not None:
            inputs[standard_fields.InputDataFields.
                   point_intensities] = fn_outputs['points_intensity']
        if 'points_elongation' in fn_outputs and elongations_key is not None:
            inputs[standard_fields.InputDataFields.
                   point_elongations] = fn_outputs['points_elongation']
        if 'points_label' in fn_outputs and semantic_labels_key is not None:
            inputs[standard_fields.InputDataFields.
                   object_class_points] = fn_outputs['points_label']
        if 'view_images' in fn_outputs:
            for key in sorted(fn_outputs['view_images']):
                if len(fn_outputs['view_images'][key].shape) != 4:
                    raise ValueError(('%s image should have rank 4.' % key))
            view_image_inputs = fn_outputs['view_images']
        if 'view_indices_2d' in fn_outputs:
            for key in sorted(fn_outputs['view_indices_2d']):
                if len(fn_outputs['view_indices_2d'][key].shape) != 3:
                    raise ValueError(
                        ('%s indices_2d should have rank 3.' % key))
            view_indices_2d_inputs = fn_outputs['view_indices_2d']
    else:
        if points_in_image_frame_key is not None:
            inputs['rgb_view/features'] = inputs['image']
            inputs['rgb_view/indices_2d'] = inputs[points_in_image_frame_key]
            if len(inputs['rgb_view/indices_2d'].shape) != 3:
                raise ValueError('`points_in_image_frame` should have rank 3.')

    frame0 = inputs.copy()
    if num_frame_to_load > 1:
        point_positions_list = [
            frame0[standard_fields.InputDataFields.point_positions]
        ]
        if view_indices_2d_inputs:
            view_indices_2d_list = [view_indices_2d_inputs[view_names[0]]]
        frame_source_list = [
            tf.zeros([
                tf.shape(
                    frame0[standard_fields.InputDataFields.point_positions])[0]
            ], tf.int32)
        ]
        for i in range(1, num_frame_to_load):
            target_frame_key = 'frame' + str(i)
            if images_points_correspondence_fn is not None:
                frame_i = images_points_correspondence_fn(
                    all_frame_inputs[target_frame_key])
            else:
                raise ValueError(
                    'images_points_correspondence_fn is needed for loading multi-frame pointclouds.'
                )
            transformed_point_positions = transform_points_fn(
                scene, frame_i['points_position'], frame_start_index,
                i + frame_start_index)
            point_positions_list.append(transformed_point_positions)
            if view_indices_2d_inputs:
                view_indices_2d_list.append(
                    frame_i['view_indices_2d'][view_names[0]])
            frame_source_list.append(
                tf.ones([tf.shape(transformed_point_positions)[0]], tf.int32) *
                i)

        # add multi-frame info to override inputs and view_indices_2d_inputs
        inputs[standard_fields.InputDataFields.
               point_frame_index] = tf.expand_dims(tf.concat(frame_source_list,
                                                             axis=0),
                                                   axis=1)
        inputs[standard_fields.InputDataFields.point_positions] = tf.concat(
            point_positions_list, axis=0)
        if view_indices_2d_inputs:
            view_indices_2d_inputs[view_names[0]] = tf.concat(
                view_indices_2d_list, axis=1)

    # Validate inputs.
    if standard_fields.InputDataFields.point_positions not in inputs:
        raise ValueError('`inputs` must contain a point_positions')
    if inputs[
            standard_fields.InputDataFields.point_positions].shape.ndims != 2:
        raise ValueError('points must be of rank 2.')
    if inputs[standard_fields.InputDataFields.point_positions].shape[1] != 3:
        raise ValueError('point should be 3 dimensional.')

    # Remove normal nans.
    if standard_fields.InputDataFields.point_normals in inputs:
        inputs[standard_fields.InputDataFields.point_normals] = tf.where(
            tf.math.is_nan(
                inputs[standard_fields.InputDataFields.point_normals]),
            tf.zeros_like(
                inputs[standard_fields.InputDataFields.point_normals]),
            inputs[standard_fields.InputDataFields.point_normals])

    # Compute semantic labels if compute_semantic_labels_fn is not None
    # An example is when the ground-truth contains 3d object boxes and not per
    # point labels. This would be a function that infers point labels from boxes.
    if compute_semantic_labels_fn is not None:
        inputs[standard_fields.InputDataFields.
               object_class_points] = compute_semantic_labels_fn(
                   inputs=frame0,
                   points_key=standard_fields.InputDataFields.point_positions)
    if compute_motion_labels_fn is not None:
        inputs[standard_fields.InputDataFields.
               object_flow_points] = compute_motion_labels_fn(
                   scene=scene,
                   frame0=frame0,
                   frame1=frame1,
                   frame_start_index=frame_start_index,
                   points_key=standard_fields.InputDataFields.point_positions)

    # Splitting inputs to {view_image_inputs,
    #                      view_indices_2d_inputs,
    #                      mesh_inputs,
    #                      non_tensor_inputs}
    mesh_keys = []
    for key in [
            standard_fields.InputDataFields.point_positions,
            standard_fields.InputDataFields.point_colors,
            standard_fields.InputDataFields.point_normals,
            standard_fields.InputDataFields.point_intensities,
            standard_fields.InputDataFields.point_elongations,
            standard_fields.InputDataFields.object_class_points,
            standard_fields.InputDataFields.point_spin_coordinates,
            standard_fields.InputDataFields.object_flow_points,
            standard_fields.InputDataFields.point_frame_index,
    ]:
        if key is not None and key in inputs:
            mesh_keys.append(key)
    view_image_names = [('%s/features' % key) for key in view_names]
    view_indices_2d_names = [('%s/indices_2d' % key) for key in view_names]

    # Additional key collecting
    for k, v in six.iteritems(inputs):
        if k in view_image_names:
            view_image_inputs[k] = v
        elif k in view_indices_2d_names:
            view_indices_2d_inputs[k] = v
        elif k in mesh_keys:
            if num_frame_to_load > 1:
                pad_size = tf.shape(
                    inputs[standard_fields.InputDataFields.
                           point_positions])[0] - tf.shape(v)[0]
                if k == standard_fields.InputDataFields.object_class_points:
                    pad_value = -1
                else:
                    pad_value = 0
                v = tf.pad(v, [[0, pad_size], [0, 0]],
                           constant_values=pad_value)
            mesh_inputs[k] = v
        else:
            non_tensor_inputs[k] = v

    # Remove points that are not in the lidar first return (optional)
    if only_keep_first_return_lidar_points:
        _remove_second_return_lidar_points(
            mesh_inputs=mesh_inputs,
            view_indices_2d_inputs=view_indices_2d_inputs)

    # Randomly sample points
    preprocessor_utils.randomly_sample_points(
        mesh_inputs=mesh_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        target_num_points=num_points_to_randomly_sample)

    # Add weights if it does not exist in inputs. The weight of the points with
    # label in `ignore_labels` is set to 0. This helps the loss and metrics to
    # ignore those labels.
    use_weights = (
        standard_fields.InputDataFields.object_class_points in mesh_inputs
        or standard_fields.InputDataFields.object_flow_points in mesh_inputs)
    if use_weights:
        if num_frame_to_load > 1:
            num_valid_points_frame0 = tf.shape(
                frame0[standard_fields.InputDataFields.point_positions])[0]
            num_additional_frame_points = tf.shape(
                mesh_inputs[standard_fields.InputDataFields.
                            object_class_points])[0] - num_valid_points_frame0
            weights = tf.concat([
                tf.ones([num_valid_points_frame0, 1], tf.float32),
                tf.zeros([num_additional_frame_points, 1], tf.float32)
            ],
                                axis=0)
        else:
            weights = tf.ones_like(mesh_inputs[
                standard_fields.InputDataFields.object_class_points],
                                   dtype=tf.float32)

    if standard_fields.InputDataFields.object_class_points in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.object_class_points] = tf.cast(
                mesh_inputs[
                    standard_fields.InputDataFields.object_class_points],
                dtype=tf.int32)
        for ignore_label in ignore_labels:
            weights *= tf.cast(tf.not_equal(
                mesh_inputs[
                    standard_fields.InputDataFields.object_class_points],
                ignore_label),
                               dtype=tf.float32)
        mesh_inputs[
            standard_fields.InputDataFields.point_loss_weights] = weights
        mesh_inputs[standard_fields.InputDataFields.
                    object_class_points] += semantic_labels_offset

    # We normalize the intensities and elongations to be in a smaller range.
    if standard_fields.InputDataFields.point_intensities in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_intensities] = change_intensity_range(
                        intensities=mesh_inputs[
                            standard_fields.InputDataFields.point_intensities])
    if standard_fields.InputDataFields.point_elongations in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.point_elongations] = (tf.cast(
                mesh_inputs[standard_fields.InputDataFields.point_elongations],
                dtype=tf.float32) * 2.0 / 255.0) - 1.0

    # Random scale the points.
    if min_scale_ratio is not None and max_scale_ratio is not None:
        scale_ratio = tf.random.uniform([],
                                        minval=min_scale_ratio,
                                        maxval=max_scale_ratio,
                                        dtype=tf.float32)
        mesh_inputs[
            standard_fields.InputDataFields.point_positions] *= scale_ratio
        if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
            mesh_inputs[standard_fields.InputDataFields.
                        object_flow_points] *= scale_ratio

    # Random crop the points.
    randomly_crop_points(mesh_inputs=mesh_inputs,
                         view_indices_2d_inputs=view_indices_2d_inputs,
                         x_random_crop_size=x_random_crop_size,
                         y_random_crop_size=y_random_crop_size)

    # If training, pick the best labeled image and points that project to it.
    # In many datasets, only one image is labeled anyways.
    if remove_unlabeled_images_and_points:
        pick_labeled_image(mesh_inputs=mesh_inputs,
                           view_image_inputs=view_image_inputs,
                           view_indices_2d_inputs=view_indices_2d_inputs,
                           view_name=labeled_view_name)

    # Process images.
    preprocessor_utils.preprocess_images(
        view_image_inputs=view_image_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        image_preprocess_fn_dic=image_preprocess_fn_dic,
        is_training=is_training)

    # Record the original points.
    original_points = mesh_inputs[
        standard_fields.InputDataFields.point_positions]
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        original_colors = mesh_inputs[
            standard_fields.InputDataFields.point_colors]
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        original_normals = mesh_inputs[
            standard_fields.InputDataFields.point_normals]

    # Update feature visibility count.
    if 'feature_visibility_count' in mesh_inputs:
        mesh_inputs['feature_visibility_count'] = tf.maximum(
            mesh_inputs['feature_visibility_count'], 1)
        mesh_inputs['features'] /= tf.cast(
            mesh_inputs['feature_visibility_count'], dtype=tf.float32)

    # Subtract mean from points.
    mean_points = tf.reduce_mean(
        mesh_inputs[standard_fields.InputDataFields.point_positions], axis=0)
    mesh_inputs[
        standard_fields.InputDataFields.point_positions] -= tf.expand_dims(
            mean_points, axis=0)

    # Rotate points randomly.
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        normals = mesh_inputs[standard_fields.InputDataFields.point_normals]
    else:
        normals = None

    if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
        motions = mesh_inputs[
            standard_fields.InputDataFields.object_flow_points]
    else:
        motions = None

    (mesh_inputs[standard_fields.InputDataFields.point_positions],
     rotated_normals, rotated_motions) = rotate_randomly(
         points=mesh_inputs[standard_fields.InputDataFields.point_positions],
         normals=normals,
         motions=motions,
         x_min_degree_rotation=x_min_degree_rotation,
         x_max_degree_rotation=x_max_degree_rotation,
         y_min_degree_rotation=y_min_degree_rotation,
         y_max_degree_rotation=y_max_degree_rotation,
         z_min_degree_rotation=z_min_degree_rotation,
         z_max_degree_rotation=z_max_degree_rotation)

    # Random flipping in x and y directions.
    (mesh_inputs[standard_fields.InputDataFields.point_positions],
     flipped_normals,
     flipped_motions) = flip_randomly_points_and_normals_motions(
         points=mesh_inputs[standard_fields.InputDataFields.point_positions],
         normals=rotated_normals,
         motions=rotated_motions,
         is_training=is_training)
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        mesh_inputs[
            standard_fields.InputDataFields.point_normals] = flipped_normals
    if standard_fields.InputDataFields.object_flow_points in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    object_flow_points] = flipped_motions
    # Normalize RGB to [-1.0, 1.0].
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.point_colors] = tf.cast(
            mesh_inputs[standard_fields.InputDataFields.point_colors],
            dtype=tf.float32)
        mesh_inputs[standard_fields.InputDataFields.point_colors] *= (2.0 /
                                                                      255.0)
        mesh_inputs[standard_fields.InputDataFields.point_colors] -= 1.0

    # Add original points to mesh inputs.
    mesh_inputs[standard_fields.InputDataFields.
                point_positions_original] = original_points
    if standard_fields.InputDataFields.point_colors in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_colors_original] = original_colors
    if standard_fields.InputDataFields.point_normals in mesh_inputs:
        mesh_inputs[standard_fields.InputDataFields.
                    point_normals_original] = original_normals

    # Pad or clip the point tensors.
    pad_or_clip(mesh_inputs=mesh_inputs,
                view_indices_2d_inputs=view_indices_2d_inputs,
                pad_or_clip_size=points_pad_or_clip_size)
    if num_frame_to_load > 1:
        # Note: num_valid_points is the sum of 'num_points_per_fram' for now.
        # num_points_per_frame is each frame's valid num of points.
        # TODO(huangrui): if random sampling is called earlier, the count here
        # is not guaranteed to be in order. need sorting.
        if num_points_to_randomly_sample is not None:
            raise ValueError(
                'randomly sample is not compatible with padding multi frame point clouds yet!'
            )
        _, _, mesh_inputs[standard_fields.InputDataFields.
                          num_valid_points_per_frame] = tf.unique_with_counts(
                              tf.reshape(
                                  mesh_inputs[standard_fields.InputDataFields.
                                              point_frame_index], [-1]))
        if points_pad_or_clip_size is not None:
            padded_points = tf.where_v2(
                tf.greater(
                    points_pad_or_clip_size, mesh_inputs[
                        standard_fields.InputDataFields.num_valid_points]),
                points_pad_or_clip_size -
                mesh_inputs[standard_fields.InputDataFields.num_valid_points],
                0)

            # Correct the potential unique count error from optionally padded 0s point
            # frame index.
            mesh_inputs[
                standard_fields.InputDataFields.
                num_valid_points_per_frame] -= tf.pad(
                    tf.expand_dims(padded_points, 0), [[
                        0,
                        tf.shape(mesh_inputs[standard_fields.InputDataFields.
                                             num_valid_points_per_frame])[0] -
                        1
                    ]])

    # Putting back the dictionaries together
    processed_inputs = mesh_inputs.copy()
    processed_inputs.update(non_tensor_inputs)
    for key in sorted(view_image_inputs):
        processed_inputs[('%s/features' % key)] = view_image_inputs[key]
    for key in sorted(view_indices_2d_inputs):
        processed_inputs[('%s/indices_2d' % key)] = view_indices_2d_inputs[key]

    # Create features that do not exist
    if 'point_offsets' in point_feature_keys:
        preprocessor_utils.add_point_offsets(
            inputs=processed_inputs, voxel_grid_cell_size=voxel_grid_cell_size)
    if 'point_offset_bins' in point_feature_keys:
        preprocessor_utils.add_point_offset_bins(
            inputs=processed_inputs,
            voxel_grid_cell_size=voxel_grid_cell_size,
            num_bins_x=num_offset_bins_x,
            num_bins_y=num_offset_bins_y,
            num_bins_z=num_offset_bins_z)

    # Voxelize point features
    preprocessor_utils.voxelize_point_features(
        inputs=processed_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size,
        point_feature_keys=point_feature_keys,
        point_to_voxel_segment_func=point_to_voxel_segment_func,
        num_frame_to_load=num_frame_to_load)

    # Voxelize point / image correspondence indices
    preprocessor_utils.voxelize_point_to_view_correspondences(
        inputs=processed_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size)

    # Voxelizing the semantic labels
    preprocessor_utils.voxelize_semantic_labels(
        inputs=processed_inputs,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size)

    # Voxelizing the loss weights
    preprocessor_utils.voxelize_property_tensor(
        inputs=processed_inputs,
        point_tensor_key=standard_fields.InputDataFields.point_loss_weights,
        corresponding_voxel_tensor_key=standard_fields.InputDataFields.
        voxel_loss_weights,
        voxels_pad_or_clip_size=voxels_pad_or_clip_size,
        voxel_grid_cell_size=voxel_grid_cell_size,
        segment_func=tf.math.unsorted_segment_max)

    # Voxelizing the object flow
    if standard_fields.InputDataFields.object_flow_points in processed_inputs:
        preprocessor_utils.voxelize_property_tensor(
            inputs=processed_inputs,
            point_tensor_key=standard_fields.InputDataFields.
            object_flow_points,
            corresponding_voxel_tensor_key='object_flow_voxels_max',
            voxels_pad_or_clip_size=voxels_pad_or_clip_size,
            voxel_grid_cell_size=voxel_grid_cell_size,
            segment_func=tf.math.unsorted_segment_max)
        preprocessor_utils.voxelize_property_tensor(
            inputs=processed_inputs,
            point_tensor_key=standard_fields.InputDataFields.
            object_flow_points,
            corresponding_voxel_tensor_key='object_flow_voxels_min',
            voxels_pad_or_clip_size=voxels_pad_or_clip_size,
            voxel_grid_cell_size=voxel_grid_cell_size,
            segment_func=tf.math.unsorted_segment_min)
        processed_inputs[standard_fields.InputDataFields.
                         object_flow_voxels] = processed_inputs[
                             'object_flow_voxels_max'] + processed_inputs[
                                 'object_flow_voxels_min']

    if num_frame_to_load > 1:
        mesh_inputs[
            standard_fields.InputDataFields.num_valid_points] = mesh_inputs[
                standard_fields.InputDataFields.num_valid_points_per_frame][0]

    # Filter preprocessed_inputs by output_keys if it is not None.
    if output_keys is not None:
        processed_inputs = {
            k: v
            for k, v in six.iteritems(processed_inputs) if k in output_keys
        }
    return processed_inputs
Ejemplo n.º 23
0
def randomly_crop_points(mesh_inputs,
                         view_indices_2d_inputs,
                         x_random_crop_size,
                         y_random_crop_size,
                         epsilon=1e-5):
    """Randomly crops points.

  Args:
    mesh_inputs: A dictionary containing input mesh (point) tensors.
    view_indices_2d_inputs: A dictionary containing input point to view
      correspondence tensors.
    x_random_crop_size: Size of the random crop in x dimension. If None, random
      crop will not take place on x dimension.
    y_random_crop_size: Size of the random crop in y dimension. If None, random
      crop will not take place on y dimension.
    epsilon: Epsilon (a very small value) used to add as a small margin to
      thresholds.
  """
    if x_random_crop_size is None and y_random_crop_size is None:
        return

    points = mesh_inputs[standard_fields.InputDataFields.point_positions]
    num_points = tf.shape(points)[0]
    # Pick a random point
    if x_random_crop_size is not None or y_random_crop_size is not None:
        random_index = tf.random.uniform([],
                                         minval=0,
                                         maxval=num_points,
                                         dtype=tf.int32)
        center_x = points[random_index, 0]
        center_y = points[random_index, 1]

    points_x = points[:, 0]
    points_y = points[:, 1]
    min_x = tf.reduce_min(points_x) - epsilon
    max_x = tf.reduce_max(points_x) + epsilon
    min_y = tf.reduce_min(points_y) - epsilon
    max_y = tf.reduce_max(points_y) + epsilon

    if x_random_crop_size is not None:
        min_x = center_x - x_random_crop_size / 2.0 - epsilon
        max_x = center_x + x_random_crop_size / 2.0 + epsilon

    if y_random_crop_size is not None:
        min_y = center_y - y_random_crop_size / 2.0 - epsilon
        max_y = center_y + y_random_crop_size / 2.0 + epsilon

    x_mask = tf.logical_and(tf.greater(points_x, min_x),
                            tf.less(points_x, max_x))
    y_mask = tf.logical_and(tf.greater(points_y, min_y),
                            tf.less(points_y, max_y))
    points_mask = tf.logical_and(x_mask, y_mask)

    for key in sorted(mesh_inputs):
        mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask)

    for key in sorted(view_indices_2d_inputs):
        view_indices_2d_inputs[key] = tf.transpose(
            tf.boolean_mask(
                tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]),
                points_mask), [1, 0, 2])
Ejemplo n.º 24
0
def fit_gaussian_mixture(embeddings,
                         responsibilities,
                         damping=1e-7,
                         full_covariance=False):
  """Fits a unimodal Gaussian distribution `embeddings`.

  Args:
    embeddings: A [batch_size, embedding_dim] tf.Tensor of embeddings.
    responsibilities: The per-component responsibilities.
    damping: The scale of the covariance damping coefficient.
    full_covariance: Whether to use a full or diagonal covariance.

  Returns:
    Parameter estimates for a Gaussian mixture model.
  """

  num, dim = tf.split(tf.shape(input=embeddings), num_or_size_splits=2)
  num, dim = tf.squeeze(num), tf.squeeze(dim)
  num_classes = responsibilities.shape[1]

  mixing_proportion = tf.einsum('jk->k', responsibilities)
  mixing_proportion /= tf.cast(num, dtype=tf.float32)
  mixing_logits = tf.math.log(mixing_proportion)

  sample_mean = tf.einsum('ij,ik->jk', responsibilities, embeddings)
  sample_mean /= tf.reduce_sum(
      input_tensor=responsibilities, axis=0)[:, tf.newaxis]
  centered_embeddings = (
      embeddings[:, tf.newaxis, :] - sample_mean[tf.newaxis, :, :])

  if full_covariance:
    sample_covariance = tf.einsum('ijk,ijl->ijkl', centered_embeddings,
                                  centered_embeddings)  # Outer product.
    sample_covariance += damping * tf.eye(dim)  # Positive definiteness.
    weighted_covariance = tf.einsum('ij,ijkl->jkl', responsibilities,
                                    sample_covariance)
    weighted_covariance /= tf.reduce_sum(
        input_tensor=responsibilities, axis=0)[:, tf.newaxis, tf.newaxis]

    return (
        _split_and_squeeze(sample_mean, num_splits=num_classes),
        _split_and_squeeze(weighted_covariance, num_splits=num_classes),
        [mixing_logits],
    )
  else:
    avg_x_squared = (
        tf.matmul(responsibilities, embeddings**2, transpose_a=True) /
        tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis])
    avg_means_squared = sample_mean**2
    avg_x_means = (
        sample_mean *
        tf.matmul(responsibilities, embeddings, transpose_a=True) /
        tf.reduce_sum(input_tensor=responsibilities, axis=0)[:, tf.newaxis])
    sample_variances = (
        avg_x_squared - 2 * avg_x_means + avg_means_squared +
        damping * tf.ones(dim))
    log_variances = tf.math.log(sample_variances)
    return (
        _split_and_squeeze(sample_mean, num_splits=num_classes),
        _split_and_squeeze(log_variances, num_splits=num_classes),
        [mixing_logits],
    )
Ejemplo n.º 25
0
def geometric_augmentation(images,
                           flow = None,
                           mask = None,
                           crop_height = 640,
                           crop_width = 640,
                           probability_flip_left_right = 0.5,
                           probability_flip_up_down = 0.1,
                           probability_scale = 0.8,
                           probability_relative_scale = 0.,
                           probability_stretch = 0.8,
                           probability_rotation = 0.0,
                           probability_relative_rotation = 0.0,
                           probability_crop_offset = 0.0,
                           min_bound_scale = -0.2,
                           max_bound_scale = 0.6,
                           max_strech_scale = 0.2,
                           min_bound_relative_scale = -0.1,
                           max_bound_relative_scale = 0.1,
                           max_rotation_deg = 15,
                           max_relative_rotation_deg = 3,
                           max_relative_crop_offset = 5,
                           return_full_scale=False):

  """Applies geometric augmentations to an image pair and corresponding flow.

  Args:
    images: Image pair of shape [2, height, width, channels].
    flow: Corresponding forward flow field of shape [height, width, 2].
    mask: Mask indicating which positions in the flow field hold valid flow
      vectors of shape [height, width, 1]. Non-valid poisitions are encoded with
      0, valid positions with 1.
    crop_height: Height of the final augmented output.
    crop_width: Width of the final augmented output.
    probability_flip_left_right: Probability of applying left/right flip.
    probability_flip_up_down: Probability of applying up/down flip
    probability_scale: Probability of applying scale augmentation.
    probability_relative_scale: Probability of applying scale augmentation to
      only the second frame of the the image pair.
    probability_stretch: Probability of applying stretch augmentation (scale
      without keeping the aspect ratio).
    probability_rotation: Probability of applying rotation augmentation.
    probability_relative_rotation: Probability of applying rotation augmentation
      to only the second frame of the the image pair.
    probability_crop_offset: Probability of applying a relative offset while
      cropping.
    min_bound_scale: Defines the smallest possible scaling factor as
      2**min_bound_scale.
    max_bound_scale: Defines the largest possible scaling factor as
      2**max_bound_scale.
    max_strech_scale: Defines the smallest and largest possible streching factor
      as 2**-max_strech_scale and 2**max_strech_scale.
    min_bound_relative_scale: Defines the smallest possible scaling factor for
      the relative scaling as 2**min_bound_relative_scale.
    max_bound_relative_scale: Defines the largest possible scaling factor for
      the relative scaling as 2**max_bound_relative_scale.
    max_rotation_deg: Defines the maximum angle of rotation in degrees.
    max_relative_rotation_deg: Defines the maximum angle of rotation in degrees
      for the relative rotation.
    max_relative_crop_offset: Defines the maximum relative offset in pixels for
      cropping.
    return_full_scale: bool. If this is passed, the full size images will be
      returned in addition to the geometrically augmented (cropped and / or
      resized) images. In addition to the resized images, the crop height,
      width, and any padding applied will be returned.

  Returns:
    if return_full_scale is False:
      Augmented images, flow and mask (if not None).
    if return_full_scale is True:
      Augmented images, flow, mask, full_size_images, crop_h, crop_w, pad_h,
       and pad_w.
  """

  # apply geometric augmentation
  if probability_flip_left_right > 0:
    images, flow, mask = random_flip_left_right(
        images, flow, mask, probability_flip_left_right)

  if probability_flip_up_down > 0:
    images, flow, mask = random_flip_up_down(
        images, flow, mask, probability_flip_up_down)

  if probability_scale > 0 or probability_stretch > 0:
    images, flow, mask = random_scale(
        images,
        flow,
        mask,
        min_scale=min_bound_scale,
        max_scale=max_bound_scale,
        max_strech=max_strech_scale,
        probability_scale=probability_scale,
        probability_strech=probability_stretch)

  if probability_relative_scale > 0:
    images, flow, mask = random_scale_second(
        images, flow, mask,
        min_scale=min_bound_relative_scale,
        max_scale=max_bound_relative_scale,
        probability_scale=probability_relative_scale)

  if probability_rotation > 0:
    images, flow, mask = random_rotation(
        images, flow, mask,
        probability=probability_rotation,
        max_rotation=max_rotation_deg, not_empty_crop=True)

  if probability_relative_rotation > 0:
    images, flow, mask = random_rotation_second(
        images, flow, mask,
        probability=probability_relative_rotation,
        max_rotation=max_relative_rotation_deg, not_empty_crop=True)

  images_uncropped = images
  images, flow, mask, offset_h, offset_w = random_crop(
      images, flow, mask, crop_height, crop_width,
      relative_offset=max_relative_crop_offset,
      probability_crop_offset=probability_crop_offset)
  # Add 100 / 200 pixels to crop height / width for full scale warp
  pad_to_size_h = crop_height + 200
  pad_to_size_w = crop_width + 400
  if return_full_scale:
    if pad_to_size_w:
      uncropped_shape = tf.shape(images_uncropped)
      if images.shape[1] > uncropped_shape[1] or images.shape[
          2] > uncropped_shape[2]:
        images_uncropped = images
        uncropped_shape = tf.shape(images_uncropped)
        offset_h = tf.zeros_like(offset_h)
        offset_w = tf.zeros_like(offset_w)

      if uncropped_shape[1] > pad_to_size_h:
        crop_ht = offset_h - (200 // 2)
        crop_hb = offset_h + crop_height + (200 // 2)
        crop_hb += tf.maximum(0, -crop_ht)
        crop_ht -= tf.maximum(0, -(uncropped_shape[1] - crop_hb))
        crop_ht = tf.maximum(crop_ht, 0)
        crop_hb = tf.minimum(crop_hb, uncropped_shape[1])
        offset_h -= crop_ht
        images_uncropped = images_uncropped[:, crop_ht:crop_hb, :, :]

      if uncropped_shape[2] > pad_to_size_w:
        crop_wt = offset_w - (400 // 2)
        crop_wb = offset_w + crop_width + (400 // 2)
        crop_wb += tf.maximum(0, -crop_wt)
        crop_wt -= tf.maximum(0, -(uncropped_shape[2] - crop_wb))
        crop_wt = tf.maximum(crop_wt, 0)
        crop_wb = tf.minimum(crop_wb, uncropped_shape[2])
        offset_w -= crop_wt
        images_uncropped = images_uncropped[:, :, crop_wt:crop_wb, :]

      uncropped_shape = tf.shape(images_uncropped)
      # remove remove_pixels_w from the width while keeping the crop centered
      pad_h = pad_to_size_h - uncropped_shape[1]
      pad_w = pad_to_size_w - uncropped_shape[2]
      with tf.control_dependencies([
          tf.compat.v1.assert_greater_equal(pad_h, 0),
          tf.compat.v1.assert_greater_equal(pad_w, 0)
      ]):
        images_uncropped = tf.pad(images_uncropped,
                                  [[0, 0], [pad_h, 0], [pad_w, 0], [0, 0]])
      images_uncropped = tf.ensure_shape(images_uncropped,
                                         [2, pad_to_size_h, pad_to_size_w, 3])
    return images, flow, mask, images_uncropped, offset_h, offset_w, pad_h, pad_w

  return images, flow, mask
Ejemplo n.º 26
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # linear layer(WX + b)
            with tf.variable_scope('last_layer/dense') as scope:
                weights = tf.get_variable('kernel',
                                          [self.dim_input, self.num_classes],
                                          dtype=tf.float32)
                biases = tf.get_variable('bias', [self.num_classes],
                                         dtype=tf.float32)
                wb = tf.concat([weights, tf.expand_dims(biases, axis=0)], 0)
                wb_renorm = tf.matmul(self.sigma_half_inv, wb)
                weights_renorm = wb_renorm[:self.dim_input, :]
                biases_renorm = wb_renorm[-1, :]
                self.z = tf.add(tf.matmul(self.x, weights_renorm),
                                biases_renorm,
                                name=scope.name)

            # Gaussian prior
            # prior = tf.nn.l2_loss(weights) + tf.nn.l2_loss(biases)

            # Non normalized loss, because of the preconditioning
            self.loss = self.n * tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            # Bayesian loss
            self.bayesian_loss = self.loss  # + prior

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # saving the weights of last layer when running SGLD/SGD/MCMC algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars,
                                        max_to_keep=self.num_samples)

            self.gd_opt = tf.train.GradientDescentOptimizer(self.step_size)
            # SGLD optimizer for the last layer
            if self.sampler in ['sgld', 'lmc']:
                grads_vars = self.gd_opt.compute_gradients(self.bayesian_loss)
                grads_vars_sgld = []

                for g, v in grads_vars:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        # Adding Gaussian noise to the gradient
                        gaussian_noise = (np.sqrt(2. / self.step_size) *
                                          tf.random_normal(tf.shape(g)))
                        g_sgld = g + gaussian_noise
                        tf.summary.histogram(''.join(s) + '/grad_hist_mcmc', g)
                        tf.summary.histogram(
                            ''.join(s) + '/gaussian_noise_hist_mcmc',
                            gaussian_noise)
                        tf.summary.histogram(
                            ''.join(s) + '/grad_total_hist_mcmc', g_sgld)
                        grads_vars_sgld.append((g_sgld, v))

                self.train_op = self.gd_opt.apply_gradients(grads_vars_sgld)

            # SGD optimizer for the last layer
            if self.sampler == 'sgd':
                grads_vars_sgd = self.gd_opt.compute_gradients(self.loss)
                self.train_op = self.gd_opt.apply_gradients(grads_vars_sgd)

                for g, v in grads_vars_sgd:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('loading the network ...')
            # Restores from checkpoint
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')
Ejemplo n.º 27
0
  def true_fn(images, flow, mask):
    angle_radian = tf.random.uniform(
        [], minval=-max_rotation, maxval=max_rotation,
        dtype=tf.float32) * math.pi / 180.0

    image_1, image_2 = tf.unstack(images)
    image_2 = rotate(image_2, angle_radian, is_flow=False, mask=None)
    images = tf.stack([image_1, image_2])

    if not_empty_crop:
      orig_height = tf.shape(images)[-3]
      orig_width = tf.shape(images)[-2]
      # introduce abbreviations for shorter notation
      cos = tf.math.cos(angle_radian % math.pi)
      sin = tf.math.sin(angle_radian % math.pi)
      h = tf.cast(orig_height, tf.float32)
      w = tf.cast(orig_width, tf.float32)

      # compute required scale factor
      scale = tf.cond(tf.math.less(angle_radian % math.pi, math.pi/2.0),
                      lambda: tf.math.maximum((w/h)*sin+cos, (h/w)*sin+cos),
                      lambda: tf.math.maximum((w/h)*sin-cos, (h/w)*sin-cos))
      new_height = tf.math.floor(h / scale)
      new_width = tf.math.floor(w / scale)

      # crop image again to original size
      offset_height = tf.cast((h-new_height)/2, tf.int32)
      offset_width = tf.cast((w-new_width)/2, tf.int32)
      images = tf.image.crop_to_bounding_box(
          images,
          offset_height=offset_height,
          offset_width=offset_width,
          target_height=tf.cast(new_height, tf.int32),
          target_width=tf.cast(new_width, tf.int32))

    if flow is not None:
      # get current locations (with the origin in the image center)
      positions = _positions_center_origin(orig_height, orig_width)

      # compute augmented flow (multiply by mask to zero invalid flow locations)
      cos = tf.math.cos(angle_radian)
      sin = tf.math.sin(angle_radian)
      rotation_matrix = tf.reshape([cos, sin, -sin, cos], [2, 2])
      flow = (tf.linalg.matmul(
          (positions + flow), rotation_matrix) - positions) * mask

      if not_empty_crop:
        # crop flow and mask again to original size
        flow = tf.image.crop_to_bounding_box(
            flow,
            offset_height=offset_height,
            offset_width=offset_width,
            target_height=tf.cast(new_height, tf.int32),
            target_width=tf.cast(new_width, tf.int32))
        mask = tf.image.crop_to_bounding_box(
            mask,
            offset_height=offset_height,
            offset_width=offset_width,
            target_height=tf.cast(new_height, tf.int32),
            target_width=tf.cast(new_width, tf.int32))
    return images, flow, mask
def prepare_lidar_images_and_correspondences(
    inputs,
    resized_image_height,
    resized_image_width,
    camera_names=('front', 'front_left', 'front_right', 'side_left',
                  'side_right'),
    lidar_names=('top', 'front', 'side_left', 'side_right', 'rear')):
  """Integrates and returns the lidars, cameras and their correspondences.

  Args:
    inputs: A dictionary containing the images and point / pixel
      correspondences.
    resized_image_height: Target height of the images.
    resized_image_width: Target width of the images.
    camera_names: List of cameras to include images from.
    lidar_names: List of lidars to include point clouds from.

  Returns:
    A tf.float32 tensor of size [num_points, 3] containing point positions.
    A tf.float32 tensor of size [num_points, 1] containing point intensities.
    A tf.float32 tensor of size [num_points, 1] containing point elongations.
    A tf.float32 tensor of size [num_points, 3] containing point normals.
    A tf.float32 tensor of size [num_images, resized_image_height,
      resized_image_width, 3].
    A tf.int32 tensor of size [num_images, num_points, 2].

  Raises:
    ValueError: If camera_names or lidar_names are empty lists.
  """
  if not camera_names:
    raise ValueError('camera_names should contain at least one name.')
  if not lidar_names:
    raise ValueError('lidar_names should contain at least one name.')

  (points_position, points_intensity, points_elongation, points_normal,
   points_in_image_frame_yx, points_in_image_frame_id) = _prepare_lidar_points(
       inputs=inputs, lidar_names=lidar_names)

  images = []
  points_in_image_frame = []

  for camera_name in camera_names:
    image_key = ('cameras/%s/image' % camera_name)
    image_height = tf.shape(inputs[image_key])[0]
    image_width = tf.shape(inputs[image_key])[1]
    height_ratio = tf.cast(
        resized_image_height, dtype=tf.float32) / tf.cast(
            image_height, dtype=tf.float32)
    width_ratio = tf.cast(
        resized_image_width, dtype=tf.float32) / tf.cast(
            image_width, dtype=tf.float32)
    if tf.executing_eagerly():
      resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    else:
      resize_method = tf.image.ResizeMethod.BILINEAR
      if inputs[image_key].dtype in [
          tf.int8, tf.uint8, tf.int16, tf.uint16, tf.int32, tf.int64
      ]:
        resize_method = tf.image.ResizeMethod.NEAREST_NEIGHBOR
    images.append(
        tf.image.resize(
            images=inputs[image_key],
            size=[resized_image_height, resized_image_width],
            method=resize_method,
            antialias=True))
    camera_id = tf.cast(inputs[('cameras/%s/id' % camera_name)], dtype=tf.int32)
    valid_points = tf.equal(points_in_image_frame_id, camera_id)
    valid_points = tf.tile(valid_points, [1, 2])
    point_coords = tf.cast(
        tf.cast(points_in_image_frame_yx, dtype=tf.float32) *
        tf.stack([height_ratio, width_ratio]),
        dtype=tf.int32)
    points_in_image_frame_camera = tf.where(
        valid_points, point_coords, -tf.ones_like(valid_points, dtype=tf.int32))
    points_in_image_frame.append(points_in_image_frame_camera)
  num_images = len(images)
  images = tf.stack(images, axis=0)
  images.set_shape([num_images, resized_image_height, resized_image_width, 3])
  points_in_image_frame = tf.stack(points_in_image_frame, axis=0)
  return {
      'points_position': points_position,
      'points_intensity': points_intensity,
      'points_elongation': points_elongation,
      'points_normal': points_normal,
      'view_images': {'rgb_view': images},
      'view_indices_2d': {'rgb_view': points_in_image_frame}
  }
Ejemplo n.º 29
0
def points_to_normals_unbatched(points,
                                k,
                                distance_upper_bound,
                                viewpoint=None,
                                noise_magnitude=1e-4,
                                method='pca'):
    """Computes normals for the points in a point cloud.

  Args:
    points: A tf.float32 tensor of size [N, 3].
    k: An integer determining the size of the neighborhood.
    distance_upper_bound: Maximum distance of the neighbor points. If None, it
      will not add a cap on the distance.
    viewpoint: A tf.float32 tensor of size [3]. Normals will be flipped to point
      towards view point. If None, it won't be used.
    noise_magnitude: Noise magnitude to be added to the input of svd. If None,
      it won't add noise.
    method: The normal prediction method, options are `pca` and `cross` (cross
      product).

  Returns:
    normals: A tf.float32 tensor of size [N, 3].
  """
    if method == 'pca':
        if k <= 3:
            raise ValueError(
                'At least 3 neighbors are required for computing PCA.')
    elif method == 'cross':
        if k <= 2:
            raise ValueError(
                'At least 2 neighbors are required for computing cross.')
    else:
        raise ValueError(('Unknown method of normal prediction %s' % method))
    n = tf.shape(points)[0]
    d = points.get_shape().as_list()[1]
    if d != 3:
        raise ValueError('Points dimension is not 3.')
    _, knn_adjacencies = knn_graph_from_points_unbatched(
        points=points, k=k, distance_upper_bound=distance_upper_bound)
    knn_adjacencies = knn_adjacencies[:, 1:]
    knn_adjacencies = tf.reshape(knn_adjacencies, [n * (k - 1)])
    adjacency_points = tf.gather(points, indices=knn_adjacencies)
    adjacency_points = tf.reshape(adjacency_points, [n, (k - 1), d])
    if method == 'pca':
        adjacency_relative_points = adjacency_points - tf.expand_dims(points,
                                                                      axis=1)
        if noise_magnitude is not None:
            adjacency_relative_points += tf.random.uniform(
                tf.shape(adjacency_relative_points),
                minval=-noise_magnitude,
                maxval=noise_magnitude,
                dtype=tf.float32)
        _, _, v = tf.linalg.svd(adjacency_relative_points)
        normals = v[:, 2, :]
    elif method == 'cross':
        v1 = adjacency_points[:, 0, :] - points
        v2 = adjacency_points[:, 1, :] - points
        normals = tf.linalg.cross(v1, v2)
        normals_length = tf.expand_dims(tf.norm(normals, axis=1), axis=1)
        if noise_magnitude is not None:
            normals_length += noise_magnitude
        normals /= normals_length
    else:
        raise ValueError(('Unknown method of normal prediction %s' % method))
    if viewpoint is not None:
        normals = flip_normals_towards_viewpoint(points=points,
                                                 normals=normals,
                                                 viewpoint=viewpoint)
    return normals
Ejemplo n.º 30
0
    def build_graph(self):
        """Builds the neural network graph."""

        # define graph
        self.g = tf.Graph()
        with self.g.as_default():

            # create and store a new session for the graph
            self.sess = tf.Session()

            # define placeholders
            self.x = tf.placeholder(shape=[None, self.dim_input],
                                    dtype=tf.float32)
            self.y = tf.placeholder(shape=[None, self.num_classes],
                                    dtype=tf.float32)

            # define simple model
            with tf.variable_scope('last_layer'):
                self.z = tf.layers.dense(inputs=self.x, units=self.num_classes)

            self.loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits_v2(labels=self.y,
                                                           logits=self.z))

            self.output_probs = tf.nn.softmax(self.z)

            # Variables of the last layer
            self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
            self.ll_vars_concat = tf.concat(
                [self.ll_vars[0],
                 tf.expand_dims(self.ll_vars[1], axis=0)], 0)

            # Summary
            _variable_summaries(self.ll_vars_concat)

            # add regularization that acts as a unit Gaussian prior on the last layer
            regularizer = tf.contrib.layers.l2_regularizer(1.0)

            # regularization
            prior = tf.contrib.layers.apply_regularization(
                regularizer, self.ll_vars)
            self.bayesian_loss = self.n * self.loss + prior

            # saving the weights of last layer when running SGLD/SGD/MCMC algorithm
            self.saver = tf.train.Saver(var_list=self.ll_vars,
                                        max_to_keep=self.num_samples)

            # SGLD optimizer for the last layer
            if self.sampler in ['sgld', 'lmc']:
                step = self.step_size / self.n
                gd_opt = tf.train.GradientDescentOptimizer(step)
                grads_vars = gd_opt.compute_gradients(self.bayesian_loss)
                grads_vars_sgld = []

                for g, v in grads_vars:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        # Adding Gaussian noise to the gradient
                        gaussian_noise = (np.sqrt(2. / step) *
                                          tf.random_normal(tf.shape(g)))
                        g_sgld = g + gaussian_noise
                        tf.summary.histogram(''.join(s) + '/grad_hist_mcmc',
                                             g / self.n)
                        tf.summary.histogram(
                            ''.join(s) + '/gaussian_noise_hist_mcmc',
                            gaussian_noise / self.n)
                        tf.summary.histogram(
                            ''.join(s) + '/grad_total_hist_mcmc',
                            g_sgld / self.n)
                        grads_vars_sgld.append((g_sgld, v))

                self.train_op = gd_opt.apply_gradients(grads_vars_sgld)

            # SGD optimizer for the last layer
            if self.sampler == 'sgd':
                gd_opt = tf.train.GradientDescentOptimizer(self.step_size)
                grads_vars_sgd = gd_opt.compute_gradients(self.loss)
                self.train_op = gd_opt.apply_gradients(grads_vars_sgd)

                for g, v in grads_vars_sgd:
                    if g is not None:
                        s = list(v.name)
                        s[v.name.rindex(':')] = '_'
                        tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g)

            # Merge all the summaries and write them out
            self.all_summaries = tf.summary.merge_all()
            location = os.path.join(self.working_dir, 'logs')
            self.writer = tf.summary.FileWriter(location, graph=self.g)

            saver_network = tf.train.Saver(var_list=self.ll_vars)
            print('loading the network ...')
            # Restores from checkpoint
            # self.sess.run(tf.global_variables_initializer())
            saver_network.restore(self.sess, self.model_dir)
            print('Graph successfully loaded.')