예제 #1
0
def pick_labeled_image(mesh_inputs, view_image_inputs, view_indices_2d_inputs,
                       view_name):
    """Pick the image with most number of labeled points projecting to it."""
    if view_name not in view_image_inputs:
        return
    if view_name not in view_indices_2d_inputs:
        return
    if standard_fields.InputDataFields.point_loss_weights not in mesh_inputs:
        raise ValueError('The key `weights` is missing from mesh_inputs.')
    height = tf.shape(view_image_inputs[view_name])[1]
    width = tf.shape(view_image_inputs[view_name])[2]
    valid_points_y = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 0], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 0], height))
    valid_points_x = tf.logical_and(
        tf.greater_equal(view_indices_2d_inputs[view_name][:, :, 1], 0),
        tf.less(view_indices_2d_inputs[view_name][:, :, 1], width))
    valid_points = tf.logical_and(valid_points_y, valid_points_x)
    image_total_weights = tf.reduce_sum(
        tf.cast(valid_points, dtype=tf.float32) * tf.squeeze(
            mesh_inputs[standard_fields.InputDataFields.point_loss_weights],
            axis=1),
        axis=1)
    image_total_weights = tf.cond(
        tf.equal(tf.reduce_sum(image_total_weights), 0),
        lambda: tf.reduce_sum(tf.cast(valid_points, dtype=tf.float32), axis=1),
        lambda: image_total_weights)
    best_image = tf.math.argmax(image_total_weights)
    view_image_inputs[view_name] = view_image_inputs[view_name][
        best_image:best_image + 1, :, :, :]
    view_indices_2d_inputs[view_name] = view_indices_2d_inputs[view_name][
        best_image:best_image + 1, :, :]
예제 #2
0
def randomly_crop_points(mesh_inputs,
                         view_indices_2d_inputs,
                         x_random_crop_size,
                         y_random_crop_size,
                         epsilon=1e-5):
  """Randomly crops points.

  Args:
    mesh_inputs: A dictionary containing input mesh (point) tensors.
    view_indices_2d_inputs: A dictionary containing input point to view
      correspondence tensors.
    x_random_crop_size: Size of the random crop in x dimension. If None, random
      crop will not take place on x dimension.
    y_random_crop_size: Size of the random crop in y dimension. If None, random
      crop will not take place on y dimension.
    epsilon: Epsilon (a very small value) used to add as a small margin to
      thresholds.
  """
  if x_random_crop_size is None and y_random_crop_size is None:
    return

  points = mesh_inputs[standard_fields.InputDataFields.point_positions]
  num_points = tf.shape(points)[0]
  # Pick a random point
  if x_random_crop_size is not None or y_random_crop_size is not None:
    random_index = tf.random.uniform([],
                                     minval=0,
                                     maxval=num_points,
                                     dtype=tf.int32)
    center_x = points[random_index, 0]
    center_y = points[random_index, 1]

  points_x = points[:, 0]
  points_y = points[:, 1]
  min_x = tf.reduce_min(points_x) - epsilon
  max_x = tf.reduce_max(points_x) + epsilon
  min_y = tf.reduce_min(points_y) - epsilon
  max_y = tf.reduce_max(points_y) + epsilon

  if x_random_crop_size is not None:
    min_x = center_x - x_random_crop_size / 2.0 - epsilon
    max_x = center_x + x_random_crop_size / 2.0 + epsilon

  if y_random_crop_size is not None:
    min_y = center_y - y_random_crop_size / 2.0 - epsilon
    max_y = center_y + y_random_crop_size / 2.0 + epsilon

  x_mask = tf.logical_and(tf.greater(points_x, min_x), tf.less(points_x, max_x))
  y_mask = tf.logical_and(tf.greater(points_y, min_y), tf.less(points_y, max_y))
  points_mask = tf.logical_and(x_mask, y_mask)

  for key in sorted(mesh_inputs):
    mesh_inputs[key] = tf.boolean_mask(mesh_inputs[key], points_mask)

  for key in sorted(view_indices_2d_inputs):
    view_indices_2d_inputs[key] = tf.transpose(
        tf.boolean_mask(
            tf.transpose(view_indices_2d_inputs[key], [1, 0, 2]), points_mask),
        [1, 0, 2])
예제 #3
0
def _pad_or_clip_voxels(voxel_features, voxel_indices, num_valid_voxels,
                        segment_ids, voxels_pad_or_clip_size):
  """Pads or clips voxels."""
  if voxels_pad_or_clip_size:
    num_valid_voxels = tf.minimum(num_valid_voxels, voxels_pad_or_clip_size)
    num_channels = voxel_features.get_shape().as_list()[-1]
    if len(voxel_features.shape.as_list()) == 2:
      output_shape = [voxels_pad_or_clip_size, num_channels]
    elif len(voxel_features.shape.as_list()) == 3:
      num_samples_per_voxel = voxel_features.get_shape().as_list()[1]
      if num_samples_per_voxel is None:
        num_samples_per_voxel = tf.shape(voxel_features)[1]
      output_shape = [
          voxels_pad_or_clip_size, num_samples_per_voxel, num_channels
      ]
    else:
      raise ValueError('voxel_features should be either rank 2 or 3.')
    voxel_features = shape_utils.pad_or_clip_nd(
        tensor=voxel_features, output_shape=output_shape)
    voxel_indices = shape_utils.pad_or_clip_nd(
        tensor=voxel_indices, output_shape=[voxels_pad_or_clip_size, 3])
    valid_segment_ids_mask = tf.cast(
        tf.less(segment_ids, num_valid_voxels), dtype=tf.int32)
    segment_ids *= valid_segment_ids_mask
  return voxel_features, voxel_indices, num_valid_voxels, segment_ids
 def true_fn(images):
   if augment_entire_batch:
     image_2 = images
     mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True)
     print(mean_color.shape)
   else:
     image_1, image_2 = tf.unstack(images)
     mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True)
   def body(var_img, mean_color):
     x0 = tf.random.uniform([], 0, width, dtype=tf.int32)
     y0 = tf.random.uniform([], 0, height, dtype=tf.int32)
     dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
     dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
     x = tf.range(width)
     x_mask = (x0 <= x) & (x < x0+dx)
     y = tf.range(height)
     y_mask = (y0 <= y) & (y < y0+dy)
     mask = x_mask & y_mask[:, tf.newaxis]
     mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype)
     result = var_img * (1 - mask) + mean_color * mask
     return result
   # Perform at least one erase operation.
   image_2 = body(image_2, mean_color)
   # Perform additional erase operations.
   for _ in range(max_operations - 1):
     perform_erase = tf.less(
         tf.random.uniform([]), probability_additional_operations)
     image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color),
                       lambda: image_2)
   if augment_entire_batch:
     images = image_2
   else:
     images = tf.stack([image_1, image_2])
   return images
예제 #5
0
def embedding_regularization_loss(inputs,
                                  outputs,
                                  lambda_coef=0.0001,
                                  regularization_type='unit_length',
                                  is_intermediate=False):
  """Classification loss with an iou threshold.

  Args:
    inputs: A dictionary that contains
      num_valid_voxels - A tf.int32 tensor of size [batch_size].
      instance_ids - A tf.int32 tensor of size [batch_size, n].
    outputs: A dictionart that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
    lambda_coef: Regularization loss coefficient.
    regularization_type: Regularization loss type. Supported values are 'msq'
      and 'unit_length'. 'msq' stands for 'mean square' which penalizes the
      embedding vectors if they have a length far from zero. 'unit_length'
      penalizes the embedding vectors if they have a length far from one.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
  instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
  num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
  if is_intermediate:
    embedding_key = (
        standard_fields.DetectionResultFields
        .intermediate_instance_embedding_voxels)
  else:
    embedding_key = (
        standard_fields.DetectionResultFields.instance_embedding_voxels)
  if instance_ids_key not in inputs:
    raise ValueError('instance_ids is missing in inputs.')
  if embedding_key not in outputs:
    raise ValueError('embedding is missing in outputs.')
  if num_voxels_key not in inputs:
    raise ValueError('num_voxels is missing in inputs.')
  batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
  if batch_size is None:
    raise ValueError('batch_size is not defined at graph construction time.')
  num_valid_voxels = inputs[num_voxels_key]
  num_voxels = tf.shape(inputs[instance_ids_key])[1]
  valid_mask = tf.less(
      tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
      tf.expand_dims(num_valid_voxels, axis=1))
  valid_mask = tf.reshape(valid_mask, [-1])
  embedding_dims = outputs[embedding_key].get_shape().as_list()[-1]
  if embedding_dims is None:
    raise ValueError(
        'Embedding dimension is unknown at graph construction time.')
  embedding = tf.reshape(outputs[embedding_key], [-1, embedding_dims])
  embedding = tf.boolean_mask(embedding, valid_mask)
  return metric_learning_losses.regularization_loss(
      embedding=embedding,
      lambda_coef=lambda_coef,
      regularization_type=regularization_type)
def _box_classification_loss_unbatched(inputs_1, outputs_1, is_intermediate,
                                       is_balanced, mine_hard_negatives,
                                       hard_negative_score_threshold):
    """Loss function for input and outputs of batch size 1."""
    valid_mask = _get_voxels_valid_mask(inputs_1=inputs_1)
    if is_intermediate:
        logits = outputs_1[standard_fields.DetectionResultFields.
                           intermediate_object_semantic_voxels]
    else:
        logits = outputs_1[
            standard_fields.DetectionResultFields.object_semantic_voxels]
    num_classes = logits.get_shape().as_list()[-1]
    if num_classes is None:
        raise ValueError('Number of classes is unknown.')
    logits = tf.boolean_mask(tf.reshape(logits, [-1, num_classes]), valid_mask)
    labels = tf.boolean_mask(
        tf.reshape(
            inputs_1[standard_fields.InputDataFields.object_class_voxels],
            [-1, 1]), valid_mask)
    if mine_hard_negatives or is_balanced:
        instances = tf.boolean_mask(
            tf.reshape(
                inputs_1[
                    standard_fields.InputDataFields.object_instance_id_voxels],
                [-1]), valid_mask)
    params = {}
    if mine_hard_negatives:
        negative_scores = tf.reshape(tf.nn.softmax(logits)[:, 0], [-1])
        hard_negative_mask = tf.logical_and(
            tf.less(negative_scores, hard_negative_score_threshold),
            tf.equal(tf.reshape(labels, [-1]), 0))
        hard_negative_labels = tf.boolean_mask(labels, hard_negative_mask)
        hard_negative_logits = tf.boolean_mask(logits, hard_negative_mask)
        hard_negative_instances = tf.boolean_mask(
            tf.ones_like(instances) * (tf.reduce_max(instances) + 1),
            hard_negative_mask)
        logits = tf.concat([logits, hard_negative_logits], axis=0)
        instances = tf.concat([instances, hard_negative_instances], axis=0)
        labels = tf.concat([labels, hard_negative_labels], axis=0)
    if is_balanced:
        weights = loss_utils.get_balanced_loss_weights_multiclass(
            labels=tf.expand_dims(instances, axis=1))
        params['weights'] = weights
    return classification_loss_fn(logits=logits, labels=labels, **params)
예제 #7
0
def random_flip_left_right(images, flow, mask, probability):
    """Performs a random left/right flip."""
    perform_flip = tf.less(tf.random.uniform([]), probability)
    # apply flip
    images = tf.cond(pred=perform_flip,
                     true_fn=lambda: tf.reverse(images, axis=[-2]),
                     false_fn=lambda: images)
    if flow is not None:
        flow = tf.cond(pred=perform_flip,
                       true_fn=lambda: tf.reverse(flow, axis=[-2]),
                       false_fn=lambda: flow)
        mask = tf.cond(pred=perform_flip,
                       true_fn=lambda: tf.reverse(mask, axis=[-2]),
                       false_fn=lambda: mask)
        # correct sign of flow
        sign_correction = tf.reshape([1.0, -1.0], [1, 1, 2])
        flow = tf.cond(pred=perform_flip,
                       true_fn=lambda: flow * sign_correction,
                       false_fn=lambda: flow)
    return images, flow, mask
예제 #8
0
def random_flip_up_down(images, flow, mask, probability):
    """Performs a random up/down flip."""
    # 50/50 chance
    perform_flip = tf.less(tf.random.uniform([]), probability)
    # apply flip
    images = tf.cond(pred=perform_flip,
                     true_fn=lambda: tf.reverse(images, axis=[-3]),
                     false_fn=lambda: images)
    if flow is not None:
        flow = tf.cond(pred=perform_flip,
                       true_fn=lambda: tf.reverse(flow, axis=[-3]),
                       false_fn=lambda: flow)
        mask = tf.cond(pred=perform_flip,
                       true_fn=lambda: tf.reverse(mask, axis=[-3]),
                       false_fn=lambda: mask)
        # correct sign of flow
        sign_correction = tf.reshape([-1.0, 1.0], [1, 1, 2])
        flow = tf.cond(pred=perform_flip,
                       true_fn=lambda: flow * sign_correction,
                       false_fn=lambda: flow)
    return images, flow, mask
def random_eraser(images,
                  min_size,
                  max_size,
                  probability,
                  max_operations,
                  probability_additional_operations,
                  augment_entire_batch = False):
  """Earses a random rectangle shaped areas in the second image or image batch.

  Args:
    images: Stacked image pair that should be augmented with shape
      [2, height, width, 3] or a batch of images that should be augmented with
      shape [batch, height, width, 3].
    min_size: Minimum size of erased rectangle.
    max_size: Maximum size of erased rectangle.
    probability: Probability of applying this augementation function.
    max_operations: Maximum number total areas that should be erased.
    probability_additional_operations: Probability for each additional area to
      be erased if augementation is applied.
    augment_entire_batch: If true the input is treated as batch of images to
      which the augmentation should be applid.

  Returns:
    Possibly augemented images.
  """
  perform_erase = tf.less(tf.random.uniform([]), probability)
  height = tf.shape(images)[-3]
  width = tf.shape(images)[-2]

  # Returns augemented images.
  def true_fn(images):
    if augment_entire_batch:
      image_2 = images
      mean_color = tf.reduce_mean(image_2, axis=[1, 2], keepdims=True)
      print(mean_color.shape)
    else:
      image_1, image_2 = tf.unstack(images)
      mean_color = tf.reduce_mean(image_2, axis=[0, 1], keepdims=True)
    def body(var_img, mean_color):
      x0 = tf.random.uniform([], 0, width, dtype=tf.int32)
      y0 = tf.random.uniform([], 0, height, dtype=tf.int32)
      dx = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
      dy = tf.random.uniform([], min_size, max_size, dtype=tf.int32)
      x = tf.range(width)
      x_mask = (x0 <= x) & (x < x0+dx)
      y = tf.range(height)
      y_mask = (y0 <= y) & (y < y0+dy)
      mask = x_mask & y_mask[:, tf.newaxis]
      mask = tf.cast(mask[:, :, tf.newaxis], image_2.dtype)
      result = var_img * (1 - mask) + mean_color * mask
      return result
    # Perform at least one erase operation.
    image_2 = body(image_2, mean_color)
    # Perform additional erase operations.
    for _ in range(max_operations - 1):
      perform_erase = tf.less(
          tf.random.uniform([]), probability_additional_operations)
      image_2 = tf.cond(perform_erase, lambda: body(image_2, mean_color),
                        lambda: image_2)
    if augment_entire_batch:
      images = image_2
    else:
      images = tf.stack([image_1, image_2])
    return images

  # Returns unaugmented images.
  def false_fn(images):
    return images

  return tf.cond(perform_erase,
                 lambda: true_fn(images),
                 lambda: false_fn(images))
def classification_loss_using_mask_iou(inputs,
                                       outputs,
                                       num_samples,
                                       max_instance_id=None,
                                       similarity_strategy='distance',
                                       is_balanced=True,
                                       is_intermediate=False):
    """Classification loss with an iou threshold.

  Args:
    inputs: A dictionary that contains
      num_valid_voxels - A tf.int32 tensor of size [batch_size].
      instance_ids - A tf.int32 tensor of size [batch_size, n].
      class_labels - A tf.int32 tensor of size [batch_size, n]. It is assumed
        that the background voxels are assigned to class 0.
    outputs: A dictionart that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
      logits - A tf.float32 tensor of size [batch_size, n, num_classes]. It is
        assumed that background is class 0.
    num_samples: An int determining the number of samples.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    is_balanced: If True, the per-voxel losses are re-weighted to have equal
      total weight for foreground vs. background voxels.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
    instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
    class_labels_key = standard_fields.InputDataFields.object_class_voxels
    num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
    if is_intermediate:
        embedding_key = (standard_fields.DetectionResultFields.
                         intermediate_instance_embedding_voxels)
        logits_key = (standard_fields.DetectionResultFields.
                      intermediate_object_semantic_voxels)
    else:
        embedding_key = (
            standard_fields.DetectionResultFields.instance_embedding_voxels)
        logits_key = standard_fields.DetectionResultFields.object_semantic_voxels
    if instance_ids_key not in inputs:
        raise ValueError('instance_ids is missing in inputs.')
    if class_labels_key not in inputs:
        raise ValueError('class_labels is missing in inputs.')
    if num_voxels_key not in inputs:
        raise ValueError('num_voxels is missing in inputs.')
    if embedding_key not in outputs:
        raise ValueError('embedding is missing in outputs.')
    if logits_key not in outputs:
        raise ValueError('logits is missing in outputs.')
    batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
    if batch_size is None:
        raise ValueError(
            'batch_size is not defined at graph construction time.')
    num_valid_voxels = inputs[num_voxels_key]
    num_voxels = tf.shape(inputs[instance_ids_key])[1]
    valid_mask = tf.less(
        tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
        tf.expand_dims(num_valid_voxels, axis=1))
    return classification_loss_using_mask_iou_func(
        embeddings=outputs[embedding_key],
        logits=outputs[logits_key],
        instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]),
        class_labels=inputs[class_labels_key],
        num_samples=num_samples,
        valid_mask=valid_mask,
        max_instance_id=max_instance_id,
        similarity_strategy=similarity_strategy,
        is_balanced=is_balanced)
예제 #11
0
def npair_loss(inputs,
               outputs,
               num_samples,
               max_instance_id=None,
               similarity_strategy='distance',
               loss_strategy='softmax',
               is_intermediate=False):
  """N-pair metric learning loss for learning feature embeddings.

  Args:
    inputs: A dictionary that contains
      instance_ids - A tf.int32 tensor of size [batch_size, n].
      valid_mask - A tf.bool tensor of size [batch_size, n] that is True when an
        element is valid and False if it needs to be ignored. By default the
        value is None which means it is not applied.
    outputs: A dictionary that contains
      embeddings - A tf.float32 tensor of size [batch_size, n, f].
    num_samples: An int determinig the number of samples.
    max_instance_id: If set, instance ids larger than that value will be
      ignored. If not set, it will be computed from instance_ids tensor.
    similarity_strategy: Defines the method for computing similarity between
                         embedding vectors. Possible values are 'dotproduct' and
                         'distance'.
    loss_strategy: Defines the type of loss including 'softmax' or 'sigmoid'.
    is_intermediate: True if applied to intermediate predictions;
      otherwise, False.

  Returns:
    A tf.float32 scalar loss tensor.
  """
  instance_ids_key = standard_fields.InputDataFields.object_instance_id_voxels
  num_voxels_key = standard_fields.InputDataFields.num_valid_voxels
  if is_intermediate:
    embedding_key = (
        standard_fields.DetectionResultFields
        .intermediate_instance_embedding_voxels)
  else:
    embedding_key = (
        standard_fields.DetectionResultFields.instance_embedding_voxels)
  if instance_ids_key not in inputs:
    raise ValueError('object_instance_id_voxels is missing in inputs.')
  if num_voxels_key not in inputs:
    raise ValueError('num_voxels is missing in inputs.')
  if embedding_key not in outputs:
    raise ValueError('embedding key is missing in outputs.')
  batch_size = inputs[num_voxels_key].get_shape().as_list()[0]
  if batch_size is None:
    raise ValueError('batch_size is not defined at graph construction time.')
  num_valid_voxels = inputs[num_voxels_key]
  num_voxels = tf.shape(inputs[instance_ids_key])[1]
  valid_mask = tf.less(
      tf.tile(tf.expand_dims(tf.range(num_voxels), axis=0), [batch_size, 1]),
      tf.expand_dims(num_valid_voxels, axis=1))
  return npair_loss_func(
      embeddings=outputs[embedding_key],
      instance_ids=tf.reshape(inputs[instance_ids_key], [batch_size, -1]),
      num_samples=num_samples,
      valid_mask=valid_mask,
      max_instance_id=max_instance_id,
      similarity_strategy=similarity_strategy,
      loss_strategy=loss_strategy)
예제 #12
0
def _safe_div(a, b):
  """Divides two numbers, returns 0 if denominator is (close to) 0."""
  return tf.where(tf.less(tf.abs(b), 1e-10), 0.0, a / b)
예제 #13
0
def prepare_waymo_open_dataset(inputs,
                               valid_object_classes=None,
                               max_object_distance_from_source=74.88):
  """Maps the fields from loaded input to standard fields.

  Args:
    inputs: A dictionary of input tensors.
    valid_object_classes: List of valid object classes. if None, it is ignored.
    max_object_distance_from_source: Maximum distance of objects from source. It
      will be ignored if None.

  Returns:
    A dictionary of input tensors with standard field names.
  """
  prepared_inputs = {}
  if standard_fields.InputDataFields.point_positions in inputs:
    prepared_inputs[standard_fields.InputDataFields.point_positions] = inputs[
        standard_fields.InputDataFields.point_positions]
  if standard_fields.InputDataFields.point_intensities in inputs:
    prepared_inputs[standard_fields.InputDataFields.point_intensities] = inputs[
        standard_fields.InputDataFields.point_intensities]
  if standard_fields.InputDataFields.point_elongations in inputs:
    prepared_inputs[standard_fields.InputDataFields.point_elongations] = inputs[
        standard_fields.InputDataFields.point_elongations]
  if standard_fields.InputDataFields.point_normals in inputs:
    prepared_inputs[standard_fields.InputDataFields.point_normals] = inputs[
        standard_fields.InputDataFields.point_normals]
  if 'cameras/front/intrinsics/K' in inputs:
    prepared_inputs[standard_fields.InputDataFields
                    .camera_intrinsics] = inputs['cameras/front/intrinsics/K']
  if 'cameras/front/extrinsics/R' in inputs:
    prepared_inputs[
        standard_fields.InputDataFields
        .camera_rotation_matrix] = inputs['cameras/front/extrinsics/R']
  if 'cameras/front/extrinsics/t' in inputs:
    prepared_inputs[standard_fields.InputDataFields
                    .camera_translation] = inputs['cameras/front/extrinsics/t']
  if 'cameras/front/image' in inputs:
    prepared_inputs[standard_fields.InputDataFields
                    .camera_image] = inputs['cameras/front/image']
    prepared_inputs[standard_fields.InputDataFields
                    .camera_raw_image] = inputs['cameras/front/image']
    prepared_inputs[standard_fields.InputDataFields
                    .camera_original_image] = inputs['cameras/front/image']
  if 'scene_name' in inputs and 'frame_name' in inputs:
    prepared_inputs[
        standard_fields.InputDataFields.camera_image_name] = tf.strings.join(
            [inputs['scene_name'], inputs['frame_name']], separator='_')
  if 'objects/pose/R' in inputs:
    prepared_inputs[standard_fields.InputDataFields
                    .objects_rotation_matrix] = inputs['objects/pose/R']
  if 'objects/pose/t' in inputs:
    prepared_inputs[standard_fields.InputDataFields
                    .objects_center] = inputs['objects/pose/t']
  if 'objects/shape/dimension' in inputs:
    prepared_inputs[
        standard_fields.InputDataFields.objects_length] = tf.reshape(
            inputs['objects/shape/dimension'][:, 0], [-1, 1])
    prepared_inputs[standard_fields.InputDataFields.objects_width] = tf.reshape(
        inputs['objects/shape/dimension'][:, 1], [-1, 1])
    prepared_inputs[
        standard_fields.InputDataFields.objects_height] = tf.reshape(
            inputs['objects/shape/dimension'][:, 2], [-1, 1])
  if 'objects/category/label' in inputs:
    prepared_inputs[standard_fields.InputDataFields.objects_class] = tf.reshape(
        inputs['objects/category/label'], [-1, 1])
  if valid_object_classes is not None:
    valid_objects_mask = tf.cast(
        tf.zeros_like(
            prepared_inputs[standard_fields.InputDataFields.objects_class],
            dtype=tf.int32),
        dtype=tf.bool)
    for object_class in valid_object_classes:
      valid_objects_mask = tf.logical_or(
          valid_objects_mask,
          tf.equal(
              prepared_inputs[standard_fields.InputDataFields.objects_class],
              object_class))
    valid_objects_mask = tf.reshape(valid_objects_mask, [-1])
    for key in standard_fields.get_input_object_fields():
      if key in prepared_inputs:
        prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key],
                                               valid_objects_mask)

  if max_object_distance_from_source is not None:
    if standard_fields.InputDataFields.objects_center in prepared_inputs:
      object_distances = tf.norm(
          prepared_inputs[standard_fields.InputDataFields.objects_center][:,
                                                                          0:2],
          axis=1)
      valid_mask = tf.less(object_distances, max_object_distance_from_source)
      for key in standard_fields.get_input_object_fields():
        if key in prepared_inputs:
          prepared_inputs[key] = tf.boolean_mask(prepared_inputs[key],
                                                 valid_mask)

  return prepared_inputs
예제 #14
0
 def _sample(a, ou):
     return tf.cond(
         tf.less(tf.random.uniform((), 0, 1), epsilon_greedy),
         lambda: a + ou(), lambda: a)