Beispiel #1
0
 def test_make_objects_axis_aligned(self):
     rotation1 = rotation_matrix.from_rotation_around_x(-math.pi / 6.0)
     rotation2 = tf.constant(
         [[0.0, 0.0, 1.0], [1.0, 0.0, 0.0], [0.0, 1.0, 0.0]],
         dtype=tf.float32)
     inputs = {
         standard_fields.InputDataFields.objects_length:
         tf.constant([[1.0], [2.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_width:
         tf.constant([[3.0], [3.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_height:
         tf.constant([[2.0], [1.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_center:
         tf.constant([[1.0, 1.0, 1.0], [-1.0, 2.0, 1.0]]),
         standard_fields.InputDataFields.objects_rotation_matrix:
         tf.stack([rotation1, rotation2], axis=0),
     }
     expected_inputs = {
         standard_fields.InputDataFields.objects_length:
         tf.constant([[1.0], [1.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_width:
         tf.constant([[3.5980759], [2.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_height:
         tf.constant([[3.232051], [3.0]], dtype=tf.float32),
         standard_fields.InputDataFields.objects_center:
         tf.constant([[1.0, 1.0, 1.0], [-1.0, 2.0, 1.0]]),
         standard_fields.InputDataFields.objects_rotation_matrix:
         tf.tile(tf.expand_dims(tf.eye(3, dtype=tf.float32), axis=0),
                 [2, 1, 1]),
     }
     preprocessor_utils.make_objects_axis_aligned(inputs)
     for key in expected_inputs:
         self.assertAllClose(inputs[key].numpy(),
                             expected_inputs[key].numpy())
Beispiel #2
0
def preprocess(inputs,
               output_keys=None,
               is_training=False,
               input_field_mapping_fn=None,
               image_preprocess_fn_dic=None,
               images_points_correspondence_fn=None,
               points_pad_or_clip_size=None,
               voxels_pad_or_clip_size=None,
               voxel_grid_cell_size=(0.1, 0.1, 0.1),
               num_offset_bins_x=4,
               num_offset_bins_y=4,
               num_offset_bins_z=4,
               point_feature_keys=('point_offset_bins',),
               point_to_voxel_segment_func=tf.math.unsorted_segment_mean,
               x_min_degree_rotation=None,
               x_max_degree_rotation=None,
               y_min_degree_rotation=None,
               y_max_degree_rotation=None,
               z_min_degree_rotation=None,
               z_max_degree_rotation=None,
               rotation_center=(0.0, 0.0, 0.0),
               min_scale_ratio=None,
               max_scale_ratio=None,
               translation_range=None,
               points_within_box_margin=0.0,
               num_points_to_randomly_sample=None,
               crop_points_around_random_seed_point=False,
               crop_num_points=None,
               crop_radius=None,
               crop_num_background_points=None,
               make_objects_axis_aligned=False,
               min_num_points_in_objects=0,
               fit_objects_to_instance_id_points=False,
               voxel_density_threshold=None,
               voxel_density_grid_cell_size=None):
  """Preprocesses data before running 3D object detection.

  Args:
    inputs: A dictionary of inputs. Each value must be a `Tensor`.
    output_keys: Either None, or a list of strings containing the keys in the
      dictionary that is returned by the preprocess function.
    is_training: Whether at training stage or not.
    input_field_mapping_fn: A function that maps the input fields to the
      fields expected by object detection pipeline.
    image_preprocess_fn_dic: Image preprocessing function. Maps view names to
      their image preprocessing functions. Set it to None, if there are no
      images to preprocess or you are not interested in preprocessing images.
    images_points_correspondence_fn: The function that computes correspondence
      between images and points.
    points_pad_or_clip_size: Number of target points to pad or clip to. If None,
      it will not perform the padding.
    voxels_pad_or_clip_size: Number of target voxels to pad or clip to. If None,
      it will not perform the voxel padding.
    voxel_grid_cell_size: A three dimensional tuple determining the voxel grid
      size.
    num_offset_bins_x: Number of bins for point offsets in x direction.
    num_offset_bins_y: Number of bins for point offsets in y direction.
    num_offset_bins_z: Number of bins for point offsets in z direction.
    point_feature_keys: The keys used to form the voxel features.
    point_to_voxel_segment_func: The function used to aggregate the features
      of the points that fall in the same voxel.
    x_min_degree_rotation: Min degree of rotation around the x axis.
    x_max_degree_rotation: Max degree of rotation around the x axis.
    y_min_degree_rotation: Min degree of rotation around the y axis.
    y_max_degree_rotation: Max degree of rotation around the y axis.
    z_min_degree_rotation: Min degree of rotation around the z axis.
    z_max_degree_rotation: Max degree of rotation around the z axis.
    rotation_center: Center of rotation.
    min_scale_ratio: Minimum scale ratio.
    max_scale_ratio: Maximum scale ratio.
    translation_range: A float value corresponding to the range of random
      translation in x, y, z directions. If None, no translation would happen.
    points_within_box_margin: A margin to add to box radius when deciding which
      points fall inside each box.
    num_points_to_randomly_sample: Number of points to randomly sample. If None,
      it will keep the original points and does not perform sampling.
    crop_points_around_random_seed_point: If True, randomly samples a seed
      point and crops the closest `points_pad_or_clip_size` points to the seed
      point. The random seed point selection is based on the following
      procedure. First an object box is randomly selected. Then a random point
      from the random box is selected. Note that the random seed point could be
      sampled from background as well.
    crop_num_points: Number of points to crop.
    crop_radius: The maximum distance of the cropped points from the randomly
      sampled point. If None, it won't be used.
    crop_num_background_points: Minimum number of background points in crop. If
      None, it won't get applied.
    make_objects_axis_aligned: If True, the objects will become axis aligned,
      meaning that they will have identity rotation matrix.
    min_num_points_in_objects: Remove objects that have less number of points
      in them than this value.
    fit_objects_to_instance_id_points: If True, it will fit objects to points
      based on their instance ids.
    voxel_density_threshold: Points that belong to a voxel with a density lower
      than this will be removed.
    voxel_density_grid_cell_size: Voxel grid size for removing noise based on
      voxel density threshold.

  Returns:
    inputs: The inputs processed according to our configuration.

  Raises:
    ValueError: If input dictionary is missing any of the required keys.
  """
  inputs = dict(inputs)

  # Convert all float64 to float32 and all int64 to int32.
  for key in sorted(inputs):
    if isinstance(inputs[key], tf.Tensor):
      if inputs[key].dtype == tf.float64:
        inputs[key] = tf.cast(inputs[key], dtype=tf.float32)
      if inputs[key].dtype == tf.int64:
        if key == 'timestamp':
          continue
        else:
          inputs[key] = tf.cast(inputs[key], dtype=tf.int32)

  (view_image_inputs, view_indices_2d_inputs, mesh_inputs, object_inputs,
   non_tensor_inputs) = split_inputs(
       inputs=inputs,
       input_field_mapping_fn=input_field_mapping_fn,
       image_preprocess_fn_dic=image_preprocess_fn_dic,
       images_points_correspondence_fn=images_points_correspondence_fn)

  if standard_fields.InputDataFields.point_positions not in mesh_inputs:
    raise ValueError('Key %s is missing' %
                     standard_fields.InputDataFields.point_positions)

  # Randomly sample points (optional)
  preprocessor_utils.randomly_sample_points(
      mesh_inputs=mesh_inputs,
      view_indices_2d_inputs=view_indices_2d_inputs,
      target_num_points=num_points_to_randomly_sample)

  # Remove low density points
  if voxel_density_threshold is not None:
    preprocessor_utils.remove_pointcloud_noise(
        mesh_inputs=mesh_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        voxel_grid_cell_size=voxel_density_grid_cell_size,
        voxel_density_threshold=voxel_density_threshold)

  rotation_center = tf.convert_to_tensor(rotation_center, dtype=tf.float32)

  # Remove objects that do not have 3d info.
  _filter_valid_objects(inputs=object_inputs)

  # Cast the objects_class to tf.int32.
  _cast_objects_class(inputs=object_inputs)

  # Remove objects that have less than a certain number of poitns
  if min_num_points_in_objects > 0:
    preprocessor_utils.remove_objects_by_num_points(
        mesh_inputs=mesh_inputs,
        object_inputs=object_inputs,
        min_num_points_in_objects=min_num_points_in_objects)

  # Set point box ids.
  preprocessor_utils.set_point_instance_ids(
      mesh_inputs=mesh_inputs,
      object_inputs=object_inputs,
      points_within_box_margin=points_within_box_margin)

  # Process images.
  preprocessor_utils.preprocess_images(
      view_image_inputs=view_image_inputs,
      view_indices_2d_inputs=view_indices_2d_inputs,
      image_preprocess_fn_dic=image_preprocess_fn_dic,
      is_training=is_training)

  # Randomly transform points and boxes.
  _randomly_transform_points_boxes(
      mesh_inputs=mesh_inputs,
      object_inputs=object_inputs,
      x_min_degree_rotation=x_min_degree_rotation,
      x_max_degree_rotation=x_max_degree_rotation,
      y_min_degree_rotation=y_min_degree_rotation,
      y_max_degree_rotation=y_max_degree_rotation,
      z_min_degree_rotation=z_min_degree_rotation,
      z_max_degree_rotation=z_max_degree_rotation,
      rotation_center=rotation_center,
      min_scale_ratio=min_scale_ratio,
      max_scale_ratio=max_scale_ratio,
      translation_range=translation_range)

  # Randomly crop points around a random seed point.
  if crop_points_around_random_seed_point:
    preprocessor_utils.crop_points_around_random_seed_point(
        mesh_inputs=mesh_inputs,
        view_indices_2d_inputs=view_indices_2d_inputs,
        num_closest_points=crop_num_points,
        max_distance=crop_radius,
        num_background_points=crop_num_background_points)

  if fit_objects_to_instance_id_points:
    preprocessor_utils.fit_objects_to_instance_id_points(
        mesh_inputs=mesh_inputs, object_inputs=object_inputs)

  if make_objects_axis_aligned:
    preprocessor_utils.make_objects_axis_aligned(object_inputs=object_inputs)

  # Putting back the dictionaries together
  inputs = mesh_inputs.copy()
  inputs.update(object_inputs)
  inputs.update(non_tensor_inputs)
  for key in sorted(view_image_inputs):
    inputs[('%s/features' % key)] = view_image_inputs[key]
  for key in sorted(view_indices_2d_inputs):
    inputs[('%s/indices_2d' % key)] = view_indices_2d_inputs[key]

  # Transfer object properties to points, and randomly rotate the points around
  # y axis at training time.
  _transfer_object_properties_to_points(inputs=inputs)

  # Pad or clip points and their properties.
  _pad_or_clip_point_properties(
      inputs=inputs, pad_or_clip_size=points_pad_or_clip_size)

  # Create features that do not exist
  preprocessor_utils.add_point_offsets(
      inputs=inputs, voxel_grid_cell_size=voxel_grid_cell_size)
  preprocessor_utils.add_point_offset_bins(
      inputs=inputs,
      voxel_grid_cell_size=voxel_grid_cell_size,
      num_bins_x=num_offset_bins_x,
      num_bins_y=num_offset_bins_y,
      num_bins_z=num_offset_bins_z)

  # Voxelize point features
  preprocessor_utils.voxelize_point_features(
      inputs=inputs,
      voxels_pad_or_clip_size=voxels_pad_or_clip_size,
      voxel_grid_cell_size=voxel_grid_cell_size,
      point_feature_keys=point_feature_keys,
      point_to_voxel_segment_func=point_to_voxel_segment_func)

  # Voxelizing the semantic labels
  preprocessor_utils.voxelize_semantic_labels(
      inputs=inputs,
      voxels_pad_or_clip_size=voxels_pad_or_clip_size,
      voxel_grid_cell_size=voxel_grid_cell_size)

  # Voxelizing the instance labels
  preprocessor_utils.voxelize_instance_labels(
      inputs=inputs,
      voxels_pad_or_clip_size=voxels_pad_or_clip_size,
      voxel_grid_cell_size=voxel_grid_cell_size)

  # Voxelize the object properties
  preprocessor_utils.voxelize_object_properties(
      inputs=inputs,
      voxels_pad_or_clip_size=voxels_pad_or_clip_size,
      voxel_grid_cell_size=voxel_grid_cell_size)

  # Filter preinputs by output_keys if it is not None.
  if output_keys is not None:
    for key in list(inputs):
      if key not in output_keys:
        inputs.pop(key, None)

  return inputs