def test_compute_inclination(self):
     inclinations = range_image_utils.compute_inclination(
         tf.constant([-1.0, 1.0]), 4)
     with self.test_session() as sess:
         incl = sess.run(inclinations)
         self.assertAllClose(incl, [
             -1.0 + 0.5 * 2 / 4, -1.0 + 1.5 * 2 / 4, -1.0 + 2.5 * 2 / 4,
             -1.0 + 3.5 * 2 / 4
         ])
Esempio n. 2
0
def convert_range_image_to_point_cloud(frame, range_images, camera_projections, range_image_top_pose, ri_index = 0):
    calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name)
    lasers = sorted(frame.lasers, key=lambda laser: laser.name)
    points = [] 
    cp_points = []

    frame_pose = tf.convert_to_tensor(np.reshape(np.array(frame.pose.transform), [4, 4]))
    # [H, W, 6]
    range_image_top_pose_tensor = tf.reshape(tf.convert_to_tensor(range_image_top_pose.data), range_image_top_pose.shape.dims)
    # [H, W, 3, 3]
    range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(range_image_top_pose_tensor[..., 0], range_image_top_pose_tensor[..., 1],range_image_top_pose_tensor[..., 2])
    range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:]
    range_image_top_pose_tensor = transform_utils.get_transform(range_image_top_pose_tensor_rotation,range_image_top_pose_tensor_translation)
    for c in calibrations:
        range_image = range_images[c.name][ri_index]

        if len(c.beam_inclinations) == 0:
            beam_inclinations = range_image_utils.compute_inclination(tf.constant([c.beam_inclination_min, c.beam_inclination_max]),height=range_image.shape.dims[0])
        else:
            beam_inclinations = tf.constant(c.beam_inclinations)

        beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
        extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

        range_image_tensor = tf.reshape(tf.convert_to_tensor(range_image.data), range_image.shape.dims)
        pixel_pose_local = None
        frame_pose_local = None

        if c.name == open_dataset.LaserName.TOP:
            pixel_pose_local = range_image_top_pose_tensor
            pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
            frame_pose_local = tf.expand_dims(frame_pose, axis=0)
        range_image_mask = range_image_tensor[..., 0] > 0
        range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image(tf.expand_dims(range_image_tensor[..., 0], axis=0),tf.expand_dims(extrinsic, axis=0),tf.expand_dims(tf.convert_to_tensor(beam_inclinations), axis=0),pixel_pose=pixel_pose_local,frame_pose=frame_pose_local)

        range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
        points_tensor = tf.gather_nd(range_image_cartesian,tf.where(range_image_mask))

        cp = camera_projections[c.name][0]
        cp_tensor = tf.reshape(tf.convert_to_tensor(cp.data), cp.shape.dims)
        cp_points_tensor = tf.gather_nd(cp_tensor, tf.where(range_image_mask))
        points.append(points_tensor.numpy())
        cp_points.append(cp_points_tensor.numpy())

    return points, cp_points
Esempio n. 3
0
def convert_range_image_to_point_cloud(frame,
                                       range_images,
                                       camera_projections,
                                       range_image_top_pose,
                                       ri_index=0):
    """Convert range images to point cloud.

  Args:
    frame: open dataset frame
     range_images: A dict of {laser_name, [range_image_first_return,
       range_image_second_return]}.
     camera_projections: A dict of {laser_name,
       [camera_projection_from_first_return,
       camera_projection_from_second_return]}.
    range_image_top_pose: range image pixel pose for top lidar.
    ri_index: 0 for the first return, 1 for the second return.

  Returns:
    points: {[N, 3]} list of 3d lidar points of length 5 (number of lidars).
    cp_points: {[N, 6]} list of camera projections of length 5
      (number of lidars).
  """
    calibrations = sorted(frame.context.laser_calibrations,
                          key=lambda c: c.name)
    points = []
    cp_points = []

    frame_pose = tf.convert_to_tensor(
        np.reshape(np.array(frame.pose.transform), [4, 4]))
    # [H, W, 6]
    range_image_top_pose_tensor = tf.reshape(
        tf.convert_to_tensor(range_image_top_pose.data),
        range_image_top_pose.shape.dims)
    # [H, W, 3, 3]
    range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
        range_image_top_pose_tensor[..., 0],
        range_image_top_pose_tensor[..., 1], range_image_top_pose_tensor[...,
                                                                         2])
    range_image_top_pose_tensor_translation = range_image_top_pose_tensor[...,
                                                                          3:]
    range_image_top_pose_tensor = transform_utils.get_transform(
        range_image_top_pose_tensor_rotation,
        range_image_top_pose_tensor_translation)
    for c in calibrations:
        range_image = range_images[c.name][ri_index]
        if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
            beam_inclinations = range_image_utils.compute_inclination(
                tf.constant([c.beam_inclination_min, c.beam_inclination_max]),
                height=range_image.shape.dims[0])
        else:
            beam_inclinations = tf.constant(c.beam_inclinations)

        beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
        extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

        range_image_tensor = tf.reshape(tf.convert_to_tensor(range_image.data),
                                        range_image.shape.dims)
        pixel_pose_local = None
        frame_pose_local = None
        if c.name == dataset_pb2.LaserName.TOP:
            pixel_pose_local = range_image_top_pose_tensor
            pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
            frame_pose_local = tf.expand_dims(frame_pose, axis=0)
        range_image_mask = range_image_tensor[..., 0] > 0
        range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image(
            tf.expand_dims(range_image_tensor[..., 0], axis=0),
            tf.expand_dims(extrinsic, axis=0),
            tf.expand_dims(tf.convert_to_tensor(beam_inclinations), axis=0),
            pixel_pose=pixel_pose_local,
            frame_pose=frame_pose_local)

        range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
        points_tensor = tf.gather_nd(range_image_cartesian,
                                     tf.where(range_image_mask))

        cp = camera_projections[c.name][0]
        cp_tensor = tf.reshape(tf.convert_to_tensor(cp.data), cp.shape.dims)
        cp_points_tensor = tf.gather_nd(cp_tensor, tf.where(range_image_mask))
        points.append(points_tensor.numpy())
        cp_points.append(cp_points_tensor.numpy())

    return points, cp_points
Esempio n. 4
0
    def add_point_cloud(self, feature, laser_names, range_image_pose):
        """Convert the range images in `feature` to 3D point clouds.

    Adds the point cloud data to the tf.Example feature map.

    Args:
      feature: A tf.Example feature map.
      laser_names: A list of laser names (e.g., 'TOP', 'REAR', 'SIDE_LEFT').
      range_image_pose: A range image pose Tensor for the GBR.
    """
        for laser_name in laser_names:
            beam_inclinations = np.array(
                feature['%s_beam_inclinations' %
                        laser_name].float_list.value[:])
            # beam_inclinations will be populated if there is a non-uniform
            # beam configuration (e.g., for the TOP lasers).  Others that have
            # uniform beam inclinations are only parameterized by the min and max.
            # We use these min and max if the beam_inclinations are not present,
            # and turn them into a uniform inclinations array.
            if beam_inclinations.size == 0:
                beam_inclination_min = feature['%s_beam_inclination_min' %
                                               laser_name].float_list.value[:]
                beam_inclination_max = feature['%s_beam_inclination_max' %
                                               laser_name].float_list.value[:]

                laser_ri_name = '%s_ri1' % laser_name
                range_image_shape = feature[laser_ri_name +
                                            '_shape'].int64_list.value[:]
                height = tf.cast(range_image_shape[0], tf.float32)

                beam_inclinations = tf.constant(
                    [beam_inclination_min[0], beam_inclination_max[0]])
                beam_inclinations = range_image_utils.compute_inclination(
                    beam_inclinations, height)

            beam_extrinsics = np.array(
                feature['%s_extrinsics' %
                        laser_name].float_list.value[:]).reshape(4, 4)

            for ri_type in ['ri1', 'ri2']:
                laser_ri_name = '%s_%s' % (laser_name, ri_type)
                # For each of the 4 features of the lasers:
                range_image = np.array(
                    feature[laser_ri_name].float_list.value[:])
                range_image_shape = feature[laser_ri_name +
                                            '_shape'].int64_list.value[:]
                range_image = range_image.reshape(range_image_shape)
                # Compute mask.  At the moment, invalid values in the range image
                # representation are indicated via a -1. entry.  Callers are expected
                # to create this mask when passing into the conversion function below.
                range_image_mask = range_image[..., 0] >= 0

                # Get the 'range' feature from the range images.
                range_image_range = range_image[..., 0]

                # Call utility to convert point cloud to cartesian coordinates.
                #
                # API expects a batch dimension for all inputs.
                batched_pixel_pose = None
                batched_frame_pose = None
                # At the moment, only the GBR has per-pixel pose.
                if laser_name == 'TOP':
                    batched_pixel_pose = range_image_pose[tf.newaxis, ...]
                    batched_frame_pose = self.frame_pose[tf.newaxis, ...]

                batched_range_image_range = tf.convert_to_tensor(
                    range_image_range[np.newaxis, ...], dtype=tf.float32)
                batched_extrinsics = tf.convert_to_tensor(
                    beam_extrinsics[np.newaxis, ...], dtype=tf.float32)
                batched_inclinations = tf.convert_to_tensor(
                    beam_inclinations[np.newaxis, ...], dtype=tf.float32)

                batched_inclinations = tf.reverse(batched_inclinations,
                                                  axis=[-1])

                range_image_cartesian = (
                    range_image_utils.extract_point_cloud_from_range_image(
                        batched_range_image_range,
                        batched_extrinsics,
                        batched_inclinations,
                        pixel_pose=batched_pixel_pose,
                        frame_pose=batched_frame_pose))

                points_xyz = tf.gather_nd(range_image_cartesian[0],
                                          tf.where(range_image_mask))

                # Fetch the features corresponding to each xyz coordinate and
                # concatentate them together.
                points_features = tf.cast(
                    tf.gather_nd(range_image[..., 1:],
                                 tf.where(range_image_mask)), tf.float32)
                points_data = tf.concat([points_xyz, points_features], axis=-1)

                # Add laser feature to output.
                #
                # Skip embedding shape since we assume that all points have six features
                # and so we can reconstruct the number of points.
                points_list = list(points_data.numpy().reshape([-1]))
                feature['laser_%s' %
                        laser_ri_name].float_list.value[:] = points_list
Esempio n. 5
0
def extract_points_from_range_image(laser, calibration, frame_pose):
    """Decode points from lidar."""
    if laser.name != calibration.name:
        raise ValueError('Laser and calibration do not match')
    if laser.name == dataset_pb2.LaserName.TOP:
        frame_pose = tf.convert_to_tensor(
            np.reshape(np.array(frame_pose.transform), [4, 4]))
        range_image_top_pose = dataset_pb2.MatrixFloat.FromString(
            zlib.decompress(laser.ri_return1.range_image_pose_compressed))
        # [H, W, 6]
        range_image_top_pose_tensor = tf.reshape(
            tf.convert_to_tensor(range_image_top_pose.data),
            range_image_top_pose.shape.dims)
        # [H, W, 3, 3]
        range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
            range_image_top_pose_tensor[..., 0],
            range_image_top_pose_tensor[..., 1],
            range_image_top_pose_tensor[..., 2])
        range_image_top_pose_tensor_translation = range_image_top_pose_tensor[
            ..., 3:]
        range_image_top_pose_tensor = transform_utils.get_transform(
            range_image_top_pose_tensor_rotation,
            range_image_top_pose_tensor_translation)
        frame_pose = tf.expand_dims(frame_pose, axis=0)
        pixel_pose = tf.expand_dims(range_image_top_pose_tensor, axis=0)
    else:
        pixel_pose = None
        frame_pose = None
    first_return = zlib.decompress(laser.ri_return1.range_image_compressed)
    second_return = zlib.decompress(laser.ri_return2.range_image_compressed)
    points_list = []
    for range_image_str in [first_return, second_return]:
        range_image = dataset_pb2.MatrixFloat.FromString(range_image_str)
        if not calibration.beam_inclinations:
            beam_inclinations = range_image_utils.compute_inclination(
                tf.constant([
                    calibration.beam_inclination_min,
                    calibration.beam_inclination_max
                ]),
                height=range_image.shape.dims[0])
        else:
            beam_inclinations = tf.constant(calibration.beam_inclinations)
        beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
        extrinsic = np.reshape(np.array(calibration.extrinsic.transform),
                               [4, 4])
        range_image_tensor = tf.reshape(tf.convert_to_tensor(range_image.data),
                                        range_image.shape.dims)
        range_image_mask = range_image_tensor[..., 0] > 0
        range_image_cartesian = (
            range_image_utils.extract_point_cloud_from_range_image(
                tf.expand_dims(range_image_tensor[..., 0], axis=0),
                tf.expand_dims(extrinsic, axis=0),
                tf.expand_dims(tf.convert_to_tensor(beam_inclinations),
                               axis=0),
                pixel_pose=pixel_pose,
                frame_pose=frame_pose))
        range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
        points_tensor = tf.gather_nd(
            tf.concat([range_image_cartesian, range_image_tensor[..., 1:4]],
                      axis=-1), tf.where(range_image_mask))
        points_list.append(points_tensor.numpy())
    return points_list
def convert_range_image_to_point_cloud(frame,
                                       range_images,
                                       range_image_top_pose,
                                       tfp,
                                       ri_index=0):

  """Convert range images to point cloud.

  Args:
    frame: open dataset frame
     range_images: A dict of {laser_name, [range_image_first_return,
       range_image_second_return]}.
    range_image_top_pose: range image pixel pose for top lidar.
    ri_index: 0 for the first return, 1 for the second return.

  //   * channel 0: range
  //   * channel 1: intensity
  //   * channel 2: elongation
  //   * channel 3: is in any no label zone.

  Returns:
    points: {[N, 3]} list of 3d lidar points of length 5 (number of lidars).
    cp_points: {[N, 6]} list of camera projections of length 5
      (number of lidars).
  """
  from waymo_open_dataset.utils import transform_utils, range_image_utils
  calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name)
  points = []
  cp_points = []
  frame_pose = tfp.convert_to_tensor(
      value=np.reshape(np.array(frame.pose.transform), [4, 4]))
  # [H, W, 6]
  range_image_top_pose_tensor = tfp.reshape(
      tfp.convert_to_tensor(value=range_image_top_pose.data),
      range_image_top_pose.shape.dims)
  # [H, W, 3, 3]
  range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
      range_image_top_pose_tensor[..., 0], range_image_top_pose_tensor[..., 1],
      range_image_top_pose_tensor[..., 2])
  range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:]
  range_image_top_pose_tensor = transform_utils.get_transform(
      range_image_top_pose_tensor_rotation,
      range_image_top_pose_tensor_translation)
  for c in calibrations:
    range_image = range_images[c.name][ri_index]
    if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
      beam_inclinations = range_image_utils.compute_inclination(
          tfp.constant([c.beam_inclination_min, c.beam_inclination_max]),
          height=range_image.shape.dims[0])
    else:
      beam_inclinations = tfp.constant(c.beam_inclinations)

    beam_inclinations = tfp.reverse(beam_inclinations, axis=[-1])
    extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

    range_image_tensor = tfp.reshape(
        tfp.convert_to_tensor(value=range_image.data), range_image.shape.dims)
    pixel_pose_local = None
    frame_pose_local = None
    if c.name == open_dataset.LaserName.TOP:
      pixel_pose_local = range_image_top_pose_tensor
      pixel_pose_local = tfp.expand_dims(pixel_pose_local, axis=0)
      frame_pose_local = tfp.expand_dims(frame_pose, axis=0)
    range_image_mask = range_image_tensor[..., 0] > 0
    range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image(
        tfp.expand_dims(range_image_tensor[..., 0], axis=0),
        tfp.expand_dims(extrinsic, axis=0),
        tfp.expand_dims(tfp.convert_to_tensor(value=beam_inclinations), axis=0),
        pixel_pose=pixel_pose_local,
        frame_pose=frame_pose_local)

    range_image_cartesian = tfp.squeeze(range_image_cartesian, axis=0)
    points_tensor = tfp.gather_nd(range_image_cartesian,
                                  tfp.compat.v1.where(range_image_mask))
    intensity_tensor = tfp.gather_nd(tfp.expand_dims(range_image_tensor[..., 1], axis=2),
                                     tfp.compat.v1.where(range_image_mask)) 
    elongation_tensor = tfp.gather_nd(tfp.expand_dims(range_image_tensor[..., 2], axis=2),
                                      tfp.compat.v1.where(range_image_mask)) 
    stacked_tensor = np.hstack((points_tensor.numpy(),intensity_tensor.numpy(),elongation_tensor.numpy()))
    points.append(stacked_tensor)

  return points
    def convert_range_image_to_point_cloud(self,
                                           frame,
                                           range_images,
                                           camera_projections,
                                           range_image_top_pose,
                                           ri_index=0):
        """Convert range images to point cloud.

        Args:
            frame (:obj:`Frame`): Open dataset frame.
            range_images (dict): Mapping from laser_name to list of two
                range images corresponding with two returns.
            camera_projections (dict): Mapping from laser_name to list of two
                camera projections corresponding with two returns.
            range_image_top_pose (:obj:`Transform`): Range image pixel pose for
                top lidar.
            ri_index (int): 0 for the first return, 1 for the second return.
                Default: 0.

        Returns:
            tuple[list[np.ndarray]]: (List of points with shape [N, 3],
                camera projections of points with shape [N, 6], intensity
                with shape [N, 1], elongation with shape [N, 1]). All the
                lists have the length of lidar numbers (5).
        """
        calibrations = sorted(frame.context.laser_calibrations,
                              key=lambda c: c.name)
        points = []
        cp_points = []
        intensity = []
        elongation = []

        frame_pose = tf.convert_to_tensor(
            value=np.reshape(np.array(frame.pose.transform), [4, 4]))
        # [H, W, 6]
        range_image_top_pose_tensor = tf.reshape(
            tf.convert_to_tensor(value=range_image_top_pose.data),
            range_image_top_pose.shape.dims)
        # [H, W, 3, 3]
        range_image_top_pose_tensor_rotation = \
            transform_utils.get_rotation_matrix(
                range_image_top_pose_tensor[..., 0],
                range_image_top_pose_tensor[..., 1],
                range_image_top_pose_tensor[..., 2])
        range_image_top_pose_tensor_translation = \
            range_image_top_pose_tensor[..., 3:]
        range_image_top_pose_tensor = transform_utils.get_transform(
            range_image_top_pose_tensor_rotation,
            range_image_top_pose_tensor_translation)
        for c in calibrations:
            range_image = range_images[c.name][ri_index]
            if len(c.beam_inclinations) == 0:
                beam_inclinations = range_image_utils.compute_inclination(
                    tf.constant(
                        [c.beam_inclination_min, c.beam_inclination_max]),
                    height=range_image.shape.dims[0])
            else:
                beam_inclinations = tf.constant(c.beam_inclinations)

            beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
            extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

            range_image_tensor = tf.reshape(
                tf.convert_to_tensor(value=range_image.data),
                range_image.shape.dims)
            pixel_pose_local = None
            frame_pose_local = None
            if c.name == dataset_pb2.LaserName.TOP:
                pixel_pose_local = range_image_top_pose_tensor
                pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
                frame_pose_local = tf.expand_dims(frame_pose, axis=0)
            range_image_mask = range_image_tensor[..., 0] > 0

            if self.filter_no_label_zone_points:
                nlz_mask = range_image_tensor[..., 3] != 1.0  # 1.0: in NLZ
                range_image_mask = range_image_mask & nlz_mask

            range_image_cartesian = \
                range_image_utils.extract_point_cloud_from_range_image(
                    tf.expand_dims(range_image_tensor[..., 0], axis=0),
                    tf.expand_dims(extrinsic, axis=0),
                    tf.expand_dims(tf.convert_to_tensor(
                        value=beam_inclinations), axis=0),
                    pixel_pose=pixel_pose_local,
                    frame_pose=frame_pose_local)

            range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
            points_tensor = tf.gather_nd(range_image_cartesian,
                                         tf.compat.v1.where(range_image_mask))

            cp = camera_projections[c.name][ri_index]
            cp_tensor = tf.reshape(tf.convert_to_tensor(value=cp.data),
                                   cp.shape.dims)
            cp_points_tensor = tf.gather_nd(
                cp_tensor, tf.compat.v1.where(range_image_mask))
            points.append(points_tensor.numpy())
            cp_points.append(cp_points_tensor.numpy())

            intensity_tensor = tf.gather_nd(range_image_tensor[..., 1],
                                            tf.where(range_image_mask))
            intensity.append(intensity_tensor.numpy())

            elongation_tensor = tf.gather_nd(range_image_tensor[..., 2],
                                             tf.where(range_image_mask))
            elongation.append(elongation_tensor.numpy())

        return points, cp_points, intensity, elongation
Esempio n. 8
0
def convert_range_image_to_cartesian(frame,
                                     range_images,
                                     range_image_top_pose,
                                     ri_index=0,
                                     keep_polar_features=False):
  """Convert range images from polar coordinates to Cartesian coordinates.

  Args:
    frame: open dataset frame
    range_images: A dict of {laser_name, [range_image_first_return,
       range_image_second_return]}.
    range_image_top_pose: range image pixel pose for top lidar.
    ri_index: 0 for the first return, 1 for the second return.
    keep_polar_features: If true, keep the features from the polar range image
      (i.e. range, intensity, and elongation) as the first features in the
      output range image.

  Returns:
    dict of {laser_name, (H, W, D)} range images in Cartesian coordinates. D
      will be 3 if keep_polar_features is False (x, y, z) and 6 if
      keep_polar_features is True (range, intensity, elongation, x, y, z).
  """
  cartesian_range_images = {}
  frame_pose = tf.convert_to_tensor(
      value=np.reshape(np.array(frame.pose.transform), [4, 4]))

  # [H, W, 6]
  range_image_top_pose_tensor = tf.reshape(
      tf.convert_to_tensor(value=range_image_top_pose.data),
      range_image_top_pose.shape.dims)
  # [H, W, 3, 3]
  range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
      range_image_top_pose_tensor[..., 0], range_image_top_pose_tensor[..., 1],
      range_image_top_pose_tensor[..., 2])
  range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:]
  range_image_top_pose_tensor = transform_utils.get_transform(
      range_image_top_pose_tensor_rotation,
      range_image_top_pose_tensor_translation)

  for c in frame.context.laser_calibrations:
    range_image = range_images[c.name][ri_index]
    if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
      beam_inclinations = range_image_utils.compute_inclination(
          tf.constant([c.beam_inclination_min, c.beam_inclination_max]),
          height=range_image.shape.dims[0])
    else:
      beam_inclinations = tf.constant(c.beam_inclinations)

    beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
    extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

    range_image_tensor = tf.reshape(
        tf.convert_to_tensor(value=range_image.data), range_image.shape.dims)
    pixel_pose_local = None
    frame_pose_local = None
    if c.name == dataset_pb2.LaserName.TOP:
      pixel_pose_local = range_image_top_pose_tensor
      pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
      frame_pose_local = tf.expand_dims(frame_pose, axis=0)
    range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image(
        tf.expand_dims(range_image_tensor[..., 0], axis=0),
        tf.expand_dims(extrinsic, axis=0),
        tf.expand_dims(tf.convert_to_tensor(value=beam_inclinations), axis=0),
        pixel_pose=pixel_pose_local,
        frame_pose=frame_pose_local)

    range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)

    if keep_polar_features:
      # If we want to keep the polar coordinate features of range, intensity,
      # and elongation, concatenate them to be the initial dimensions of the
      # returned Cartesian range image.
      range_image_cartesian = tf.concat(
          [range_image_tensor[..., 0:3], range_image_cartesian], axis=-1)

    cartesian_range_images[c.name] = range_image_cartesian

  return cartesian_range_images
Esempio n. 9
0
def convert_frame_to_dict(frame):
  """Convert the frame proto into a dict of numpy arrays.

  The keys, shapes, and data types are:
    For each lidar:
      <LIDAR_NAME>_BEAM_INCLINATION: H float32 array
      <LIDAR_NAME>_LIDAR_EXTRINSIC: 4x4 float32 array
      <LIDAR_NAME>_RANGE_IMAGE_FIRST_RETURN: HxWx6 float32 array
      <LIDAR_NAME>_RANGE_IMAGE_SECOND_RETURN: HxWx6 float32 array
      <LIDAR_NAME>_CAM_PROJ_FIRST_RETURN: HxWx6 int64 array
      <LIDAR_NAME>_CAM_PROJ_SECOND_RETURN: HxWx6 float32 array
      (top lidar only) TOP_RANGE_IMAGE_POSE: HxWx6 float32 array

    For each camera:
      <CAMERA_NAME>_IMAGE: HxWx3 uint8 array
      <CAMERA_NAME>_INTRINSIC: 9 float32 array
      <CAMERA_NAME>_EXTRINSIC: 4x4 float32 array
      <CAMERA_NAME>_WIDTH: int64 scalar
      <CAMERA_NAME>_HEIGHT: int64 scalar

  NOTE: This function only works in eager mode for now.

  See the LaserName.Name and CameraName.Name enums in dataset.proto for the
  valid lidar and camera name strings that will be present in the returned
  dictionaries.

  Args:
    frame: open dataset frame

  Returns:
    Dict from string field name to numpy ndarray.
  """
  range_images, camera_projection_protos, range_image_top_pose = (
      parse_range_image_and_camera_projection(frame))
  first_return_cartesian_range_images = convert_range_image_to_cartesian(
      frame, range_images, range_image_top_pose, ri_index=0,
      keep_polar_features=True)
  second_return_cartesian_range_images = convert_range_image_to_cartesian(
      frame, range_images, range_image_top_pose, ri_index=1,
      keep_polar_features=True)

  data_dict = {}

  # Save the beam inclinations, extrinsic matrices, first/second return range
  # images, and first/second return camera projections for each lidar.
  for c in frame.context.laser_calibrations:
    laser_name_str = dataset_pb2.LaserName.Name.Name(c.name)

    beam_inclination_key = f'{laser_name_str}_BEAM_INCLINATION'
    if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
      data_dict[beam_inclination_key] = range_image_utils.compute_inclination(
          tf.constant([c.beam_inclination_min, c.beam_inclination_max]),
          height=range_images[c.name][0].shape.dims[0]).numpy()
    else:
      data_dict[beam_inclination_key] = np.array(
          c.beam_inclinations, np.float32)

    data_dict[f'{laser_name_str}_LIDAR_EXTRINSIC'] = np.reshape(
        np.array(c.extrinsic.transform, np.float32), [4, 4])

    data_dict[f'{laser_name_str}_RANGE_IMAGE_FIRST_RETURN'] = (
        first_return_cartesian_range_images[c.name].numpy())
    data_dict[f'{laser_name_str}_RANGE_IMAGE_SECOND_RETURN'] = (
        second_return_cartesian_range_images[c.name].numpy())

    first_return_cp = camera_projection_protos[c.name][0]
    data_dict[f'{laser_name_str}_CAM_PROJ_FIRST_RETURN'] = np.reshape(
        np.array(first_return_cp.data), first_return_cp.shape.dims)

    second_return_cp = camera_projection_protos[c.name][1]
    data_dict[f'{laser_name_str}_CAM_PROJ_SECOND_RETURN'] = np.reshape(
        np.array(second_return_cp.data), second_return_cp.shape.dims)

  # Save the H x W x 3 RGB image for each camera, extracted from JPEG.
  for im in frame.images:
    cam_name_str = dataset_pb2.CameraName.Name.Name(im.name)
    data_dict[f'{cam_name_str}_IMAGE'] = tf.io.decode_jpeg(im.image).numpy()

  # Save the intrinsics, 4x4 extrinsic matrix, width, and height of each camera.
  for c in frame.context.camera_calibrations:
    cam_name_str = dataset_pb2.CameraName.Name.Name(c.name)
    data_dict[f'{cam_name_str}_INTRINSIC'] = np.array(c.intrinsic, np.float32)
    data_dict[f'{cam_name_str}_EXTRINSIC'] = np.reshape(
        np.array(c.extrinsic.transform, np.float32), [4, 4])
    data_dict[f'{cam_name_str}_WIDTH'] = np.array(c.width)
    data_dict[f'{cam_name_str}_HEIGHT'] = np.array(c.height)

  # Save the range image pixel pose for the top lidar.
  data_dict['TOP_RANGE_IMAGE_POSE'] = np.reshape(
      np.array(range_image_top_pose.data, np.float32),
      range_image_top_pose.shape.dims)

  return data_dict
def convert_frame_to_dict(frame):
    """Convert the frame proto into a dict of numpy arrays.
    The keys, shapes, and data types are:
      POSE: 4x4 float32 array
      For each lidar:
        <LIDAR_NAME>_BEAM_INCLINATION: H float32 array
        <LIDAR_NAME>_LIDAR_EXTRINSIC: 4x4 float32 array
        <LIDAR_NAME>_RANGE_IMAGE_FIRST_RETURN: HxWx6 float32 array
        <LIDAR_NAME>_RANGE_IMAGE_SECOND_RETURN: HxWx6 float32 array
        <LIDAR_NAME>_CAM_PROJ_FIRST_RETURN: HxWx6 int64 array
        <LIDAR_NAME>_CAM_PROJ_SECOND_RETURN: HxWx6 float32 array
        (top lidar only) TOP_RANGE_IMAGE_POSE: HxWx6 float32 array
      For each camera:
        <CAMERA_NAME>_IMAGE: HxWx3 uint8 array
        <CAMERA_NAME>_INTRINSIC: 9 float32 array
        <CAMERA_NAME>_EXTRINSIC: 4x4 float32 array
        <CAMERA_NAME>_WIDTH: int64 scalar
        <CAMERA_NAME>_HEIGHT: int64 scalar
    NOTE: This function only works in eager mode for now.
    See the LaserName.Name and CameraName.Name enums in dataset.proto for the
    valid lidar and camera name strings that will be present in the returned
    dictionaries.
    Args:
      frame: open dataset frame
    Returns:
      Dict from string field name to numpy ndarray.
    """
    # Laser name definition in https://github.com/waymo-research/waymo-open-dataset/blob/master/waymo_open_dataset/dataset.proto TOP = 1; FRONT = 2; SIDE_LEFT = 3; SIDE_RIGHT = 4; REAR = 5; The dataset contains data from five lidars - one mid-range lidar (top) and four short-range lidars (front, side left, side right, and rear), ref: https://waymo.com/open/data/perception/ The point cloud of each lidar is encoded as a range image. Two range images are provided for each lidar, one for each of the two strongest returns. It has 4 channels:
    # channel 0: range (see spherical coordinate system definition) channel 1: lidar intensity channel 2: lidar elongation channel 3: is_in_nlz (1 = in, -1 = not in)
    range_images, camera_projection_protos, range_image_top_pose = (
        parse_range_image_and_camera_projection(frame))

    # Convert range images from polar coordinates to Cartesian coordinates
    # dict of {laser_name, (H, W, D)} range images in Cartesian coordinates. D
    # will be 3 if keep_polar_features is False (x, y, z) and 6 if
    # keep_polar_features is True (range, intensity, elongation, x, y, z).
    first_return_cartesian_range_images = convert_range_image_to_cartesian(
        frame,
        range_images,
        range_image_top_pose,
        ri_index=0,
        keep_polar_features=True)

    second_return_cartesian_range_images = convert_range_image_to_cartesian(
        frame,
        range_images,
        range_image_top_pose,
        ri_index=1,
        keep_polar_features=True)

    data_dict = {}

    # Save the beam inclinations, extrinsic matrices, first/second return range
    # images, and first/second return camera projections for each lidar.
    for c in frame.context.laser_calibrations:
        laser_name_str = dataset_pb2.LaserName.Name.Name(c.name)

        beam_inclination_key = f'{laser_name_str}_BEAM_INCLINATION'
        if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
            data_dict[
                beam_inclination_key] = range_image_utils.compute_inclination(
                    tf.constant(
                        [c.beam_inclination_min, c.beam_inclination_max]),
                    height=range_images[c.name][0].shape.dims[0]).numpy()
        else:
            data_dict[beam_inclination_key] = np.array(c.beam_inclinations,
                                                       np.float32)

        data_dict[f'{laser_name_str}_LIDAR_EXTRINSIC'] = np.reshape(
            np.array(c.extrinsic.transform, np.float32), [4, 4])

        data_dict[f'{laser_name_str}_RANGE_IMAGE_FIRST_RETURN'] = (
            first_return_cartesian_range_images[c.name].numpy())
        data_dict[f'{laser_name_str}_RANGE_IMAGE_SECOND_RETURN'] = (
            second_return_cartesian_range_images[c.name].numpy())

        first_return_cp = camera_projection_protos[c.name][0]
        data_dict[f'{laser_name_str}_CAM_PROJ_FIRST_RETURN'] = np.reshape(
            np.array(first_return_cp.data), first_return_cp.shape.dims)

        second_return_cp = camera_projection_protos[c.name][1]
        data_dict[f'{laser_name_str}_CAM_PROJ_SECOND_RETURN'] = np.reshape(
            np.array(second_return_cp.data), second_return_cp.shape.dims)

    # Save the H x W x 3 RGB image for each camera, extracted from JPEG.
    for im in frame.images:
        cam_name_str = dataset_pb2.CameraName.Name.Name(im.name)
        data_dict[f'{cam_name_str}_IMAGE'] = tf.io.decode_jpeg(
            im.image).numpy()

    # Save the intrinsics, 4x4 extrinsic matrix, width, and height of each camera.
    for c in frame.context.camera_calibrations:
        cam_name_str = dataset_pb2.CameraName.Name.Name(c.name)
        data_dict[f'{cam_name_str}_INTRINSIC'] = np.array(
            c.intrinsic, np.float32)
        data_dict[f'{cam_name_str}_EXTRINSIC'] = np.reshape(
            np.array(c.extrinsic.transform, np.float32), [4, 4])
        data_dict[f'{cam_name_str}_WIDTH'] = np.array(c.width)
        data_dict[f'{cam_name_str}_HEIGHT'] = np.array(c.height)

    # Save the range image pixel pose for the top lidar.
    data_dict['TOP_RANGE_IMAGE_POSE'] = np.reshape(
        np.array(range_image_top_pose.data, np.float32),
        range_image_top_pose.shape.dims)

    data_dict['POSE'] = np.reshape(np.array(frame.pose.transform, np.float32),
                                   (4, 4))

    return data_dict
def yzl_convert_range_image_to_point_cloud(frame,
                                       range_images,
                                       camera_projections,
                                       range_image_top_pose,
                                       ri_index=0):
  """Convert range images to point cloud.

  Args:
    frame: open dataset frame
     range_images: A dict of {laser_name, [range_image_first_return,
       range_image_second_return]}.
     camera_projections: A dict of {laser_name,
       [camera_projection_from_first_return,
       camera_projection_from_second_return]}.
    range_image_top_pose: range image pixel pose for top lidar.
    ri_index: 0 for the first return, 1 for the second return.

  Returns:
    points: {[N, 12]} list of 3d lidar points of length 5 (number of lidars).
    x y z intensity elongation is_in_nlz
    channel 6: camera name
    channel 7: x (axis along image width)
    channel 8: y (axis along image height)
    channel 9: camera name of 2nd projection (set to UNKNOWN if no projection)
    channel 10: x (axis along image width)
    channel 11: y (axis along image height)

  """
  calibrations = sorted(frame.context.laser_calibrations, key=lambda c: c.name)
  points = []
  cp_points = []

  frame_pose = tf.convert_to_tensor(
      value=np.reshape(np.array(frame.pose.transform), [4, 4]))
  # [H, W, 6]
  range_image_top_pose_tensor = tf.reshape(
      tf.convert_to_tensor(value=range_image_top_pose.data),
      range_image_top_pose.shape.dims)
  # [H, W, 3, 3]
  range_image_top_pose_tensor_rotation = transform_utils.get_rotation_matrix(
      range_image_top_pose_tensor[..., 0], range_image_top_pose_tensor[..., 1],
      range_image_top_pose_tensor[..., 2])
  range_image_top_pose_tensor_translation = range_image_top_pose_tensor[..., 3:]
  range_image_top_pose_tensor = transform_utils.get_transform(
      range_image_top_pose_tensor_rotation,
      range_image_top_pose_tensor_translation)
  for c in calibrations:
    range_image = range_images[c.name][ri_index]
    if len(c.beam_inclinations) == 0:  # pylint: disable=g-explicit-length-test
      beam_inclinations = range_image_utils.compute_inclination(
          tf.constant([c.beam_inclination_min, c.beam_inclination_max]),
          height=range_image.shape.dims[0])
    else:
      beam_inclinations = tf.constant(c.beam_inclinations)

    beam_inclinations = tf.reverse(beam_inclinations, axis=[-1])
    extrinsic = np.reshape(np.array(c.extrinsic.transform), [4, 4])

    range_image_tensor = tf.reshape(
        tf.convert_to_tensor(value=range_image.data), range_image.shape.dims)
    pixel_pose_local = None
    frame_pose_local = None
    if c.name == open_dataset.LaserName.TOP:
      pixel_pose_local = range_image_top_pose_tensor
      pixel_pose_local = tf.expand_dims(pixel_pose_local, axis=0)
      frame_pose_local = tf.expand_dims(frame_pose, axis=0)
    range_image_mask = range_image_tensor[..., 0] > 0 #tf.ones_like(range_image_tensor[..., 0])
    range_image_cartesian = range_image_utils.extract_point_cloud_from_range_image(
        tf.expand_dims(range_image_tensor[..., 0], axis=0),
        tf.expand_dims(extrinsic, axis=0),
        tf.expand_dims(tf.convert_to_tensor(value=beam_inclinations), axis=0),
        pixel_pose=pixel_pose_local,
        frame_pose=frame_pose_local)

    range_image_cartesian = tf.squeeze(range_image_cartesian, axis=0)
    range_image_cartesian = tf.concat([range_image_cartesian,range_image_tensor[..., 1:]],axis=2)

    points_tensor = tf.gather_nd(range_image_cartesian,
                                 tf.compat.v1.where(range_image_mask))

    cp = camera_projections[c.name][0]
    cp_tensor = tf.reshape(tf.convert_to_tensor(value=cp.data,dtype=tf.float32), cp.shape.dims)
    cp_points_tensor = tf.gather_nd(cp_tensor,
                                    tf.compat.v1.where(range_image_mask))

    points_tensor = tf.concat([points_tensor, cp_points_tensor],axis=1)

    points.append(points_tensor.numpy())

  return points