コード例 #1
0
    def CreateDecoderMetrics(self):
        """Decoder metrics for WaymoOpenDataset."""
        p = self.params

        waymo_metric_p = p.ap_metric.Copy().Set(
            cls=waymo_ap_metric.WaymoAPMetrics)
        waymo_metrics = waymo_metric_p.Instantiate()
        class_names = waymo_metrics.metadata.ClassNames()

        # TODO(bencaine,vrv): There's some code smell with this ap_metrics params
        # usage. We create local copies of the params to then instantiate them.
        # Failing to do this risks users editing the params after construction of
        # the object, making each object method call have the potential for side
        # effects.
        # Create a new dictionary with copies of the params converted to objects
        # so we can then add these to the decoder metrics.
        extra_ap_metrics = {}
        for k, metric_p in p.extra_ap_metrics.items():
            extra_ap_metrics[k] = metric_p.Instantiate()

        waymo_metric_bev_p = waymo_metric_p.Copy()
        waymo_metric_bev_p.box_type = '2d'
        waymo_metrics_bev = waymo_metric_bev_p.Instantiate()
        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        # TODO(vrv): This uses the same top down transform as for KITTI;
        # re-visit these settings since detections can happen all around
        # the car.
        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)
        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'waymo_metrics':
            waymo_metrics,
            'waymo_metrics_bev':
            waymo_metrics_bev,
        })
        self._update_metrics_class_keys = [
            'waymo_metrics_bev', 'waymo_metrics'
        ]
        for k, metric in extra_ap_metrics.items():
            decoder_metrics[k] = metric
            self._update_metrics_class_keys.append(k)

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()
        return decoder_metrics
コード例 #2
0
ファイル: transform_util_test.py プロジェクト: wzhang1/lingvo
 def testMakeCarToImageTransformFlipAxesTrue(self):
   transform = transform_util.MakeCarToImageTransform(
       pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=True)
   # pyformat: disable
   self.assertAllClose(
       np.matrix([[0., -10., 0., 250.,],
                  [-10., 0., 0., 750.,],
                  [0., 0., 1., 0.,],
                  [0., 0., 0., 1.,]]),
       transform)
コード例 #3
0
ファイル: transform_util_test.py プロジェクト: wzhang1/lingvo
 def testTransformPoint(self):
   transform = transform_util.MakeCarToImageTransform(
       pixels_per_meter=10.0,
       image_ref_x=250,
       image_ref_y=750,
       flip_axes=False)
   tx, ty, tz = transform_util.TransformPoint(transform, 0.0, 1.0, 0.0)
   # X gets translated.
   self.assertEqual(250., tx)
   # Y gets translated and scaled by pixels_per_meter.
   self.assertEqual(760., ty)
   self.assertEqual(0., tz)
コード例 #4
0
ファイル: transform_util_test.py プロジェクト: wzhang1/lingvo
 def testCopyTransform(self):
   # Same transform as above.
   transform = transform_util.MakeCarToImageTransform(
       pixels_per_meter=10.0,
       image_ref_x=250,
       image_ref_y=750,
       flip_axes=False)
   # Test that copying the transform yields the same result.
   copy_transform = transform_util.CopyTransform(transform)
   tx, ty, tz = transform_util.TransformPoint(copy_transform, 0.0, 1.0, 0.0)
   self.assertEqual(250., tx)
   self.assertEqual(760., ty)
   self.assertEqual(0., tz)
コード例 #5
0
ファイル: waymo_decoder.py プロジェクト: thzll2001/lingvo
    def CreateDecoderMetrics(self):
        """Decoder metrics for WaymoOpenDataset."""
        p = self.params

        waymo_metric_p = p.ap_metric.Copy().Set(
            cls=waymo_ap_metric.WaymoAPMetrics)
        waymo_metrics = waymo_metric_p.Instantiate()
        class_names = waymo_metrics.metadata.ClassNames()

        for k, v in p.extra_ap_metrics.items():
            p.extra_ap_metrics[k] = v.Instantiate()

        waymo_metric_bev_p = waymo_metric_p.Copy()
        waymo_metric_bev_p.box_type = '2d'
        waymo_metrics_bev = waymo_metric_bev_p.Instantiate()
        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        # TODO(vrv): This uses the same top down transform as for KITTI;
        # re-visit these settings since detections can happen all around
        # the car.
        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)
        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'waymo_metrics':
            waymo_metrics,
            'waymo_metrics_bev':
            waymo_metrics_bev,
        })
        self._update_metrics_class_keys = [
            'waymo_metrics_bev', 'waymo_metrics'
        ]
        for k, v in p.extra_ap_metrics.items():
            decoder_metrics[k] = v
            self._update_metrics_class_keys.append(k)

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()
        return decoder_metrics
コード例 #6
0
ファイル: transform_util_test.py プロジェクト: wzhang1/lingvo
  def testBox2DTransform(self):
    # Take the box from above and apply a car-image transform.
    box = transform_util.Box2D(1.0, 1.0, 2., 1., 0.)
    transform = transform_util.MakeCarToImageTransform(
        pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=True)
    new_box = box.Apply(transform)

    # The center flips across the x=y axis to -1, -1.  After the scaling and
    # translation, the box should be centered (240, 740).  Because the box flips
    # across the axis, the width and length get flipped from 2, 1 to [10, 20].
    #
    # The flip axes should cause the heading to go from 0. to -pi/2.
    self.assertAllClose([240., 740., 10., 20., -np.pi / 2.], new_box.AsNumpy())

    # Check ymin/xmin/ymax/xmax: the rectangle is now longer in the y-dimension
    # than the x-dimension.
    self.assertAllClose((730., 235., 750., 245.), new_box.Extrema())
コード例 #7
0
ファイル: transform_util_test.py プロジェクト: wzhang1/lingvo
  def testTransformHeading(self):
    transform = transform_util.MakeCarToImageTransform(
        pixels_per_meter=1.0, image_ref_x=123, image_ref_y=455, flip_axes=True)

    # Ray (0, 1): 90 degrees becomes -180 degrees.
    self.assertAllClose(-np.pi,
                        transform_util.TransformHeading(transform, np.pi / 2.))

    # Ray (1, 0): 0 degrees becomes -90
    self.assertAllClose(-np.pi / 2.,
                        transform_util.TransformHeading(transform, 0.))

    # (-1, 0) becomes (0, 1) or np.pi / 2.
    self.assertAllClose(np.pi / 2.,
                        transform_util.TransformHeading(transform, np.pi))

    # (0, -1) becomes (1, 0) or 0
    self.assertAllClose(0.,
                        transform_util.TransformHeading(transform, 1.5 * np.pi))
コード例 #8
0
    def testTopDownVisualizationMetric(self):
        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)
        metric = detection_3d_metrics.TopDownVisualizationMetric(
            top_down_transform)

        batch_size = 4
        num_preds = 10
        num_gt = 12
        num_points = 128

        visualization_labels = np.random.randint(0, 2, (batch_size, num_preds))
        predicted_bboxes = np.random.rand(batch_size, num_preds, 5)
        visualization_weights = np.abs(np.random.rand(batch_size, num_preds))

        labels = np.random.randint(0, 2, (batch_size, num_gt))
        gt_bboxes_2d = np.random.rand(batch_size, num_gt, 5)
        gt_bboxes_2d_weights = np.abs(np.random.rand(batch_size, num_gt))
        difficulties = np.random.randint(0, 3, (batch_size, num_gt))

        points_xyz = np.random.rand(batch_size, num_points, 3)
        points_padding = np.random.randint(0, 2, (batch_size, num_points))
        source_ids = np.full([batch_size], '012346')

        metric.Update(
            py_utils.NestedMap({
                'visualization_labels': visualization_labels,
                'predicted_bboxes': predicted_bboxes,
                'visualization_weights': visualization_weights,
                'labels': labels,
                'gt_bboxes_2d': gt_bboxes_2d,
                'gt_bboxes_2d_weights': gt_bboxes_2d_weights,
                'points_xyz': points_xyz,
                'points_padding': points_padding,
                'difficulties': difficulties,
                'source_ids': source_ids,
            }))

        _ = metric.Summary('test')
コード例 #9
0
    def CreateDecoderMetrics(self):
        """Decoder metrics for KITTI."""
        p = self.params

        kitti_metric_p = p.ap_metric.Copy().Set(
            cls=kitti_ap_metric.KITTIAPMetrics)
        apm = kitti_metric_p.Instantiate()
        class_names = apm.metadata.ClassNames()

        # Convert the list of class names to a dictionary mapping class_id -> name.
        class_id_to_name = dict(enumerate(class_names))

        top_down_transform = transform_util.MakeCarToImageTransform(
            pixels_per_meter=32.,
            image_ref_x=512.,
            image_ref_y=1408.,
            flip_axes=True)

        decoder_metrics = py_utils.NestedMap({
            'top_down_visualization':
            (detection_3d_metrics.TopDownVisualizationMetric(
                top_down_transform,
                image_height=1536,
                image_width=1024,
                class_id_to_name=class_id_to_name)),
            'num_samples_in_batch':
            metrics.AverageMetric(),
            'kitti_AP_v2':
            apm,
        })

        decoder_metrics.mesh = detection_3d_metrics.WorldViewer()

        if p.summarize_boxes_on_image:
            decoder_metrics.camera_visualization = (
                detection_3d_metrics.CameraVisualization(
                    bbox_score_threshold=p.
                    visualization_classification_threshold))

        return decoder_metrics
コード例 #10
0
def DrawTrajectory(image, bboxes, masks, labels, is_groundtruth):
    """Draw the trajectory of bounding boxes on 'image'.

  Args:
    image: The uint8 image array to draw on.  Assumes [1000, 500, 3] input with
      RGB value ranges.
    bboxes: A [num_steps, num_objects, 5] float array containing the bounding
      box information over a sequence of steps.  bboxes are expected to be in
      car coordinates.
    masks: A [num_steps, num_objects] integer array indicating whether the
      corresponding bbox entry in bboxes is present (1 = present).
    labels: A [num_steps, num_objects] integer label indicating which class is
      being predicted.  Used for colorizing based on labels.
    is_groundtruth: True if the scene is the groundtruth vs. the predicted.

  Returns:
    The updated image array.
  """
    image = Image.fromarray(np.uint8(image)).convert('RGB')
    draw = ImageDraw.Draw(image)

    try:
        font = ImageFont.truetype('arial.ttf', 20)
    except IOError:
        font = ImageFont.load_default()

    pixels_per_meter = 10.
    image_ref_x = 250.
    image_ref_y = 750.
    car_to_image_transform = transform_util.MakeCarToImageTransform(
        pixels_per_meter=pixels_per_meter,
        image_ref_x=image_ref_x,
        image_ref_y=image_ref_y,
        flip_axes=True)

    # Iterate over each object and produce the series of visualized trajectories
    # over time.
    for object_idx in range(bboxes.shape[1]):
        # Annotate the box with the class type
        label = labels[0, object_idx]

        # Choose a label_consistent color.
        color = PIL_COLOR_LIST[label % len(PIL_COLOR_LIST)]
        # Make predictions white.
        if not is_groundtruth:
            color = 'white'

        # Convert string color name to RGB so we can manipulate it.
        color_rgb = ImageColor.getrgb(color)

        # For each sample, extract the data, transform to image coordinates, and
        # store in centroids.
        centroids = []
        for time in range(bboxes.shape[0]):
            if masks[time, object_idx] == 0:
                continue

            center_x, center_y, width, height, heading = bboxes[time,
                                                                object_idx, :]

            # Compute the new heading.
            heading = transform_util.TransformHeading(car_to_image_transform,
                                                      heading)

            # Transform from car to image coords.
            x, y, _ = transform_util.TransformPoint(car_to_image_transform,
                                                    center_x, center_y, 0.0)

            # Hack to scale from meters to pixels.
            width *= pixels_per_meter
            height *= pixels_per_meter

            # Collect the centroids of all of the points.
            centroids.append((x, y, heading))

            # Draw the groundtruth bounding box at the first timestep.
            if is_groundtruth and time == 0:
                # Draw a rectangle
                rect = MakeRectangle(height, width, heading, offset=(x, y))
                rect += [rect[0]]
                draw.line(rect, fill=color_rgb, width=4)

                delta = 20

                # Annotate the box with the object index
                draw.text((x + delta, y + delta),
                          str(object_idx),
                          fill='white',
                          font=font)

                # Draw a callout
                draw.line([(x, y), (x + delta, y + delta)],
                          fill='white',
                          width=1)

        # Extract the point pairs from centroids and draw a line through them.
        point_pairs = []
        for (x, y, heading) in centroids:
            point_pairs.append((x, y))
        if point_pairs:
            draw.line(point_pairs, width=4, fill=color_rgb)

        # Draw the centroids.
        triangle_color_rgb = color_rgb
        for i, (x, y, heading) in enumerate(centroids):
            if i == 0:
                # Draw the heading for the first timestep.
                scale = 25 if is_groundtruth else 15
                DrawHeadingTriangle(draw, x, y, heading, triangle_color_rgb,
                                    scale)
            else:
                # Draw a circle for the centroids of other timesteps.
                outline_color = color_rgb
                circle_size = 5 if is_groundtruth else 3
                DrawCircle(draw,
                           x,
                           y,
                           fill=triangle_color_rgb,
                           outline=outline_color,
                           circle_size=circle_size)

            # Desaturate the color with every timestep.
            increment = 45  # Allow this to be modified?
            triangle_color_rgb = (triangle_color_rgb[0] - increment,
                                  triangle_color_rgb[1] - increment,
                                  triangle_color_rgb[2] - increment)

    return np.array(image)
コード例 #11
0
def _CarToImageTransform():
    return transform_util.MakeCarToImageTransform(pixels_per_meter=10.,
                                                  image_ref_x=250,
                                                  image_ref_y=750,
                                                  flip_axes=True)