def CreateDecoderMetrics(self): """Decoder metrics for WaymoOpenDataset.""" p = self.params waymo_metric_p = p.ap_metric.Copy().Set( cls=waymo_ap_metric.WaymoAPMetrics) waymo_metrics = waymo_metric_p.Instantiate() class_names = waymo_metrics.metadata.ClassNames() # TODO(bencaine,vrv): There's some code smell with this ap_metrics params # usage. We create local copies of the params to then instantiate them. # Failing to do this risks users editing the params after construction of # the object, making each object method call have the potential for side # effects. # Create a new dictionary with copies of the params converted to objects # so we can then add these to the decoder metrics. extra_ap_metrics = {} for k, metric_p in p.extra_ap_metrics.items(): extra_ap_metrics[k] = metric_p.Instantiate() waymo_metric_bev_p = waymo_metric_p.Copy() waymo_metric_bev_p.box_type = '2d' waymo_metrics_bev = waymo_metric_bev_p.Instantiate() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) # TODO(vrv): This uses the same top down transform as for KITTI; # re-visit these settings since detections can happen all around # the car. top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'waymo_metrics': waymo_metrics, 'waymo_metrics_bev': waymo_metrics_bev, }) self._update_metrics_class_keys = [ 'waymo_metrics_bev', 'waymo_metrics' ] for k, metric in extra_ap_metrics.items(): decoder_metrics[k] = metric self._update_metrics_class_keys.append(k) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() return decoder_metrics
def testMakeCarToImageTransformFlipAxesTrue(self): transform = transform_util.MakeCarToImageTransform( pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=True) # pyformat: disable self.assertAllClose( np.matrix([[0., -10., 0., 250.,], [-10., 0., 0., 750.,], [0., 0., 1., 0.,], [0., 0., 0., 1.,]]), transform)
def testTransformPoint(self): transform = transform_util.MakeCarToImageTransform( pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=False) tx, ty, tz = transform_util.TransformPoint(transform, 0.0, 1.0, 0.0) # X gets translated. self.assertEqual(250., tx) # Y gets translated and scaled by pixels_per_meter. self.assertEqual(760., ty) self.assertEqual(0., tz)
def testCopyTransform(self): # Same transform as above. transform = transform_util.MakeCarToImageTransform( pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=False) # Test that copying the transform yields the same result. copy_transform = transform_util.CopyTransform(transform) tx, ty, tz = transform_util.TransformPoint(copy_transform, 0.0, 1.0, 0.0) self.assertEqual(250., tx) self.assertEqual(760., ty) self.assertEqual(0., tz)
def CreateDecoderMetrics(self): """Decoder metrics for WaymoOpenDataset.""" p = self.params waymo_metric_p = p.ap_metric.Copy().Set( cls=waymo_ap_metric.WaymoAPMetrics) waymo_metrics = waymo_metric_p.Instantiate() class_names = waymo_metrics.metadata.ClassNames() for k, v in p.extra_ap_metrics.items(): p.extra_ap_metrics[k] = v.Instantiate() waymo_metric_bev_p = waymo_metric_p.Copy() waymo_metric_bev_p.box_type = '2d' waymo_metrics_bev = waymo_metric_bev_p.Instantiate() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) # TODO(vrv): This uses the same top down transform as for KITTI; # re-visit these settings since detections can happen all around # the car. top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'waymo_metrics': waymo_metrics, 'waymo_metrics_bev': waymo_metrics_bev, }) self._update_metrics_class_keys = [ 'waymo_metrics_bev', 'waymo_metrics' ] for k, v in p.extra_ap_metrics.items(): decoder_metrics[k] = v self._update_metrics_class_keys.append(k) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() return decoder_metrics
def testBox2DTransform(self): # Take the box from above and apply a car-image transform. box = transform_util.Box2D(1.0, 1.0, 2., 1., 0.) transform = transform_util.MakeCarToImageTransform( pixels_per_meter=10.0, image_ref_x=250, image_ref_y=750, flip_axes=True) new_box = box.Apply(transform) # The center flips across the x=y axis to -1, -1. After the scaling and # translation, the box should be centered (240, 740). Because the box flips # across the axis, the width and length get flipped from 2, 1 to [10, 20]. # # The flip axes should cause the heading to go from 0. to -pi/2. self.assertAllClose([240., 740., 10., 20., -np.pi / 2.], new_box.AsNumpy()) # Check ymin/xmin/ymax/xmax: the rectangle is now longer in the y-dimension # than the x-dimension. self.assertAllClose((730., 235., 750., 245.), new_box.Extrema())
def testTransformHeading(self): transform = transform_util.MakeCarToImageTransform( pixels_per_meter=1.0, image_ref_x=123, image_ref_y=455, flip_axes=True) # Ray (0, 1): 90 degrees becomes -180 degrees. self.assertAllClose(-np.pi, transform_util.TransformHeading(transform, np.pi / 2.)) # Ray (1, 0): 0 degrees becomes -90 self.assertAllClose(-np.pi / 2., transform_util.TransformHeading(transform, 0.)) # (-1, 0) becomes (0, 1) or np.pi / 2. self.assertAllClose(np.pi / 2., transform_util.TransformHeading(transform, np.pi)) # (0, -1) becomes (1, 0) or 0 self.assertAllClose(0., transform_util.TransformHeading(transform, 1.5 * np.pi))
def testTopDownVisualizationMetric(self): top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) metric = detection_3d_metrics.TopDownVisualizationMetric( top_down_transform) batch_size = 4 num_preds = 10 num_gt = 12 num_points = 128 visualization_labels = np.random.randint(0, 2, (batch_size, num_preds)) predicted_bboxes = np.random.rand(batch_size, num_preds, 5) visualization_weights = np.abs(np.random.rand(batch_size, num_preds)) labels = np.random.randint(0, 2, (batch_size, num_gt)) gt_bboxes_2d = np.random.rand(batch_size, num_gt, 5) gt_bboxes_2d_weights = np.abs(np.random.rand(batch_size, num_gt)) difficulties = np.random.randint(0, 3, (batch_size, num_gt)) points_xyz = np.random.rand(batch_size, num_points, 3) points_padding = np.random.randint(0, 2, (batch_size, num_points)) source_ids = np.full([batch_size], '012346') metric.Update( py_utils.NestedMap({ 'visualization_labels': visualization_labels, 'predicted_bboxes': predicted_bboxes, 'visualization_weights': visualization_weights, 'labels': labels, 'gt_bboxes_2d': gt_bboxes_2d, 'gt_bboxes_2d_weights': gt_bboxes_2d_weights, 'points_xyz': points_xyz, 'points_padding': points_padding, 'difficulties': difficulties, 'source_ids': source_ids, })) _ = metric.Summary('test')
def CreateDecoderMetrics(self): """Decoder metrics for KITTI.""" p = self.params kitti_metric_p = p.ap_metric.Copy().Set( cls=kitti_ap_metric.KITTIAPMetrics) apm = kitti_metric_p.Instantiate() class_names = apm.metadata.ClassNames() # Convert the list of class names to a dictionary mapping class_id -> name. class_id_to_name = dict(enumerate(class_names)) top_down_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=32., image_ref_x=512., image_ref_y=1408., flip_axes=True) decoder_metrics = py_utils.NestedMap({ 'top_down_visualization': (detection_3d_metrics.TopDownVisualizationMetric( top_down_transform, image_height=1536, image_width=1024, class_id_to_name=class_id_to_name)), 'num_samples_in_batch': metrics.AverageMetric(), 'kitti_AP_v2': apm, }) decoder_metrics.mesh = detection_3d_metrics.WorldViewer() if p.summarize_boxes_on_image: decoder_metrics.camera_visualization = ( detection_3d_metrics.CameraVisualization( bbox_score_threshold=p. visualization_classification_threshold)) return decoder_metrics
def DrawTrajectory(image, bboxes, masks, labels, is_groundtruth): """Draw the trajectory of bounding boxes on 'image'. Args: image: The uint8 image array to draw on. Assumes [1000, 500, 3] input with RGB value ranges. bboxes: A [num_steps, num_objects, 5] float array containing the bounding box information over a sequence of steps. bboxes are expected to be in car coordinates. masks: A [num_steps, num_objects] integer array indicating whether the corresponding bbox entry in bboxes is present (1 = present). labels: A [num_steps, num_objects] integer label indicating which class is being predicted. Used for colorizing based on labels. is_groundtruth: True if the scene is the groundtruth vs. the predicted. Returns: The updated image array. """ image = Image.fromarray(np.uint8(image)).convert('RGB') draw = ImageDraw.Draw(image) try: font = ImageFont.truetype('arial.ttf', 20) except IOError: font = ImageFont.load_default() pixels_per_meter = 10. image_ref_x = 250. image_ref_y = 750. car_to_image_transform = transform_util.MakeCarToImageTransform( pixels_per_meter=pixels_per_meter, image_ref_x=image_ref_x, image_ref_y=image_ref_y, flip_axes=True) # Iterate over each object and produce the series of visualized trajectories # over time. for object_idx in range(bboxes.shape[1]): # Annotate the box with the class type label = labels[0, object_idx] # Choose a label_consistent color. color = PIL_COLOR_LIST[label % len(PIL_COLOR_LIST)] # Make predictions white. if not is_groundtruth: color = 'white' # Convert string color name to RGB so we can manipulate it. color_rgb = ImageColor.getrgb(color) # For each sample, extract the data, transform to image coordinates, and # store in centroids. centroids = [] for time in range(bboxes.shape[0]): if masks[time, object_idx] == 0: continue center_x, center_y, width, height, heading = bboxes[time, object_idx, :] # Compute the new heading. heading = transform_util.TransformHeading(car_to_image_transform, heading) # Transform from car to image coords. x, y, _ = transform_util.TransformPoint(car_to_image_transform, center_x, center_y, 0.0) # Hack to scale from meters to pixels. width *= pixels_per_meter height *= pixels_per_meter # Collect the centroids of all of the points. centroids.append((x, y, heading)) # Draw the groundtruth bounding box at the first timestep. if is_groundtruth and time == 0: # Draw a rectangle rect = MakeRectangle(height, width, heading, offset=(x, y)) rect += [rect[0]] draw.line(rect, fill=color_rgb, width=4) delta = 20 # Annotate the box with the object index draw.text((x + delta, y + delta), str(object_idx), fill='white', font=font) # Draw a callout draw.line([(x, y), (x + delta, y + delta)], fill='white', width=1) # Extract the point pairs from centroids and draw a line through them. point_pairs = [] for (x, y, heading) in centroids: point_pairs.append((x, y)) if point_pairs: draw.line(point_pairs, width=4, fill=color_rgb) # Draw the centroids. triangle_color_rgb = color_rgb for i, (x, y, heading) in enumerate(centroids): if i == 0: # Draw the heading for the first timestep. scale = 25 if is_groundtruth else 15 DrawHeadingTriangle(draw, x, y, heading, triangle_color_rgb, scale) else: # Draw a circle for the centroids of other timesteps. outline_color = color_rgb circle_size = 5 if is_groundtruth else 3 DrawCircle(draw, x, y, fill=triangle_color_rgb, outline=outline_color, circle_size=circle_size) # Desaturate the color with every timestep. increment = 45 # Allow this to be modified? triangle_color_rgb = (triangle_color_rgb[0] - increment, triangle_color_rgb[1] - increment, triangle_color_rgb[2] - increment) return np.array(image)
def _CarToImageTransform(): return transform_util.MakeCarToImageTransform(pixels_per_meter=10., image_ref_x=250, image_ref_y=750, flip_axes=True)