def get_ground_truth_and_pred_box(): gt_file = gt_file_with_all_classes pred_file = true_res_with_all_classes with open(gt_file, "r") as ground_truth: data = json.load(ground_truth) ground_truth_box = Box3D( sample_token=data[0]["sample_token"], translation=data[0]["translation"], size=data[0]["size"], rotation=data[0]["rotation"], name=data[0]["name"], ) with open(pred_file, "r") as prediction_file: data = json.load(prediction_file) prediction_box = Box3D( sample_token=data[0]["sample_token"], translation=data[0]["translation"], size=data[0]["size"], rotation=data[0]["rotation"], name=data[0]["name"], ) return ground_truth_box, prediction_box
def load_groundtruth_boxes(level5data, sample_tokens): gt_box3ds = [] # Load annotations and filter predictions and annotations. for sample_token in tqdm(sample_tokens): sample = level5data.get('sample', sample_token) sample_annotation_tokens = sample['anns'] sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) ego_translation = np.array(ego_pose['translation']) for sample_annotation_token in sample_annotation_tokens: sample_annotation = level5data.get('sample_annotation', sample_annotation_token) sample_annotation_translation = sample_annotation['translation'] class_name = sample_annotation['category_name'] box3d = Box3D(sample_token=sample_token, translation=sample_annotation_translation, size=sample_annotation['size'], rotation=sample_annotation['rotation'], name=class_name) gt_box3ds.append(box3d) return gt_box3ds
def box_to_box3D(box: Box, sample_token: str): box3d = Box3D(sample_token=sample_token, translation=box.center, size=box.wlh, rotation=box.orientation.q, name=box.name, score=box.score) return box3d
def test_intersection(target_box, intersection): original_box = Box3D(translation=[0, 0, 1.5], size=[2, 4, 3], rotation=[1, 0, 0, 0], name="car", sample_token="") assert np.isclose( original_box.get_intersection(target_box, iou_threshold=0.5), intersection, rtol=1e-05, atol=1e-08, equal_nan=False, )
def modify_prediction_and_get_box(translation=(0, 0, 0), size=(1, 1, 1), rotation=(0, 0, 0, 1)): ground_truth_box = Box3D( sample_token="a3b278456a7ee38322388eda31378d0c91a48645fba18b8", translation=[0, 0, 0], size=[1, 1, 1], rotation=[0, 0, 0, 1], name="animal", ) prediction_box = Box3D( sample_token="a3b278456a7ee38322388eda31378d0c91a48645fba18b8", translation=translation, size=size, rotation=rotation, name="animal", ) return ground_truth_box, prediction_box
def test_fround_area(rotation): translation = [0, 0, 1.5] size = 2, 4, translation[2] * 2 # width, length, height sample_token = "" name = "car" b = Box3D(translation=translation, rotation=rotation, size=size, sample_token=sample_token, name=name) assert b.volume == np.prod(size)
def test_iou(target_box, iou): original_box = Box3D(translation=[0, 0, 1.5], size=[2, 4, 3], rotation=[1, 0, 0, 0], name="car", sample_token="") assert np.isclose(original_box.get_iou(target_box), iou, rtol=1e-05, atol=1e-08, equal_nan=False)
def test_ground_area(rotation): translation = [0, 0, 1.5] size = 2, 4, translation[2] * 2 # width, length, height sample_token = "" name = "car" b = Box3D(translation=translation, rotation=rotation, size=size, sample_token=sample_token, name=name) assert b.volume == np.prod(size) assert np.isclose(b.ground_bbox_coords.area, b.length * b.width, rtol=1e-05, atol=1e-08, equal_nan=False)
def test_self(): original_box = Box3D( sample_token="", translation=[2680.2830359779, 698.1969292853, -18.0477669237], size=[2.064, 5.488, 2.053], rotation=[0.2654919368, 0, 0, 0.9641130802], name="car", ) assert original_box.volume == np.prod([2.064, 5.488, 2.053]) assert np.isclose(original_box.ground_bbox_coords.area, 2.064 * 5.488) assert np.isclose(original_box.get_area_intersection(original_box), 2.064 * 5.488) assert np.isclose(original_box.get_height_intersection(original_box), 2.053) assert np.isclose(original_box.get_intersection(original_box), original_box.volume) assert np.isclose(original_box.get_iou(original_box), 1)
def parse_string_to_box(ps, with_score=True, output_type="box", sample_token=None) -> List[Box]: boxes = [] col_num = 8 if with_score: col_num = 9 object_params = ps.split() n_objects = len(object_params) for i in range(n_objects // col_num): if with_score: score, x, y, z, w, l, h, yaw, c = tuple(object_params[i * 9: (i + 1) * 9]) score = float(score) else: x, y, z, w, l, h, yaw, c = tuple(object_params[i * 8: (i + 1) * 8]) score = 1.0 # assume ground truth if not (float(w) > 0 and float(l) > 0 and float(h) > 0): warnings.warn("wrong wlh value") continue orient_q = Quaternion(axis=[0, 0, 1], angle=float(yaw)) center_pos = [float(x), float(y), float(z)] wlh = [float(w), float(l), float(h)] obj_name = c if output_type == "3dbox": boxes.append(Box3D(translation=center_pos, size=wlh, rotation=orient_q.q, name=obj_name, score=score, sample_token=sample_token)) elif output_type == "box": boxes.append(Box(center=center_pos, size=wlh, orientation=orient_q, name=obj_name, score=score)) elif output_type == "dict": boxes.append({'translation': center_pos, 'size': wlh, 'rotation': orient_q.q, 'name': obj_name, 'score': score, 'sample_token': sample_token}) else: raise ValueError("output_type must be either 3dbox, box, or dict") return boxes
def make_prediction_boxes(model, dataloader, num_inputs, batch_size, level5data): classes = cfg.DATA.CLASSES height = cfg.DATA.BEV_SHAPE[0] width = cfg.DATA.BEV_SHAPE[1] device = cfg.TRAIN.DEVICE bev_shape = cfg.DATA.BEV_SHAPE voxel_size = cfg.DATA.VOXEL_SIZE z_offset = cfg.DATA.Z_OFFSET # we quantize to uint8 here to conserve memory. we're allocating >20GB of memory otherwise. predictions = np.zeros((num_inputs, 1 + len(classes), height, width), dtype=np.uint8) # [N,C,H,W] sample_tokens = [] progress_bar = tqdm(dataloader) # evaluate samples with loaded model - predictions are gathered in 'predictions' with torch.no_grad(): model.eval() for ii, (X, target, batch_sample_tokens) in enumerate(progress_bar): offset = ii * batch_size sample_tokens.extend(batch_sample_tokens) X = X.to(device) # [N, 1, H, W] prediction = model(X) # [N, 2, H, W] prediction = F.softmax(prediction, dim=1) prediction_cpu = prediction.cpu().numpy() predictions[offset:offset + batch_size] = np.round( prediction_cpu * 255).astype(np.uint8) predictions_non_class0 = 255 - predictions[:, 0] # [N,H,W] background_threshold = 255 // 2 kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3)) predictions_opened = np.zeros((predictions_non_class0.shape), dtype=np.uint8) # [N,H,W] for i, p in enumerate(tqdm(predictions_non_class0)): thresholded_p = (p > background_threshold).astype(np.uint8) predictions_opened[i] = cv2.morphologyEx(thresholded_p, cv2.MORPH_OPEN, kernel) # [H,W] detection_boxes = [] detection_scores = [] detection_classes = [] for i in tqdm(range(len(predictions))): prediction_opened = predictions_opened[i] # [H,W] probability_non_class0 = predictions_non_class0[i] # [H,W] class_probability = predictions[i] # [C,H,W] sample_boxes = [] sample_detection_scores = [] sample_detection_classes = [] contours, hierarchy = cv2.findContours(prediction_opened, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE) for cnt in contours: rect = cv2.minAreaRect(cnt) box = cv2.boxPoints(rect) # Let's take the center pixel value as the confidence value box_center_index = np.int0(np.mean(box, axis=0)) for class_index in range(len(classes)): box_center_value = class_probability[class_index + 1, box_center_index[1], box_center_index[0]] # Let's remove candidates with very low probability if box_center_value < 0.01: continue box_center_class = classes[class_index] box_detection_score = box_center_value sample_detection_classes.append(box_center_class) sample_detection_scores.append(box_detection_score) sample_boxes.append(box) detection_boxes.append(np.array(sample_boxes)) detection_scores.append(sample_detection_scores) detection_classes.append(sample_detection_classes) pred_box3ds = [] height_dict = cfg.DATA.AVG_CAT_HEIGHT # This could use some refactoring.. for (sample_token, sample_boxes, sample_detection_scores, sample_detection_class) in tqdm(zip(sample_tokens, detection_boxes, detection_scores, detection_classes), total=len(sample_tokens)): sample_boxes = sample_boxes.reshape(-1, 2) # (N, 4, 2) -> (N*4, 2) sample_boxes = sample_boxes.transpose(1, 0) # (N*4, 2) -> (2, N*4) # Add Z dimension sample_boxes = np.vstack(( sample_boxes, np.zeros(sample_boxes.shape[1]), )) # (2, N*4) -> (3, N*4) sample = level5data.get("sample", sample_token) sample_lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = level5data.get("sample_data", sample_lidar_token) lidar_filepath = level5data.get_sample_data_path(sample_lidar_token) ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"]) ego_translation = np.array(ego_pose['translation']) global_from_car = transform_matrix(ego_pose['translation'], Quaternion(ego_pose['rotation']), inverse=False) car_from_voxel = np.linalg.inv( create_transformation_matrix_to_voxel_space( bev_shape, voxel_size, (0, 0, z_offset))) global_from_voxel = np.dot(global_from_car, car_from_voxel) sample_boxes = transform_points(sample_boxes, global_from_voxel) # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at # the same height as the ego vehicle. sample_boxes[2, :] = ego_pose["translation"][2] # (3, N*4) -> (N, 4, 3) sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3) box_height = [height_dict[name] for name in sample_detection_class] # Note: Each of these boxes describes the ground corners of a 3D box. # To get the center of the box in 3D, we'll have to add half the height to it. sample_boxes_centers = sample_boxes.mean(axis=1) sample_boxes_centers[:, 2] += box_height / 2 # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation # It doesn't matter for evaluation, so no need to worry about that here. # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that sample_lengths = np.linalg.norm( sample_boxes[:, 0, :] - sample_boxes[:, 1, :], axis=1) * 1 / box_scale sample_widths = np.linalg.norm( sample_boxes[:, 1, :] - sample_boxes[:, 2, :], axis=1) * 1 / box_scale sample_boxes_dimensions = np.zeros_like(sample_boxes_centers) sample_boxes_dimensions[:, 0] = sample_widths sample_boxes_dimensions[:, 1] = sample_lengths sample_boxes_dimensions[:, 2] = box_height for i in range(len(sample_boxes)): translation = sample_boxes_centers[i] size = sample_boxes_dimensions[i] class_name = sample_detection_class[i] ego_distance = float(np.linalg.norm(ego_translation - translation)) # Determine the rotation of the box v = (sample_boxes[i, 0] - sample_boxes[i, 1]) v /= np.linalg.norm(v) r = R.from_dcm([ [v[0], -v[1], 0], [v[1], v[0], 0], [0, 0, 1], ]) quat = r.as_quat() # XYZW -> WXYZ order of elements quat = quat[[3, 0, 1, 2]] detection_score = float(sample_detection_scores[i]) box3d = Box3D(sample_token=sample_token, translation=list(translation), size=list(size), rotation=list(quat), name=class_name, score=detection_score) pred_box3ds.append(box3d) return pred_box3ds
v = (sample_boxes[i, 0] - sample_boxes[i, 1]) v /= np.linalg.norm(v) r = R.from_dcm([ [v[0], -v[1], 0], [v[1], v[0], 0], [0, 0, 1], ]) quat = r.as_quat() # XYZW -> WXYZ order of elements quat = quat[[3, 0, 1, 2]] detection_score = float(sample_detection_scores[i]) box3d = Box3D(sample_token=sample_token, translation=list(translation), size=list(size), rotation=list(quat), name=class_name, score=detection_score) pred_box3ds.append(box3d) sub = {} for i in tqdm_notebook(range(len(pred_box3ds))): # yaw = -np.arctan2(pred_box3ds[i].rotation[2], pred_box3ds[i].rotation[0]) yaw = 2 * np.arccos(pred_box3ds[i].rotation[0]) pred = str(pred_box3ds[i].score / 255) + ' ' + str(pred_box3ds[i].center_x) + ' ' + \ str(pred_box3ds[i].center_y) + ' ' + str(pred_box3ds[i].center_z) + ' ' + \ str(pred_box3ds[i].width) + ' ' \ + str(pred_box3ds[i].length) + ' ' + str(pred_box3ds[i].height) + ' ' + str(yaw) + ' ' \ + str(pred_box3ds[i].name) + ' ' if pred_box3ds[i].sample_token in sub.keys(): sub[pred_box3ds[i].sample_token] += pred
def get_single_class_aps(gt, predictions, iou_thresholds): """Compute recall and precision for all iou thresholds. Adapted from LyftDatasetDevkit. Args: gt (list[dict]): list of dictionaries in the format described above. predictions (list[dict]): list of dictionaries in the format \ described below. iou_thresholds (list[float]): IOU thresholds used to calculate \ TP / FN Returns: tuple[np.ndarray]: Returns (recalls, precisions, average precisions) for each class. """ num_gts = len(gt) image_gts = group_by_key(gt, 'sample_token') image_gts = wrap_in_box(image_gts) sample_gt_checked = { sample_token: np.zeros((len(boxes), len(iou_thresholds))) for sample_token, boxes in image_gts.items() } predictions = sorted(predictions, key=lambda x: x['score'], reverse=True) # go down dets and mark TPs and FPs num_predictions = len(predictions) tps = np.zeros((num_predictions, len(iou_thresholds))) fps = np.zeros((num_predictions, len(iou_thresholds))) for prediction_index, prediction in enumerate(predictions): predicted_box = Box3D(**prediction) sample_token = prediction['sample_token'] max_overlap = -np.inf jmax = -1 if sample_token in image_gts: gt_boxes = image_gts[sample_token] # gt_boxes per sample gt_checked = sample_gt_checked[sample_token] # gt flags per sample else: gt_boxes = [] gt_checked = None if len(gt_boxes) > 0: overlaps = get_ious(gt_boxes, predicted_box) max_overlap = np.max(overlaps) jmax = np.argmax(overlaps) for i, iou_threshold in enumerate(iou_thresholds): if max_overlap > iou_threshold: if gt_checked[jmax, i] == 0: tps[prediction_index, i] = 1.0 gt_checked[jmax, i] = 1 else: fps[prediction_index, i] = 1.0 else: fps[prediction_index, i] = 1.0 # compute precision recall fps = np.cumsum(fps, axis=0) tps = np.cumsum(tps, axis=0) recalls = tps / float(num_gts) # avoid divide by zero in case the first detection # matches a difficult ground truth precisions = tps / np.maximum(tps + fps, np.finfo(np.float64).eps) aps = [] for i in range(len(iou_thresholds)): recall = recalls[:, i] precision = precisions[:, i] assert np.all(0 <= recall) & np.all(recall <= 1) assert np.all(0 <= precision) & np.all(precision <= 1) ap = get_ap(recall, precision) aps.append(ap) aps = np.array(aps) return recalls, precisions, aps
assert b.volume == np.prod(size) assert np.isclose(b.ground_bbox_coords.area, b.length * b.width, rtol=1e-05, atol=1e-08, equal_nan=False) @pytest.mark.parametrize( ["target_box", "intersection"], [ (Box3D(translation=[0, 0, 1.5], size=[2, 4, 3], rotation=[1, 0, 0, 0], name="car", sample_token=""), 24), (Box3D(translation=[4, 0, 1.5], size=[2, 4, 3], rotation=[1, 0, 0, 0], name="car", sample_token=""), 0), (Box3D(translation=[0, 0, 1.5], size=[2, 2, 3], rotation=[1, 0, 0, 0], name="car", sample_token=""), 12), (Box3D(translation=[0, 0, 1.5], size=[2, 4, 3], rotation=[0, 0, 0, 1],