Esempio n. 1
0
def get_ground_truth_and_pred_box():
    gt_file = gt_file_with_all_classes
    pred_file = true_res_with_all_classes

    with open(gt_file, "r") as ground_truth:
        data = json.load(ground_truth)

    ground_truth_box = Box3D(
        sample_token=data[0]["sample_token"],
        translation=data[0]["translation"],
        size=data[0]["size"],
        rotation=data[0]["rotation"],
        name=data[0]["name"],
    )

    with open(pred_file, "r") as prediction_file:
        data = json.load(prediction_file)

    prediction_box = Box3D(
        sample_token=data[0]["sample_token"],
        translation=data[0]["translation"],
        size=data[0]["size"],
        rotation=data[0]["rotation"],
        name=data[0]["name"],
    )

    return ground_truth_box, prediction_box
Esempio n. 2
0
def load_groundtruth_boxes(level5data, sample_tokens):
    gt_box3ds = []

    # Load annotations and filter predictions and annotations.
    for sample_token in tqdm(sample_tokens):

        sample = level5data.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']

        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        ego_translation = np.array(ego_pose['translation'])

        for sample_annotation_token in sample_annotation_tokens:
            sample_annotation = level5data.get('sample_annotation',
                                               sample_annotation_token)
            sample_annotation_translation = sample_annotation['translation']

            class_name = sample_annotation['category_name']

            box3d = Box3D(sample_token=sample_token,
                          translation=sample_annotation_translation,
                          size=sample_annotation['size'],
                          rotation=sample_annotation['rotation'],
                          name=class_name)
            gt_box3ds.append(box3d)

    return gt_box3ds
Esempio n. 3
0
def box_to_box3D(box: Box, sample_token: str):
    box3d = Box3D(sample_token=sample_token,
                  translation=box.center,
                  size=box.wlh,
                  rotation=box.orientation.q,
                  name=box.name,
                  score=box.score)

    return box3d
Esempio n. 4
0
def test_intersection(target_box, intersection):
    original_box = Box3D(translation=[0, 0, 1.5], size=[2, 4, 3], rotation=[1, 0, 0, 0], name="car", sample_token="")
    assert np.isclose(
        original_box.get_intersection(target_box, iou_threshold=0.5),
        intersection,
        rtol=1e-05,
        atol=1e-08,
        equal_nan=False,
    )
Esempio n. 5
0
def modify_prediction_and_get_box(translation=(0, 0, 0), size=(1, 1, 1), rotation=(0, 0, 0, 1)):
    ground_truth_box = Box3D(
        sample_token="a3b278456a7ee38322388eda31378d0c91a48645fba18b8",
        translation=[0, 0, 0],
        size=[1, 1, 1],
        rotation=[0, 0, 0, 1],
        name="animal",
    )

    prediction_box = Box3D(
        sample_token="a3b278456a7ee38322388eda31378d0c91a48645fba18b8",
        translation=translation,
        size=size,
        rotation=rotation,
        name="animal",
    )

    return ground_truth_box, prediction_box
Esempio n. 6
0
def test_fround_area(rotation):
    translation = [0, 0, 1.5]
    size = 2, 4, translation[2] * 2  # width, length, height

    sample_token = ""
    name = "car"

    b = Box3D(translation=translation, rotation=rotation, size=size, sample_token=sample_token, name=name)

    assert b.volume == np.prod(size)
Esempio n. 7
0
def test_iou(target_box, iou):
    original_box = Box3D(translation=[0, 0, 1.5],
                         size=[2, 4, 3],
                         rotation=[1, 0, 0, 0],
                         name="car",
                         sample_token="")
    assert np.isclose(original_box.get_iou(target_box),
                      iou,
                      rtol=1e-05,
                      atol=1e-08,
                      equal_nan=False)
Esempio n. 8
0
def test_ground_area(rotation):
    translation = [0, 0, 1.5]
    size = 2, 4, translation[2] * 2  # width, length, height

    sample_token = ""
    name = "car"

    b = Box3D(translation=translation, rotation=rotation, size=size, sample_token=sample_token, name=name)

    assert b.volume == np.prod(size)

    assert np.isclose(b.ground_bbox_coords.area, b.length * b.width, rtol=1e-05, atol=1e-08, equal_nan=False)
Esempio n. 9
0
def test_self():
    original_box = Box3D(
        sample_token="",
        translation=[2680.2830359779, 698.1969292853, -18.0477669237],
        size=[2.064, 5.488, 2.053],
        rotation=[0.2654919368, 0, 0, 0.9641130802],
        name="car",
    )

    assert original_box.volume == np.prod([2.064, 5.488, 2.053])
    assert np.isclose(original_box.ground_bbox_coords.area, 2.064 * 5.488)

    assert np.isclose(original_box.get_area_intersection(original_box), 2.064 * 5.488)
    assert np.isclose(original_box.get_height_intersection(original_box), 2.053)
    assert np.isclose(original_box.get_intersection(original_box), original_box.volume)
    assert np.isclose(original_box.get_iou(original_box), 1)
Esempio n. 10
0
def parse_string_to_box(ps, with_score=True, output_type="box", sample_token=None) -> List[Box]:
    boxes = []

    col_num = 8
    if with_score:
        col_num = 9

    object_params = ps.split()
    n_objects = len(object_params)
    for i in range(n_objects // col_num):
        if with_score:
            score, x, y, z, w, l, h, yaw, c = tuple(object_params[i * 9: (i + 1) * 9])
            score = float(score)
        else:
            x, y, z, w, l, h, yaw, c = tuple(object_params[i * 8: (i + 1) * 8])
            score = 1.0  # assume ground truth

        if not (float(w) > 0 and float(l) > 0 and float(h) > 0):
            warnings.warn("wrong wlh value")
            continue

        orient_q = Quaternion(axis=[0, 0, 1], angle=float(yaw))
        center_pos = [float(x), float(y), float(z)]
        wlh = [float(w), float(l), float(h)]
        obj_name = c
        if output_type == "3dbox":
            boxes.append(Box3D(translation=center_pos, size=wlh, rotation=orient_q.q, name=obj_name, score=score,
                               sample_token=sample_token))
        elif output_type == "box":
            boxes.append(Box(center=center_pos, size=wlh, orientation=orient_q, name=obj_name, score=score))
        elif output_type == "dict":
            boxes.append({'translation': center_pos, 'size': wlh, 'rotation': orient_q.q,
                          'name': obj_name, 'score': score, 'sample_token': sample_token})
        else:
            raise ValueError("output_type must be either 3dbox, box, or dict")

    return boxes
Esempio n. 11
0
def make_prediction_boxes(model, dataloader, num_inputs, batch_size,
                          level5data):

    classes = cfg.DATA.CLASSES
    height = cfg.DATA.BEV_SHAPE[0]
    width = cfg.DATA.BEV_SHAPE[1]
    device = cfg.TRAIN.DEVICE
    bev_shape = cfg.DATA.BEV_SHAPE
    voxel_size = cfg.DATA.VOXEL_SIZE
    z_offset = cfg.DATA.Z_OFFSET

    # we quantize to uint8 here to conserve memory. we're allocating >20GB of memory otherwise.
    predictions = np.zeros((num_inputs, 1 + len(classes), height, width),
                           dtype=np.uint8)  # [N,C,H,W]

    sample_tokens = []
    progress_bar = tqdm(dataloader)

    # evaluate samples with loaded model - predictions are gathered in 'predictions'
    with torch.no_grad():
        model.eval()
        for ii, (X, target, batch_sample_tokens) in enumerate(progress_bar):

            offset = ii * batch_size
            sample_tokens.extend(batch_sample_tokens)

            X = X.to(device)  # [N, 1, H, W]
            prediction = model(X)  # [N, 2, H, W]

            prediction = F.softmax(prediction, dim=1)

            prediction_cpu = prediction.cpu().numpy()
            predictions[offset:offset + batch_size] = np.round(
                prediction_cpu * 255).astype(np.uint8)

    predictions_non_class0 = 255 - predictions[:, 0]  # [N,H,W]
    background_threshold = 255 // 2

    kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (3, 3))
    predictions_opened = np.zeros((predictions_non_class0.shape),
                                  dtype=np.uint8)  # [N,H,W]

    for i, p in enumerate(tqdm(predictions_non_class0)):
        thresholded_p = (p > background_threshold).astype(np.uint8)
        predictions_opened[i] = cv2.morphologyEx(thresholded_p, cv2.MORPH_OPEN,
                                                 kernel)  # [H,W]

    detection_boxes = []
    detection_scores = []
    detection_classes = []

    for i in tqdm(range(len(predictions))):
        prediction_opened = predictions_opened[i]  # [H,W]
        probability_non_class0 = predictions_non_class0[i]  # [H,W]
        class_probability = predictions[i]  # [C,H,W]

        sample_boxes = []
        sample_detection_scores = []
        sample_detection_classes = []

        contours, hierarchy = cv2.findContours(prediction_opened,
                                               cv2.RETR_EXTERNAL,
                                               cv2.CHAIN_APPROX_NONE)

        for cnt in contours:
            rect = cv2.minAreaRect(cnt)
            box = cv2.boxPoints(rect)

            # Let's take the center pixel value as the confidence value
            box_center_index = np.int0(np.mean(box, axis=0))

            for class_index in range(len(classes)):
                box_center_value = class_probability[class_index + 1,
                                                     box_center_index[1],
                                                     box_center_index[0]]

                # Let's remove candidates with very low probability
                if box_center_value < 0.01:
                    continue

                box_center_class = classes[class_index]

                box_detection_score = box_center_value
                sample_detection_classes.append(box_center_class)
                sample_detection_scores.append(box_detection_score)
                sample_boxes.append(box)

        detection_boxes.append(np.array(sample_boxes))
        detection_scores.append(sample_detection_scores)
        detection_classes.append(sample_detection_classes)

    pred_box3ds = []
    height_dict = cfg.DATA.AVG_CAT_HEIGHT

    # This could use some refactoring..
    for (sample_token, sample_boxes, sample_detection_scores,
         sample_detection_class) in tqdm(zip(sample_tokens, detection_boxes,
                                             detection_scores,
                                             detection_classes),
                                         total=len(sample_tokens)):
        sample_boxes = sample_boxes.reshape(-1, 2)  # (N, 4, 2) -> (N*4, 2)
        sample_boxes = sample_boxes.transpose(1, 0)  # (N*4, 2) -> (2, N*4)

        # Add Z dimension
        sample_boxes = np.vstack((
            sample_boxes,
            np.zeros(sample_boxes.shape[1]),
        ))  # (2, N*4) -> (3, N*4)

        sample = level5data.get("sample", sample_token)
        sample_lidar_token = sample["data"]["LIDAR_TOP"]
        lidar_data = level5data.get("sample_data", sample_lidar_token)
        lidar_filepath = level5data.get_sample_data_path(sample_lidar_token)
        ego_pose = level5data.get("ego_pose", lidar_data["ego_pose_token"])
        ego_translation = np.array(ego_pose['translation'])

        global_from_car = transform_matrix(ego_pose['translation'],
                                           Quaternion(ego_pose['rotation']),
                                           inverse=False)

        car_from_voxel = np.linalg.inv(
            create_transformation_matrix_to_voxel_space(
                bev_shape, voxel_size, (0, 0, z_offset)))

        global_from_voxel = np.dot(global_from_car, car_from_voxel)
        sample_boxes = transform_points(sample_boxes, global_from_voxel)

        # We don't know at where the boxes are in the scene on the z-axis (up-down), let's assume all of them are at
        # the same height as the ego vehicle.
        sample_boxes[2, :] = ego_pose["translation"][2]

        # (3, N*4) -> (N, 4, 3)
        sample_boxes = sample_boxes.transpose(1, 0).reshape(-1, 4, 3)

        box_height = [height_dict[name] for name in sample_detection_class]

        # Note: Each of these boxes describes the ground corners of a 3D box.
        # To get the center of the box in 3D, we'll have to add half the height to it.
        sample_boxes_centers = sample_boxes.mean(axis=1)
        sample_boxes_centers[:, 2] += box_height / 2

        # Width and height is arbitrary - we don't know what way the vehicles are pointing from our prediction segmentation
        # It doesn't matter for evaluation, so no need to worry about that here.
        # Note: We scaled our targets to be 0.8 the actual size, we need to adjust for that
        sample_lengths = np.linalg.norm(
            sample_boxes[:, 0, :] - sample_boxes[:, 1, :],
            axis=1) * 1 / box_scale
        sample_widths = np.linalg.norm(
            sample_boxes[:, 1, :] - sample_boxes[:, 2, :],
            axis=1) * 1 / box_scale

        sample_boxes_dimensions = np.zeros_like(sample_boxes_centers)
        sample_boxes_dimensions[:, 0] = sample_widths
        sample_boxes_dimensions[:, 1] = sample_lengths
        sample_boxes_dimensions[:, 2] = box_height

        for i in range(len(sample_boxes)):
            translation = sample_boxes_centers[i]
            size = sample_boxes_dimensions[i]
            class_name = sample_detection_class[i]
            ego_distance = float(np.linalg.norm(ego_translation - translation))

            # Determine the rotation of the box
            v = (sample_boxes[i, 0] - sample_boxes[i, 1])
            v /= np.linalg.norm(v)
            r = R.from_dcm([
                [v[0], -v[1], 0],
                [v[1], v[0], 0],
                [0, 0, 1],
            ])
            quat = r.as_quat()
            # XYZW -> WXYZ order of elements
            quat = quat[[3, 0, 1, 2]]

            detection_score = float(sample_detection_scores[i])

            box3d = Box3D(sample_token=sample_token,
                          translation=list(translation),
                          size=list(size),
                          rotation=list(quat),
                          name=class_name,
                          score=detection_score)
            pred_box3ds.append(box3d)

    return pred_box3ds
Esempio n. 12
0
        v = (sample_boxes[i, 0] - sample_boxes[i, 1])
        v /= np.linalg.norm(v)
        r = R.from_dcm([
            [v[0], -v[1], 0],
            [v[1], v[0], 0],
            [0, 0, 1],
        ])
        quat = r.as_quat()
        # XYZW -> WXYZ order of elements
        quat = quat[[3, 0, 1, 2]]

        detection_score = float(sample_detection_scores[i])

        box3d = Box3D(sample_token=sample_token,
                      translation=list(translation),
                      size=list(size),
                      rotation=list(quat),
                      name=class_name,
                      score=detection_score)
        pred_box3ds.append(box3d)
sub = {}
for i in tqdm_notebook(range(len(pred_box3ds))):
    #     yaw = -np.arctan2(pred_box3ds[i].rotation[2], pred_box3ds[i].rotation[0])
    yaw = 2 * np.arccos(pred_box3ds[i].rotation[0])
    pred = str(pred_box3ds[i].score / 255) + ' ' + str(pred_box3ds[i].center_x) + ' ' + \
           str(pred_box3ds[i].center_y) + ' ' + str(pred_box3ds[i].center_z) + ' ' + \
           str(pred_box3ds[i].width) + ' ' \
           + str(pred_box3ds[i].length) + ' ' + str(pred_box3ds[i].height) + ' ' + str(yaw) + ' ' \
           + str(pred_box3ds[i].name) + ' '

    if pred_box3ds[i].sample_token in sub.keys():
        sub[pred_box3ds[i].sample_token] += pred
Esempio n. 13
0
def get_single_class_aps(gt, predictions, iou_thresholds):
    """Compute recall and precision for all iou thresholds. Adapted from
    LyftDatasetDevkit.

    Args:
        gt (list[dict]): list of dictionaries in the format described above.
        predictions (list[dict]): list of dictionaries in the format \
            described below.
        iou_thresholds (list[float]): IOU thresholds used to calculate \
            TP / FN

    Returns:
        tuple[np.ndarray]: Returns (recalls, precisions, average precisions)
            for each class.
    """
    num_gts = len(gt)
    image_gts = group_by_key(gt, 'sample_token')
    image_gts = wrap_in_box(image_gts)

    sample_gt_checked = {
        sample_token: np.zeros((len(boxes), len(iou_thresholds)))
        for sample_token, boxes in image_gts.items()
    }

    predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)

    # go down dets and mark TPs and FPs
    num_predictions = len(predictions)
    tps = np.zeros((num_predictions, len(iou_thresholds)))
    fps = np.zeros((num_predictions, len(iou_thresholds)))

    for prediction_index, prediction in enumerate(predictions):
        predicted_box = Box3D(**prediction)

        sample_token = prediction['sample_token']

        max_overlap = -np.inf
        jmax = -1

        if sample_token in image_gts:
            gt_boxes = image_gts[sample_token]
            # gt_boxes per sample
            gt_checked = sample_gt_checked[sample_token]
            # gt flags per sample
        else:
            gt_boxes = []
            gt_checked = None

        if len(gt_boxes) > 0:
            overlaps = get_ious(gt_boxes, predicted_box)

            max_overlap = np.max(overlaps)

            jmax = np.argmax(overlaps)

        for i, iou_threshold in enumerate(iou_thresholds):
            if max_overlap > iou_threshold:
                if gt_checked[jmax, i] == 0:
                    tps[prediction_index, i] = 1.0
                    gt_checked[jmax, i] = 1
                else:
                    fps[prediction_index, i] = 1.0
            else:
                fps[prediction_index, i] = 1.0

    # compute precision recall
    fps = np.cumsum(fps, axis=0)
    tps = np.cumsum(tps, axis=0)

    recalls = tps / float(num_gts)
    # avoid divide by zero in case the first detection
    # matches a difficult ground truth
    precisions = tps / np.maximum(tps + fps, np.finfo(np.float64).eps)

    aps = []
    for i in range(len(iou_thresholds)):
        recall = recalls[:, i]
        precision = precisions[:, i]
        assert np.all(0 <= recall) & np.all(recall <= 1)
        assert np.all(0 <= precision) & np.all(precision <= 1)
        ap = get_ap(recall, precision)
        aps.append(ap)

    aps = np.array(aps)

    return recalls, precisions, aps
Esempio n. 14
0
    assert b.volume == np.prod(size)

    assert np.isclose(b.ground_bbox_coords.area,
                      b.length * b.width,
                      rtol=1e-05,
                      atol=1e-08,
                      equal_nan=False)


@pytest.mark.parametrize(
    ["target_box", "intersection"],
    [
        (Box3D(translation=[0, 0, 1.5],
               size=[2, 4, 3],
               rotation=[1, 0, 0, 0],
               name="car",
               sample_token=""), 24),
        (Box3D(translation=[4, 0, 1.5],
               size=[2, 4, 3],
               rotation=[1, 0, 0, 0],
               name="car",
               sample_token=""), 0),
        (Box3D(translation=[0, 0, 1.5],
               size=[2, 2, 3],
               rotation=[1, 0, 0, 0],
               name="car",
               sample_token=""), 12),
        (Box3D(translation=[0, 0, 1.5],
               size=[2, 4, 3],
               rotation=[0, 0, 0, 1],