Exemple #1
0
def create_groundtruth_database(
    dataset_class_name,
    data_path,
    info_path=None,
    used_classes=None,
    db_path=None,
    dbinfo_path=None,
    relative_path=True,
    add_rgb=False,
    lidar_only=False,
    bev_only=False,
    coors_range=None,
    gt_aug_with_context=-1.0,
    **kwargs,
):
    gt_aug_with_context = gt_aug_with_context
    pipeline = [
        {
            "type": "LoadPointCloudFromFile",
            "dataset": dataset_name_map[dataset_class_name],
        },
        {
            "type": "LoadPointCloudAnnotations",
            "with_bbox": True,
            "enable_difficulty_level": True
        },
    ]
    # get KittiDataset loaded points and annos.
    dataset = get_dataset(dataset_class_name)(info_path=info_path,
                                              root_path=data_path,
                                              test_mode=True,
                                              pipeline=pipeline)

    # prepare dbinfo_path and db_path.
    root_path = Path(data_path)
    if db_path is None:
        db_path = root_path / "gt_database"
    if dbinfo_path is None:
        dbinfo_path = root_path / "dbinfos_train.pkl"
    db_path.mkdir(parents=True, exist_ok=True)

    all_db_infos = {}
    group_counter = 0

    for index in tqdm(range(len(dataset))):
        image_idx = index
        sensor_data = dataset.get_sensor_data(
            index)  # see in loading.py after pipelines.
        if "image_idx" in sensor_data[
                "metadata"]:  # True, image_idx = file_name (000001)
            image_idx = sensor_data["metadata"]["image_idx"]

        points = sensor_data["lidar"]["points"]
        annos = sensor_data["lidar"]["annotations"]
        gt_boxes = annos[
            "boxes"]  # gt_boxes of all classes, dc has been removed in pipeline LoadPointCloudAnnotations.
        names = annos["names"]  # gt_names.
        group_dict = {}
        group_ids = np.full([gt_boxes.shape[0]], -1, dtype=np.int64)

        if "group_ids" in annos:  # False
            group_ids = annos["group_ids"]
        else:
            group_ids = np.arange(gt_boxes.shape[0], dtype=np.int64)

        difficulty = np.zeros(gt_boxes.shape[0], dtype=np.int32)
        if "difficulty" in annos:  # False
            difficulty = annos["difficulty"]

        num_obj = gt_boxes.shape[0]

        # todo: maybe we need add some contexual points here.
        offset = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]  # [x, y, z, w, l, h, ry]
        if gt_aug_with_context > 0.0:
            offset = [
                0.0, 0.0, 0.0, gt_aug_with_context, gt_aug_with_context, 0.0,
                0.0
            ]
            db_path = root_path / "gt_enlarged_database"
            dbinfo_path = root_path / "dbinfos_enlarged_train.pkl"
            db_path.mkdir(parents=True, exist_ok=True)

        point_indices_for_num = box_np_ops.points_in_rbbox(points, gt_boxes)
        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes + offset)
        for i in range(num_obj):  # in a single scene.
            filename = f"{image_idx}_{names[i]}_{i}.bin"
            filepath = db_path / filename
            gt_points = points[point_indices[:, i]]
            num_points_in_gt = point_indices_for_num[:, i].sum()
            gt_points[:, :3] -= gt_boxes[i, :
                                         3]  # only record relative distance
            with open(filepath,
                      "w") as f:  # db: gt points in each gt_box are saved
                gt_points[:, :4].tofile(f)

            if (used_classes is None) or names[i] in used_classes:
                if relative_path:
                    db_dump_path = str(db_path.stem + "/" + filename)
                else:
                    db_dump_path = str(filepath)

                db_info = {
                    "name": names[i],
                    "path": db_dump_path,
                    "image_idx": image_idx,
                    "gt_idx": i,
                    "box3d_lidar": gt_boxes[i],
                    "num_points_in_gt": num_points_in_gt,
                    "difficulty":
                    difficulty[i]  # todo: not accurate, all are set as 0.
                }
                local_group_id = group_ids[i]
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info["group_id"] = group_dict[
                    local_group_id]  # count from 0 to total_num_of_specific_class[like 13442 for car]
                if "score" in annos:  # False
                    db_info["score"] = annos["score"][i]
                if names[
                        i] in all_db_infos:  # all_db_infos are grouped by class_names (like car, pedestrian, cyclist, cycle)
                    all_db_infos[names[i]].append(
                        db_info)  # all db infos include info of all db
                else:
                    all_db_infos[names[i]] = [db_info]

    print("dataset length: ", len(dataset))
    for k, v in all_db_infos.items():
        print(f"load {len(v)} {k} database infos")

    with open(dbinfo_path, "wb") as f:
        pickle.dump(all_db_infos, f)
Exemple #2
0
def create_groundtruth_database(
    dataset_class_name,
    data_path,
    info_path=None,
    used_classes=None,
    db_path=None,
    dbinfo_path=None,
    relative_path=True,
    add_rgb=False,
    lidar_only=False,
    bev_only=False,
    coors_range=None,
    **kwargs,
):
    pipeline = [
        {
            "type": "LoadPointCloudFromFile",
            "dataset": dataset_name_map[dataset_class_name],
        },
        {
            "type": "LoadPointCloudAnnotations",
            "with_bbox": True
        },
    ]

    if "nsweeps" in kwargs:
        dataset = get_dataset(dataset_class_name)(
            info_path=info_path,
            root_path=data_path,
            pipeline=pipeline,
            test_mode=True,
            nsweeps=kwargs["nsweeps"],
        )
        nsweeps = dataset.nsweeps
    else:
        dataset = get_dataset(dataset_class_name)(info_path=info_path,
                                                  root_path=data_path,
                                                  test_mode=True,
                                                  pipeline=pipeline)
        nsweeps = 1

    root_path = Path(data_path)

    if dataset_class_name == "NUSC":
        if db_path is None:
            db_path = root_path / f"gt_database_{nsweeps}sweeps_withvelo"
        if dbinfo_path is None:
            dbinfo_path = root_path / f"dbinfos_train_{nsweeps}sweeps_withvelo.pkl"
    else:
        if db_path is None:
            db_path = root_path / "gt_database"
        if dbinfo_path is None:
            dbinfo_path = root_path / "dbinfos_train.pkl"
    if dataset_class_name == "NUSC" or dataset_class_name == "LYFT":
        point_features = 5
    elif dataset_class_name == "KITTI":
        point_features = 4

    db_path.mkdir(parents=True, exist_ok=True)

    all_db_infos = {}
    group_counter = 0

    # def prepare_single_data(index):
    for index in tqdm(range(len(dataset))):
        image_idx = index
        # modified to nuscenes
        sensor_data = dataset.get_sensor_data(index)
        # for nsweep, sensor_data in enumerate(sensor_datas):
        if "image_idx" in sensor_data["metadata"]:
            image_idx = sensor_data["metadata"]["image_idx"]

        if dataset_class_name == "NUSC":
            points = sensor_data["lidar"]["combined"]
        elif dataset_class_name == "KITTI":
            points = sensor_data["lidar"]["points"]
        elif dataset_class_name == "LYFT":
            points = sensor_data["lidar"]["points"]

        print(points.shape)
        annos = sensor_data["lidar"]["annotations"]
        gt_boxes = annos["boxes"]
        names = annos["names"]
        group_dict = {}
        group_ids = np.full([gt_boxes.shape[0]], -1, dtype=np.int64)
        if "group_ids" in annos:
            group_ids = annos["group_ids"]
        else:
            group_ids = np.arange(gt_boxes.shape[0], dtype=np.int64)
        difficulty = np.zeros(gt_boxes.shape[0], dtype=np.int32)
        if "difficulty" in annos:
            difficulty = annos["difficulty"]

        num_obj = gt_boxes.shape[0]
        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes)
        for i in range(num_obj):
            filename = f"{image_idx}_{names[i]}_{i}.bin"
            filepath = db_path / filename
            gt_points = points[point_indices[:, i]]
            gt_points[:, :3] -= gt_boxes[i, :3]
            with open(filepath, "w") as f:
                gt_points[:, :point_features].tofile(f)

            if (used_classes is None) or names[i] in used_classes:
                if relative_path:
                    db_dump_path = str(db_path.stem + "/" + filename)
                else:
                    db_dump_path = str(filepath)

                db_info = {
                    "name": names[i],
                    "path": db_dump_path,
                    "image_idx": image_idx,
                    "gt_idx": i,
                    "box3d_lidar": gt_boxes[i],
                    "num_points_in_gt": gt_points.shape[0],
                    "difficulty": difficulty[i],
                    # "group_id": -1,
                    # "bbox": bboxes[i],
                }
                local_group_id = group_ids[i]
                # if local_group_id >= 0:
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info["group_id"] = group_dict[local_group_id]
                if "score" in annos:
                    db_info["score"] = annos["score"][i]
                if names[i] in all_db_infos:
                    all_db_infos[names[i]].append(db_info)
                else:
                    all_db_infos[names[i]] = [db_info]
        # print(f"Finish {index}th sample")

    print("dataset length: ", len(dataset))
    for k, v in all_db_infos.items():
        print(f"load {len(v)} {k} database infos")

    with open(dbinfo_path, "wb") as f:
        pickle.dump(all_db_infos, f)
def create_groundtruth_database(
    dataset_class_name,
    data_path,
    info_path=None,
    used_classes=None,
    db_path=None,
    dbinfo_path=None,
    relative_path=True,
    **kwargs,
):
    pipeline = [
        {
            "type": "LoadPointCloudFromFile",
            "dataset": dataset_name_map[dataset_class_name],
        },
        {
            "type": "LoadPointCloudAnnotations",
            "with_bbox": True
        },
    ]

    if "nsweeps" in kwargs:
        dataset = get_dataset(dataset_class_name)(
            info_path=info_path,
            root_path=data_path,
            pipeline=pipeline,
            test_mode=True,
            nsweeps=kwargs["nsweeps"],
        )
        nsweeps = dataset.nsweeps
    else:
        dataset = get_dataset(dataset_class_name)(info_path=info_path,
                                                  root_path=data_path,
                                                  test_mode=True,
                                                  pipeline=pipeline)
        nsweeps = 1

    root_path = Path(data_path)

    if dataset_class_name in ["WAYMO", "NUSC"]:
        if db_path is None:
            db_path = root_path / f"gt_database_{nsweeps}sweeps_withvelo"
        if dbinfo_path is None:
            dbinfo_path = root_path / f"dbinfos_train_{nsweeps}sweeps_withvelo.pkl"
    else:
        raise NotImplementedError()

    if dataset_class_name == "NUSC":
        point_features = 5
    elif dataset_class_name == "WAYMO":
        point_features = 5 if nsweeps == 1 else 6
    else:
        raise NotImplementedError()

    db_path.mkdir(parents=True, exist_ok=True)

    all_db_infos = {}
    group_counter = 0

    for index in tqdm(range(len(dataset))):
        image_idx = index
        # modified to nuscenes
        sensor_data = dataset.get_sensor_data(index)
        if "image_idx" in sensor_data["metadata"]:
            image_idx = sensor_data["metadata"]["image_idx"]

        if nsweeps > 1:
            points = sensor_data["lidar"]["combined"]
        else:
            points = sensor_data["lidar"]["points"]

        annos = sensor_data["lidar"]["annotations"]
        gt_boxes = annos["boxes"]
        names = annos["names"]

        if dataset_class_name == 'WAYMO':
            # waymo dataset contains millions of objects and it is not possible to store
            # all of them into a single folder
            # we randomly sample a few objects for gt augmentation
            # We keep all cyclist as they are rare
            if index % 4 != 0:
                mask = (names == 'VEHICLE')
                mask = np.logical_not(mask)
                names = names[mask]
                gt_boxes = gt_boxes[mask]

            if index % 2 != 0:
                mask = (names == 'PEDESTRIAN')
                mask = np.logical_not(mask)
                names = names[mask]
                gt_boxes = gt_boxes[mask]

        group_dict = {}
        group_ids = np.full([gt_boxes.shape[0]], -1, dtype=np.int64)
        if "group_ids" in annos:
            group_ids = annos["group_ids"]
        else:
            group_ids = np.arange(gt_boxes.shape[0], dtype=np.int64)
        difficulty = np.zeros(gt_boxes.shape[0], dtype=np.int32)
        if "difficulty" in annos:
            difficulty = annos["difficulty"]

        num_obj = gt_boxes.shape[0]
        if num_obj == 0:
            continue
        point_indices = box_np_ops.points_in_rbbox(points, gt_boxes)
        for i in range(num_obj):
            if (used_classes is None) or names[i] in used_classes:
                filename = f"{image_idx}_{names[i]}_{i}.bin"
                dirpath = os.path.join(str(db_path), names[i])
                os.makedirs(dirpath, exist_ok=True)

                filepath = os.path.join(str(db_path), names[i], filename)
                gt_points = points[point_indices[:, i]]
                gt_points[:, :3] -= gt_boxes[i, :3]
                with open(filepath, "w") as f:
                    try:
                        gt_points[:, :point_features].tofile(f)
                    except:
                        print("process {} files".format(index))
                        break

            if (used_classes is None) or names[i] in used_classes:
                if relative_path:
                    db_dump_path = os.path.join(db_path.stem, names[i],
                                                filename)
                else:
                    db_dump_path = str(filepath)

                db_info = {
                    "name": names[i],
                    "path": db_dump_path,
                    "image_idx": image_idx,
                    "gt_idx": i,
                    "box3d_lidar": gt_boxes[i],
                    "num_points_in_gt": gt_points.shape[0],
                    "difficulty": difficulty[i],
                    # "group_id": -1,
                    # "bbox": bboxes[i],
                }
                local_group_id = group_ids[i]
                # if local_group_id >= 0:
                if local_group_id not in group_dict:
                    group_dict[local_group_id] = group_counter
                    group_counter += 1
                db_info["group_id"] = group_dict[local_group_id]
                if "score" in annos:
                    db_info["score"] = annos["score"][i]
                if names[i] in all_db_infos:
                    all_db_infos[names[i]].append(db_info)
                else:
                    all_db_infos[names[i]] = [db_info]

    print("dataset length: ", len(dataset))
    for k, v in all_db_infos.items():
        print(f"load {len(v)} {k} database infos")

    with open(dbinfo_path, "wb") as f:
        pickle.dump(all_db_infos, f)