예제 #1
0
    def draw_3d_bbox(self,
                     frame,
                     points_camera,
                     cam_calib,
                     cam_pose,
                     cam_near_clip: float = 0.15,
                     line_color: tuple = (0, 255, 0),
                     line_width: int = 3,
                     corner_info: str = None):
        projpoints = tu.get_3d_bbox_vertex(cam_calib, cam_pose, points_camera,
                                           cam_near_clip)

        for p1, p2 in projpoints:
            cv2.line(frame, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])),
                     line_color, line_width)

        if corner_info is not None:
            is_before = False
            cp1 = tu.cameratoimage(points_camera[0:1], cam_calib)[0]

            if cp1 is not None:
                is_before = tu.is_before_clip_plane_camera(
                    points_camera[0:1], cam_near_clip)[0]

            if is_before:
                x1 = int(cp1[0])
                y1 = int(cp1[1])

                frame = self.draw_corner_info(frame, x1, y1, corner_info,
                                              line_color)

        return frame
예제 #2
0
    def draw_3d_traj(frame,
                     points_hist,
                     cam_calib,
                     cam_pose,
                     line_color=(0, 255, 0)):

        # Plot center history
        for index, wt in enumerate(points_hist):
            ct = tu.worldtocamera(wt, cam_pose)
            pt = tu.cameratoimage(ct, cam_calib)
            rgba = line_color + tuple(
                [int(max(float(index) / len(points_hist), 0.5) * 255)])
            cv2.circle(frame, (int(pt[0, 0]), int(pt[0, 1])),
                       3,
                       rgba,
                       thickness=-1)

        return frame
예제 #3
0
def convert_track(data_dir, mode=None, adjust_center=True):
    kitti = defaultdict(list)

    img_dir = osp.join(data_dir, 'image_02')
    label_dir = osp.join(data_dir, 'label_02')
    cali_dir = osp.join(data_dir, 'calib')
    oxt_dir = osp.join(data_dir, 'oxts')

    if not osp.exists(img_dir):
        print(f"Folder {img_dir} is not found")
        return None

    if not osp.exists(label_dir):
        label_dir = None

    vid_names = sorted(os.listdir(img_dir))
    print(f"{data_dir} with {len(vid_names)} sequences")

    for k, v in cats_mapping.items():
        kitti['categories'].append(dict(id=v, name=k))

    img_id = 0
    global_track_id = 0
    ann_id = 0

    for vid_id, vid_name in enumerate(vid_names):
        if mode == 'train':
            if vid_name in val_sets:
                continue
        elif mode == 'val':
            if vid_name not in val_sets:
                continue
        elif mode == 'mini':
            if vid_name not in mini_sets:
                continue
        print("VID ID: {}".format(vid_id))
        ind2id = dict()
        trackid_maps = dict()
        img_names = sorted([
            f.path for f in os.scandir(osp.join(img_dir, vid_name))
            if f.is_file() and f.name.endswith('png')
        ])
        vid_info = dict(id=vid_id, name=vid_name, n_frames=len(img_names))
        kitti['videos'].append(vid_info)

        projection = ku.read_calib(cali_dir, vid_id)

        for fr, img_name in enumerate(sorted(img_names)):
            img = mmcv.imread(img_name)
            fields = ku.read_oxts(oxt_dir, vid_id)
            poses = [ku.KittiPoseParser(fields[i]) for i in range(len(fields))]

            rotation = R.from_matrix(poses[fr].rotation).as_euler('xyz')
            position = poses[fr].position - poses[0].position
            pose_dict = dict(rotation=rotation.tolist(),
                             position=position.tolist())

            height, width, _ = img.shape
            index = fr
            img_info = dict(file_name=img_name,
                            cali=projection.tolist(),
                            pose=pose_dict,
                            height=height,
                            width=width,
                            fov=60,
                            near_clip=0.15,
                            id=img_id,
                            video_id=vid_id,
                            index=index)
            kitti['images'].append(img_info)
            ind2id[index] = img_id
            img_id += 1

        if label_dir:
            label_file = osp.join(label_dir, '{}.txt'.format(vid_name))
            labels = mmcv.list_from_file(label_file)
            for label in labels:
                label = label.split()
                cat = label[2]
                if cat in ['DontCare']:
                    continue
                # if cat not in kitti_cats.keys():
                #     continue
                image_id = ind2id[int(label[0])]
                if label[1] in trackid_maps.keys():
                    track_id = trackid_maps[label[1]]
                else:
                    track_id = global_track_id
                    trackid_maps[label[1]] = track_id
                    global_track_id += 1
                x1, y1, x2, y2 = float(label[6]), float(label[7]), float(
                    label[8]), float(label[9])

                if adjust_center:
                    # KITTI GT uses the bottom of the car as center (x, 0, z).
                    # Prediction uses center of the bbox as center (x, y, z).
                    # So we align them to the bottom center as GT does
                    y_cen_adjust = float(label[10]) / 2.0
                else:
                    y_cen_adjust = 0.0

                center_2d = tu.cameratoimage(
                    np.array([[
                        float(label[13]),
                        float(label[14]) - y_cen_adjust,
                        float(label[15])
                    ]]), projection).flatten().tolist()

                ann = dict(id=ann_id,
                           image_id=image_id,
                           category_id=cats_mapping[kitti_cats[cat]],
                           instance_id=track_id,
                           alpha=float(label[5]),
                           roty=float(label[16]),
                           dimension=[
                               float(label[10]),
                               float(label[11]),
                               float(label[12])
                           ],
                           translation=[
                               float(label[13]),
                               float(label[14]) - y_cen_adjust,
                               float(label[15])
                           ],
                           is_occluded=int(label[4]),
                           is_truncated=float(label[3]),
                           center_2d=center_2d,
                           delta_2d=[
                               center_2d[0] - (x1 + x2) / 2.0,
                               center_2d[1] - (y1 + y2) / 2.0
                           ],
                           bbox=[x1, y1, x2 - x1, y2 - y1],
                           area=(x2 - x1) * (y2 - y1),
                           iscrowd=False,
                           ignore=False,
                           segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                kitti['annotations'].append(ann)
                ann_id += 1
    return kitti
예제 #4
0
def convert_det(data_dir, mode=None, adjust_center=True):
    kitti = defaultdict(list)

    img_dir = osp.join(data_dir, 'image_2')
    label_dir = osp.join(data_dir, 'label_2')
    cali_dir = osp.join(data_dir, 'calib')

    if not osp.exists(img_dir):
        print(f"Folder {img_dir} is not found")
        return None

    if not osp.exists(label_dir):
        label_dir = None

    img_names = sorted(os.listdir(img_dir))
    for k, v in cats_mapping.items():
        kitti['categories'].append(dict(id=v, name=k))

    pose_dict = dict(rotation=[0, 0, 0], position=[0, 0, 0])
    ann_id = 0
    vid_info = dict(id=0, name='', n_frames=len(img_names))
    kitti['videos'].append(vid_info)
    for img_id, img_name in enumerate(img_names):
        if mode == 'train':
            if osp.splitext(img_name)[0] in det_val_sets:
                continue
        elif mode == 'val':
            if osp.splitext(img_name)[0] not in det_val_sets:
                continue
        print('DET ID: {}'.format(img_id))
        img = mmcv.imread(osp.join(img_dir, img_name))
        height, width, _ = img.shape
        projection = ku.read_calib_det(cali_dir, img_id)

        index = int(img_name.split('.')[0])
        img_info = dict(file_name=osp.join(img_dir, img_name),
                        cali=projection.tolist(),
                        pose=pose_dict,
                        height=height,
                        width=width,
                        fov=60,
                        near_clip=0.15,
                        id=img_id,
                        video_id=0,
                        index=index)
        kitti['images'].append(img_info)

        if label_dir:
            label_file = osp.join(label_dir,
                                  '{}.txt'.format(img_name.split('.')[0]))
            labels = mmcv.list_from_file(label_file)
            track_id = 0
            for label in labels:
                label = label.split()
                cat = label[0]
                if cat in ['DontCare']:
                    continue
                # if cat not in kitti_cats.keys():
                #     continue
                x1, y1, x2, y2 = float(label[4]), float(label[5]), float(
                    label[6]), float(label[7])

                if adjust_center:
                    # KITTI GT uses the bottom of the car as center (x, 0, z).
                    # Prediction uses center of the bbox as center (x, y, z).
                    # So we align them to the bottom center as GT does
                    y_cen_adjust = float(label[8]) / 2.0
                else:
                    y_cen_adjust = 0.0

                center_2d = tu.cameratoimage(
                    np.array([[
                        float(label[11]),
                        float(label[12]) - y_cen_adjust,
                        float(label[13])
                    ]]), projection).flatten().tolist()

                ann = dict(id=ann_id,
                           image_id=img_id,
                           instance_id=track_id,
                           category_id=cats_mapping[kitti_cats[cat]],
                           alpha=float(label[3]),
                           roty=float(label[14]),
                           dimension=[
                               float(label[8]),
                               float(label[9]),
                               float(label[10])
                           ],
                           translation=[
                               float(label[11]),
                               float(label[12]) - y_cen_adjust,
                               float(label[13])
                           ],
                           is_occluded=int(label[2]),
                           is_truncated=float(label[1]),
                           center_2d=center_2d,
                           delta_2d=[
                               center_2d[0] - (x1 + x2) / 2.0,
                               center_2d[1] - (y1 + y2) / 2.0
                           ],
                           bbox=[x1, y1, x2 - x1, y2 - y1],
                           area=(x2 - x1) * (y2 - y1),
                           iscrowd=False,
                           ignore=False,
                           segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                kitti['annotations'].append(ann)
                ann_id += 1
                track_id += 1
    return kitti
예제 #5
0
def convert_track(data_dir, subset: str):
    gta_anno = defaultdict(list)

    set_size = data_set_dict[subset]['amount']
    subset_folder = data_set_dict[subset]['folder']
    img_dir = os.path.join(data_dir, subset_folder, 'image')
    label_dir = os.path.join(data_dir, subset_folder, 'label')

    if not osp.exists(img_dir):
        print(f"Folder {img_dir} is not found")
        return None

    if not os.path.exists(label_dir):
        label_dir = None

    vid_names = sorted([f.path for f in os.scandir(label_dir) if f.is_dir()])

    # Uniformly sample videos
    if set_size < len(vid_names):
        vid_names = vid_names[::set_size]

    # get information at boxes level. Collect dict. per box, not image.
    print(f"{subset} with {len(vid_names)} sequences")

    for k, v in cats_mapping.items():
        gta_anno['categories'].append(dict(id=v, name=k))

    img_id = 0
    global_track_id = 0
    ann_id = 0

    for vid_id, vid_name in enumerate(vid_names):
        print(f"VID {vid_id} ID: {vid_name}")

        ind2id = dict()
        trackid_maps = dict()

        fr_names = sorted([
            f.path for f in os.scandir(vid_name)
            if f.is_file() and f.name.endswith('final.json')
        ])

        if vid_name == osp.join(
                data_dir,
                'train/label/rec_10090618_snow_10h14m_x-493y-1796tox-1884y1790'
        ):
            print('Bump!')
            continue

        vid_info = dict(id=vid_id, name=vid_name, n_frames=len(fr_names))
        gta_anno['videos'].append(vid_info)

        init_position = bu.load_json(fr_names[0])['extrinsics']['location']

        for fr_idx, fr_name in enumerate(fr_names):
            frame = bu.load_json(fr_name)

            img_name = fr_name.replace('label', 'image').replace('json', 'jpg')
            height = frame['resolution']['height']
            width = frame['resolution']['width']
            rot_angle = np.array(frame['extrinsics']['rotation'])
            rot_matrix = tu.angle2rot(rot_angle)
            gps_to_camera = tu.angle2rot(np.array([np.pi / 2, 0, 0]),
                                         inverse=True)
            rot_matrix = rot_matrix.dot(gps_to_camera)
            rotation = R.from_matrix(rot_matrix).as_euler('xyz')
            position = [
                float(p_t) - float(p_0) for (
                    p_t,
                    p_0) in zip(frame['extrinsics']['location'], init_position)
            ]
            pose_dict = dict(rotation=rotation.tolist(), position=position)

            projection = np.array(frame['intrinsics']['cali'])

            index = fr_idx
            img_info = dict(file_name=img_name,
                            cali=projection.tolist(),
                            pose=pose_dict,
                            height=height,
                            width=width,
                            fov=60,
                            near_clip=0.15,
                            timestamp=frame['timestamp'],
                            id=img_id,
                            video_id=vid_id,
                            index=index)

            gta_anno['images'].append(img_info)

            ind2id[index] = img_id
            img_id += 1

            for label in frame['labels']:
                cat = label['category']
                if cat in ['DontCare']:
                    continue
                image_id = ind2id[index]
                if label['id'] in trackid_maps.keys():
                    track_id = trackid_maps[label['id']]
                else:
                    track_id = global_track_id
                    trackid_maps[label['id']] = track_id
                    global_track_id += 1
                x1, y1, x2, y2 = float(label['box2d']['x1']), float(
                    label['box2d']['y1']), float(label['box2d']['x2']), float(
                        label['box2d']['y2'])
                location = bu.get_label_array([label], ['box3d', 'location'],
                                              (0, 3)).astype(float)
                center_2d = tu.cameratoimage(location,
                                             projection).flatten().tolist()
                ann = dict(id=ann_id,
                           image_id=image_id,
                           category_id=cats_mapping[gta_merge_maps[cat]],
                           instance_id=track_id,
                           alpha=float(label['box3d']['alpha']),
                           roty=float(label['box3d']['orientation']),
                           dimension=[
                               float(dim)
                               for dim in label['box3d']['dimension']
                           ],
                           translation=[
                               float(loc) for loc in label['box3d']['location']
                           ],
                           is_occluded=int(label['attributes']['occluded']),
                           is_truncated=int(label['attributes']['truncated']),
                           center_2d=center_2d,
                           delta_2d=[
                               center_2d[0] - (x1 + x2) / 2.0,
                               center_2d[1] - (y1 + y2) / 2.0
                           ],
                           bbox=[x1, y1, x2 - x1, y2 - y1],
                           area=(x2 - x1) * (y2 - y1),
                           iscrowd=False,
                           ignore=label['attributes']['ignore'],
                           segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]])
                gta_anno['annotations'].append(ann)
                ann_id += 1
    return gta_anno