def draw_3d_bbox(self, frame, points_camera, cam_calib, cam_pose, cam_near_clip: float = 0.15, line_color: tuple = (0, 255, 0), line_width: int = 3, corner_info: str = None): projpoints = tu.get_3d_bbox_vertex(cam_calib, cam_pose, points_camera, cam_near_clip) for p1, p2 in projpoints: cv2.line(frame, (int(p1[0]), int(p1[1])), (int(p2[0]), int(p2[1])), line_color, line_width) if corner_info is not None: is_before = False cp1 = tu.cameratoimage(points_camera[0:1], cam_calib)[0] if cp1 is not None: is_before = tu.is_before_clip_plane_camera( points_camera[0:1], cam_near_clip)[0] if is_before: x1 = int(cp1[0]) y1 = int(cp1[1]) frame = self.draw_corner_info(frame, x1, y1, corner_info, line_color) return frame
def draw_3d_traj(frame, points_hist, cam_calib, cam_pose, line_color=(0, 255, 0)): # Plot center history for index, wt in enumerate(points_hist): ct = tu.worldtocamera(wt, cam_pose) pt = tu.cameratoimage(ct, cam_calib) rgba = line_color + tuple( [int(max(float(index) / len(points_hist), 0.5) * 255)]) cv2.circle(frame, (int(pt[0, 0]), int(pt[0, 1])), 3, rgba, thickness=-1) return frame
def convert_track(data_dir, mode=None, adjust_center=True): kitti = defaultdict(list) img_dir = osp.join(data_dir, 'image_02') label_dir = osp.join(data_dir, 'label_02') cali_dir = osp.join(data_dir, 'calib') oxt_dir = osp.join(data_dir, 'oxts') if not osp.exists(img_dir): print(f"Folder {img_dir} is not found") return None if not osp.exists(label_dir): label_dir = None vid_names = sorted(os.listdir(img_dir)) print(f"{data_dir} with {len(vid_names)} sequences") for k, v in cats_mapping.items(): kitti['categories'].append(dict(id=v, name=k)) img_id = 0 global_track_id = 0 ann_id = 0 for vid_id, vid_name in enumerate(vid_names): if mode == 'train': if vid_name in val_sets: continue elif mode == 'val': if vid_name not in val_sets: continue elif mode == 'mini': if vid_name not in mini_sets: continue print("VID ID: {}".format(vid_id)) ind2id = dict() trackid_maps = dict() img_names = sorted([ f.path for f in os.scandir(osp.join(img_dir, vid_name)) if f.is_file() and f.name.endswith('png') ]) vid_info = dict(id=vid_id, name=vid_name, n_frames=len(img_names)) kitti['videos'].append(vid_info) projection = ku.read_calib(cali_dir, vid_id) for fr, img_name in enumerate(sorted(img_names)): img = mmcv.imread(img_name) fields = ku.read_oxts(oxt_dir, vid_id) poses = [ku.KittiPoseParser(fields[i]) for i in range(len(fields))] rotation = R.from_matrix(poses[fr].rotation).as_euler('xyz') position = poses[fr].position - poses[0].position pose_dict = dict(rotation=rotation.tolist(), position=position.tolist()) height, width, _ = img.shape index = fr img_info = dict(file_name=img_name, cali=projection.tolist(), pose=pose_dict, height=height, width=width, fov=60, near_clip=0.15, id=img_id, video_id=vid_id, index=index) kitti['images'].append(img_info) ind2id[index] = img_id img_id += 1 if label_dir: label_file = osp.join(label_dir, '{}.txt'.format(vid_name)) labels = mmcv.list_from_file(label_file) for label in labels: label = label.split() cat = label[2] if cat in ['DontCare']: continue # if cat not in kitti_cats.keys(): # continue image_id = ind2id[int(label[0])] if label[1] in trackid_maps.keys(): track_id = trackid_maps[label[1]] else: track_id = global_track_id trackid_maps[label[1]] = track_id global_track_id += 1 x1, y1, x2, y2 = float(label[6]), float(label[7]), float( label[8]), float(label[9]) if adjust_center: # KITTI GT uses the bottom of the car as center (x, 0, z). # Prediction uses center of the bbox as center (x, y, z). # So we align them to the bottom center as GT does y_cen_adjust = float(label[10]) / 2.0 else: y_cen_adjust = 0.0 center_2d = tu.cameratoimage( np.array([[ float(label[13]), float(label[14]) - y_cen_adjust, float(label[15]) ]]), projection).flatten().tolist() ann = dict(id=ann_id, image_id=image_id, category_id=cats_mapping[kitti_cats[cat]], instance_id=track_id, alpha=float(label[5]), roty=float(label[16]), dimension=[ float(label[10]), float(label[11]), float(label[12]) ], translation=[ float(label[13]), float(label[14]) - y_cen_adjust, float(label[15]) ], is_occluded=int(label[4]), is_truncated=float(label[3]), center_2d=center_2d, delta_2d=[ center_2d[0] - (x1 + x2) / 2.0, center_2d[1] - (y1 + y2) / 2.0 ], bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) kitti['annotations'].append(ann) ann_id += 1 return kitti
def convert_det(data_dir, mode=None, adjust_center=True): kitti = defaultdict(list) img_dir = osp.join(data_dir, 'image_2') label_dir = osp.join(data_dir, 'label_2') cali_dir = osp.join(data_dir, 'calib') if not osp.exists(img_dir): print(f"Folder {img_dir} is not found") return None if not osp.exists(label_dir): label_dir = None img_names = sorted(os.listdir(img_dir)) for k, v in cats_mapping.items(): kitti['categories'].append(dict(id=v, name=k)) pose_dict = dict(rotation=[0, 0, 0], position=[0, 0, 0]) ann_id = 0 vid_info = dict(id=0, name='', n_frames=len(img_names)) kitti['videos'].append(vid_info) for img_id, img_name in enumerate(img_names): if mode == 'train': if osp.splitext(img_name)[0] in det_val_sets: continue elif mode == 'val': if osp.splitext(img_name)[0] not in det_val_sets: continue print('DET ID: {}'.format(img_id)) img = mmcv.imread(osp.join(img_dir, img_name)) height, width, _ = img.shape projection = ku.read_calib_det(cali_dir, img_id) index = int(img_name.split('.')[0]) img_info = dict(file_name=osp.join(img_dir, img_name), cali=projection.tolist(), pose=pose_dict, height=height, width=width, fov=60, near_clip=0.15, id=img_id, video_id=0, index=index) kitti['images'].append(img_info) if label_dir: label_file = osp.join(label_dir, '{}.txt'.format(img_name.split('.')[0])) labels = mmcv.list_from_file(label_file) track_id = 0 for label in labels: label = label.split() cat = label[0] if cat in ['DontCare']: continue # if cat not in kitti_cats.keys(): # continue x1, y1, x2, y2 = float(label[4]), float(label[5]), float( label[6]), float(label[7]) if adjust_center: # KITTI GT uses the bottom of the car as center (x, 0, z). # Prediction uses center of the bbox as center (x, y, z). # So we align them to the bottom center as GT does y_cen_adjust = float(label[8]) / 2.0 else: y_cen_adjust = 0.0 center_2d = tu.cameratoimage( np.array([[ float(label[11]), float(label[12]) - y_cen_adjust, float(label[13]) ]]), projection).flatten().tolist() ann = dict(id=ann_id, image_id=img_id, instance_id=track_id, category_id=cats_mapping[kitti_cats[cat]], alpha=float(label[3]), roty=float(label[14]), dimension=[ float(label[8]), float(label[9]), float(label[10]) ], translation=[ float(label[11]), float(label[12]) - y_cen_adjust, float(label[13]) ], is_occluded=int(label[2]), is_truncated=float(label[1]), center_2d=center_2d, delta_2d=[ center_2d[0] - (x1 + x2) / 2.0, center_2d[1] - (y1 + y2) / 2.0 ], bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), iscrowd=False, ignore=False, segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) kitti['annotations'].append(ann) ann_id += 1 track_id += 1 return kitti
def convert_track(data_dir, subset: str): gta_anno = defaultdict(list) set_size = data_set_dict[subset]['amount'] subset_folder = data_set_dict[subset]['folder'] img_dir = os.path.join(data_dir, subset_folder, 'image') label_dir = os.path.join(data_dir, subset_folder, 'label') if not osp.exists(img_dir): print(f"Folder {img_dir} is not found") return None if not os.path.exists(label_dir): label_dir = None vid_names = sorted([f.path for f in os.scandir(label_dir) if f.is_dir()]) # Uniformly sample videos if set_size < len(vid_names): vid_names = vid_names[::set_size] # get information at boxes level. Collect dict. per box, not image. print(f"{subset} with {len(vid_names)} sequences") for k, v in cats_mapping.items(): gta_anno['categories'].append(dict(id=v, name=k)) img_id = 0 global_track_id = 0 ann_id = 0 for vid_id, vid_name in enumerate(vid_names): print(f"VID {vid_id} ID: {vid_name}") ind2id = dict() trackid_maps = dict() fr_names = sorted([ f.path for f in os.scandir(vid_name) if f.is_file() and f.name.endswith('final.json') ]) if vid_name == osp.join( data_dir, 'train/label/rec_10090618_snow_10h14m_x-493y-1796tox-1884y1790' ): print('Bump!') continue vid_info = dict(id=vid_id, name=vid_name, n_frames=len(fr_names)) gta_anno['videos'].append(vid_info) init_position = bu.load_json(fr_names[0])['extrinsics']['location'] for fr_idx, fr_name in enumerate(fr_names): frame = bu.load_json(fr_name) img_name = fr_name.replace('label', 'image').replace('json', 'jpg') height = frame['resolution']['height'] width = frame['resolution']['width'] rot_angle = np.array(frame['extrinsics']['rotation']) rot_matrix = tu.angle2rot(rot_angle) gps_to_camera = tu.angle2rot(np.array([np.pi / 2, 0, 0]), inverse=True) rot_matrix = rot_matrix.dot(gps_to_camera) rotation = R.from_matrix(rot_matrix).as_euler('xyz') position = [ float(p_t) - float(p_0) for ( p_t, p_0) in zip(frame['extrinsics']['location'], init_position) ] pose_dict = dict(rotation=rotation.tolist(), position=position) projection = np.array(frame['intrinsics']['cali']) index = fr_idx img_info = dict(file_name=img_name, cali=projection.tolist(), pose=pose_dict, height=height, width=width, fov=60, near_clip=0.15, timestamp=frame['timestamp'], id=img_id, video_id=vid_id, index=index) gta_anno['images'].append(img_info) ind2id[index] = img_id img_id += 1 for label in frame['labels']: cat = label['category'] if cat in ['DontCare']: continue image_id = ind2id[index] if label['id'] in trackid_maps.keys(): track_id = trackid_maps[label['id']] else: track_id = global_track_id trackid_maps[label['id']] = track_id global_track_id += 1 x1, y1, x2, y2 = float(label['box2d']['x1']), float( label['box2d']['y1']), float(label['box2d']['x2']), float( label['box2d']['y2']) location = bu.get_label_array([label], ['box3d', 'location'], (0, 3)).astype(float) center_2d = tu.cameratoimage(location, projection).flatten().tolist() ann = dict(id=ann_id, image_id=image_id, category_id=cats_mapping[gta_merge_maps[cat]], instance_id=track_id, alpha=float(label['box3d']['alpha']), roty=float(label['box3d']['orientation']), dimension=[ float(dim) for dim in label['box3d']['dimension'] ], translation=[ float(loc) for loc in label['box3d']['location'] ], is_occluded=int(label['attributes']['occluded']), is_truncated=int(label['attributes']['truncated']), center_2d=center_2d, delta_2d=[ center_2d[0] - (x1 + x2) / 2.0, center_2d[1] - (y1 + y2) / 2.0 ], bbox=[x1, y1, x2 - x1, y2 - y1], area=(x2 - x1) * (y2 - y1), iscrowd=False, ignore=label['attributes']['ignore'], segmentation=[[x1, y1, x1, y2, x2, y2, x2, y1]]) gta_anno['annotations'].append(ann) ann_id += 1 return gta_anno