def __getitem__(self, index): # # for debug purpose, uncomment this to fix the frame # index = 0 # index = 300 # for index, info in enumerate(self.kitti_infos): # if info['point_cloud']['lidar_idx'] == '100001284': # break # print(self.kitti_infos[index]['point_cloud']['lidar_idx']) info = copy.deepcopy(self.kitti_infos[index]) sample_idx = info['point_cloud']['lidar_idx'] if cfg.TAG_PTS_WITH_RGB: points = self.get_colored_lidar(sample_idx) else: points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) img_shape = info['image']['image_shape'] if cfg.DATA_CONFIG.FOV_POINTS_ONLY: pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, img_shape, calib) points = points[fov_flag] input_dict = { 'points': points, 'sample_idx': sample_idx, 'calib': calib, } if 'annos' in info: annos = info['annos'] annos = common_utils.drop_info_with_name(annos, name='DontCare') loc, dims, rots = annos['location'], annos['dimensions'], annos[ 'rotation_y'] gt_names = annos['name'] bbox = annos['bbox'] gt_boxes = np.concatenate([loc, dims, rots[..., np.newaxis]], axis=1).astype(np.float32) if 'gt_boxes_lidar' in annos: gt_boxes_lidar = annos['gt_boxes_lidar'] else: gt_boxes_lidar = box_utils.boxes3d_camera_to_lidar( gt_boxes, calib) input_dict.update({ 'gt_boxes': gt_boxes, 'gt_names': gt_names, 'gt_box2d': bbox, 'gt_boxes_lidar': gt_boxes_lidar }) # debuggin model; here we cheat by tagging if each point is in the object bbox if cfg.TAG_PTS_IF_IN_GT_BBOXES: points[:, 3] = 0 corners_lidar = box_utils.boxes3d_to_corners3d_lidar( gt_boxes_lidar) for k in range(len(gt_boxes_lidar)): if gt_names[k] == 'Car': flag = box_utils.in_hull(points[:, 0:3], corners_lidar[k]) points[flag, 3] = 1 input_dict['points'] = points example = self.prepare_data(input_dict=input_dict, has_label='annos' in info) example['sample_idx'] = sample_idx example['image_shape'] = img_shape # add the bev maps if HD map is available (eg, argoverse) if 'bev' in cfg.MODE: bev = self.get_bev(sample_idx) example['bev'] = bev if cfg.INJECT_SEMANTICS or cfg.USE_PSEUDOLIDAR: # todo: check the ordering of the imagenet mean and std. is it rgb or bgr? img = self.get_image(sample_idx) # preprocessing for the image to work for hrnet which works on cityscapes # if on kitti (not on argoverse), some of the images don't have the same size, # so we must scale them to the same size, then during voxelization, rescale # to the right size for projecting lidar pts onto image plane to work # if the height is not set, then we scale it according to the width if cfg.INJECT_SEMANTICS_HEIGHT == 0: img = image_resize(img, cfg.INJECT_SEMANTICS_WIDTH) else: img = cv2.resize( img, (cfg.INJECT_SEMANTICS_WIDTH, cfg.INJECT_SEMANTICS_HEIGHT), interpolation=cv2.INTER_LINEAR) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) img = input_transform(img, mean, std) # normalize wrt to imagenet img = img.transpose( (2, 0, 1)) # change dim ordering to c, h, w for torch example['img'] = img example['pseudolidar_image_shape'] = img.shape[-2:] # if using pseudolidar from depth estimation, we need to crop out the sky and the road if cfg.USE_PSEUDOLIDAR: # first scale down, then crop out sky and roads oh, ow = img.shape[-2:] bottom_p, top_p = .15, cfg.DEPTH_MAP_TOP_MARGIN_PCT top = oh * top_p bottom = oh - (oh * bottom_p) top = int(top) bottom = int(bottom) img_cropped = img[:, top:bottom, :] example['img'] = img_cropped return example
def process_single_scene(sample_idx): print('%s sample_idx: %s' % (self.split, sample_idx)) info = {} pc_info = {'num_features': 4, 'lidar_idx': sample_idx} info['point_cloud'] = pc_info image_info = { 'image_idx': sample_idx, 'image_shape': self.get_image_shape(sample_idx) } info['image'] = image_info calib = self.get_calib(sample_idx) P2 = np.concatenate( [calib.P2, np.array([[0., 0., 0., 1.]])], axis=0) R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype) R0_4x4[3, 3] = 1. R0_4x4[:3, :3] = calib.R0 V2C_4x4 = np.concatenate( [calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0) calib_info = { 'P2': P2, 'R0_rect': R0_4x4, 'Tr_velo_to_cam': V2C_4x4 } info['calib'] = calib_info if has_label: obj_list = self.get_label(sample_idx) annotations = {} annotations['name'] = np.array( [obj.cls_type for obj in obj_list]) annotations['truncated'] = np.array( [obj.truncation for obj in obj_list]) annotations['occluded'] = np.array( [obj.occlusion for obj in obj_list]) annotations['alpha'] = np.array( [obj.alpha for obj in obj_list]) annotations['bbox'] = np.concatenate( [obj.box2d.reshape(1, 4) for obj in obj_list], axis=0) annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w] for obj in obj_list ]) # lhw(camera) format annotations['location'] = np.concatenate( [obj.loc.reshape(1, 3) for obj in obj_list], axis=0) annotations['rotation_y'] = np.array( [obj.ry for obj in obj_list]) annotations['score'] = np.array( [obj.score for obj in obj_list]) annotations['difficulty'] = np.array( [obj.level for obj in obj_list], np.int32) num_objects = len([ obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare' ]) num_gt = len(annotations['name']) index = list( range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) loc = annotations['location'][:num_objects] dims = annotations['dimensions'][:num_objects] rots = annotations['rotation_y'][:num_objects] loc_lidar = calib.rect_to_lidar(loc) l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3] gt_boxes_lidar = np.concatenate( [loc_lidar, w, l, h, rots[..., np.newaxis]], axis=1) annotations['gt_boxes_lidar'] = gt_boxes_lidar info['annos'] = annotations if count_inside_pts: points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, info['image']['image_shape'], calib) pts_fov = points[fov_flag] corners_lidar = box_utils.boxes3d_to_corners3d_lidar( gt_boxes_lidar) num_points_in_gt = -np.ones(num_gt, dtype=np.int32) for k in range(num_objects): flag = box_utils.in_hull(pts_fov[:, 0:3], corners_lidar[k]) num_points_in_gt[k] = flag.sum() annotations['num_points_in_gt'] = num_points_in_gt return info
def process_single_scene(sample_idx): print('%s sample_idx: %s' % (self.split, sample_idx)) # an info of a scene is represented as a nest dict info = {} # point cloud info dict pc_info = {'num_features': 4, 'lidar_idx': sample_idx} info['point_cloud'] = pc_info # image info dict image_info = { 'image_idx': sample_idx, 'image_shape': self.get_image_shape(sample_idx) } info['image'] = image_info # calibration matrixes info dict # convert calib matrices to 4x4 so that we can use them in homogeneous coordinates calib = self.get_calib(sample_idx) P2 = np.concatenate( [calib.P2, np.array([[0., 0., 0., 1.]])], axis=0) R0_4x4 = np.zeros([4, 4], dtype=calib.R0.dtype) R0_4x4[3, 3] = 1. R0_4x4[:3, :3] = calib.R0 V2C_4x4 = np.concatenate( [calib.V2C, np.array([[0., 0., 0., 1.]])], axis=0) calib_info = { 'P2': P2, 'R0_rect': R0_4x4, 'Tr_velo_to_cam': V2C_4x4 } info['calib'] = calib_info if has_label: obj_list = self.get_label(sample_idx) annotations = {} annotations['name'] = np.array( [obj.cls_type for obj in obj_list]) annotations['truncated'] = np.array( [obj.truncation for obj in obj_list]) annotations['occluded'] = np.array( [obj.occlusion for obj in obj_list]) annotations['alpha'] = np.array( [obj.alpha for obj in obj_list]) annotations['bbox'] = np.concatenate( [obj.box2d.reshape(1, 4) for obj in obj_list], axis=0) # this is only the 2d image bbox annotations['dimensions'] = np.array([[obj.l, obj.h, obj.w] for obj in obj_list ]) # lhw(camera) format annotations['location'] = np.concatenate( [obj.loc.reshape(1, 3) for obj in obj_list], axis=0) annotations['rotation_y'] = np.array( [obj.ry for obj in obj_list]) annotations['score'] = np.array( [obj.score for obj in obj_list]) annotations['difficulty'] = np.array( [obj.level for obj in obj_list], np.int32) # filter out the objects of class DontCare # index refers to the object index in that scene, -1 means DontCare num_objects = len([ obj.cls_type for obj in obj_list if obj.cls_type != 'DontCare' ]) num_gt = len(annotations['name']) index = list( range(num_objects)) + [-1] * (num_gt - num_objects) annotations['index'] = np.array(index, dtype=np.int32) # convert the 3d bboxes annotations in rectified camera coordinate to velodyne lidar coordinate loc = annotations['location'][:num_objects] dims = annotations['dimensions'][:num_objects] rots = annotations['rotation_y'][:num_objects] loc_lidar = calib.rect_to_lidar(loc) l, h, w = dims[:, 0:1], dims[:, 1:2], dims[:, 2:3] gt_boxes_lidar = np.concatenate( [loc_lidar, w, l, h, rots[..., np.newaxis]], axis=1) annotations['gt_boxes_lidar'] = gt_boxes_lidar info['annos'] = annotations if count_inside_pts: points = self.get_lidar(sample_idx) calib = self.get_calib(sample_idx) pts_rect = calib.lidar_to_rect(points[:, 0:3]) fov_flag = self.get_fov_flag(pts_rect, info['image']['image_shape'], calib) pts_fov = points[fov_flag] corners_lidar = box_utils.boxes3d_to_corners3d_lidar( gt_boxes_lidar) num_points_in_gt = -np.ones(num_gt, dtype=np.int32) for k in range(num_objects): flag = box_utils.in_hull(pts_fov[:, 0:3], corners_lidar[k]) num_points_in_gt[k] = flag.sum() annotations['num_points_in_gt'] = num_points_in_gt return info