Ejemplo n.º 1
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
Ejemplo n.º 2
0
class NuScenesDataset(Dataset):
    """
    NuScenes dataset loader and producer
    """
    def __init__(self,
                 mode,
                 split='training',
                 img_list='trainval',
                 is_training=True,
                 workers_num=1):
        """
        mode: 'loading', 'preprocessing'
        """
        self.mode = mode
        self.dataset_dir = os.path.join(cfg.ROOT_DIR,
                                        cfg.DATASET.KITTI.BASE_DIR_PATH)
        self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS
        self.is_training = is_training
        self.img_list = img_list
        self.workers_num = workers_num

        # cast labels from NuScenes name to useful name
        self.useful_cls_dict = {
            'animal': 'ignore',
            'human.pedestrian.personal_mobility': 'ignore',
            'human.pedestrian.stroller': 'ignore',
            'human.pedestrian.wheelchair': 'ignore',
            'movable_object.debris': 'ignore',
            'movable_object.pushable_pullable': 'ignore',
            'static_object.bicycle_rack': 'ignore',
            'vehicle.emergency.ambulance': 'ignore',
            'vehicle.emergency.police': 'ignore',
            'movable_object.barrier': 'barrier',
            'vehicle.bicycle': 'bicycle',
            'vehicle.bus.bendy': 'bus',
            'vehicle.bus.rigid': 'bus',
            'vehicle.car': 'car',
            'vehicle.construction': 'construction_vehicle',
            'vehicle.motorcycle': 'motorcycle',
            'human.pedestrian.adult': 'pedestrian',
            'human.pedestrian.child': 'pedestrian',
            'human.pedestrian.construction_worker': 'pedestrian',
            'human.pedestrian.police_officer': 'pedestrian',
            'movable_object.trafficcone': 'traffic_cone',
            'vehicle.trailer': 'trailer',
            'vehicle.truck': 'truck'
        }
        # cast attribute to index
        self.attribute_idx_list = {
            'vehicle.moving': 0,
            'vehicle.stopped': 1,
            'vehicle.parked': 2,
            'cycle.with_rider': 3,
            'cycle.without_rider': 4,
            'pedestrian.sitting_lying_down': 5,
            'pedestrian.standing': 6,
            'pedestrian.moving': 7,
            'default': -1,
        }
        self.idx_attribute_list = dict([
            (v, k) for k, v in self.attribute_idx_list.items()
        ])
        self.AttributeIdxLabelMapping = {
            "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "construction_vehicle":
            ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "pedestrian": [
                'pedestrian.sitting_lying_down', 'pedestrian.standing',
                'pedestrian.moving'
            ],
            "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "traffic_cone": ['', '', ''],
            "barrier": ['', '', ''],
        }

        self.DefaultAttribute = {
            "car": "vehicle.parked",
            "pedestrian": "pedestrian.moving",
            "trailer": "vehicle.parked",
            "truck": "vehicle.parked",
            "bus": "vehicle.parked",
            "motorcycle": "cycle.without_rider",
            "construction_vehicle": "vehicle.parked",
            "bicycle": "cycle.without_rider",
            "barrier": "",
            "traffic_cone": "",
        }

        self.cls_list = cfg.DATASET.KITTI.CLS_LIST
        self.idx2cls_dict = dict([(idx + 1, cls)
                                  for idx, cls in enumerate(self.cls_list)])
        self.cls2idx_dict = dict([(cls, idx + 1)
                                  for idx, cls in enumerate(self.cls_list)])

        self.sv_npy_path = os.path.join(
            cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes',
            '{}_{}'.format(img_list, self.max_sweeps))
        self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl')

        self.voxel_generator = VoxelGenerator()

        self.test_mode = cfg.TEST.TEST_MODE
        if self.test_mode == 'mAP':
            self.evaluation = self.evaluate_map
            self.logger_and_select_best = self.logger_and_select_best_map
        elif self.test_mode == 'Recall':
            self.evaluation = self.evaluate_recall
            self.logger_and_select_best = self.logger_and_select_best_recall
        else:
            raise Exception('No other evaluation mode.')

        if mode == 'loading':
            # data loader
            with open(self.train_list, 'rb') as f:
                self.train_npy_list = pickle.load(f)
            self.sample_num = len(self.train_npy_list)
            if self.is_training:
                self.data_augmentor = DataAugmentor(
                    'NuScenes', workers_num=self.workers_num)

        elif mode == 'preprocessing':
            # preprocess raw data
            if img_list == 'train':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in train_scene
                ]
            elif img_list == 'val':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in val_scene
                ]
            else:  # test
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-test')
                self.scenes = self.nusc.scene

            self.sample_data_token_list = OrderedDict()
            sample_num = 0
            for scene in self.scenes:
                # static the sample num, and save all sample_data_token
                self.sample_data_token_list[scene['token']] = []
                all_sample = self.nusc.field2token('sample', 'scene_token',
                                                   scene['token'])
                sample_num += len(all_sample)
                for sample in all_sample:  # all sample token
                    sample = self.nusc.get('sample', sample)
                    cur_token = sample['token']
                    cur_data_token = sample['data']['LIDAR_TOP']
                    self.sample_data_token_list[scene['token']].append(
                        cur_data_token)

            self.sample_num = sample_num

            self.extents = cfg.DATASET.POINT_CLOUD_RANGE
            self.extents = np.reshape(self.extents, [3, 2])
            if not os.path.exists(self.sv_npy_path):
                os.makedirs(self.sv_npy_path)

            # also calculate the mean size here
            self.cls_size_dict = dict([(cls,
                                        np.array([0, 0, 0], dtype=np.float32))
                                       for cls in self.cls_list])
            self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list])

            # the save path for MixupDB
            if self.img_list in [
                    'train', 'val', 'trainval'
            ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                self.mixup_db_cls_path = dict()
                self.mixup_db_trainlist_path = dict()
                self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS
                for cls in self.mixup_db_class:
                    mixup_db_cls_path = os.path.join(
                        cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST,
                        '{}'.format(cls))
                    mixup_db_trainlist_path = os.path.join(
                        mixup_db_cls_path, 'train_list.txt')
                    if not os.path.exists(mixup_db_cls_path):
                        os.makedirs(mixup_db_cls_path)
                    self.mixup_db_cls_path[cls] = mixup_db_cls_path
                    self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path

    def __len__(self):
        return self.sample_num

    def load_samples(self, sample_idx, pipename):
        """ load data per thread """
        pipename = int(pipename)
        biggest_label_num = 0
        sample_dict = self.train_npy_list[sample_idx]

        points_path = sample_dict[maps_dict.KEY_POINT_CLOUD]
        sweeps = sample_dict[maps_dict.KEY_SWEEPS]
        sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME]
        cur_transformation_matrix = sample_dict[
            maps_dict.KEY_TRANSFORMRATION_MATRIX]
        ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6

        # then first read points and stack points from multiple frame
        points = np.fromfile(points_path, dtype=np.float32)
        points = points.reshape((-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0
        sweep_points_list = [points]
        original_cur_sweep_points = points
        cur_sweep_points_num = points.shape[0]
        for sweep in sweeps:
            points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32)
            points_sweep = points_sweep.reshape((-1, 5))
            sweep_ts = sweep['timestamp'] / 1e6
            points_sweep[:, 3] /= 255
            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
                'sweep2lidar_rotation'].T
            points_sweep[:, :3] += sweep['sweep2lidar_translation']
            points_sweep[:, 4] = ts - sweep_ts
            points_sweep = cast_points_to_kitti(points_sweep)
            sweep_points_list.append(points_sweep)
        if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4:
            points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
        else:
            points = np.concatenate(sweep_points_list, axis=0)

        # then read groundtruth file if have
        if self.is_training or cfg.TEST.WITH_GT:
            label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
            label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d)

            label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES]
            label_classes = np.array([
                self.cls2idx_dict[label_class]
                for label_class in label_classes_name
            ])

            label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES]
            label_velocity = sample_dict[
                maps_dict.KEY_LABEL_VELOCITY]  # [-1, 2]

            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        else:  # not is_training and no_gt
            label_boxes_3d = np.zeros([1, 7], np.float32)
            label_classes = np.zeros([1], np.int32)
            label_attributes = np.zeros([1], np.int32)
            label_velocity = np.zeros([1, 2], np.float32)
            ry_cls_label = np.zeros([1], np.int32)
            residual_angle = np.zeros([1], np.float32)

        if self.is_training:  # data augmentation
            points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward(
                points, label_boxes_3d, label_classes, pipename,
                label_attributes, label_velocity, cur_sweep_points_num)
            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        cur_label_num = len(label_boxes_3d)

        # then randomly choose some points
        cur_sweep_points = points[:cur_sweep_points_num, :]  # [-1, 4]
        other_sweep_points = points[cur_sweep_points_num:, :]  # [-1, 4]
        if len(other_sweep_points) == 0:
            other_sweep_points = cur_sweep_points.copy()
        np.random.shuffle(cur_sweep_points)
        np.random.shuffle(other_sweep_points)

        input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc(
            cur_sweep_points, other_sweep_points,
            cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM
        )  # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points]
        cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE.
                                                MAX_CUR_SAMPLE_POINTS_NUM]
        other_sample_points = input_sample_points[cfg.DATASET.NUSCENE.
                                                  MAX_CUR_SAMPLE_POINTS_NUM:]

        biggest_label_num = max(biggest_label_num, cur_label_num)
        return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points

    def load_batch(self, batch_size):
        perm = np.arange(
            self.sample_num).tolist()  # a list indicates each data
        dp = DataFromList(perm,
                          is_train=self.is_training,
                          shuffle=self.is_training)
        dp = MultiProcessMapData(dp, self.load_samples, self.workers_num)

        use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0]

        dp = BatchDataNuscenes(dp,
                               batch_size,
                               use_concat=use_concat,
                               use_list=use_list)
        dp.reset_state()
        dp = dp.get_data()
        return dp

    # Preprocess data
    def preprocess_samples(self, cur_scene_key, sample_data_token):
        sample_dicts = []
        biggest_label_num = 0

        cur_sample_data = self.nusc.get('sample_data', sample_data_token)
        cur_sample_token = cur_sample_data['sample_token']
        cur_sample = self.nusc.get('sample', cur_sample_token)

        ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token'])
        calibrated_sensor = self.nusc.get(
            'calibrated_sensor', cur_sample_data['calibrated_sensor_token'])

        l2e_r = calibrated_sensor['rotation']
        l2e_t = calibrated_sensor['translation']
        e2g_r = ego_pose['rotation']
        e2g_t = ego_pose['translation']
        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
        cur_timestamp = cur_sample['timestamp']

        cur_transformation_matrix = {
            'lidar2ego_translation': l2e_t,
            'lidar2ego_rotation': l2e_r,
            'ego2global_translation': e2g_t,
            'ego2global_rotation': e2g_r,
        }

        # get point cloud in former 0.5 second
        sweeps = []
        while len(sweeps) < self.max_sweeps:
            if not cur_sample_data['prev'] == '':
                # has next frame
                cur_sample_data = self.nusc.get('sample_data',
                                                cur_sample_data['prev'])
                cur_ego_pose = self.nusc.get('ego_pose',
                                             cur_sample_data['ego_pose_token'])
                cur_calibrated_sensor = self.nusc.get(
                    'calibrated_sensor',
                    cur_sample_data['calibrated_sensor_token'])
                cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data(
                    cur_sample_data['token'])
                sweep = {
                    "lidar_path": cur_lidar_path,
                    "sample_data_token": cur_sample_data['token'],
                    "lidar2ego_translation":
                    cur_calibrated_sensor['translation'],
                    "lidar2ego_rotation": cur_calibrated_sensor['rotation'],
                    "ego2global_translation": cur_ego_pose['translation'],
                    "ego2global_rotation": cur_ego_pose['rotation'],
                    "timestamp": cur_sample_data["timestamp"]
                }
                l2e_r_s = sweep["lidar2ego_rotation"]
                l2e_t_s = sweep["lidar2ego_translation"]
                e2g_r_s = sweep["ego2global_rotation"]
                e2g_t_s = sweep["ego2global_translation"]
                # sweep->ego->global->ego'->lidar
                l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T

                sweep["sweep2lidar_rotation"] = R.T  # points @ R.T + T
                sweep["sweep2lidar_translation"] = T
                sweeps.append(sweep)
            else:  # prev is none
                break

        # then load gt_boxes_3d
        if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT:
            cur_data_path, all_boxes, _ = self.nusc.get_sample_data(
                sample_data_token)

            # then first parse boxes labels
            locs = np.array([box.center for box in all_boxes]).reshape(-1, 3)
            sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3)
            rots = np.array([
                box.orientation.yaw_pitch_roll[0] for box in all_boxes
            ]).reshape(-1, 1)
            all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1)

            annos_tokens = cur_sample['anns']
            all_velocity = np.array([
                self.nusc.box_velocity(ann_token)[:2]
                for ann_token in annos_tokens
            ])  # [-1, 2]
            for i in range(len(all_boxes)):
                velo = np.array([*all_velocity[i], 0.0])
                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T
                all_velocity[i] = velo[:2]  # [-1, 2]

            attribute_tokens = [
                self.nusc.get('sample_annotation',
                              ann_token)['attribute_tokens']
                for ann_token in annos_tokens
            ]
            all_attribute = []
            for attribute_token in attribute_tokens:
                if len(attribute_token) == 0:
                    all_attribute.append([])
                else:
                    all_attribute.append(
                        self.nusc.get('attribute', attribute_token[0])['name'])
            # then filter these ignore labels
            categories = np.array([box.name for box in all_boxes])
            if self.img_list == 'train':
                useful_idx = [
                    index for index, category in enumerate(categories)
                    if self.useful_cls_dict[category] != 'ignore'
                ]
            else:
                useful_idx = [
                    index for index, category in enumerate(categories)
                ]
            if len(useful_idx) == 0:
                if self.img_list == 'train':
                    return None, biggest_label_num
                else:
                    all_boxes_3d = np.ones([1, 7], dtype=np.float32)
                    all_boxes_classes = np.array(['ignore'])
                    all_attribute = np.array([-1])
                    all_velocity = np.array([[0, 0]], dtype=np.float32)
            else:
                all_boxes_3d = all_boxes_3d[useful_idx]

                categories = categories[useful_idx]
                all_boxes_classes = np.array(
                    [self.useful_cls_dict[cate] for cate in categories])
                # now calculate the mean size of each box
                for tmp_idx, all_boxes_class in enumerate(all_boxes_classes):
                    cur_mean_size = self.cls_size_dict[
                        all_boxes_class] * self.cls_num_dict[all_boxes_class]
                    cur_cls_num = self.cls_num_dict[all_boxes_class] + 1
                    cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [
                        4, 5, 3
                    ]]  # [l, w, h]
                    cur_mean_size = cur_total_size / cur_cls_num
                    self.cls_size_dict[all_boxes_class] = cur_mean_size
                    self.cls_num_dict[all_boxes_class] = cur_cls_num

                all_attribute = [
                    all_attribute[tmp_idx] for tmp_idx in useful_idx
                ]
                tmp_attribute = []
                for attr in all_attribute:
                    if attr == []: tmp_attribute.append(-1)
                    else:
                        tmp_attribute.append(self.attribute_idx_list[attr])
                all_attribute = tmp_attribute
                all_attribute = np.array(all_attribute, dtype=np.int32)
                all_velocity = [
                    all_velocity[tmp_idx] for tmp_idx in useful_idx
                ]
                all_velocity = np.array(all_velocity, dtype=np.float32)
        else:
            cur_data_path = self.nusc.get_sample_data_path(sample_data_token)

        # then generate the bev_maps
        if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT:
            sample_dict = {
                maps_dict.KEY_LABEL_BOXES_3D:
                all_boxes_3d,
                maps_dict.KEY_LABEL_CLASSES:
                all_boxes_classes,
                maps_dict.KEY_LABEL_ATTRIBUTES:
                all_attribute,
                maps_dict.KEY_LABEL_VELOCITY:
                all_velocity,
                maps_dict.KEY_LABEL_NUM:
                len(all_boxes_3d),
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
            biggest_label_num = max(len(all_boxes_3d), biggest_label_num)
        else:
            # img_list is test
            sample_dict = {
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
        return sample_dict, biggest_label_num

    def preprocess_batch(self):
        # if create_gt_dataset, then also create a boxes_numpy, saving all points
        if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:  # also save mixup database
            mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class])

        sample_dicts_list = []
        for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()):
            for sample_data_token in v:
                sample_dict, tmp_biggest_label_num = self.preprocess_samples(
                    scene_key, sample_data_token)
                if sample_dict is None:
                    continue
                # else save the result
                sample_dicts_list.append(sample_dict)

                # create_gt_dataset
                if self.img_list in [
                        'train', 'val', 'trainval'
                ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                    mixup_sample_dicts = self.generate_mixup_sample(
                        sample_dict)
                    if mixup_sample_dicts is None: continue
                    for mixup_sample_dict in mixup_sample_dicts:
                        cur_cls = mixup_sample_dict[
                            maps_dict.KEY_SAMPLED_GT_CLSES]
                        mixup_label_dict[cur_cls].append(mixup_sample_dict)

        # save preprocessed data
        with open(self.train_list, 'wb') as f:
            pickle.dump(sample_dicts_list, f)
        for k, v in self.cls_num_dict.items():
            print('class name: %s / class num: %d / mean size: (%f, %f, %f)' %
                  (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1],
                   self.cls_size_dict[k][2]))  # [l, w, h]

        if self.img_list in [
                'train', 'val', 'trainval'
        ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
            print('**** Generating groundtruth database ****')
            for cur_cls_name, mixup_sample_dict in mixup_label_dict.items():
                cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name]
                cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[
                    cur_cls_name]
                print('**** Class %s ****' % cur_cls_name)
                with open(cur_mixup_db_trainlist_path, 'w') as f:
                    for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm(
                            enumerate(mixup_sample_dict)):
                        f.write('%06d.npy\n' % tmp_idx)
                        np.save(
                            os.path.join(cur_mixup_db_cls_path,
                                         '%06d.npy' % tmp_idx),
                            tmp_cur_mixup_sample_dict)
        print('Ending of the preprocess !!!')

    def generate_mixup_sample(self, sample_dict):
        """ This function is bound for generating mixup dataset """
        all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
        all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES]
        point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD]

        # then we first cast all_boxes_3d to kitti format
        all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d)

        # load points
        points = np.fromfile(point_cloud_path, dtype=np.float32).reshape(
            (-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0  # timestamp is zero

        points_mask = check_inside_points(points,
                                          all_boxes_3d)  # [pts_num, gt_num]
        points_masks_num = np.sum(points_masks, axis=0)  # [gt_num]
        valid_box_idx = np.where(
            points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0]

        if len(valid_box_idx) == 0:
            return None

        valid_label_boxes_3d = all_boxes_3d[valid_box_idx]
        valid_label_classes = all_boxes_classes[valid_box_idx]

        sample_dicts = []
        for index, i in enumerate(valid_box_idx):
            cur_points_mask = points_mask[:, i]
            cur_points_idx = np.where(cur_points_mask)[0]
            cur_inside_points = points[cur_points_idx, :]
            sample_dict = {
                # 0 timestamp and /255 reflectance
                maps_dict.KEY_SAMPLED_GT_POINTS:
                cur_inside_points,  # kitti format points
                maps_dict.KEY_SAMPLED_GT_LABELS_3D:
                valid_label_boxes_3d[index],
                maps_dict.KEY_SAMPLED_GT_CLSES:
                valid_label_classes[index],
            }
            sample_dicts.append(sample_dict)
        return sample_dicts

    # Evaluation
    def set_evaluation_tensor(self, model):
        # get prediction results, bs = 1
        pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1],
                                  axis=0)
        pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1],
                                    axis=0)
        pred_cls_category = tf.squeeze(
            model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0)
        pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category]

        if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0:
            pred_attribute = tf.squeeze(
                model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0)
            pred_velocity = tf.squeeze(
                model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0)
            pred_list.extend([pred_attribute, pred_velocity])
        return pred_list

    def evaluate_map(self,
                     sess,
                     feeddict_producer,
                     pred_list,
                     val_size,
                     cls_thresh,
                     log_dir,
                     placeholders=None):
        submissions = {}
        submissions['meta'] = dict()
        submissions['meta']['use_camera'] = False
        submissions['meta']['use_lidar'] = True
        submissions['meta']['use_radar'] = False
        submissions['meta']['use_map'] = False
        submissions['meta']['use_external'] = False

        submissions_results = dict()
        pred_attr_velo = (len(pred_list) == 5)

        for i in tqdm.tqdm(range(val_size)):
            feed_dict = feeddict_producer.create_feed_dict()

            if pred_attr_velo:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            else:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            pred_cls_category_op += 1  # label from 1 to n

            sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info
            sample_name = sample_name[0]
            cur_transformation_matrix = cur_transformation_matrix[0]
            sweeps = sweeps[0]
            cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split(
                '/')

            select_idx = np.where(pred_cls_score_op >= cls_thresh)[0]
            pred_cls_score_op = pred_cls_score_op[select_idx]
            pred_cls_category_op = pred_cls_category_op[select_idx]
            pred_bbox_3d_op = pred_bbox_3d_op[select_idx]
            if pred_attr_velo:
                pred_attr_op = pred_attr_op[select_idx]
                pred_velo_op = pred_velo_op[select_idx]
            else:
                pred_attr_op, pred_velo_op = None, None

            if len(pred_bbox_3d_op) > 500:
                arg_sort_idx = np.argsort(pred_cls_score_op)[::-1]
                arg_sort_idx = arg_sort_idx[:500]
                pred_cls_score_op = pred_cls_score_op[arg_sort_idx]
                pred_cls_category_op = pred_cls_category_op[arg_sort_idx]
                pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx]
                if pred_attr_velo:
                    pred_attr_op = pred_attr_op[arg_sort_idx]
                    pred_velo_op = pred_velo_op[arg_sort_idx]

            # then transform pred_bbox_op to nuscenes_box
            boxes = cast_kitti_format_to_nusc_box_3d(
                pred_bbox_3d_op,
                pred_cls_score_op,
                pred_cls_category_op,
                cur_attribute=pred_attr_op,
                cur_velocity=pred_velo_op,
                classes=self.idx2cls_dict)
            for box in boxes:
                velocity = box.velocity[:2].tolist()
                if len(sweeps) == 0:
                    velocity = (np.nan, np.nan)
                box.velocity = np.array([*velocity, 0.0])
            # then cast the box from ego to global
            boxes = _lidar_nusc_box_to_global(cur_transformation_matrix,
                                              boxes,
                                              self.idx2cls_dict,
                                              eval_version='cvpr_2019')

            annos = []
            for box in boxes:
                name = self.idx2cls_dict[box.label]
                if box.name == -1:
                    attr = self.DefaultAttribute[name]
                else:
                    attr = self.AttributeIdxLabelMapping[name][box.name]
                velocity = box.velocity[:2].tolist()
                nusc_anno = {
                    "sample_token": cur_sample_token,
                    "translation": box.center.tolist(),
                    "size": box.wlh.tolist(),
                    "rotation": box.orientation.elements.tolist(),
                    "velocity": velocity,
                    "detection_name": name,
                    "detection_score": box.score,
                    "attribute_name": attr,
                }
                annos.append(nusc_anno)
            submissions_results[info['sample_token']] = annos

        submissions['results'] = submissions_results

        res_path = os.path.join(log_dir, "results_nusc_1.json")
        with open(res_path, "w") as f:
            json.dump(submissions, f)
        eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py')
        root_path = self.dataset_dir
        cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\""
        cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}"
        cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}"
        cmd += f" --output_dir=\"{LOG_FOUT_DIR}\""
        # use subprocess can release all nusc memory after evaluation
        subprocess.check_output(cmd, shell=True)
        os.system('rm \"%s\"' % res_path)  # remove former result file

        with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f:
            metrics = json.load(f)
        return metrics

    def evaluate_recall(self,
                        sess,
                        feeddict_producer,
                        pred_list,
                        val_size,
                        cls_thresh,
                        log_dir,
                        placeholders=None):
        pass

    def logger_and_select_best_map(self, metrics, log_string):
        detail = {}
        result = f"Nusc v1.0-trainval Evaluation\n"
        final_score = []
        for name in self.cls_list:
            detail[name] = {}
            for k, v in metrics["label_aps"][name].items():
                detail[name][f"dist@{k}"] = v
            tp_errs = []
            tp_names = []
            for k, v in metrics["label_tp_errors"][name].items():
                detail[name][k] = v
                tp_errs.append(f"{v:.4f}")
                tp_names.append(k)
            threshs = ', '.join(list(metrics["label_aps"][name].keys()))
            scores = list(metrics["label_aps"][name].values())
            final_score.append(np.mean(scores))
            scores = ', '.join([f"{s * 100:.2f}" for s in scores])
            result += f"{name} Nusc dist AP@{threshs} and TP errors\n"
            result += scores
            result += "\n"
            result += "mAP: %0.2f\n" % (
                np.mean(list(metrics["label_aps"][name].values())) * 100)
            result += ', '.join(tp_names) + ": " + ', '.join(tp_errs)
            result += "\n"
        result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100)
        log_string(result)

        cur_result = metrics['nd_score']
        return cur_result

    def logger_and_select_best_recall(self, metrics, log_string):
        pass

    # save prediction results
    def save_predictions(self,
                         sess,
                         feeddict_producer,
                         pred_list,
                         val_size,
                         cls_thresh,
                         log_dir,
                         placeholders=None):
        pass
Ejemplo n.º 3
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))
Ejemplo n.º 4
0
class NuScenesPrepare():
    NameMapping = {
        'movable_object.barrier': 'barrier',
        'movable_object.trafficcone': 'barrier',
        'vehicle.bicycle': 'cyclist',
        'vehicle.motorcycle': 'cyclist',
        'vehicle.bus.bendy': 'vehicle',
        'vehicle.bus.rigid': 'vehicle',
        'vehicle.car': 'vehicle',
        'vehicle.trailer': 'vehicle',
        'vehicle.truck': 'vehicle',
        'vehicle.construction': 'vehicle',
        'human.pedestrian.adult': 'pedestrian',
        'human.pedestrian.child': 'pedestrian',
        'human.pedestrian.construction_worker': 'pedestrian',
        'human.pedestrian.police_officer': 'pedestrian',
    }
    labelmapping=LabelEncoder()
    labelmapping.fit(['barrier','cyclist','pedestrian','vehicle'])
    DefaultAttribute = {
        "car": "vehicle.parked",
        "pedestrian": "pedestrian.moving",
        "trailer": "vehicle.parked",
        "truck": "vehicle.parked",
        "bus": "vehicle.parked",
        "motorcycle": "cycle.without_rider",
        "construction_vehicle": "vehicle.parked",
        "bicycle": "cycle.without_rider",
        "barrier": "",
        "traffic_cone": "",
    }

    def __init__(self,args):
        self.args_dp=args['DataPrepare']
        self.args_vg=args['VoxelGenerator']
        self.cache_path=self.args_dp.data_root+'/'+self.args_dp.cache_name

        if os.path.exists(self.cache_path):
            self._Data_frags=pickle.load(open(self.cache_path, 'rb'))
        else:
            self.nusc = NuScenes(version=self.args_dp.version, dataroot=self.args_dp.data_root, verbose=self.args_dp.verbose)
            self._Data_frags=self.getFragAnnotations()
            pickle.dump(self._Data_frags,open(self.cache_path, 'wb'))

        if True:
            self._Data_frags=[item for scene_data in self._Data_frags for item in scene_data]

    def __len__(self):
        return len(self._Data_frags)

    def __getitem__(self, idx):
        c_frag = self._Data_frags[idx]
        res = {
            "lidar": {
                "type": "lidar",
                "points": None,
            },
            "gt_boxes": {
                "obs": None,
                "pred": None,
            },
            "metadata": {
                "token": c_frag['Data_frags'][self.args_dp.obs_length-1] # token of last observed frame
            },
        }
        sweep_voxels,sweep_coords,sweep_num_voxels,sweep_num_points=[],[],[],[]
        bev_imgs, cam_imgs = [], []
        gt_boxes=[]
        #ts = c_frag['Data_frags'][self.args.obs_length-1]["timestamp"] / 1e6

        #get BEV sweeps
        for fi,c_frame in enumerate(c_frag['Data_frags']):
            if fi < self.args_dp.obs_length:
                # get Annotations
                gt_boxes.append(c_frame['boxes'])
                # load lidar points
                lidar_path = c_frame['lidar_path']
                points = np.fromfile(
                    str(Path(self.args_dp.data_root)/lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])
                points[:, 3] /= 255
                points[:, :3] = points[:, :3] @ c_frame["R_cl2rl"].T
                points[:, :3] += c_frame["T_cl2rl"]
                # generate voxel bev
                voxel_res=self.getVoxel(points)
                if 'bev_img' in self.args_vg.bev_data:
                    bev_img_size = [np.int(np.ceil((self.args_vg.point_cloud_range[3 + i] - self.args_vg.point_cloud_range[i])
                                       / self.args_vg.voxel_size[i])) for i in range(3)]
                    bev_img = np.zeros(bev_img_size)
                    bev_img[voxel_res['coordinates'][:, 2], voxel_res['coordinates'][:, 1], voxel_res['coordinates'][:, 0]]\
                    =voxel_res["num_points_per_voxel"]
                    bev_imgs.append(bev_img)
                if 'bev_index' in self.args_vg.bev_data:
                    sweep_voxels.append(voxel_res['voxels'])
                    sweep_coords.append(voxel_res['coordinates'])
                    sweep_num_voxels.append(np.array([voxel_res['voxels'].shape[0]], dtype=np.int64))
                    sweep_num_points.append( voxel_res["num_points_per_voxel"])
                # Load image
                if self.args_dp.use_image == 'last_image': # only use image of the last observed frame
                    load_image = fi == self.args_dp.obs_length - 1
                elif self.args_dp.use_image == 'key_images': # use image of all key frames
                    load_image = 'cam_path' in c_frame.keys()
                if load_image:
                    if Path(c_frame['cam_path']).exists():
                        with open(str(c_frame['cam_path']), 'rb') as f:
                            image_str = f.read()
                    else:
                        image_str=None
                    cam_imgs.append(image_str)

        res["lidar"]["voxel_sweeps"] =  np.concatenate(sweep_voxels, axis=0)
        res["lidar"]["bev_imgs"] =  np.stack(bev_imgs, axis=0)
        res["lidar"]["coordinates"] =  np.concatenate(sweep_coords, axis=0)
        res["lidar"]["num_voxels"] =  np.concatenate(sweep_num_voxels, axis=0)
        res["lidar"]["num_points"] =  np.concatenate(sweep_num_points, axis=0)
        res["cam"] = {
            "type": "camera",
            "data": image_str,
            "datatype": Path(c_frag['Data_frags'][self.args_dp.obs_length-1]['cam_path']).suffix[1:],
        }
        gt_boxes=np.stack(gt_boxes,axis=0)
        res["gt_boxes"]["obs"]=gt_boxes[:self.args_dp.obs_length]
        res["gt_boxes"]["pred"] =gt_boxes[self.args_dp.obs_length:]
        res['cls_label']=self.labelmapping.fit_transform(c_frag['names'])

        return res
        #Ground Truth (by instance)
        #res["GT"]["obs"]=
        #image of last
    def getVoxel(self,points):
        max_voxels=100000
        voxel_generator = VoxelGeneratorV2(
            voxel_size=list(self.args_vg.voxel_size),
            point_cloud_range=list(self.args_vg.point_cloud_range),
            max_num_points=self.args_vg.max_number_of_points_per_voxel,
            max_voxels=max_voxels,
            full_mean=self.args_vg.full_empty_part_with_mean,
            block_filtering=self.args_vg.block_filtering,
            block_factor=self.args_vg.block_factor,
            block_size=self.args_vg.block_size,
            height_threshold=self.args_vg.height_threshold)

        res = voxel_generator.generate(
            points, max_voxels)
        return res
    def getSamplebyFrame(self):
        #################interplot data 10hz
        scene_all=[]
        for si,scene in enumerate(self.nusc.scene):
            sample_interp_all = []
            first_sample = self.nusc.get('sample', scene['first_sample_token'])
            sd_rec = self.nusc.get('sample_data', first_sample['data']["LIDAR_TOP"])
            sample_interp_all.append(sd_rec)
            while sd_rec['next'] != '':
                sd_rec = self.nusc.get('sample_data', sd_rec['next'])
                sample_interp_all.append(sd_rec)
            scene_all.append(sample_interp_all)
        return scene_all
    def getFragAnnotations(self):

        scene_frames = self.getSamplebyFrame()
        Data_frags=[]
        key_slide_window=int(self.args_dp.obs_length*2) #find key frame in this time window
        si_start=0
        if os.path.exists(self.cache_path):
            Data_frags=pickle.load(open(self.cache_path, 'rb'))
            si_start=len(Data_frags)
        print('-------------Prepraing fragments--------------')
        for si in range(si_start,len(scene_frames)):
            scene_data=scene_frames[si]
            start = time.time()
            scene_frags = []
            for di,sample_data in enumerate(scene_data):
                frag_info={}
                if sample_data['is_key_frame']:
                    if di <= self.args_dp.obs_length or di >= len(scene_data)-self.args_dp.pred_length*self.args_dp.interval:
                        continue
                    cur_frag_index=[i+1 for i in range(di-self.args_dp.obs_length,di+self.args_dp.pred_length)]#the fragment index
                    if di !=cur_frag_index[self.args_dp.obs_length-1]:
                        print('error')
                    start_key=max(0,min(di-self.args_dp.obs_length,di - key_slide_window))
                    end_key=min(len(scene_data)-1,max(di+self.args_dp.pred_length,di + key_slide_window))
                    cur_key_index = [i+1 for i in range(start_key,end_key)]#find key frame in this index

                    ## Get reference coordinates
                    refer_frame = sample_data
                    refer_cs_rec = self.nusc.get('calibrated_sensor', refer_frame['calibrated_sensor_token'])
                    refer_pos_rec = self.nusc.get('ego_pose', refer_frame['ego_pose_token'])
                    R_rl2re, T_rl2re = refer_cs_rec['rotation'], refer_cs_rec['translation']
                    R_re2g, T_re2g = refer_pos_rec['rotation'], refer_pos_rec['translation']
                    R_rl2re_mat = Quaternion(R_rl2re).rotation_matrix
                    R_re2g_mat = Quaternion(R_re2g).rotation_matrix

                    # get key frame location
                    key_frame_flag = np.zeros((len(cur_key_index),), dtype='bool')
                    key_frame_index = []
                    for i, d in enumerate(cur_key_index):
                        try:
                            if scene_data[d]['is_key_frame'] == True:
                                key_frame_index.append(i)
                                key_frame_flag[i] = True
                        except:
                            print('error')
                    key_frames = np.array(scene_data[cur_key_index[0]:(cur_key_index[-1]+1)])[key_frame_flag]

                    # only key frame has annotations, so firstly get key frame infos
                    key_sample, key_sample_token, key_instances, key_annotations, key_velocity = [], [], [], [], []
                    for k, key_frame in enumerate(key_frames):
                        sample_token = key_frame['sample_token']
                        sample = self.nusc.get('sample', sample_token)
                        annotations = [
                            self.nusc.get('sample_annotation', token)
                            for token in sample['anns']
                        ]
                        velocity = np.array(
                            [self.nusc.box_velocity(token)[:2] for token in sample['anns']])
                        key_sample_token.append(sample_token)
                        key_instances.append([anno['instance_token'] for anno in annotations])
                        key_sample.append(sample)
                        key_annotations.append(annotations)
                        key_velocity.append(velocity)

                    # get full presented instance token in the candidate fragments
                    instances_intersect = list(set.intersection(*[set(i) for i in key_instances]))
                    #instances_union = list(set.union(*[set(i) for i in key_instances]))

                    # full presented instance flags
                    valid_inst_flags = [np.zeros(len(kinst), dtype='bool') for kinst in key_instances]
                    for kinst, key_inst in enumerate(key_instances):
                        for vkinst, valid_inst in enumerate(key_inst):
                            if valid_inst in instances_intersect:
                                valid_inst_flags[kinst][vkinst] = True

                    ##########################################
                    ##             Prepare fragments Database
                    ##########################################
                    cur_key_frame_index = []
                    for i, d in enumerate(cur_frag_index):
                        if scene_data[d]['is_key_frame'] == True:
                            cur_key_frame_index.append(i)

                    frag_info['Data_frags'] = []
                    frag_info['instance_token'] = instances_intersect
                    frag_info['key_frame_index'] = cur_key_frame_index
                    frag_info['last_obs_frame']=di
                    frag_info['scene_No'] = si
                    for i, d in enumerate(cur_frag_index):

                        frag_data = {}
                        sample_data=scene_data[d]
                        sample_token = sample_data['sample_token']
                        sample = self.nusc.get('sample', sample_token)

                        # find the key sample this frame data belongs to
                        try:
                            key_sample_ind = key_sample_token.index(sample_token)
                        except:
                            print('can not find corresponding  key frame at scene {} frame {}'.format(si,d))

                        valid_inst_flag = valid_inst_flags[key_sample_ind]

                        ## Pose matrix: lidar2ego and ego2global
                        s_cs_rec = self.nusc.get('calibrated_sensor', sample_data['calibrated_sensor_token'])
                        s_pos_rec = self.nusc.get('ego_pose', sample_data['ego_pose_token'])

                        R_cl2ce, T_cl2ce = s_cs_rec['rotation'], s_cs_rec['translation']
                        R_ce2g, T_ce2g = s_pos_rec['rotation'], s_pos_rec['translation']

                        R_cl2ce_mat = Quaternion(R_cl2ce).rotation_matrix
                        R_ce2g_mat = Quaternion(R_ce2g).rotation_matrix

                        # Data_frag['Info_frags']['T_l2e'],Data_frag['Info_frags']['R_l2e'] = cs_record['translation'],cs_record['rotation']
                        # Data_frag['Info_frags']['T_e2g'],Data_frag['Info_frags']['R_e2g'] = pose_record['translation'], pose_record['rotation']

                        ## Get Relative Pose: R_cl2rl,T_cl2rl, based on R/T_rl2re, R/T_re2g, R/T_cl2ce, R/T_ce2g
                        # r: reference, c: current, l: lidar, e: ego, g: global
                        # Attention: R_b2a = inv(R_a2b), T_b2a = - T_b2a * inv(R_b2a),

                        # R_cl2rl = R_cl2se * R_ce2g * [R_g2rl]
                        # R_g2rl= R_g2re * R_re2rl =  inv(R_re2g) * inv(R_rl2re)
                        R_cl2rl = (R_cl2ce_mat.T @ R_ce2g_mat.T) @ (
                                np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T)

                        # T_cl2rl = (T_cl2ce * R_ce2g + T_ce2g) * [R_g2rl] + [T_g2rl]
                        # T_g2rl = (T_g2re * R_re2rl + T_re2rl) = - T_re2g * inv(R_re2g) - T_rl2re * inv(R_rl2re)
                        T_cl2rl = (T_cl2ce @ R_ce2g_mat.T + T_ce2g) @ (
                                    np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) \
                                  - T_re2g @ (np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(
                            R_rl2re_mat).T) - T_rl2re @ np.linalg.inv(R_rl2re_mat).T

                        frag_data['R_cl2rl'], frag_data['T_cl2rl'] = R_cl2rl, T_cl2rl

                        ### Get valid boxes.Then Transform to the reference coordinates
                        boxes = self.nusc.get_boxes(sample_data['token'])  # At global coordinate
                        for box in boxes:
                            # Move box to referred coord system
                            box.translate(-np.array(refer_pos_rec['translation']))
                            box.rotate(Quaternion(refer_pos_rec['rotation']).inverse)
                            box.translate(-np.array(refer_cs_rec['translation']))
                            box.rotate(Quaternion(refer_cs_rec['rotation']).inverse)

                        boxes = np.array(boxes)  # At reference coordinate
                        try:
                            valid_boxes = boxes[valid_inst_flag]
                        except:
                            print('can not find valid box at scene {} frame {}'.format(si, d))
                        ## Transform Boxes to [location,dimension,rotation]
                        locs = np.array([b.center for b in valid_boxes]).reshape(-1, 3)
                        dims = np.array([b.wlh for b in valid_boxes]).reshape(-1, 3)
                        rots = np.array([b.orientation.yaw_pitch_roll[0]
                                         for b in valid_boxes]).reshape(-1, 1)
                        gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
                        frag_data['boxes'] = gt_boxes

                        ## Datapath
                        if i<self.args_dp.obs_length:
                            frag_data['lidar_path'] = sample_data['filename']
                            if sample_data['is_key_frame']:
                                cam_front_token = sample["data"]["CAM_FRONT"]
                                cam_path, _, _ = self.nusc.get_sample_data(cam_front_token)
                                frag_data['cam_path'] = cam_path

                        ## Object name
                        if 'names' not in frag_info.keys():
                            names = [b.name for b in valid_boxes]
                            for i in range(len(names)):
                                if names[i] in self.NameMapping:
                                    names[i] = self.NameMapping[names[i]]
                            names = np.array(names)
                            frag_info['names'] = names

                        ##Velocity (without interplotion)
                        valid_velo = key_velocity[key_sample_ind][valid_inst_flag]
                        # convert velo from global to current lidar
                        for i in range(len(valid_boxes)):
                            velo = np.array([*valid_velo[i], 0.0])
                            velo = velo @ np.linalg.inv(R_ce2g_mat).T @ np.linalg.inv(R_cl2ce_mat).T
                            valid_velo[i] = velo[:2]
                        frag_data['Velocity'] = valid_velo
                        frag_data['FrameNo.'] = d
                        frag_data['Token'] = sample_data['token']
                        frag_data['timestamp'] = sample_data['timestamp']
                        frag_info['Data_frags'].append(frag_data)
                    scene_frags.append(frag_info)
            Data_frags.append(scene_frags)
            end = time.time()
            print('scene {}/{}: total frags: {} time: {} '.format(si, len(scene_frames), len(scene_frags), end - start))
            if si%200==0 and si>0:
                pickle.dump(Data_frags, open(self.cache_path, 'wb'))
        return Data_frags