Esempio n. 1
0
def main(args):
    args.path = "/home/jo/training_data/nuscenes/nuscenes-v1.0"
    args.resize = [640, 256, 0]

    client = MongoClient(args.conn)
    collection = client[args.db][args.collection]

    nusc = NuScenes(version="v1.0-mini", dataroot=args.path, verbose=True)
    nusc.list_scenes()

    for scene in tqdm(nusc.scene):
        next_sample_token = scene["first_sample_token"]

        while True:
            sample = nusc.get('sample', next_sample_token)
            next_sample_token = sample["next"]
            
            sample_data_token = sample["data"]["CAM_FRONT"]
            sample_data = nusc.get_sample_data(sample_data_token)
            cam_front_data = nusc.get('sample_data', sample_data_token)

            # Create image data
            img_path = sample_data[0]
            if os.path.exists(img_path):
                img = cv2.imread(img_path)
                nusc.render_pointcloud_in_image
                depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='LIDAR_TOP', camera_channel='CAM_FRONT')
                # Not sure about radar data, seems to not allign with lidar data very often, leaving it out for now
                # depth_map = map_pointcloud_to_image(nusc, sample['token'], pointsensor_channel='RADAR_FRONT', camera_channel='CAM_FRONT', depth_map=depth_map)
                roi = Roi()
                if args.resize:
                    img, roi = resize_img(img, args.resize[0], args.resize[1], args.resize[2])
                    depth_map, _ = resize_img(depth_map, args.resize[0], args.resize[1], args.resize[2], cv2.INTER_NEAREST)
                img_bytes = cv2.imencode('.jpeg', img)[1].tobytes()
                depth_bytes = cv2.imencode(".png", depth_map)[1].tobytes()
                content_type = "image/jpeg"
            else:
                print("WARNING: file not found: " + img_path + ", continue with next image")
                continue

            # Get sensor extrinsics, Not sure why roll and yaw seem to be PI/2 off compared to nuImage calibarted sensor
            sensor = nusc.get('calibrated_sensor', cam_front_data['calibrated_sensor_token'])
            q = MyQuaternion(sensor["rotation"][0], sensor["rotation"][1], sensor["rotation"][2], sensor["rotation"][3])
            roll  = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5
            pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) 
            yaw   = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5
            # print(sensor["translation"])
            # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}")

            # Sensor calibration is static, pose would be dynamic. TODO: Somehow also add some sort of cam to cam motion to be learned
            # ego_pose = nusc.get("ego_pose", cam_front_data["ego_pose_token"])
            # q = MyQuaternion(ego_pose["rotation"][0], ego_pose["rotation"][1], ego_pose["rotation"][2], ego_pose["rotation"][3])
            # roll  = math.atan2(2.0 * (q.z * q.y + q.w * q.x) , 1.0 - 2.0 * (q.x * q.x + q.y * q.y)) + math.pi * 0.5
            # pitch = math.asin(2.0 * (q.y * q.w - q.z * q.x)) 
            # yaw   = math.atan2(2.0 * (q.z * q.w + q.x * q.y) , - 1.0 + 2.0 * (q.w * q.w + q.x * q.x)) + math.pi * 0.5
            # print(ego_pose["translation"])
            # print(f"Pitch: {pitch*57.2} Yaw: {yaw*57.2} Roll: {roll*57.2}")

            entry = Entry(
                img=img_bytes,
                content_type=content_type,
                depth=depth_bytes,
                org_source="nuscenes",
                org_id=img_path,
                objects=[],
                ignore=[],
                has_3D_info=True,
                has_track_info=True,
                sensor_valid=True,
                yaw=wrap_angle(yaw),
                roll=wrap_angle(roll),
                pitch=wrap_angle(pitch),
                translation=sensor["translation"],
                scene_token=sample["scene_token"],
                timestamp=sample["timestamp"]
            )

            labels = sample_data[1]
            idx_counter = 0
            for box in labels:
                if box.name in CLASS_MAP.keys():
                    box.translate(np.array([0, box.wlh[2] / 2, 0])) # translate center center to bottom center
                    pos_3d = np.array([*box.center, 1.0])
                    cam_mat = np.hstack((sample_data[2], np.zeros((3, 1))))
                    cam_mat[0][2] += roi.offset_left
                    cam_mat[1][2] += roi.offset_top
                    cam_mat *= roi.scale
                    cam_mat[2][2] = 1.0
                    # create rotation matrix, somehow using the rotation matrix directly from nuscenes does not work
                    # instead, we calc the rotation as it is in kitti and use the same code
                    v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                    yaw = -np.arctan2(v[2], v[0])
                    rot_angle = wrap_angle(float(yaw) + math.pi * 0.5) # because parallel to optical view of camera = 90 deg
                    rot_mat = np.array([
                        [math.cos(rot_angle), 0.0, math.sin(rot_angle), 0.0],
                        [0.0, 1.0, 0.0, 0.0],
                        [-math.sin(rot_angle), 0.0, math.cos(rot_angle), 0.0],
                        [0.0, 0.0, 0.0, 1.0],
                    ])
                    pos_2d = np.matmul(cam_mat, pos_3d)
                    pos_2d /= pos_2d[2]
                    box3d = calc_cuboid_from_3d(pos_3d, cam_mat, rot_mat, box.wlh[0], box.wlh[2], box.wlh[1])
                    box2d = bbox_from_cuboid(box3d)

                    annotation = nusc.get("sample_annotation", box.token)
                    instance_token = annotation["instance_token"]

                    entry.objects.append(Object(
                        obj_class=CLASS_MAP[box.name],
                        box2d=box2d,
                        box3d=box3d,
                        box3d_valid=True,
                        instance_token=instance_token,
                        truncated=None,
                        occluded=None,
                        width=box.wlh[0],
                        length=box.wlh[1],
                        height=box.wlh[2],
                        orientation=rot_angle,
                        # converted to autosar coordinate system
                        x=pos_3d[2],
                        y=-pos_3d[0],
                        z=-pos_3d[1]
                    ))

                    # For debugging show the data in the image
                    box2d = list(map(int, box2d))
                    cv2.circle(img, (int(pos_2d[0]), int(pos_2d[1])), 3, (255, 0, 0))
                    cv2.rectangle(img, (box2d[0], box2d[1]), (box2d[0] + box2d[2], box2d[1] + box2d[3]), (255, 255, 0), 1)
                    cv2.putText(img,f"{idx_counter}", (box2d[0], box2d[1] + int(box2d[3])), 0, 0.4, (255, 255, 255))
                    idx_counter += 1
                    print(f"{idx_counter}: {math.degrees(rot_angle)}")

            f, (ax1, ax2) = plt.subplots(2, 1)
            ax1.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            max_val = np.amax(depth_map)
            ax2.imshow(depth_map, cmap="gray", vmin=1, vmax=10000)
            plt.show()

            # collection.insert_one(entry.get_dict())

            if next_sample_token == "":
                break
def create_dataset(dataroot,
                   save_dir,
                   width=672,
                   height=672,
                   grid_range=70.,
                   nusc_version='v1.0-mini',
                   use_constant_feature=True,
                   use_intensity_feature=True,
                   end_id=None):

    os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True)

    nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True)
    ref_chan = 'LIDAR_TOP'

    if width == height:
        size = width
    else:
        raise Exception(
            'Currently only supported if width and height are equal')

    grid_length = 2. * grid_range / size
    label_half_length = 10

    data_id = 0
    grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length)
    grid_centers \
        = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1]

    for my_scene in nusc.scene:
        first_sample_token = my_scene['first_sample_token']
        token = first_sample_token

        while (token != ''):
            print('--- {} '.format(data_id) + token + ' ---')
            my_sample = nusc.get('sample', token)
            sd_record = nusc.get('sample_data', my_sample['data'][ref_chan])
            sample_rec = nusc.get('sample', sd_record['sample_token'])
            chan = sd_record['channel']

            pc, times = LidarPointCloud.from_file_multisweep(nusc,
                                                             sample_rec,
                                                             chan,
                                                             ref_chan,
                                                             nsweeps=10)
            _, boxes, _ = nusc.get_sample_data(sd_record['token'],
                                               box_vis_level=0)

            out_feature = np.zeros((size, size, 7), dtype=np.float32)
            for box_idx, box in enumerate(boxes):
                label = 0
                if box.name.split('.')[0] == 'vehicle':
                    if box.name.split('.')[1] == 'car':
                        label = 1
                    elif (box.name.split('.')[1]
                          in ['bus', 'construction', 'trailer', 'truck']):
                        label = 2
                    elif box.name.split('.')[1] == 'emergency':
                        if box.name.split('.')[2] == 'ambulance':
                            label = 2
                        # police
                        else:
                            label = 1
                    elif box.name.split('.')[1] in ['bicycle', 'motorcycle']:
                        label = 3
                elif box.name.split('.')[0] == 'human':
                    label = 4
                elif (box.name.split('.')[0]
                      in ['animal', 'movable_object', 'static_object']):
                    label = 5
                else:
                    continue

                height_pt = np.linalg.norm(box.corners().T[0] -
                                           box.corners().T[3])
                corners2d = box.corners()[:2, :]
                box2d = corners2d.T[[2, 3, 7, 6]]

                box2d_center = box2d.mean(axis=0)
                generate_out_feature(width, height, size, grid_centers, box2d,
                                     box2d_center, height_pt, label,
                                     label_half_length, out_feature)

            out_feature = out_feature.astype(np.float16)
            feature_generator = fg.Feature_generator(grid_range, width, height,
                                                     use_constant_feature,
                                                     use_intensity_feature)
            feature_generator.generate(pc.points.T)
            in_feature = feature_generator.feature
            if use_constant_feature and use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 8)
            elif use_constant_feature or use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 6)
            else:
                in_feature = in_feature.reshape(size, size, 4)

            # instance_pt is flipped due to flip
            out_feature = np.flip(np.flip(out_feature, axis=0), axis=1)
            out_feature[:, :, 1:3] *= -1

            np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)),
                    in_feature)
            np.save(
                os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)),
                out_feature)

            token = my_sample['next']
            data_id += 1
            if data_id == end_id:
                return
def train():

    net.train()
    current_lr = config['learning_rate']
    print('Loading Dataset...')

    dataset = TrainDataset(
        args.nuscenes_data_root,
        SSJAugmentation(
            config['image_size'],
            means,
        ),
    )
    print('length = ', len(dataset))
    epoch_size = len(dataset) // args.batch_size
    step_index = 0

    batch_iterator = None
    batch_size = 1
    data_loader = data.DataLoader(
        dataset,
        batch_size,
        num_workers=args.num_workers,
        shuffle=True,
        collate_fn=collate_fn,
        pin_memory=False,
    )
    if args.Joint:
        nusc = NuScenes(
            version='v1.0-trainval',
            verbose=True,
            dataroot=args.nuscenes_root,
        )
    for iteration in range(0, 380000):
        print(iteration)
        if (not batch_iterator) or (iteration % epoch_size == 0):
            # create batch iterator
            batch_iterator = iter(data_loader)
            all_epoch_loss = []
        print('pass 0')
        if iteration in stepvalues:
            step_index += 1
            current_lr = current_lr * 0.5
            for param_group in optimizer.param_groups:
                param_group['lr'] = current_lr

        # load train data
        img_pre, img_next, boxes_pre, boxes_next, labels, valid_pre, valid_next, current_poses, next_poses, pre_bbox, next_bbox, img_org_pre, img_org_next, current_tokens, next_tokens =\
            next(batch_iterator)

        if args.Joint:
            current_token = current_tokens[0]
            next_token = next_tokens[0]
            first_token = current_tokens[1]

            current_cam_record = nusc.get('sample_data', current_token)
            current_cam_path = nusc.get_sample_data_path(current_token)
            current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data(
                current_cam_record['token'], )
            current_cs_record = nusc.get(
                'calibrated_sensor',
                current_cam_record['calibrated_sensor_token'],
            )
            current_poserecord = nusc.get(
                'ego_pose',
                current_cam_record['ego_pose_token'],
            )

            next_cam_record = nusc.get('sample_data', next_token)
            next_cam_path = nusc.get_sample_data_path(next_token)
            next_cam_path, boxes, next_camera_intrinsic = nusc.get_sample_data(
                next_cam_record['token'], )
            next_cs_record = nusc.get(
                'calibrated_sensor',
                next_cam_record['calibrated_sensor_token'],
            )
            next_poserecord = nusc.get(
                'ego_pose',
                next_cam_record['ego_pose_token'],
            )

            first_cam_record = nusc.get('sample_data', first_token)
            first_cam_path = nusc.get_sample_data_path(first_token)
            first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data(
                first_cam_record['token'], )
            first_cs_record = nusc.get(
                'calibrated_sensor',
                first_cam_record['calibrated_sensor_token'],
            )
            first_poserecord = nusc.get(
                'ego_pose',
                first_cam_record['ego_pose_token'],
            )

        if args.cuda:
            img_pre = (img_pre.cuda())
            img_next = (img_next.cuda())
            boxes_pre = (boxes_pre.cuda())
            boxes_next = (boxes_next.cuda())
            valid_pre = (valid_pre.cuda())
            valid_next = (valid_next.cuda())
            labels = (labels.cuda())
            current_poses = (current_poses.cuda())
            next_poses = (next_poses.cuda())
            pre_bbox = (pre_bbox.cuda())
            next_bbox = (next_bbox.cuda())
            img_org_pre = (img_org_pre.cuda())
            img_org_next = (img_org_next.cuda())

        # forward
        if args.Joint:
            out, pose_loss = net(
                args.Joint,
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
                current_poses,
                next_poses,
                pre_bbox,
                next_bbox,
                img_org_pre,
                img_org_next,
                current_cs_record,
                current_poserecord,
                next_cs_record,
                next_poserecord,
                first_cs_record,
                first_poserecord,
                first_camera_intrinsic,
            )

        else:
            out = net(
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
            )

        optimizer.zero_grad()
        loss_pre, loss_next, loss_similarity, loss, accuracy_pre, accuracy_next, accuracy, predict_indexes = criterion(
            out,
            labels,
            valid_pre,
            valid_next,
        )
        total_loss = loss + 0.1 * pose_loss
        total_loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        all_epoch_loss += [loss.data.cpu()]

        print(
            'iter ' + repr(iteration) + ', ' + repr(epoch_size) +
            ' || epoch: %.4f ' % (iteration / (float)(epoch_size)) +
            ' || Loss: %.4f ||' % (loss.data.item()),
            end=' ',
        )

        if iteration % 1000 == 0:

            result_image = show_batch_circle_image(
                img_pre,
                img_next,
                boxes_pre,
                boxes_next,
                valid_pre,
                valid_next,
                predict_indexes,
                iteration,
            )

        if iteration % 1000 == 0:
            print('Saving state, iter:', iteration)
            torch.save(
                net.state_dict(),
                os.path.join(
                    args.save_folder,
                    'model_' + repr(iteration) + '.pth',
                ),
            )
Esempio n. 4
0
def get_nuscenes_dicts(path="./", version='v1.0-mini', categories=None):
    """
    This is a helper fuction that create dicts from nuscenes to detectron2 format.
    Nuscenes annotation use 3d bounding box, but for detectron we need 2d bounding box.
    The simplest solution is get max x, min x, max y and min y coordinates from 3d bb and
    create 2d box. So we lost accuracy, but this is not critical.
    :param path: <string>. Path to Nuscenes dataset.
    :param version: <string>. Nuscenes dataset version name.
    :param categories <list<string>>. List of selected categories for detection.
        Get from https://www.nuscenes.org/data-annotation
        Categories names:
            ['human.pedestrian.adult',
             'human.pedestrian.child',
             'human.pedestrian.wheelchair',
             'human.pedestrian.stroller',
             'human.pedestrian.personal_mobility',
             'human.pedestrian.police_officer',
             'human.pedestrian.construction_worker',
             'animal',
             'vehicle.car',
             'vehicle.motorcycle',
             'vehicle.bicycle',
             'vehicle.bus.bendy',
             'vehicle.bus.rigid',
             'vehicle.truck',
             'vehicle.construction',
             'vehicle.emergency.ambulance',
             'vehicle.emergency.police',
             'vehicle.trailer',
             'movable_object.barrier',
             'movable_object.trafficcone',
             'movable_object.pushable_pullable',
             'movable_object.debris',
             'static_object.bicycle_rack']
    :return: <dict>. Return dict with data annotation in detectron2 format.
    """
    logging.info('your nuscenes root: {}'.format(path))
    # assert(path[-1] == "/"), "Insert '/' in the end of path"
    path = os.path.abspath(path) + '/'
    nusc = NuScenes(version=version, dataroot=path, verbose=False)
    logging.info('nuScenes object loaded done.')

    # Select all catecategories if not set
    if categories == None:
        categories = [data["name"] for data in nusc.category]
    assert (isinstance(categories, list)), "Categories type must be list"

    dataset_dicts = []
    cache_f = os.path.join(path, 'ct_nuscenes.cache')
    if os.path.exists(cache_f):
        dataset_dicts = pickle.load(open(cache_f, 'rb'))
        logging.info('resumed nuscenes cache file: {}'.format(cache_f))
        return dataset_dicts
    idx = 0
    logging.info('start parsing nuscenes data to train dict format...')
    for i in tqdm(range(0, len(nusc.scene))):
        scene = nusc.scene[i]
        scene_rec = nusc.get('scene', scene['token'])
        sample_rec_cur = nusc.get('sample', scene_rec['first_sample_token'])

        # Go through all frame in current scene
        while True:
            # we maybe add CAM_LEFT as well
            data = nusc.get('sample_data', sample_rec_cur['data']["CAM_FRONT"])

            record = {}
            record["file_name"] = path + data["filename"]
            if os.path.exists(record['file_name']):
                record["image_id"] = idx
                record["height"] = data["height"]
                record["width"] = data["width"]
                idx += 1

                # Get boxes from front camera
                _, boxes, camera_intrinsic = nusc.get_sample_data(
                    sample_rec_cur['data']["CAM_FRONT"], BoxVisibility.ANY)
                # Get only necessary boxes
                boxes = [box for box in boxes if box.name in categories]
                # Go through all bounding boxes
                objs = []
                for box in boxes:
                    corners = view_points(box.corners(),
                                          camera_intrinsic,
                                          normalize=True)[:2, :]
                    max_x = int(max(corners[:][0]))
                    min_x = int(min(corners[:][0]))
                    max_y = int(max(corners[:][1]))
                    min_y = int(min(corners[:][1]))
                    obj = {
                        "bbox": [min_x, min_y, max_x, max_y],
                        "bbox_mode": BoxMode.XYXY_ABS,
                        "category_id": categories.index(box.name),
                        "iscrowd": 0
                    }
                    objs.append(obj)

                record["annotations"] = objs
                dataset_dicts.append(record)

                # Get next frame
                sample_rec_cur = nusc.get('sample', sample_rec_cur['next'])
                # End of scene condition
                if (sample_rec_cur["next"] == ""):
                    break
            else:
                logging.info('pass record: {} since image not found!'.format(
                    record['file_name']))
                sample_rec_cur = nusc.get('sample', sample_rec_cur['next'])
                if (sample_rec_cur["next"] == ""):
                    break
                continue
    logging.info('data convert done! all samples: {}'.format(
        len(dataset_dicts)))
    if not os.path.exists(cache_f):
        os.makedirs(os.path.dirname(cache_f), exist_ok=True)
        pickle.dump(dataset_dicts, open(cache_f, 'wb'))
        logging.info('nuscenes cache saved into: {}'.format(cache_f))
    return dataset_dicts
Esempio n. 5
0
                                                 Quaternion(calib_sensor_prev['rotation']))
            lidar_to_global = np.dot(ego_to_global, lidar_to_ego)
            lidar_to_global_prev = np.dot(ego_to_global_prev, lidar_to_ego_prev)
            delta = inv(lidar_to_global_prev).dot(lidar_to_global)
            y_translation.append(delta[1, 3])
            delta_angles = rotationMatrixToEulerAngles(delta[0:3, 0:3]) * 180 / np.pi
            yaw.append(delta_angles[2])

        for annotation_token in sample['anns']:
            annotation_metadata = nusc.get('sample_annotation', annotation_token)
            if annotation_metadata['num_lidar_pts'] == 0:
                continue
            detection_name = category_to_detection_name(annotation_metadata['category_name'])
            if detection_name is None:
                continue
            _, box_lidar, _ = nusc.get_sample_data(sample['data'][sensor], box_vis_level=BoxVisibility.NONE,
                                                   selected_anntokens=[annotation_token])
            box_lidar = box_lidar[0]
            box_lidar.rotate(nu_to_kitti_lidar)
            if x_positions[detection_name] is None:
                x_positions[detection_name] = [box_lidar.center[0]]
                y_positions[detection_name] = [box_lidar.center[1]]
                length[detection_name] = [box_lidar.wlh[1]]
                width[detection_name] = [box_lidar.wlh[0]]
                num_lidar_points[detection_name] = [annotation_metadata['num_lidar_pts']]
            else:
                x_positions[detection_name].append(box_lidar.center[0])
                y_positions[detection_name].append(box_lidar.center[1])
                length[detection_name].append(box_lidar.wlh[1])
                width[detection_name].append(box_lidar.wlh[0])
                num_lidar_points[detection_name].append(annotation_metadata['num_lidar_pts'])
        current_sample_token = sample['next']
Esempio n. 6
0
num_samples = len(nusc.sample)

for ind, sample in enumerate(nusc.sample):
    sample_json = dict()
    category = []
    box_3d = []
    rotations = []
    my_sample = nusc.get('sample', sample['token'])
    sample_data = nusc.get('sample_data', my_sample['data'][SENSOR])
    filename = os.path.basename(sample_data['filename'])

    my_annotation_token_list = my_sample['anns']
    sensor_modality = sample_data['sensor_modality']
    assert sensor_modality == 'camera'
    data_path, boxes, camera_intrinsic = nusc.get_sample_data(
        my_sample['data'][SENSOR])

    # for anno_token in my_annotation_token_list:
    # my_annotation_metadata = nusc.get('sample_annotation', anno_token)
    # # class
    # cat_name = my_annotation_metadata['category_name']
    for box in boxes:
        cat_name = box.name
        if classes_map.get(cat_name) is None:
            continue
        else:
            category.append(classes_map[cat_name])
        # xyz (forward, left, up)
        trans = box.center
        # wlh (yxz) (left, forward, up)
        dim = box.wlh
Esempio n. 7
0
                'rotation']

            c2e_t_mat = np.array(cam2ego_translation).reshape(3, 1)
            e2g_t_mat = np.array(ego2global_translation).reshape(3, 1)
            c2e_r_mat = Quaternion(cam2ego_rotation).rotation_matrix  # (3, 3)
            e2g_r_mat = Quaternion(ego2global_rotation).rotation_matrix  # (3, 3)
            calib[present_sensor+'_'+'cam2ego_translation'] = c2e_t_mat
            calib[present_sensor+'_'+'cam2ego_rotation'] = c2e_r_mat
            calib[present_sensor+'_'+'ego2global_translation'] = e2g_t_mat
            calib[present_sensor+'_'+'ego2global_rotation'] = e2g_r_mat

            sensor_img_output_dir = os.path.join(img_output_root, 'image_' + present_sensor)
            sensor_label_output_dir = os.path.join(label_output_root, 'label_' + present_sensor)
            # each sensor_data corresponds to one specific image in the dataset
            sensor_data = nusc.get('sample_data', img_token)
            data_path, box_list, cam_intrinsic = nusc.get_sample_data(img_token, getattr(BoxVisibility,truncation_level))
            """
            get_sample_data:        
                Returns the data path as well as all annotations related to that sample_data.
                Note that the boxes are transformed into the current sensor's coordinate frame.
            """
            calib[present_sensor] = cam_intrinsic
            img_file = data_root + sensor_data['filename']
            seqname = str(frame_counter).zfill(6)
            output_label_file = sensor_label_output_dir  +'/' + seqname + '.txt'
            with open(output_label_file, 'a') as output_f:
                for box in box_list:
                    # obtaining visibility level of each 3D box
                    present_visibility_token = nusc.get('sample_annotation', box.token)['visibility_token']
                    if present_visibility_token > min_visibility_level:
                        if not (category_reflection[box.name] == 'DontCare' and delete_dontcare_objects):
Esempio n. 8
0
root_dir = '../v1.0-trainval01'
nusc = NuScenes(version='v1.0-trainval', dataroot=root_dir, verbose=False)
all_category_db = list(NameMapping.keys())
all_category = list(NameMapping.values())

my_scene = nusc.scene[15]
sample_token = my_scene['first_sample_token']
end_token = my_scene['last_sample_token']
my_sample = nusc.get('sample', sample_token)

i = 0
while sample_token != end_token:
    my_sample = nusc.get('sample', my_sample['next'])

    img = get_image(my_sample, root_dir)
    sample_data = nusc.get_sample_data(my_sample['data']['CAM_FRONT'])
    labels = sample_data[1]
    # this label is according to camera, what we have is lidar
    # converts top lidar
    intrinsic = sample_data[2]
    for label in labels:
        c2d = project_cam_coords_to_pixel([label.center], intrinsic)[0]
        cv2.circle(img, (int(c2d[0]), int(c2d[1])), 3, (0, 255, 255), -1)
        # convert center and wlh to 3d box
        box = Box(center=label.center,
                  size=label.wlh,
                  orientation=label.orientation)
        corners3d = box.corners()
        corners3d_2d = project_cam_coords_to_pixel(corners3d, intrinsic)

        idx = all_category_db.index(label.name)
Esempio n. 9
0
class NuScenesDataset(Dataset):
    """
    NuScenes dataset loader and producer
    """
    def __init__(self,
                 mode,
                 split='training',
                 img_list='trainval',
                 is_training=True,
                 workers_num=1):
        """
        mode: 'loading', 'preprocessing'
        """
        self.mode = mode
        self.dataset_dir = os.path.join(cfg.ROOT_DIR,
                                        cfg.DATASET.KITTI.BASE_DIR_PATH)
        self.max_sweeps = cfg.DATASET.NUSCENES.NSWEEPS
        self.is_training = is_training
        self.img_list = img_list
        self.workers_num = workers_num

        # cast labels from NuScenes name to useful name
        self.useful_cls_dict = {
            'animal': 'ignore',
            'human.pedestrian.personal_mobility': 'ignore',
            'human.pedestrian.stroller': 'ignore',
            'human.pedestrian.wheelchair': 'ignore',
            'movable_object.debris': 'ignore',
            'movable_object.pushable_pullable': 'ignore',
            'static_object.bicycle_rack': 'ignore',
            'vehicle.emergency.ambulance': 'ignore',
            'vehicle.emergency.police': 'ignore',
            'movable_object.barrier': 'barrier',
            'vehicle.bicycle': 'bicycle',
            'vehicle.bus.bendy': 'bus',
            'vehicle.bus.rigid': 'bus',
            'vehicle.car': 'car',
            'vehicle.construction': 'construction_vehicle',
            'vehicle.motorcycle': 'motorcycle',
            'human.pedestrian.adult': 'pedestrian',
            'human.pedestrian.child': 'pedestrian',
            'human.pedestrian.construction_worker': 'pedestrian',
            'human.pedestrian.police_officer': 'pedestrian',
            'movable_object.trafficcone': 'traffic_cone',
            'vehicle.trailer': 'trailer',
            'vehicle.truck': 'truck'
        }
        # cast attribute to index
        self.attribute_idx_list = {
            'vehicle.moving': 0,
            'vehicle.stopped': 1,
            'vehicle.parked': 2,
            'cycle.with_rider': 3,
            'cycle.without_rider': 4,
            'pedestrian.sitting_lying_down': 5,
            'pedestrian.standing': 6,
            'pedestrian.moving': 7,
            'default': -1,
        }
        self.idx_attribute_list = dict([
            (v, k) for k, v in self.attribute_idx_list.items()
        ])
        self.AttributeIdxLabelMapping = {
            "car": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "truck": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "bus": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "trailer": ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "construction_vehicle":
            ['vehicle.moving', 'vehicle.stopped', 'vehicle.parked'],
            "pedestrian": [
                'pedestrian.sitting_lying_down', 'pedestrian.standing',
                'pedestrian.moving'
            ],
            "motorcycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "bicycle": ['cycle.with_rider', 'cycle.without_rider', ''],
            "traffic_cone": ['', '', ''],
            "barrier": ['', '', ''],
        }

        self.DefaultAttribute = {
            "car": "vehicle.parked",
            "pedestrian": "pedestrian.moving",
            "trailer": "vehicle.parked",
            "truck": "vehicle.parked",
            "bus": "vehicle.parked",
            "motorcycle": "cycle.without_rider",
            "construction_vehicle": "vehicle.parked",
            "bicycle": "cycle.without_rider",
            "barrier": "",
            "traffic_cone": "",
        }

        self.cls_list = cfg.DATASET.KITTI.CLS_LIST
        self.idx2cls_dict = dict([(idx + 1, cls)
                                  for idx, cls in enumerate(self.cls_list)])
        self.cls2idx_dict = dict([(cls, idx + 1)
                                  for idx, cls in enumerate(self.cls_list)])

        self.sv_npy_path = os.path.join(
            cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH, 'NuScenes',
            '{}_{}'.format(img_list, self.max_sweeps))
        self.train_list = os.path.join(self.sv_npy_path, 'infos.pkl')

        self.voxel_generator = VoxelGenerator()

        self.test_mode = cfg.TEST.TEST_MODE
        if self.test_mode == 'mAP':
            self.evaluation = self.evaluate_map
            self.logger_and_select_best = self.logger_and_select_best_map
        elif self.test_mode == 'Recall':
            self.evaluation = self.evaluate_recall
            self.logger_and_select_best = self.logger_and_select_best_recall
        else:
            raise Exception('No other evaluation mode.')

        if mode == 'loading':
            # data loader
            with open(self.train_list, 'rb') as f:
                self.train_npy_list = pickle.load(f)
            self.sample_num = len(self.train_npy_list)
            if self.is_training:
                self.data_augmentor = DataAugmentor(
                    'NuScenes', workers_num=self.workers_num)

        elif mode == 'preprocessing':
            # preprocess raw data
            if img_list == 'train':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in train_scene
                ]
            elif img_list == 'val':
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-trainval')
                self.scenes = [
                    scene for scene in self.nusc.scene
                    if scene['name'] in val_scene
                ]
            else:  # test
                self.nusc = NuScenes(dataroot=self.dataset_dir,
                                     version='v1.0-test')
                self.scenes = self.nusc.scene

            self.sample_data_token_list = OrderedDict()
            sample_num = 0
            for scene in self.scenes:
                # static the sample num, and save all sample_data_token
                self.sample_data_token_list[scene['token']] = []
                all_sample = self.nusc.field2token('sample', 'scene_token',
                                                   scene['token'])
                sample_num += len(all_sample)
                for sample in all_sample:  # all sample token
                    sample = self.nusc.get('sample', sample)
                    cur_token = sample['token']
                    cur_data_token = sample['data']['LIDAR_TOP']
                    self.sample_data_token_list[scene['token']].append(
                        cur_data_token)

            self.sample_num = sample_num

            self.extents = cfg.DATASET.POINT_CLOUD_RANGE
            self.extents = np.reshape(self.extents, [3, 2])
            if not os.path.exists(self.sv_npy_path):
                os.makedirs(self.sv_npy_path)

            # also calculate the mean size here
            self.cls_size_dict = dict([(cls,
                                        np.array([0, 0, 0], dtype=np.float32))
                                       for cls in self.cls_list])
            self.cls_num_dict = dict([(cls, 0) for cls in self.cls_list])

            # the save path for MixupDB
            if self.img_list in [
                    'train', 'val', 'trainval'
            ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                self.mixup_db_cls_path = dict()
                self.mixup_db_trainlist_path = dict()
                self.mixup_db_class = cfg.TRAIN.AUGMENTATIONS.MIXUP.CLASS
                for cls in self.mixup_db_class:
                    mixup_db_cls_path = os.path.join(
                        cfg.ROOT_DIR, cfg.DATASET.KITTI.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.SAVE_NUMPY_PATH,
                        cfg.TRAIN.AUGMENTATIONS.MIXUP.PC_LIST,
                        '{}'.format(cls))
                    mixup_db_trainlist_path = os.path.join(
                        mixup_db_cls_path, 'train_list.txt')
                    if not os.path.exists(mixup_db_cls_path):
                        os.makedirs(mixup_db_cls_path)
                    self.mixup_db_cls_path[cls] = mixup_db_cls_path
                    self.mixup_db_trainlist_path[cls] = mixup_db_trainlist_path

    def __len__(self):
        return self.sample_num

    def load_samples(self, sample_idx, pipename):
        """ load data per thread """
        pipename = int(pipename)
        biggest_label_num = 0
        sample_dict = self.train_npy_list[sample_idx]

        points_path = sample_dict[maps_dict.KEY_POINT_CLOUD]
        sweeps = sample_dict[maps_dict.KEY_SWEEPS]
        sample_name = sample_dict[maps_dict.KEY_SAMPLE_NAME]
        cur_transformation_matrix = sample_dict[
            maps_dict.KEY_TRANSFORMRATION_MATRIX]
        ts = sample_dict[maps_dict.KEY_TIMESTAMPS] / 1e6

        # then first read points and stack points from multiple frame
        points = np.fromfile(points_path, dtype=np.float32)
        points = points.reshape((-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0
        sweep_points_list = [points]
        original_cur_sweep_points = points
        cur_sweep_points_num = points.shape[0]
        for sweep in sweeps:
            points_sweep = np.fromfile(sweep['lidar_path'], dtype=np.float32)
            points_sweep = points_sweep.reshape((-1, 5))
            sweep_ts = sweep['timestamp'] / 1e6
            points_sweep[:, 3] /= 255
            points_sweep[:, :3] = points_sweep[:, :3] @ sweep[
                'sweep2lidar_rotation'].T
            points_sweep[:, :3] += sweep['sweep2lidar_translation']
            points_sweep[:, 4] = ts - sweep_ts
            points_sweep = cast_points_to_kitti(points_sweep)
            sweep_points_list.append(points_sweep)
        if cfg.DATASET.NUSCENES.INPUT_FEATURE_CHANNEL == 4:
            points = np.concatenate(sweep_points_list, axis=0)[:, [0, 1, 2, 4]]
        else:
            points = np.concatenate(sweep_points_list, axis=0)

        # then read groundtruth file if have
        if self.is_training or cfg.TEST.WITH_GT:
            label_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
            label_boxes_3d = cast_box_3d_to_kitti_format(label_boxes_3d)

            label_classes_name = sample_dict[maps_dict.KEY_LABEL_CLASSES]
            label_classes = np.array([
                self.cls2idx_dict[label_class]
                for label_class in label_classes_name
            ])

            label_attributes = sample_dict[maps_dict.KEY_LABEL_ATTRIBUTES]
            label_velocity = sample_dict[
                maps_dict.KEY_LABEL_VELOCITY]  # [-1, 2]

            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        else:  # not is_training and no_gt
            label_boxes_3d = np.zeros([1, 7], np.float32)
            label_classes = np.zeros([1], np.int32)
            label_attributes = np.zeros([1], np.int32)
            label_velocity = np.zeros([1, 2], np.float32)
            ry_cls_label = np.zeros([1], np.int32)
            residual_angle = np.zeros([1], np.float32)

        if self.is_training:  # data augmentation
            points, label_boxes_3d, label_classes, label_attributes, label_velocity, cur_sweep_points_num = self.data_augmentor.nuscenes_forward(
                points, label_boxes_3d, label_classes, pipename,
                label_attributes, label_velocity, cur_sweep_points_num)
            ry_cls_label, residual_angle = encode_angle2class_np(
                label_boxes_3d[:, -1], cfg.MODEL.ANGLE_CLS_NUM)
        cur_label_num = len(label_boxes_3d)

        # then randomly choose some points
        cur_sweep_points = points[:cur_sweep_points_num, :]  # [-1, 4]
        other_sweep_points = points[cur_sweep_points_num:, :]  # [-1, 4]
        if len(other_sweep_points) == 0:
            other_sweep_points = cur_sweep_points.copy()
        np.random.shuffle(cur_sweep_points)
        np.random.shuffle(other_sweep_points)

        input_sample_points, num_points_per_voxel = self.voxel_generator.generate_nusc(
            cur_sweep_points, other_sweep_points,
            cfg.DATASET.NUSCENE.MAX_CUR_SAMPLE_POINTS_NUM
        )  # points, [num_voxels, num_points, 5], sem_labels, [num_voxels, num_points]
        cur_sample_points = input_sample_points[:cfg.DATASET.NUSCENE.
                                                MAX_CUR_SAMPLE_POINTS_NUM]
        other_sample_points = input_sample_points[cfg.DATASET.NUSCENE.
                                                  MAX_CUR_SAMPLE_POINTS_NUM:]

        biggest_label_num = max(biggest_label_num, cur_label_num)
        return biggest_label_num, input_sample_points, cur_sample_points, other_sample_points, label_boxes_3d, ry_cls_label, residual_angle, label_classes, label_attributes, label_velocity, sample_name, cur_transformation_matrix, sweeps, original_cur_sweep_points

    def load_batch(self, batch_size):
        perm = np.arange(
            self.sample_num).tolist()  # a list indicates each data
        dp = DataFromList(perm,
                          is_train=self.is_training,
                          shuffle=self.is_training)
        dp = MultiProcessMapData(dp, self.load_samples, self.workers_num)

        use_list = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1]
        use_concat = [0, 0, 0, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0]

        dp = BatchDataNuscenes(dp,
                               batch_size,
                               use_concat=use_concat,
                               use_list=use_list)
        dp.reset_state()
        dp = dp.get_data()
        return dp

    # Preprocess data
    def preprocess_samples(self, cur_scene_key, sample_data_token):
        sample_dicts = []
        biggest_label_num = 0

        cur_sample_data = self.nusc.get('sample_data', sample_data_token)
        cur_sample_token = cur_sample_data['sample_token']
        cur_sample = self.nusc.get('sample', cur_sample_token)

        ego_pose = self.nusc.get('ego_pose', cur_sample_data['ego_pose_token'])
        calibrated_sensor = self.nusc.get(
            'calibrated_sensor', cur_sample_data['calibrated_sensor_token'])

        l2e_r = calibrated_sensor['rotation']
        l2e_t = calibrated_sensor['translation']
        e2g_r = ego_pose['rotation']
        e2g_t = ego_pose['translation']
        l2e_r_mat = Quaternion(l2e_r).rotation_matrix
        e2g_r_mat = Quaternion(e2g_r).rotation_matrix
        cur_timestamp = cur_sample['timestamp']

        cur_transformation_matrix = {
            'lidar2ego_translation': l2e_t,
            'lidar2ego_rotation': l2e_r,
            'ego2global_translation': e2g_t,
            'ego2global_rotation': e2g_r,
        }

        # get point cloud in former 0.5 second
        sweeps = []
        while len(sweeps) < self.max_sweeps:
            if not cur_sample_data['prev'] == '':
                # has next frame
                cur_sample_data = self.nusc.get('sample_data',
                                                cur_sample_data['prev'])
                cur_ego_pose = self.nusc.get('ego_pose',
                                             cur_sample_data['ego_pose_token'])
                cur_calibrated_sensor = self.nusc.get(
                    'calibrated_sensor',
                    cur_sample_data['calibrated_sensor_token'])
                cur_lidar_path, cur_sweep_boxes, _ = self.nusc.get_sample_data(
                    cur_sample_data['token'])
                sweep = {
                    "lidar_path": cur_lidar_path,
                    "sample_data_token": cur_sample_data['token'],
                    "lidar2ego_translation":
                    cur_calibrated_sensor['translation'],
                    "lidar2ego_rotation": cur_calibrated_sensor['rotation'],
                    "ego2global_translation": cur_ego_pose['translation'],
                    "ego2global_rotation": cur_ego_pose['rotation'],
                    "timestamp": cur_sample_data["timestamp"]
                }
                l2e_r_s = sweep["lidar2ego_rotation"]
                l2e_t_s = sweep["lidar2ego_translation"]
                e2g_r_s = sweep["ego2global_rotation"]
                e2g_t_s = sweep["ego2global_translation"]
                # sweep->ego->global->ego'->lidar
                l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                    np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T)
                T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T

                sweep["sweep2lidar_rotation"] = R.T  # points @ R.T + T
                sweep["sweep2lidar_translation"] = T
                sweeps.append(sweep)
            else:  # prev is none
                break

        # then load gt_boxes_3d
        if self.img_list in ['train', 'val'] and cfg.TEST.WITH_GT:
            cur_data_path, all_boxes, _ = self.nusc.get_sample_data(
                sample_data_token)

            # then first parse boxes labels
            locs = np.array([box.center for box in all_boxes]).reshape(-1, 3)
            sizes = np.array([box.wlh for box in all_boxes]).reshape(-1, 3)
            rots = np.array([
                box.orientation.yaw_pitch_roll[0] for box in all_boxes
            ]).reshape(-1, 1)
            all_boxes_3d = np.concatenate([locs, sizes, -rots], axis=-1)

            annos_tokens = cur_sample['anns']
            all_velocity = np.array([
                self.nusc.box_velocity(ann_token)[:2]
                for ann_token in annos_tokens
            ])  # [-1, 2]
            for i in range(len(all_boxes)):
                velo = np.array([*all_velocity[i], 0.0])
                velo = velo @ np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                    l2e_r_mat).T
                all_velocity[i] = velo[:2]  # [-1, 2]

            attribute_tokens = [
                self.nusc.get('sample_annotation',
                              ann_token)['attribute_tokens']
                for ann_token in annos_tokens
            ]
            all_attribute = []
            for attribute_token in attribute_tokens:
                if len(attribute_token) == 0:
                    all_attribute.append([])
                else:
                    all_attribute.append(
                        self.nusc.get('attribute', attribute_token[0])['name'])
            # then filter these ignore labels
            categories = np.array([box.name for box in all_boxes])
            if self.img_list == 'train':
                useful_idx = [
                    index for index, category in enumerate(categories)
                    if self.useful_cls_dict[category] != 'ignore'
                ]
            else:
                useful_idx = [
                    index for index, category in enumerate(categories)
                ]
            if len(useful_idx) == 0:
                if self.img_list == 'train':
                    return None, biggest_label_num
                else:
                    all_boxes_3d = np.ones([1, 7], dtype=np.float32)
                    all_boxes_classes = np.array(['ignore'])
                    all_attribute = np.array([-1])
                    all_velocity = np.array([[0, 0]], dtype=np.float32)
            else:
                all_boxes_3d = all_boxes_3d[useful_idx]

                categories = categories[useful_idx]
                all_boxes_classes = np.array(
                    [self.useful_cls_dict[cate] for cate in categories])
                # now calculate the mean size of each box
                for tmp_idx, all_boxes_class in enumerate(all_boxes_classes):
                    cur_mean_size = self.cls_size_dict[
                        all_boxes_class] * self.cls_num_dict[all_boxes_class]
                    cur_cls_num = self.cls_num_dict[all_boxes_class] + 1
                    cur_total_size = cur_mean_size + all_boxes_3d[tmp_idx, [
                        4, 5, 3
                    ]]  # [l, w, h]
                    cur_mean_size = cur_total_size / cur_cls_num
                    self.cls_size_dict[all_boxes_class] = cur_mean_size
                    self.cls_num_dict[all_boxes_class] = cur_cls_num

                all_attribute = [
                    all_attribute[tmp_idx] for tmp_idx in useful_idx
                ]
                tmp_attribute = []
                for attr in all_attribute:
                    if attr == []: tmp_attribute.append(-1)
                    else:
                        tmp_attribute.append(self.attribute_idx_list[attr])
                all_attribute = tmp_attribute
                all_attribute = np.array(all_attribute, dtype=np.int32)
                all_velocity = [
                    all_velocity[tmp_idx] for tmp_idx in useful_idx
                ]
                all_velocity = np.array(all_velocity, dtype=np.float32)
        else:
            cur_data_path = self.nusc.get_sample_data_path(sample_data_token)

        # then generate the bev_maps
        if self.img_list in ['train', 'val', 'trainval'] and cfg.TEST.WITH_GT:
            sample_dict = {
                maps_dict.KEY_LABEL_BOXES_3D:
                all_boxes_3d,
                maps_dict.KEY_LABEL_CLASSES:
                all_boxes_classes,
                maps_dict.KEY_LABEL_ATTRIBUTES:
                all_attribute,
                maps_dict.KEY_LABEL_VELOCITY:
                all_velocity,
                maps_dict.KEY_LABEL_NUM:
                len(all_boxes_3d),
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
            biggest_label_num = max(len(all_boxes_3d), biggest_label_num)
        else:
            # img_list is test
            sample_dict = {
                maps_dict.KEY_POINT_CLOUD:
                cur_data_path,
                maps_dict.KEY_SAMPLE_NAME:
                '{}/{}/{}'.format(cur_scene_key, cur_sample_token,
                                  sample_data_token),
                maps_dict.KEY_TRANSFORMRATION_MATRIX:
                cur_transformation_matrix,
                maps_dict.KEY_SWEEPS:
                sweeps,
                maps_dict.KEY_TIMESTAMPS:
                cur_timestamp,
            }
        return sample_dict, biggest_label_num

    def preprocess_batch(self):
        # if create_gt_dataset, then also create a boxes_numpy, saving all points
        if cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:  # also save mixup database
            mixup_label_dict = dict([(cls, []) for cls in self.mixup_db_class])

        sample_dicts_list = []
        for scene_key, v in tqdm.tqdm(self.sample_data_token_list.items()):
            for sample_data_token in v:
                sample_dict, tmp_biggest_label_num = self.preprocess_samples(
                    scene_key, sample_data_token)
                if sample_dict is None:
                    continue
                # else save the result
                sample_dicts_list.append(sample_dict)

                # create_gt_dataset
                if self.img_list in [
                        'train', 'val', 'trainval'
                ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
                    mixup_sample_dicts = self.generate_mixup_sample(
                        sample_dict)
                    if mixup_sample_dicts is None: continue
                    for mixup_sample_dict in mixup_sample_dicts:
                        cur_cls = mixup_sample_dict[
                            maps_dict.KEY_SAMPLED_GT_CLSES]
                        mixup_label_dict[cur_cls].append(mixup_sample_dict)

        # save preprocessed data
        with open(self.train_list, 'wb') as f:
            pickle.dump(sample_dicts_list, f)
        for k, v in self.cls_num_dict.items():
            print('class name: %s / class num: %d / mean size: (%f, %f, %f)' %
                  (k, v, self.cls_size_dict[k][0], self.cls_size_dict[k][1],
                   self.cls_size_dict[k][2]))  # [l, w, h]

        if self.img_list in [
                'train', 'val', 'trainval'
        ] and cfg.TEST.WITH_GT and cfg.TRAIN.AUGMENTATIONS.MIXUP.OPEN:
            print('**** Generating groundtruth database ****')
            for cur_cls_name, mixup_sample_dict in mixup_label_dict.items():
                cur_mixup_db_cls_path = self.mixup_db_cls_path[cur_cls_name]
                cur_mixup_db_trainlist_path = self.mixup_db_trainlist_path[
                    cur_cls_name]
                print('**** Class %s ****' % cur_cls_name)
                with open(cur_mixup_db_trainlist_path, 'w') as f:
                    for tmp_idx, tmp_cur_mixup_sample_dict in tqdm.tqdm(
                            enumerate(mixup_sample_dict)):
                        f.write('%06d.npy\n' % tmp_idx)
                        np.save(
                            os.path.join(cur_mixup_db_cls_path,
                                         '%06d.npy' % tmp_idx),
                            tmp_cur_mixup_sample_dict)
        print('Ending of the preprocess !!!')

    def generate_mixup_sample(self, sample_dict):
        """ This function is bound for generating mixup dataset """
        all_boxes_3d = sample_dict[maps_dict.KEY_LABEL_BOXES_3D]
        all_boxes_classes = sample_dict[maps_dict.KEY_LABEL_CLASSES]
        point_cloud_path = sample_dict[maps_dict.KEY_POINT_CLOUD]

        # then we first cast all_boxes_3d to kitti format
        all_boxes_3d = cast_box_3d_to_kitti_format(all_boxes_3d)

        # load points
        points = np.fromfile(point_cloud_path, dtype=np.float32).reshape(
            (-1, 5))
        points = cast_points_to_kitti(points)
        points[:, 3] /= 255
        points[:, 4] = 0  # timestamp is zero

        points_mask = check_inside_points(points,
                                          all_boxes_3d)  # [pts_num, gt_num]
        points_masks_num = np.sum(points_masks, axis=0)  # [gt_num]
        valid_box_idx = np.where(
            points_masks_num >= cfg.DATASET.MIN_POINTS_NUM)[0]

        if len(valid_box_idx) == 0:
            return None

        valid_label_boxes_3d = all_boxes_3d[valid_box_idx]
        valid_label_classes = all_boxes_classes[valid_box_idx]

        sample_dicts = []
        for index, i in enumerate(valid_box_idx):
            cur_points_mask = points_mask[:, i]
            cur_points_idx = np.where(cur_points_mask)[0]
            cur_inside_points = points[cur_points_idx, :]
            sample_dict = {
                # 0 timestamp and /255 reflectance
                maps_dict.KEY_SAMPLED_GT_POINTS:
                cur_inside_points,  # kitti format points
                maps_dict.KEY_SAMPLED_GT_LABELS_3D:
                valid_label_boxes_3d[index],
                maps_dict.KEY_SAMPLED_GT_CLSES:
                valid_label_classes[index],
            }
            sample_dicts.append(sample_dict)
        return sample_dicts

    # Evaluation
    def set_evaluation_tensor(self, model):
        # get prediction results, bs = 1
        pred_bbox_3d = tf.squeeze(model.output[maps_dict.PRED_3D_BBOX][-1],
                                  axis=0)
        pred_cls_score = tf.squeeze(model.output[maps_dict.PRED_3D_SCORE][-1],
                                    axis=0)
        pred_cls_category = tf.squeeze(
            model.output[maps_dict.PRED_3D_CLS_CATEGORY][-1], axis=0)
        pred_list = [pred_bbox_3d, pred_cls_score, pred_cls_category]

        if len(model.output[maps_dict.PRED_3D_ATTRIBUTE]) > 0:
            pred_attribute = tf.squeeze(
                model.output[maps_dict.PRED_3D_ATTRIBUTE][-1], axis=0)
            pred_velocity = tf.squeeze(
                model.output[maps_dict.PRED_3D_VELOCITY][-1], axis=0)
            pred_list.extend([pred_attribute, pred_velocity])
        return pred_list

    def evaluate_map(self,
                     sess,
                     feeddict_producer,
                     pred_list,
                     val_size,
                     cls_thresh,
                     log_dir,
                     placeholders=None):
        submissions = {}
        submissions['meta'] = dict()
        submissions['meta']['use_camera'] = False
        submissions['meta']['use_lidar'] = True
        submissions['meta']['use_radar'] = False
        submissions['meta']['use_map'] = False
        submissions['meta']['use_external'] = False

        submissions_results = dict()
        pred_attr_velo = (len(pred_list) == 5)

        for i in tqdm.tqdm(range(val_size)):
            feed_dict = feeddict_producer.create_feed_dict()

            if pred_attr_velo:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op, pred_attr_op, pred_velo_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            else:
                pred_bbox_3d_op, pred_cls_score_op, pred_cls_category_op = sess.run(
                    pred_list, feed_dict=feed_dict)
            pred_cls_category_op += 1  # label from 1 to n

            sample_name, cur_transformation_matrix, sweeps = feeddict_producer.info
            sample_name = sample_name[0]
            cur_transformation_matrix = cur_transformation_matrix[0]
            sweeps = sweeps[0]
            cur_scene_key, cur_sample_token, cur_sample_data_token = sample_name.split(
                '/')

            select_idx = np.where(pred_cls_score_op >= cls_thresh)[0]
            pred_cls_score_op = pred_cls_score_op[select_idx]
            pred_cls_category_op = pred_cls_category_op[select_idx]
            pred_bbox_3d_op = pred_bbox_3d_op[select_idx]
            if pred_attr_velo:
                pred_attr_op = pred_attr_op[select_idx]
                pred_velo_op = pred_velo_op[select_idx]
            else:
                pred_attr_op, pred_velo_op = None, None

            if len(pred_bbox_3d_op) > 500:
                arg_sort_idx = np.argsort(pred_cls_score_op)[::-1]
                arg_sort_idx = arg_sort_idx[:500]
                pred_cls_score_op = pred_cls_score_op[arg_sort_idx]
                pred_cls_category_op = pred_cls_category_op[arg_sort_idx]
                pred_bbox_3d_op = pred_bbox_3d_op[arg_sort_idx]
                if pred_attr_velo:
                    pred_attr_op = pred_attr_op[arg_sort_idx]
                    pred_velo_op = pred_velo_op[arg_sort_idx]

            # then transform pred_bbox_op to nuscenes_box
            boxes = cast_kitti_format_to_nusc_box_3d(
                pred_bbox_3d_op,
                pred_cls_score_op,
                pred_cls_category_op,
                cur_attribute=pred_attr_op,
                cur_velocity=pred_velo_op,
                classes=self.idx2cls_dict)
            for box in boxes:
                velocity = box.velocity[:2].tolist()
                if len(sweeps) == 0:
                    velocity = (np.nan, np.nan)
                box.velocity = np.array([*velocity, 0.0])
            # then cast the box from ego to global
            boxes = _lidar_nusc_box_to_global(cur_transformation_matrix,
                                              boxes,
                                              self.idx2cls_dict,
                                              eval_version='cvpr_2019')

            annos = []
            for box in boxes:
                name = self.idx2cls_dict[box.label]
                if box.name == -1:
                    attr = self.DefaultAttribute[name]
                else:
                    attr = self.AttributeIdxLabelMapping[name][box.name]
                velocity = box.velocity[:2].tolist()
                nusc_anno = {
                    "sample_token": cur_sample_token,
                    "translation": box.center.tolist(),
                    "size": box.wlh.tolist(),
                    "rotation": box.orientation.elements.tolist(),
                    "velocity": velocity,
                    "detection_name": name,
                    "detection_score": box.score,
                    "attribute_name": attr,
                }
                annos.append(nusc_anno)
            submissions_results[info['sample_token']] = annos

        submissions['results'] = submissions_results

        res_path = os.path.join(log_dir, "results_nusc_1.json")
        with open(res_path, "w") as f:
            json.dump(submissions, f)
        eval_main_file = os.path.join(cfg.ROOT_DIR, 'lib/core/nusc_eval.py')
        root_path = self.dataset_dir
        cmd = f"python3 {str(eval_main_file)} --root_path=\"{str(root_path)}\""
        cmd += f" --version={'v1.0-trainval'} --eval_version={'cvpr_2019'}"
        cmd += f" --res_path=\"{str(res_path)}\" --eval_set={'val'}"
        cmd += f" --output_dir=\"{LOG_FOUT_DIR}\""
        # use subprocess can release all nusc memory after evaluation
        subprocess.check_output(cmd, shell=True)
        os.system('rm \"%s\"' % res_path)  # remove former result file

        with open(os.path.join(log_dir, "metrics_summary.json"), "r") as f:
            metrics = json.load(f)
        return metrics

    def evaluate_recall(self,
                        sess,
                        feeddict_producer,
                        pred_list,
                        val_size,
                        cls_thresh,
                        log_dir,
                        placeholders=None):
        pass

    def logger_and_select_best_map(self, metrics, log_string):
        detail = {}
        result = f"Nusc v1.0-trainval Evaluation\n"
        final_score = []
        for name in self.cls_list:
            detail[name] = {}
            for k, v in metrics["label_aps"][name].items():
                detail[name][f"dist@{k}"] = v
            tp_errs = []
            tp_names = []
            for k, v in metrics["label_tp_errors"][name].items():
                detail[name][k] = v
                tp_errs.append(f"{v:.4f}")
                tp_names.append(k)
            threshs = ', '.join(list(metrics["label_aps"][name].keys()))
            scores = list(metrics["label_aps"][name].values())
            final_score.append(np.mean(scores))
            scores = ', '.join([f"{s * 100:.2f}" for s in scores])
            result += f"{name} Nusc dist AP@{threshs} and TP errors\n"
            result += scores
            result += "\n"
            result += "mAP: %0.2f\n" % (
                np.mean(list(metrics["label_aps"][name].values())) * 100)
            result += ', '.join(tp_names) + ": " + ', '.join(tp_errs)
            result += "\n"
        result += 'NDS score: %0.2f\n' % (metrics['nd_score'] * 100)
        log_string(result)

        cur_result = metrics['nd_score']
        return cur_result

    def logger_and_select_best_recall(self, metrics, log_string):
        pass

    # save prediction results
    def save_predictions(self,
                         sess,
                         feeddict_producer,
                         pred_list,
                         val_size,
                         cls_thresh,
                         log_dir,
                         placeholders=None):
        pass
class LogConverter:
    __radar_reader = scan_readers.RadarScanReader()
    __lidar_reader = scan_readers.LidarScanReader()
    __vision_reader = scan_readers.VisionScanReader()
    __host_data_reader = scan_readers.HostDataReader()

    def __init__(self, data_set_name, data_set_path, output_path=''):
        self.__scan_data = {}
        self.__writers = {}

        if os.path.exists(data_set_path):
            self.__nusc = NuScenes(version=data_set_name,
                                   dataroot=data_set_path,
                                   verbose=True)
        else:
            print('Given path: {}, does not exist'.format(data_set_path))

        self.__output_path = output_path
        if not (self.__output_path == ''):
            if not os.path.exists(self.__output_path):
                os.makedirs(self.__output_path)

    def convertAllScenes(self):
        for scene in self.__nusc.scene:
            self.__scene_name = scene['name']
            self.convertScene(scene)

    def convertScene(self, scene):
        first_sample_token = scene['first_sample_token']
        sample = self.__nusc.get('sample', first_sample_token)
        while sample['next']:
            sample = self.__nusc.get('sample', sample['next'])
            for sensor in sample['data']:
                token = sample['data'][sensor]
                self.__sensor_data = self.__nusc.get('sample_data', token)
                self.readHostData()
                self.readSensorSampleData(sensor, token)
            self.__writeScan()
        self.__writeScene()

    def readHostData(self):
        ego_pose_raw = self.__nusc.get('ego_pose',
                                       self.__sensor_data['ego_pose_token'])
        self.__scan_data['HOST_DATA'] = self.__host_data_reader.readHostData(
            ego_pose_raw)

    def readSensorSampleData(self, sensor, token):
        data_file_path = self.__nusc.dataroot + '/' + self.__sensor_data[
            'filename']
        calibrated_sensor_raw = self.__nusc.get(
            'calibrated_sensor', self.__sensor_data['calibrated_sensor_token'])
        ts = self.__sensor_data['timestamp']

        if 'RADAR' in sensor:
            self.__scan_data[sensor] = self.__radar_reader.readScan(
                data_file_path, ts, calibrated_sensor_raw)
        elif 'LIDAR' in sensor:
            self.__scan_data[sensor] = self.__lidar_reader.readScan(
                data_file_path, ts, calibrated_sensor_raw)
        elif 'CAM' in sensor:
            _, boxes, _ = self.__nusc.get_sample_data(token)
            self.__scan_data[sensor] = self.__vision_reader.readScan(
                data_file_path, ts, calibrated_sensor_raw, boxes)
        else:
            print('Sensor unknown')
            self.__scan_data = None

    def __writeScan(self):
        for sensor_name, sensor_data in self.__scan_data.items():
            if not (sensor_name in self.__writers):
                self.__writers[sensor_name] = self.__createWriter(sensor_name)

            self.__writers[sensor_name].writeScan(sensor_data)

    def __createWriter(self, sensor_key):
        writer = None
        ext = None

        if 'RADAR' in sensor_key:
            writer = writers.ProtobufSceneWriter('radar')
            ext = '.rad'
        elif 'LIDAR' in sensor_key:
            writer = writers.ProtobufSceneWriter('lidar')
            ext = '.lid'
        elif 'CAM' in sensor_key:
            writer = writers.ProtobufSceneWriter('camera')
            ext = '.cam'
        elif 'HOST' in sensor_key:
            writer = writers.ProtobufSceneWriter('host')
            ext = '.host'
        else:
            writer = None

        if writer:
            file_name = self.__scene_name + '_' + sensor_key + ext
            writer.createFile(self.__createFilePath(file_name))
        return writer

    def __createFilePath(self, file_name):
        if self.__output_path == '':
            return file_name
        else:
            return self.__output_path + '/' + file_name

    def __writeScene(self):
        for writer in self.__writers:
            self.__writers[writer].closeFile()
        self.__writers = {}
def create_dataset(dataroot,
                   save_dir,
                   width=672,
                   height=672,
                   grid_range=70.,
                   nusc_version='v1.0-mini',
                   use_constant_feature=False,
                   use_intensity_feature=True,
                   end_id=None,
                   augmentation_num=0,
                   add_noise=False):
    """Create a learning dataset from Nuscens

    Parameters
    ----------
    dataroot : str
        Nuscenes dataroot path.
    save_dir : str
        Dataset save directory.
    width : int, optional
        feature map width, by default 672
    height : int, optional
        feature map height, by default 672
    grid_range : float, optional
        feature map range, by default 70.
    nusc_version : str, optional
        Nuscenes version. v1.0-mini or v1.0-trainval, by default 'v1.0-mini'
    use_constant_feature : bool, optional
        Whether to use constant feature, by default False
    use_intensity_feature : bool, optional
        Whether to use intensity feature, by default True
    end_id : int, optional
        How many data to generate. If None, all data, by default None
    augmentation_num : int, optional
        How many data augmentations for one sample, by default 0
    add_noise : bool, optional
        Whether to add noise to pointcloud, by default True

    Raises
    ------
    Exception
        Width and height are not equal

    """
    os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True)

    nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True)
    ref_chan = 'LIDAR_TOP'

    if width == height:
        size = width
    else:
        raise Exception(
            'Currently only supported if width and height are equal')

    grid_length = 2. * grid_range / size
    z_trans_range = 0.5
    sample_id = 0
    data_id = 0
    grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length)
    grid_centers \
        = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1]

    for my_scene in nusc.scene:
        first_sample_token = my_scene['first_sample_token']
        token = first_sample_token

        # try:
        while (token != ''):
            print('sample:{} {} created_data={}'.format(
                sample_id, token, data_id))
            my_sample = nusc.get('sample', token)
            sd_record = nusc.get('sample_data', my_sample['data'][ref_chan])
            sample_rec = nusc.get('sample', sd_record['sample_token'])
            chan = sd_record['channel']
            pc_raw, _ = LidarPointCloud.from_file_multisweep(nusc,
                                                             sample_rec,
                                                             chan,
                                                             ref_chan,
                                                             nsweeps=1)

            _, boxes_raw, _ = nusc.get_sample_data(sd_record['token'],
                                                   box_vis_level=0)

            z_trans = 0
            q = Quaternion()
            for augmentation_idx in range(augmentation_num + 1):
                pc = copy.copy(pc_raw)
                if add_noise:
                    pc.points = add_noise_points(pc.points.T).T
                boxes = copy.copy(boxes_raw)
                if augmentation_idx > 0:
                    z_trans = (np.random.rand() - 0.5) * 2 * z_trans_range
                    pc.translate([0, 0, z_trans])

                    z_rad = np.random.rand() * np.pi * 2
                    q = Quaternion(axis=[0, 0, 1], radians=z_rad)
                    pc.rotate(q.rotation_matrix)

                pc_points = pc.points.astype(np.float32)

                out_feature = np.zeros((size, size, 8), dtype=np.float32)
                for box_idx, box in enumerate(boxes):
                    if augmentation_idx > 0:
                        box.translate([0, 0, z_trans])
                        box.rotate(q)

                    label = 0
                    if box.name.split('.')[0] == 'vehicle':
                        if box.name.split('.')[1] == 'car':
                            label = 1
                        elif box.name.split('.')[1] == 'bus':
                            label = 2
                        elif box.name.split('.')[1] == 'truck':
                            label = 2
                        elif box.name.split('.')[1] == 'construction':
                            label = 2
                        elif box.name.split('.')[1] == 'emergency':
                            label = 2
                        elif box.name.split('.')[1] == 'trailer':
                            label = 2
                        elif box.name.split('.')[1] == 'bicycle':
                            label = 3
                        elif box.name.split('.')[1] == 'motorcycle':
                            label = 3
                    elif box.name.split('.')[0] == 'human':
                        label = 4
                    # elif box.name.split('.')[0] == 'movable_object':
                    #     label = 1
                    # elif box.name.split('.')[0] == 'static_object':
                    #     label = 1
                    else:
                        continue
                    height_pt = np.linalg.norm(box.corners().T[0] -
                                               box.corners().T[3])
                    box_corners = box.corners().astype(np.float32)
                    corners2d = box_corners[:2, :]
                    box2d = corners2d.T[[2, 3, 7, 6]]
                    box2d_center = box2d.mean(axis=0)
                    yaw, pitch, roll = box.orientation.yaw_pitch_roll
                    out_feature = generate_out_feature(size, grid_centers,
                                                       box_corners, box2d,
                                                       box2d_center, pc_points,
                                                       height_pt, label, yaw,
                                                       out_feature)

                # feature_generator = fg.FeatureGenerator(
                #     grid_range, width, height,
                #     use_constant_feature, use_intensity_feature)
                feature_generator = fgpb.FeatureGenerator(
                    grid_range, size, size)
                in_feature = feature_generator.generate(
                    pc_points.T, use_constant_feature, use_intensity_feature)

                if use_constant_feature and use_intensity_feature:
                    channels = 8
                elif use_constant_feature or use_intensity_feature:
                    channels = 6
                else:
                    channels = 4

                in_feature = np.array(in_feature).reshape(
                    channels, size, size).astype(np.float16)
                in_feature = in_feature.transpose(1, 2, 0)
                np.save(
                    os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)),
                    in_feature)
                np.save(
                    os.path.join(save_dir,
                                 'out_feature/{:05}'.format(data_id)),
                    out_feature)
                token = my_sample['next']
                data_id += 1
                if data_id == end_id:
                    return
            sample_id += 1
def test(estimator=None):

    if args.Joint:
        nusc = NuScenes(
            version='v1.0-trainval', verbose=True,
            dataroot=args.nuscenes_root,
        )

    dataset_image_folder_format = os.path.join(
        args.nuscenes_data_root, args.type+'/'+'{}/img1',
    )
    detection_file_name_format = os.path.join(
        args.nuscenes_data_root, args.type+'/'+'{}/gt/gt.txt',
    )

    if not os.path.exists(args.log_folder):
        os.mkdir(args.log_folder)

    save_folder = args.log_folder
    if not os.path.exists(save_folder):
        os.mkdir(save_folder)

    saved_file_name_format = os.path.join(save_folder, 'Video'+'{}.txt')
    save_video_name_format = os.path.join(save_folder, 'Video'+'{}.avi')
    test_dataset_index = os.listdir(
        os.path.join(args.nuscenes_data_root, args.type),
    )

    def f(format_str): return [
        format_str.format(index)
        for index in test_dataset_index
    ]
    for image_folder, detection_file_name, saved_file_name, save_video_name in zip(f(dataset_image_folder_format), f(detection_file_name_format), f(saved_file_name_format), f(save_video_name_format)):
        if path.exists(image_folder) and os.path.getsize(detection_file_name) > 0:

            tracker = Tracker(estimator)
            reader = DataReader(
                image_folder=image_folder,
                detection_file_name=detection_file_name,
            )
            result = list()
            first_run = True
            for i, item in enumerate(reader):

                if i > len(reader):
                    break

                if item is None:
                    print('item is none')
                    continue

                img = item[0]
                det = item[1]
                if img is None or det is None or len(det) == 0:
                    continue
                if len(det) > config['max_object']:
                    det = det[:config['max_object'], :]

                h, w, _ = img.shape

                if first_run:
                    vw = cv2.VideoWriter(
                        save_video_name, cv2.VideoWriter_fourcc(
                            'M', 'J', 'P', 'G',
                        ), 10, (w, h),
                    )
                    first_run = False

                features = det[:, 6:12].astype(float)
                tokens = det[:, 12:]

                if args.Joint:
                    tokens = tokens[0][1:]
                    frame_token = tokens[0]
                    first_frame_token = tokens[1]

                    current_cam_record = nusc.get('sample_data', frame_token)
                    current_cam_path = nusc.get_sample_data_path(frame_token)
                    current_cam_path, boxes, current_camera_intrinsic = nusc.get_sample_data(
                        current_cam_record['token'],
                    )
                    current_cs_record = nusc.get(
                        'calibrated_sensor', current_cam_record['calibrated_sensor_token'],
                    )
                    current_poserecord = nusc.get(
                        'ego_pose', current_cam_record['ego_pose_token'],
                    )

                    first_cam_record = nusc.get(
                        'sample_data', first_frame_token,
                    )
                    first_cam_path = nusc.get_sample_data_path(
                        first_frame_token,
                    )
                    first_cam_path, boxes, first_camera_intrinsic = nusc.get_sample_data(
                        first_cam_record['token'],
                    )
                    first_cs_record = nusc.get(
                        'calibrated_sensor', first_cam_record['calibrated_sensor_token'],
                    )
                    first_poserecord = nusc.get(
                        'ego_pose', first_cam_record['ego_pose_token'],
                    )

                det[:, [2, 4]] /= float(w)
                det[:, [3, 5]] /= float(h)
                if args.Joint:
                    image_org = tracker.update(
                        args.Joint, img, det[
                            :,
                            2:6
                        ], args.show_image, i, features, tokens, False,
                        current_cs_record, current_poserecord, first_cs_record, first_poserecord, first_camera_intrinsic,
                    )
                else:
                    image_org = tracker.update(
                        args.Joint, img, det[:, 2:6], args.show_image, i,
                    )

                vw.write(image_org)

                # save result
                for t in tracker.tracks:
                    n = t.nodes[-1]
                    if t.age == 1:
                        b = n.get_box(tracker.frame_index-1, tracker.recorder)
#                         print('n.pose ',n.pose.cpu().data.numpy())
                        if args.Joint:
                            result.append(
                                [i] + [t.id] + [
                                    b[0]*w, b[1]*h, b[2]*w, b[3]
                                    * h,
                                ] + list(n.pose.cpu().data.numpy()),
                            )
                        else:
                            result.append(
                                [i] + [t.id] + [b[0]*w, b[1]*h, b[2]*w, b[3]*h],
                            )
            # save data
            np.savetxt(saved_file_name, np.array(result), fmt='%10f')
Esempio n. 13
0
    def create_annotations(self, version, max_sweeps):
        from nuscenes.nuscenes import NuScenes
        nusc = NuScenes(version=version, dataroot=self.cfg.DATA.ROOTDIR, verbose=True)
        from nuscenes.utils import splits
        available_vers = ["v1.0-trainval", "v1.0-test", "v1.0-mini"]
        assert version in available_vers
        if version == "v1.0-trainval":
            train_scenes = splits.train
            val_scenes = splits.val
        elif version == "v1.0-test":
            train_scenes = splits.test
            val_scenes = []
        elif version == "v1.0-mini":
            train_scenes = splits.mini_train
            val_scenes = splits.mini_val
        else:
            raise ValueError("unknown")
        root_path = Path(self.cfg.DATA.ROOTDIR)
        available_scenes = self._get_available_scenes(nusc)
        available_scene_names = [s["name"] for s in available_scenes]
        train_scenes = list(filter(lambda x: x in available_scene_names, train_scenes))
        val_scenes = list(filter(lambda x: x in available_scene_names, val_scenes))
        train_scenes = set([
            available_scenes[available_scene_names.index(s)]["token"]
            for s in train_scenes
        ])
        val_scenes = set([
            available_scenes[available_scene_names.index(s)]["token"]
            for s in val_scenes
        ])
        print(
            f"train scene: {len(train_scenes)}, val scene: {len(val_scenes)}")
        self.train_infos = dict()
        self.val_infos = dict()
        train_index = 0
        val_index = 0
        
        for sample in tqdm(nusc.sample, desc="Generating train infos..."):
            lidar_token = sample["data"]["LIDAR_TOP"]
            sd_rec = nusc.get('sample_data', lidar_token)
            cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
            pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
            lidar_path, boxes, _ = nusc.get_sample_data(lidar_token)
            assert Path(lidar_path).exists(), ("some lidar file miss...+_+")
            item = {
                "lidar_path": lidar_path,
                "token": sample["token"],
                "sweeps": [],
                "calib": cs_record,
                "ego_pose": pose_record,
                "timestamp": sample["timestamp"]
            }
            l2e_t = cs_record['translation']
            l2e_r = cs_record['rotation']
            e2g_t = pose_record['translation']
            e2g_r = pose_record['rotation']
            
            l2e_r_mat = Quaternion(l2e_r).rotation_matrix
            e2g_r_mat = Quaternion(e2g_r).rotation_matrix

            sweeps = []
            while len(sweeps) < max_sweeps:
                if not sd_rec['prev'] == "":
                    sd_rec = nusc.get('sample_data', sd_rec['prev'])
                    cs_record = nusc.get('calibrated_sensor', sd_rec['calibrated_sensor_token'])
                    pose_record = nusc.get('ego_pose', sd_rec['ego_pose_token'])
                    lidar_path = nusc.get_sample_data_path(sd_rec['token'])
                    sweep = {
                        "lidar_path": lidar_path,
                        "sample_data_token": sd_rec['token'],
                        "timestamp": sd_rec["timestamp"]
                    }
                    l2e_r_s = cs_record["rotation"]
                    l2e_t_s = cs_record["translation"]
                    e2g_r_s = pose_record["rotation"]
                    e2g_t_s = pose_record["translation"]
                    ## sweep->ego->global->ego'->lidar
                    l2e_r_s_mat = Quaternion(l2e_r_s).rotation_matrix
                    e2g_r_s_mat = Quaternion(e2g_r_s).rotation_matrix

                    R = (l2e_r_s_mat.T @ e2g_r_s_mat.T) @ (
                        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
                    )
                    T = (l2e_t_s @ e2g_r_s_mat.T + e2g_t_s) @ (
                        np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(l2e_r_mat).T
                    )
                    T -= e2g_t @ (np.linalg.inv(e2g_r_mat).T @ np.linalg.inv(
                        l2e_r_mat).T) + l2e_t @ np.linalg.inv(l2e_r_mat).T 
                    sweep["sweep2lidar_rotation"] = R.T 
                    sweep["sweep2lidar_translation"] = T
                    sweeps.append(sweep)
                else:
                    break
            item["sweeps"] = sweeps

            annotations = [
                nusc.get('sample_annotation', token)
                for token in sample['anns']
            ]
            locs = np.array([b.center for b in boxes]).reshape(-1, 3)
            dims = np.array([b.wlh for b in boxes]).reshape(-1, 3)
            rots = np.array([b.orientation.yaw_pitch_roll[0] 
                            for b in boxes]).reshape(-1, 1)
            names = [b.name for b in boxes]
            class_idx = []
            for i in range(len(names)):
                if names[i] in NuscenesDataset.NameMapping:
                    names[i] = NuscenesDataset.NameMapping[names[i]]
                    class_idx.append(nuscenes_class_name_to_idx(names[i]))
            names = np.array(names)
            class_idx = np.array(class_idx)

            gt_boxes = np.concatenate([locs, dims, rots], axis=1)

            ## In Nuscenes Dataset, need to filter some rare class
            mask = np.array([NuscenesDataset.Nuscenes_classes.count(s) > 0 for s in names], dtype=np.bool_)
            gt_boxes = gt_boxes[mask]
            names = names[mask]
            
            # assert len(gt_boxes) == len(
            #     annotations), f"{len(gt_boxes)}, {len(annotations)}."
            assert len(gt_boxes) == len(names), f"{len(gt_boxes)}, {len(names)}"
            
            assert len(gt_boxes) == len(class_idx), f"{len(gt_boxes)}, {len(class_idx)}"

            item["boxes"] = gt_boxes
            item["class_idx"] = class_idx
            item["names"] = names
            item["num_lidar_pts"] = np.array(
                [a["num_lidar_pts"] for a in annotations]
            )
            item["num_radar_pts"] = np.array(
                [a["num_radar_pts"] for a in annotations]
            )
            if sample["scene_token"] in train_scenes:
                self.train_infos[train_index] = item
                train_index += 1
            else:
                self.val_infos[val_index] = item
                val_index += 1
Esempio n. 14
0
def create_dataset(dataroot,
                   save_dir,
                   pretrained_model,
                   width=672,
                   height=672,
                   grid_range=70.,
                   nusc_version='v1.0-mini',
                   use_constant_feature=True,
                   use_intensity_feature=True,
                   end_id=None):

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    bcnn_model = BCNN(in_channels=6, n_class=5).to(device)
    # bcnn_model = torch.nn.DataParallel(bcnn_model)  # multi gpu
    if os.path.exists(pretrained_model):
        print('Use pretrained model')
        bcnn_model.load_state_dict(
            fix_model_state_dict(torch.load(pretrained_model)))
        bcnn_model.eval()
    else:
        return

    os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'inference_feature'), exist_ok=True)

    nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True)
    ref_chan = 'LIDAR_TOP'

    if width == height:
        size = width
    else:
        raise Exception(
            'Currently only supported if width and height are equal')

    grid_length = 2. * grid_range / size
    label_half_length = 0

    data_id = 0
    grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length)
    grid_centers \
        = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1]

    for my_scene in nusc.scene:
        first_sample_token = my_scene['first_sample_token']
        token = first_sample_token

        # try:
        while (token != ''):
            print('--- {} '.format(data_id) + token + ' ---')
            my_sample = nusc.get('sample', token)
            sd_record = nusc.get('sample_data', my_sample['data'][ref_chan])
            sample_rec = nusc.get('sample', sd_record['sample_token'])
            chan = sd_record['channel']
            pc, times = LidarPointCloud.from_file_multisweep(nusc,
                                                             sample_rec,
                                                             chan,
                                                             ref_chan,
                                                             nsweeps=1)
            _, boxes, _ = nusc.get_sample_data(sd_record['token'],
                                               box_vis_level=0)
            out_feature = np.zeros((size, size, 8), dtype=np.float32)
            for box_idx, box in enumerate(boxes):
                label = 0
                if box.name.split('.')[0] == 'vehicle':
                    if box.name.split('.')[1] == 'car':
                        label = 1
                    elif box.name.split('.')[1] == 'bus':
                        label = 1
                    elif box.name.split('.')[1] == 'truck':
                        label = 1
                    elif box.name.split('.')[1] == 'construction':
                        label = 1
                    elif box.name.split('.')[1] == 'emergency':
                        label = 1
                    elif box.name.split('.')[1] == 'trailer':
                        label = 1
                    elif box.name.split('.')[1] == 'bicycle':
                        label = 2
                    elif box.name.split('.')[1] == 'motorcycle':
                        label = 2
                elif box.name.split('.')[0] == 'human':
                    label = 3
                # elif box.name.split('.')[0] == 'movable_object':
                #     label = 1
                # elif box.name.split('.')[0] == 'static_object':
                #     label = 1
                else:
                    continue
                height_pt = np.linalg.norm(box.corners().T[0] -
                                           box.corners().T[3])
                corners2d = box.corners()[:2, :]
                box2d = corners2d.T[[2, 3, 7, 6]]
                box2d_center = box2d.mean(axis=0)
                yaw, pitch, roll = box.orientation.yaw_pitch_roll
                generate_out_feature(width, height, size, grid_centers, box2d,
                                     box2d_center, height_pt, label,
                                     label_half_length, yaw, out_feature)
                # import pdb; pdb.set_trace()
            out_feature = out_feature.astype(np.float16)
            in_feature_generator = Feature_generator(grid_range, width, height,
                                                     use_constant_feature,
                                                     use_intensity_feature)
            in_feature_generator.generate(pc.points.T)
            in_feature = in_feature_generator.feature
            if use_constant_feature and use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 8)
            elif use_constant_feature or use_intensity_feature:
                in_feature = in_feature.reshape(size, size, 6)
            else:
                in_feature = in_feature.reshape(size, size, 4)
            # instance_pt is flipped due to flip
            # out_feature = np.flip(np.flip(out_feature, axis=0), axis=1)
            # out_feature[:, :, 1:3] *= -1
            np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)),
                    in_feature)
            np.save(
                os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)),
                out_feature)

            # inference feature
            # in_feature = torch.from_numpy(in_feature)
            in_feature = in_feature.astype(np.float32)
            transform = transforms.Compose([transforms.ToTensor()])
            in_feature = transform(in_feature)
            in_feature = in_feature.to(device)
            in_feature = torch.unsqueeze(in_feature, 0)
            print(in_feature.size())
            inference_feature = bcnn_model(in_feature)
            np.save(
                os.path.join(save_dir,
                             'inference_feature/{:05}'.format(data_id)),
                inference_feature.cpu().detach().numpy())
            token = my_sample['next']
            data_id += 1
            if data_id == end_id:
                return
Esempio n. 15
0
def main():
  SCENE_SPLITS['mini-val'] = SCENE_SPLITS['val']
  if not os.path.exists(OUT_PATH):
    os.mkdir(OUT_PATH)
  for split in SPLITS:
    data_path = DATA_PATH + '{}/'.format(SPLITS[split])
    nusc = NuScenes(
      version=SPLITS[split], dataroot=data_path, verbose=True)
    out_path = OUT_PATH + '{}.json'.format(split)
    categories_info = [{'name': CATS[i], 'id': i + 1} for i in range(len(CATS))]
    ret = {'images': [], 'annotations': [], 'categories': categories_info, 
           'videos': [], 'attributes': ATTRIBUTE_TO_ID}
    num_images = 0
    num_anns = 0
    num_videos = 0

    # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
    for sample in nusc.sample:
      scene_name = nusc.get('scene', sample['scene_token'])['name']
      if not (split in ['mini', 'test']) and \
        not (scene_name in SCENE_SPLITS[split]):
        continue
      if sample['prev'] == '':
        print('scene_name', scene_name)
        num_videos += 1
        ret['videos'].append({'id': num_videos, 'file_name': scene_name})
        frame_ids = {k: 0 for k in sample['data']}
        track_ids = {}
      # We decompose a sample into 6 images in our case.
      for sensor_name in sample['data']:
        if sensor_name in USED_SENSOR:
          image_token = sample['data'][sensor_name]
          image_data = nusc.get('sample_data', image_token)
          num_images += 1

          # Complex coordinate transform. This will take time to understand.
          sd_record = nusc.get('sample_data', image_token)
          cs_record = nusc.get(
            'calibrated_sensor', sd_record['calibrated_sensor_token'])
          pose_record = nusc.get('ego_pose', sd_record['ego_pose_token'])
          global_from_car = transform_matrix(pose_record['translation'],
            Quaternion(pose_record['rotation']), inverse=False)
          car_from_sensor = transform_matrix(
            cs_record['translation'], Quaternion(cs_record['rotation']),
            inverse=False)
          trans_matrix = np.dot(global_from_car, car_from_sensor)
          _, boxes, camera_intrinsic = nusc.get_sample_data(
            image_token, box_vis_level=BoxVisibility.ANY)
          calib = np.eye(4, dtype=np.float32)
          calib[:3, :3] = camera_intrinsic
          calib = calib[:3]
          frame_ids[sensor_name] += 1

          # image information in COCO format
          image_info = {'id': num_images,
                        'file_name': image_data['filename'],
                        'calib': calib.tolist(), 
                        'video_id': num_videos,
                        'frame_id': frame_ids[sensor_name],
                        'sensor_id': SENSOR_ID[sensor_name],
                        'sample_token': sample['token'],
                        'trans_matrix': trans_matrix.tolist(),
                        'width': sd_record['width'],
                        'height': sd_record['height'],
                        'pose_record_trans': pose_record['translation'],
                        'pose_record_rot': pose_record['rotation'],
                        'cs_record_trans': cs_record['translation'],
                        'cs_record_rot': cs_record['rotation']}
          ret['images'].append(image_info)
          anns = []
          for box in boxes:
            det_name = category_to_detection_name(box.name)
            if det_name is None:
              continue
            num_anns += 1
            v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
            yaw = -np.arctan2(v[2], v[0])
            box.translate(np.array([0, box.wlh[2] / 2, 0]))
            category_id = CAT_IDS[det_name]

            amodel_center = project_to_image(
              np.array([box.center[0], box.center[1] - box.wlh[2] / 2, box.center[2]], 
                np.float32).reshape(1, 3), calib)[0].tolist()
            sample_ann = nusc.get(
              'sample_annotation', box.token)
            instance_token = sample_ann['instance_token']
            if not (instance_token in track_ids):
              track_ids[instance_token] = len(track_ids) + 1
            attribute_tokens = sample_ann['attribute_tokens']
            attributes = [nusc.get('attribute', att_token)['name'] \
              for att_token in attribute_tokens]
            att = '' if len(attributes) == 0 else attributes[0]
            if len(attributes) > 1:
              print(attributes)
              import pdb; pdb.set_trace()
            track_id = track_ids[instance_token]
            vel = nusc.box_velocity(box.token) # global frame
            vel = np.dot(np.linalg.inv(trans_matrix), 
              np.array([vel[0], vel[1], vel[2], 0], np.float32)).tolist()
            
            # instance information in COCO format
            ann = {
              'id': num_anns,
              'image_id': num_images,
              'category_id': category_id,
              'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
              'location': [box.center[0], box.center[1], box.center[2]],
              'depth': box.center[2],
              'occluded': 0,
              'truncated': 0,
              'rotation_y': yaw,
              'amodel_center': amodel_center,
              'iscrowd': 0,
              'track_id': track_id,
              'attributes': ATTRIBUTE_TO_ID[att],
              'velocity': vel
            }

            bbox = KittiDB.project_kitti_box_to_image(
              copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
            alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, 
                                 camera_intrinsic[0, 2], camera_intrinsic[0, 0])
            ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
            ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
            ann['alpha'] = alpha
            anns.append(ann)

          # Filter out bounding boxes outside the image
          visable_anns = []
          for i in range(len(anns)):
            vis = True
            for j in range(len(anns)):
              if anns[i]['depth'] - min(anns[i]['dim']) / 2 > \
                 anns[j]['depth'] + max(anns[j]['dim']) / 2 and \
                _bbox_inside(anns[i]['bbox'], anns[j]['bbox']):
                vis = False
                break
            if vis:
              visable_anns.append(anns[i])
            else:
              pass

          for ann in visable_anns:
            ret['annotations'].append(ann)
          if DEBUG:
            img_path = data_path + image_info['file_name']
            img = cv2.imread(img_path)
            img_3d = img.copy()
            for ann in visable_anns:
              bbox = ann['bbox']
              cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), 
                            (int(bbox[2] + bbox[0]), int(bbox[3] + bbox[1])), 
                            (0, 0, 255), 3, lineType=cv2.LINE_AA)
              box_3d = compute_box_3d(ann['dim'], ann['location'], ann['rotation_y'])
              box_2d = project_to_image(box_3d, calib)
              img_3d = draw_box_3d(img_3d, box_2d)

              pt_3d = unproject_2d_to_3d(ann['amodel_center'], ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('location', ann['location'])
              print('loc model', pt_3d)
              pt_2d = np.array([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32)
              pt_3d = unproject_2d_to_3d(pt_2d, ann['depth'], calib)
              pt_3d[1] += ann['dim'][0] / 2
              print('loc      ', pt_3d)
            cv2.imshow('img', img)
            cv2.imshow('img_3d', img_3d)
            cv2.waitKey()
            nusc.render_sample_data(image_token)
            plt.show()
    print('reordering images')
    images = ret['images']
    video_sensor_to_images = {}
    for image_info in images:
      tmp_seq_id = image_info['video_id'] * 20 + image_info['sensor_id']
      if tmp_seq_id in video_sensor_to_images:
        video_sensor_to_images[tmp_seq_id].append(image_info)
      else:
        video_sensor_to_images[tmp_seq_id] = [image_info]
    ret['images'] = []
    for tmp_seq_id in sorted(video_sensor_to_images):
      ret['images'] = ret['images'] + video_sensor_to_images[tmp_seq_id]

    print('{} {} images {} boxes'.format(
      split, len(ret['images']), len(ret['annotations'])))
    print('out_path', out_path)
    json.dump(ret, open(out_path, 'w'))
Esempio n. 16
0
class BaseNuScenesDataset(DatasetTemplate):
    NameMapping = {
        'movable_object.barrier': 'Barrier',
        'vehicle.bicycle': 'Bicycle',
        'vehicle.bus.bendy': 'Bus',
        'vehicle.bus.rigid': 'Bus',
        'vehicle.car': 'Car',
        'vehicle.construction': 'Construction_vehicle',
        'vehicle.motorcycle': 'Motorcycle',
        'human.pedestrian.adult': 'Pedestrian',
        'human.pedestrian.child': 'Pedestrian',
        'human.pedestrian.construction_worker': 'Pedestrian',
        'human.pedestrian.police_officer': 'Pedestrian',
        'movable_object.trafficcone': 'Traffic_cone',
        'vehicle.trailer': 'Trailer',
        'vehicle.truck': 'Truck'
    }
    DefaultAttribute = {
        "car": "vehicle.parked",
        "pedestrian": "pedestrian.moving",
        "trailer": "vehicle.parked",
        "truck": "vehicle.parked",
        "bus": "vehicle.parked",
        "motorcycle": "cycle.without_rider",
        "construction_vehicle": "vehicle.parked",
        "bicycle": "cycle.without_rider",
        "barrier": "",
        "traffic_cone": "",
    }

    def __init__(self, root_path, split='train', init_nusc=True):
        super().__init__()
        self.root_path = root_path
        self.split = split
        if (init_nusc):
            self.nusc = NuScenes(version='v1.0-trainval',
                                 dataroot=root_path,
                                 verbose=True)

        splits = create_splits_scenes()
        split_scenes = splits[split]
        all_scene_names = [scene['name'] for scene in self.nusc.scene]
        split_scene_tokens = [
            self.nusc.scene[all_scene_names.index(scene_name)]['token']
            for scene_name in split_scenes
        ]

        self.sample_id_list = self.get_sample_tokens_from_scenes(
            split_scene_tokens)

    def get_sample_tokens_from_scenes(self, scene_tokens):
        """
        :param scene_tokens: List of scene tokens
        :ret: List of sample tokens
        """
        sample_tokens = []
        for token in scene_tokens:
            sample_tokens.append(
                self.nusc.get('scene', token)['first_sample_token'])
            sample = self.nusc.get('sample', sample_tokens[-1])
            while (sample['next'] != ''):
                sample_tokens.append(sample['next'])
                sample = self.nusc.get('sample', sample_tokens[-1])

        return sample_tokens

    def set_split(self, split):
        self.__init__(self.root_path, split, False)

    def get_lidar(self, sample_token):
        lidar_token = self.nusc.get('sample',
                                    sample_token)['data']['LIDAR_TOP']
        lidar_file = os.path.join(
            self.root_path,
            self.nusc.get('sample_data', lidar_token)['filename'])
        assert os.path.exists(lidar_file)
        points = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
        points[:, 3] /= 255
        #points[:, 4] = 0
        return points

    def get_image_shape(self, sample_token):
        img_token = self.nusc.get('sample', sample_token)['data']['CAM_FRONT']
        img_file = os.path.join(
            self.root_path,
            self.nusc.get('sample_data', img_token)['filename'])
        assert os.path.exists(img_file)
        return np.array(io.imread(img_file).shape[:2], dtype=np.int32)

    def get_label(self, sample_token, sensor='LIDAR_TOP'):
        sensor_token = self.nusc.get('sample', sample_token)['data'][sensor]
        data_path, boxes, camera_intrinsic = self.nusc.get_sample_data(
            sensor_token)
        return boxes

    def get_calib(self, sample_token):
        cam_token = self.nusc.get('sample', sample_token)['data']['CAM_FRONT']
        lidar_token = self.nusc.get('sample',
                                    sample_token)['data']['LIDAR_TOP']
        cam_calibrated_token = self.nusc.get(
            'sample_data', cam_token)['calibrated_sensor_token']
        lidar_calibrated_token = self.nusc.get(
            'sample_data', lidar_token)['calibrated_sensor_token']
        ego_pose_token = self.nusc.get('sample_data',
                                       cam_token)['ego_pose_token']

        cam_calibrated = self.nusc.get('calibrated_sensor',
                                       cam_calibrated_token)
        lidar_calibrated = self.nusc.get('calibrated_sensor',
                                         lidar_calibrated_token)
        ego_pose = self.nusc.get('ego_pose', ego_pose_token)

        return nuscenes_calibration.Calibration(ego_pose, cam_calibrated,
                                                lidar_calibrated)

    def get_road_plane(self, sample_token):
        """
        plane_file = os.path.join(self.root_path, 'planes', '%s.txt' % idx)
        with open(plane_file, 'r') as f:
            lines = f.readlines()
        lines = [float(i) for i in lines[3].split()]
        plane = np.asarray(lines)

        # Ensure normal is always facing up, this is in the rectified camera coordinate
        if plane[1] > 0:
            plane = -plane

        norm = np.linalg.norm(plane[0:3])
        plane = plane / norm
        return plane
        """
        # Currently unsupported in NuScenes
        raise NotImplementedError

    def get_annotation_from_label(self, calib, sample_token):
        box_list = self.get_label(sample_token, sensor='LIDAR_TOP')
        if (len(box_list) == 0):
            annotations = {}
            annotations['name'] = annotations['num_points_in_gt'] = annotations['gt_boxes_lidar'] = \
                annotations['token'] = annotations['location'] = annotations['rotation_y'] = \
                annotations['dimensions'] = annotations['score'] = annotations['difficulty'] = \
                annotations['truncated'] = annotations['occluded'] = annotations['alpha'] = annotations['bbox'] = np.array([])
            return None

        annotations = {}
        gt_names = np.array([
            self.NameMapping[box.name]
            if box.name in self.NameMapping else 'DontCare' for box in box_list
        ])
        num_points_in_gt = np.array([
            self.nusc.get('sample_annotation', box.token)['num_lidar_pts']
            for box in box_list
        ])

        loc_lidar = np.array([box.center for box in box_list])
        dims = np.array([box.wlh for box in box_list])
        #loc_lidar[:,2] -= dims[:,2] / 2 # Translate true center to bottom middle coordinate
        rots = np.array(
            [box.orientation.yaw_pitch_roll[0] for box in box_list])
        gt_boxes_lidar = np.concatenate(
            [loc_lidar, dims, rots[..., np.newaxis]], axis=1)

        annotations['name'] = gt_names
        annotations['num_points_in_gt'] = num_points_in_gt
        annotations['gt_boxes_lidar'] = gt_boxes_lidar
        annotations['token'] = np.array([box.token for box in box_list])

        # in CAM_FRONT frame. Probably meaningless as most objects aren't in frame.
        annotations['location'] = calib.lidar_to_rect(loc_lidar)
        annotations['rotation_y'] = rots
        annotations['dimensions'] = np.array(
            [[box.wlh[1], box.wlh[2], box.wlh[0]]
             for box in box_list])  # lhw format

        occluded = np.zeros([num_points_in_gt.shape[0]])
        easy_mask = num_points_in_gt > 15
        moderate_mask = num_points_in_gt > 7
        occluded = np.zeros([num_points_in_gt.shape[0]])
        occluded[:] = 2
        occluded[moderate_mask] = 1
        occluded[easy_mask] = 0

        gt_boxes_camera = box_utils.boxes3d_lidar_to_camera(
            gt_boxes_lidar, calib)
        assert len(gt_boxes_camera) == len(gt_boxes_lidar) == len(box_list)
        # Currently unused for NuScenes, and don't make too much since as we primarily use 360 degree 3d LIDAR boxes.
        annotations['score'] = np.array([1 for _ in box_list])
        annotations['difficulty'] = np.array([0 for _ in box_list], np.int32)
        annotations['truncated'] = np.array([0 for _ in box_list])
        annotations['occluded'] = occluded
        annotations['alpha'] = np.array([
            -np.arctan2(-gt_boxes_lidar[i][1], gt_boxes_lidar[i][0]) +
            gt_boxes_camera[i][6] for i in range(len(gt_boxes_camera))
        ])
        annotations['bbox'] = gt_boxes_camera

        return annotations

    @staticmethod
    def get_fov_flag(pts_rect, img_shape, calib):
        '''
        Valid point should be in the image (and in the PC_AREA_SCOPE)
        :param pts_rect:
        :param img_shape:
        :return:
        '''
        pts_img, pts_rect_depth = calib.rect_to_img(pts_rect)
        val_flag_1 = np.logical_and(pts_img[:, 0] >= 0,
                                    pts_img[:, 0] < img_shape[1])
        val_flag_2 = np.logical_and(pts_img[:, 1] >= 0,
                                    pts_img[:, 1] < img_shape[0])
        val_flag_merge = np.logical_and(val_flag_1, val_flag_2)
        pts_valid_flag = np.logical_and(val_flag_merge, pts_rect_depth >= 0)

        return pts_valid_flag

    def get_infos(self,
                  num_workers=4,
                  has_label=True,
                  count_inside_pts=True,
                  sample_id_list=None):
        import concurrent.futures as futures

        def process_single_scene(sample_token):

            print('%s sample_token: %s ' % (self.split, sample_token))
            info = {}
            pc_info = {'num_features': 4, 'lidar_idx': sample_token}
            info['point_cloud'] = pc_info

            image_info = {
                'image_idx': sample_token,
                'image_shape': self.get_image_shape(sample_token)
            }
            info['image'] = image_info
            calib = self.get_calib(sample_token)

            calib_info = {
                'T_IMG_CAM': calib.t_img_cam,
                'T_CAR_CAM': calib.t_car_cam,
                'T_CAR_LIDAR': calib.t_car_lidar,
                'T_GLOBAL_CAR': calib.t_global_car
            }
            info['calib'] = calib_info

            if has_label:
                annotations = self.get_annotation_from_label(
                    calib, sample_token)
                if (annotations == None):
                    return None
                info['annos'] = annotations
            return info

        # temp = process_single_scene(self.sample_id_list[0])
        sample_id_list = sample_id_list if sample_id_list is not None else self.sample_id_list
        with futures.ThreadPoolExecutor(num_workers) as executor:
            infos = executor.map(process_single_scene, sample_id_list)
        # Remove samples with no gt boxes
        infos = [sample for sample in infos if sample]

        return list(infos)

    def create_groundtruth_database(self,
                                    info_path=None,
                                    used_classes=None,
                                    split='train'):
        database_save_path = Path(
            self.root_path) / ('gt_database' if split == 'train' else
                               ('gt_database_%s' % split))
        db_info_save_path = Path(
            self.root_path) / ('nuscenes_dbinfos_%s.pkl' % split)

        database_save_path.mkdir(parents=True, exist_ok=True)
        all_db_infos = {}

        with open(info_path, 'rb') as f:
            infos = pickle.load(f)

        for k in range(len(infos)):
            print('gt_database sample: %d/%d' % (k + 1, len(infos)))
            info = infos[k]
            sample_idx = info['point_cloud']['lidar_idx']
            points = self.get_lidar(sample_idx)
            annos = info['annos']
            names = annos['name']
            difficulty = annos['difficulty']
            bbox = annos['bbox']
            gt_boxes = annos['gt_boxes_lidar']

            if (len(gt_boxes) == 0):
                continue

            num_obj = gt_boxes.shape[0]
            point_indices = roiaware_pool3d_utils.points_in_boxes_cpu(
                torch.from_numpy(points[:, 0:3]),
                torch.from_numpy(gt_boxes)).numpy()  # (nboxes, npoints)

            for i in range(num_obj):
                filename = '%s_%s_%d.bin' % (sample_idx, names[i], i)
                filepath = database_save_path / filename
                gt_points = points[point_indices[i] > 0]

                gt_points[:, :3] -= gt_boxes[i, :3]
                with open(filepath, 'w') as f:
                    gt_points.tofile(f)

                if (used_classes is None) or names[i] in used_classes:
                    db_path = str(filepath.relative_to(
                        self.root_path))  # gt_database/xxxxx.bin
                    db_info = {
                        'name': names[i],
                        'path': db_path,
                        'image_idx': sample_idx,
                        'gt_idx': i,
                        'box3d_lidar': gt_boxes[i],
                        'num_points_in_gt': gt_points.shape[0],
                        'difficulty': difficulty[i],
                        'bbox': bbox[i],
                        'score': annos['score'][i]
                    }
                    if names[i] in all_db_infos:
                        all_db_infos[names[i]].append(db_info)
                    else:
                        all_db_infos[names[i]] = [db_info]
        for k, v in all_db_infos.items():
            print('Database %s: %d' % (k, len(v)))

        with open(db_info_save_path, 'wb') as f:
            pickle.dump(all_db_infos, f)

    @staticmethod
    def generate_prediction_dict(input_dict, index, record_dict):
        # finally generate predictions.
        sample_idx = input_dict['sample_idx'][
            index] if 'sample_idx' in input_dict else -1
        boxes3d_lidar_preds = record_dict['boxes'].cpu().numpy()

        if boxes3d_lidar_preds.shape[0] == 0:
            return {'sample_idx': sample_idx}

        calib = input_dict['calib'][index]
        image_shape = input_dict['image_shape'][index]

        boxes3d_camera_preds = box_utils.boxes3d_lidar_to_camera(
            boxes3d_lidar_preds, calib)
        boxes2d_image_preds = box_utils.boxes3d_camera_to_imageboxes(
            boxes3d_camera_preds, calib, image_shape=image_shape)
        # predictions
        predictions_dict = {
            'bbox': boxes2d_image_preds,
            'box3d_camera': boxes3d_camera_preds,
            'box3d_lidar': boxes3d_lidar_preds,
            'scores': record_dict['scores'].cpu().numpy(),
            'label_preds': record_dict['labels'].cpu().numpy(),
            'sample_idx': sample_idx,
        }
        return predictions_dict

    @staticmethod
    def generate_annotations(input_dict,
                             pred_dicts,
                             class_names,
                             save_to_file=False,
                             output_dir=None):
        def get_empty_prediction():
            ret_dict = {
                'name': np.array([]),
                'truncated': np.array([]),
                'occluded': np.array([]),
                'alpha': np.array([]),
                'bbox': np.zeros([0, 4]),
                'dimensions': np.zeros([0, 3]),
                'location': np.zeros([0, 3]),
                'rotation_y': np.array([]),
                'score': np.array([]),
                'boxes_lidar': np.zeros([0, 7])
            }
            return ret_dict

        def generate_single_anno(idx, box_dict):
            num_example = 0
            if 'bbox' not in box_dict:
                return get_empty_prediction(), num_example

            sample_idx = box_dict['sample_idx']
            box_preds_image = box_dict['bbox']
            box_preds_camera = box_dict['box3d_camera']
            box_preds_lidar = box_dict['box3d_lidar']
            scores = box_dict['scores']
            label_preds = box_dict['label_preds']

            anno = {
                'name': [],
                'truncated': [],
                'occluded': [],
                'alpha': [],
                'bbox': [],
                'dimensions': [],
                'location': [],
                'rotation_y': [],
                'score': [],
                'boxes_lidar': []
            }

            for box_camera, box_lidar, bbox, score, label in zip(
                    box_preds_camera, box_preds_lidar, box_preds_image, scores,
                    label_preds):

                if not (np.all(box_lidar[3:6] > -0.1)):
                    print('Invalid size(sample %s): ' % str(sample_idx),
                          box_lidar)
                    continue

                anno['name'].append(class_names[int(label - 1)])
                anno['truncated'].append(0.0)
                anno['occluded'].append(0)
                anno['alpha'].append(-np.arctan2(-box_lidar[1], box_lidar[0]) +
                                     box_camera[6])
                anno['bbox'].append(bbox)
                anno['dimensions'].append(box_camera[3:6])
                anno['location'].append(box_camera[:3])
                anno['rotation_y'].append(box_camera[6])
                anno['score'].append(score)
                anno['boxes_lidar'].append(box_lidar)

                num_example += 1

            if num_example != 0:
                anno = {k: np.stack(v) for k, v in anno.items()}
            else:
                anno = get_empty_prediction()

            return anno, num_example

        annos = []
        for i, box_dict in enumerate(pred_dicts):
            sample_idx = box_dict['sample_idx']
            single_anno, num_example = generate_single_anno(i, box_dict)
            single_anno['num_example'] = num_example
            single_anno['sample_idx'] = np.array([sample_idx] * num_example)
            annos.append(single_anno)
            if save_to_file:
                cur_det_file = os.path.join(output_dir,
                                            '%s.json' % (sample_idx))
                boxes_lidar = single_anno['boxes_lidar']  # x y z w l h yaw
                pred_json = {}
                pred_json['cuboids'] = []
                for idx in range(len(boxes_lidar)):
                    data['cuboids'].append({
                        'label': single_anno['name'][idx],
                        'position': {
                            'x': boxes_lidar[idx][0],
                            'y': boxes_lidar[idx][1],
                            'z': boxes_lidar[idx][2],
                        },
                        'dimension': {
                            'x': boxes_lidar[idx][3],
                            'y': boxes_lidar[idx][4],
                            'z': boxes_lidar[idx][5],
                        },
                        "yaw": boxes_lidar[idx][6],
                        "score": single_anno['score'][idx]
                    })
                with open(cur_det_file, 'w') as f:
                    json.dump(pred_json, f)

        return annos

    def evaluation(self, det_annos, class_names, **kwargs):

        eval_det_annos = copy.deepcopy(det_annos)

        # Create NuScenes JSON output file
        nusc_annos = {}
        for sample in eval_det_annos:
            try:
                sample_idx = sample['sample_idx'][0]
            except:
                continue

            sample_results = []

            calib = self.get_calib(sample_idx)

            sample['boxes_lidar'] = np.array(sample['boxes_lidar'])
            positions = sample['boxes_lidar'][:, :3]
            dimensions = sample['boxes_lidar'][:, 3:6]
            rotations = sample['boxes_lidar'][:, 6]

            for center, dimension, yaw, label, score in zip(
                    positions, dimensions, rotations, sample['name'],
                    sample['score']):

                quaternion = Quaternion(axis=[0, 0, 1], radians=yaw)

                box = Box(center, dimension, quaternion)
                # Move box to ego vehicle coord system
                box.rotate(Quaternion(calib.lidar_calibrated['rotation']))
                box.translate(np.array(calib.lidar_calibrated['translation']))
                # Move box to global coord system
                box.rotate(Quaternion(calib.ego_pose['rotation']))
                box.translate(np.array(calib.ego_pose['translation']))

                if (float(score) < 0):
                    score = 0
                if (float(score) > 1):
                    score = 1
                if (label == 'Cyclist'):
                    label = 'bicycle'
                sample_results.append({
                    "sample_token":
                    sample_idx,
                    "translation":
                    box.center.tolist(),
                    "size":
                    box.wlh.tolist(),
                    "rotation":
                    box.orientation.elements.tolist(),
                    "lidar_yaw":
                    float(yaw),
                    "velocity": (0, 0),
                    "detection_name":
                    label.lower(),
                    "detection_score":
                    float(score),
                    "attribute_name":
                    self.DefaultAttribute[label.lower()],
                })

            nusc_annos[sample_idx] = sample_results

        for sample_id in self.sample_id_list:
            if sample_id not in nusc_annos:
                nusc_annos[sample_id] = []

        nusc_submission = {
            "meta": {
                "use_camera": False,
                "use_lidar": True,
                "use_radar": False,
                "use_map": False,
                "use_external": False,
            },
            "results": nusc_annos,
        }
        eval_file = os.path.join(kwargs['output_dir'], 'nusc_results.json')
        with open(eval_file, "w") as f:
            json.dump(nusc_submission, f, indent=2)

        # Call NuScenes evaluation
        cfg = config_factory('detection_cvpr_2019')
        nusc_eval = DetectionEval(self.nusc,
                                  config=cfg,
                                  result_path=eval_file,
                                  eval_set=self.split,
                                  output_dir=kwargs['output_dir'],
                                  verbose=True)
        metric_summary = nusc_eval.main(plot_examples=10, render_curves=True)

        # Reformat the metrics summary a bit for the tensorboard logger
        err_name_mapping = {
            'trans_err': 'mATE',
            'scale_err': 'mASE',
            'orient_err': 'mAOE',
            'vel_err': 'mAVE',
            'attr_err': 'mAAE'
        }
        result = {}
        result['mean_ap'] = metric_summary['mean_ap']
        for tp_name, tp_val in metric_summary['tp_errors'].items():
            result[tp_name] = tp_val

        class_aps = metric_summary['mean_dist_aps']
        class_tps = metric_summary['label_tp_errors']
        for class_name in class_aps.keys():
            result['mAP_' + class_name] = class_aps[class_name]
            for key, val in err_name_mapping.items():
                result[val + '_' + class_name] = class_tps[class_name][key]

        return str(result), result
class KittiConverter:
    def __init__(self,
                 nusc_kitti_dir: str = '/home/developer/nuscenes/nusc_kitti',
                 cam_name: str = 'CAM_FRONT',
                 lidar_name: str = 'LIDAR_TOP',
                 image_count: int = 10,
                 nusc_version: str = 'v1.0-mini',
                 split: str = 'mini_train'):
        """
        :param nusc_kitti_dir: Where to write the KITTI-style annotations.
        :param cam_name: Name of the camera to export. Note that only one camera is allowed in KITTI.
        :param lidar_name: Name of the lidar sensor.
        :param image_count: Number of images to convert.
        :param nusc_version: nuScenes version to use.
        :param split: Dataset split to use.
        """
        self.nusc_kitti_dir = os.path.expanduser(nusc_kitti_dir)
        self.cam_name = cam_name
        self.lidar_name = lidar_name
        self.image_count = image_count
        self.nusc_version = nusc_version
        self.split = split

        # Create nusc_kitti_dir.
        if not os.path.isdir(self.nusc_kitti_dir):
            os.makedirs(self.nusc_kitti_dir)

        # Select subset of the data to look at.
        self.nusc = NuScenes(version=nusc_version,
                             dataroot='/home/developer/nuscenes')

    def nuscenes_gt_to_kitti(self) -> None:
        """
        Converts nuScenes GT annotations to KITTI format.
        """
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        token_idx = 0  # Start tokens from 0.

        # Get assignment of scenes to splits.
        split_logs = create_splits_logs(self.split, self.nusc)

        # Create output folders.
        label_folder = os.path.join(self.nusc_kitti_dir, self.split, 'label_2')
        calib_folder = os.path.join(self.nusc_kitti_dir, self.split, 'calib')
        image_folder = os.path.join(self.nusc_kitti_dir, self.split, 'image_2')
        lidar_folder = os.path.join(self.nusc_kitti_dir, self.split,
                                    'velodyne')
        for folder in [label_folder, calib_folder, image_folder, lidar_folder]:
            if not os.path.isdir(folder):
                os.makedirs(folder)

        # Use only the samples from the current split.
        sample_tokens = self._split_to_samples(split_logs)
        sample_tokens = sample_tokens[:self.image_count]

        tokens = []
        for sample_token in sample_tokens:

            # Get sample data.
            sample = self.nusc.get('sample', sample_token)
            sample_annotation_tokens = sample['anns']
            cam_front_token = sample['data'][self.cam_name]
            lidar_token = sample['data'][self.lidar_name]

            # Retrieve sensor records.
            sd_record_cam = self.nusc.get('sample_data', cam_front_token)
            sd_record_lid = self.nusc.get('sample_data', lidar_token)
            cs_record_cam = self.nusc.get(
                'calibrated_sensor', sd_record_cam['calibrated_sensor_token'])
            cs_record_lid = self.nusc.get(
                'calibrated_sensor', sd_record_lid['calibrated_sensor_token'])

            # Combine transformations and convert to KITTI format.
            # Note: cam uses same conventions in KITTI and nuScenes.
            lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                          Quaternion(
                                              cs_record_lid['rotation']),
                                          inverse=False)
            ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                          Quaternion(
                                              cs_record_cam['rotation']),
                                          inverse=True)
            velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

            # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
            velo_to_cam_kitti = np.dot(velo_to_cam,
                                       kitti_to_nu_lidar.transformation_matrix)

            # Currently not used.
            imu_to_velo_kitti = np.zeros((3, 4))  # Dummy values.
            r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

            # Projection matrix.
            p_left_kitti = np.zeros((3, 4))
            p_left_kitti[:3, :3] = cs_record_cam[
                'camera_intrinsic']  # Cameras are always rectified.

            # Create KITTI style transforms.
            velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
            velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

            # Check that the rotation has the same format as in KITTI.
            assert (velo_to_cam_rot.round(0) == np.array([[0, -1,
                                                           0], [0, 0, -1],
                                                          [1, 0, 0]])).all()
            assert (velo_to_cam_trans[1:3] < 0).all()

            # Retrieve the token from the lidar.
            # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
            # not the camera.
            filename_cam_full = sd_record_cam['filename']
            filename_lid_full = sd_record_lid['filename']
            # token = '%06d' % token_idx # Alternative to use KITTI names.
            token_idx += 1

            # Convert image (jpg to png).
            src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
            dst_im_path = os.path.join(image_folder, sample_token + '.png')
            if not os.path.exists(dst_im_path):
                im = Image.open(src_im_path)
                im.save(dst_im_path, "PNG")

            # Convert lidar.
            # Note that we are only using a single sweep, instead of the commonly used n sweeps.
            src_lid_path = os.path.join(self.nusc.dataroot, filename_lid_full)
            dst_lid_path = os.path.join(lidar_folder, sample_token + '.bin')
            assert not dst_lid_path.endswith('.pcd.bin')
            pcl = LidarPointCloud.from_file(src_lid_path)
            pcl.rotate(
                kitti_to_nu_lidar_inv.rotation_matrix)  # In KITTI lidar frame.
            with open(dst_lid_path, "w") as lid_file:
                pcl.points.T.tofile(lid_file)

            # Add to tokens.
            tokens.append(sample_token)

            # Create calibration file.
            kitti_transforms = dict()
            kitti_transforms['P0'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P1'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms['P2'] = p_left_kitti  # Left camera transform.
            kitti_transforms['P3'] = np.zeros((3, 4))  # Dummy values.
            kitti_transforms[
                'R0_rect'] = r0_rect.rotation_matrix  # Cameras are already rectified.
            kitti_transforms['Tr_velo_to_cam'] = np.hstack(
                (velo_to_cam_rot, velo_to_cam_trans.reshape(3, 1)))
            kitti_transforms['Tr_imu_to_velo'] = imu_to_velo_kitti
            calib_path = os.path.join(calib_folder, sample_token + '.txt')
            with open(calib_path, "w") as calib_file:
                for (key, val) in kitti_transforms.items():
                    val = val.flatten()
                    val_str = '%.12e' % val[0]
                    for v in val[1:]:
                        val_str += ' %.12e' % v
                    calib_file.write('%s: %s\n' % (key, val_str))

            # Write label file.
            label_path = os.path.join(label_folder, sample_token + '.txt')
            if os.path.exists(label_path):
                print('Skipping existing file: %s' % label_path)
                continue
            else:
                print('Writing file: %s' % label_path)
            with open(label_path, "w") as label_file:
                for sample_annotation_token in sample_annotation_tokens:
                    sample_annotation = self.nusc.get('sample_annotation',
                                                      sample_annotation_token)

                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Truncated: Set all objects to 0 which means untruncated.
                    truncated = 0.0

                    # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
                    occluded = 0

                    # Convert nuScenes category to nuScenes detection challenge category.
                    detection_name = category_to_detection_name(
                        sample_annotation['category_name'])

                    # Skip categories that are not part of the nuScenes detection challenge.
                    if detection_name is None:
                        continue

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Project 3d box to 2d box in image, ignore box if it does not fall inside.
                    bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                                 p_left_kitti,
                                                                 imsize=imsize)
                    if bbox_2d is None:
                        continue

                    # Set dummy score so we can use this file as result.
                    box_cam_kitti.score = 0

                    # Convert box to output string format.
                    output = KittiDB.box_to_string(name=detection_name,
                                                   box=box_cam_kitti,
                                                   bbox_2d=bbox_2d,
                                                   truncation=truncated,
                                                   occlusion=occluded)

                    # Write to disk.
                    label_file.write(output + '\n')

    def render_kitti(self, render_2d: bool) -> None:
        """
        Renders the annotations in the KITTI dataset from a lidar and a camera view.
        :param render_2d: Whether to render 2d boxes (only works for camera data).
        """
        if render_2d:
            print('Rendering 2d boxes from KITTI format')
        else:
            print('Rendering 3d boxes projected from 3d KITTI format')

        # Load the KITTI dataset.
        kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, ))

        # Create output folder.
        render_dir = os.path.join(self.nusc_kitti_dir, 'render')
        if not os.path.isdir(render_dir):
            os.mkdir(render_dir)

        # Render each image.
        for token in kitti.tokens[:self.image_count]:
            for sensor in ['lidar', 'camera']:
                out_path = os.path.join(render_dir,
                                        '%s_%s.png' % (token, sensor))
                print('Rendering file to disk: %s' % out_path)
                kitti.render_sample_data(token,
                                         sensor_modality=sensor,
                                         out_path=out_path,
                                         render_2d=render_2d)
                plt.close(
                )  # Close the windows to avoid a warning of too many open windows.

    def kitti_res_to_nuscenes(self, meta: Dict[str, bool] = None) -> None:
        """
        Converts a KITTI detection result to the nuScenes detection results format.
        :param meta: Meta data describing the method used to generate the result. See nuscenes.org/object-detection.
        """
        # Dummy meta data, please adjust accordingly.
        if meta is None:
            meta = {
                'use_camera': False,
                'use_lidar': True,
                'use_radar': False,
                'use_map': False,
                'use_external': False,
            }

        # Init.
        results = {}

        # Load the KITTI dataset.
        kitti = KittiDB(root=self.nusc_kitti_dir, splits=(self.split, ))

        # Get assignment of scenes to splits.
        split_logs = create_splits_logs(self.split, self.nusc)

        # Use only the samples from the current split.
        sample_tokens = self._split_to_samples(split_logs)
        sample_tokens = sample_tokens[:self.image_count]

        for sample_token in sample_tokens:
            # Get the KITTI boxes we just generated in LIDAR frame.
            kitti_token = '%s_%s' % (self.split, sample_token)
            boxes = kitti.get_boxes(token=kitti_token)

            # Convert KITTI boxes to nuScenes detection challenge result format.
            sample_results = [
                self._box_to_sample_result(sample_token, box) for box in boxes
            ]

            # Store all results for this image.
            results[sample_token] = sample_results

        # Store submission file to disk.
        submission = {'meta': meta, 'results': results}
        submission_path = os.path.join(self.nusc_kitti_dir, 'submission.json')
        print('Writing submission to: %s' % submission_path)
        with open(submission_path, 'w') as f:
            json.dump(submission, f, indent=2)

    def _box_to_sample_result(self,
                              sample_token: str,
                              box: Box,
                              attribute_name: str = '') -> Dict[str, Any]:
        # Prepare data
        translation = box.center
        size = box.wlh
        rotation = box.orientation.q
        velocity = box.velocity
        detection_name = box.name
        detection_score = box.score

        # Create result dict
        sample_result = dict()
        sample_result['sample_token'] = sample_token
        sample_result['translation'] = translation.tolist()
        sample_result['size'] = size.tolist()
        sample_result['rotation'] = rotation.tolist()
        sample_result['velocity'] = velocity.tolist()[:2]  # Only need vx, vy.
        sample_result['detection_name'] = detection_name
        sample_result['detection_score'] = detection_score
        sample_result['attribute_name'] = attribute_name

        return sample_result

    def _split_to_samples(self, split_logs: List[str]) -> List[str]:
        """
        Convenience function to get the samples in a particular split.
        :param split_logs: A list of the log names in this split.
        :return: The list of samples.
        """
        samples = []
        for sample in self.nusc.sample:
            scene = self.nusc.get('scene', sample['scene_token'])
            log = self.nusc.get('log', scene['log_token'])
            logfile = log['logfile']
            if logfile in split_logs:
                samples.append(sample['token'])
        return samples
Esempio n. 18
0
def create_dataset(dataroot,
                   save_dir,
                   width=672,
                   height=672,
                   grid_range=70.,
                   nusc_version='v1.0-mini',
                   use_constant_feature=True,
                   use_intensity_feature=True,
                   end_id=None):

    os.makedirs(os.path.join(save_dir, 'in_feature'), exist_ok=True)
    os.makedirs(os.path.join(save_dir, 'out_feature'), exist_ok=True)

    nusc = NuScenes(version=nusc_version, dataroot=dataroot, verbose=True)
    ref_chan = 'LIDAR_TOP'

    if width == height:
        size = width
    else:
        raise Exception(
            'Currently only supported if width and height are equal')

    grid_length = 2. * grid_range / size
    label_half_length = 0

    data_id = 0
    grid_ticks = np.arange(-grid_range, grid_range + grid_length, grid_length)
    grid_centers \
        = (grid_ticks + grid_length / 2)[:len(grid_ticks) - 1]

    for my_scene in nusc.scene:
        first_sample_token = my_scene['first_sample_token']
        token = first_sample_token

        # try:
        while (token != ''):
            print('--- {} '.format(data_id) + token + ' ---')

            my_sample = nusc.get('sample', token)
            sd_record = nusc.get('sample_data', my_sample['data'][ref_chan])
            sample_rec = nusc.get('sample', sd_record['sample_token'])
            chan = sd_record['channel']
            pc, times = LidarPointCloud.from_file_multisweep(nusc,
                                                             sample_rec,
                                                             chan,
                                                             ref_chan,
                                                             nsweeps=1)
            _, boxes, _ = nusc.get_sample_data(sd_record['token'],
                                               box_vis_level=0)
            out_feature = np.zeros((size, size, 8), dtype=np.float32)
            start = time.time()
            for box_idx, box in enumerate(boxes):
                label = 0
                if box.name.split('.')[0] == 'vehicle':
                    if box.name.split('.')[1] == 'car':
                        label = 1
                    elif box.name.split('.')[1] == 'bus':
                        label = 2
                    elif box.name.split('.')[1] == 'truck':
                        label = 2
                    elif box.name.split('.')[1] == 'construction':
                        label = 2
                    elif box.name.split('.')[1] == 'emergency':
                        label = 2
                    elif box.name.split('.')[1] == 'trailer':
                        label = 2
                    elif box.name.split('.')[1] == 'bicycle':
                        label = 3
                    elif box.name.split('.')[1] == 'motorcycle':
                        label = 3
                elif box.name.split('.')[0] == 'human':
                    label = 4
                # elif box.name.split('.')[0] == 'movable_object':
                #     label = 1
                # elif box.name.split('.')[0] == 'static_object':
                #     label = 1
                else:
                    continue
                height_pt = np.linalg.norm(box.corners().T[0] -
                                           box.corners().T[3])
                box_corners = box.corners().astype(np.float32)
                corners2d = box_corners[:2, :]
                # corners2d = box.corners()[:2, :].astype(np.float32)
                box2d = corners2d.T[[2, 3, 7, 6]]
                box2d_center = box2d.mean(axis=0)
                yaw, pitch, roll = box.orientation.yaw_pitch_roll
                # print('--')
                # print(box.name)
                p1_reshape = box_corners
                out_feature = generate_out_feature(
                    width, height, size, grid_centers,
                    box_corners, box2d, box2d_center,
                    pc.points.astype(np.float32), height_pt, label,
                    label_half_length, yaw, out_feature)

                # generate_out_feature(width, height, size, grid_centers, pc.points,
                #                      box.corners(), height_pt,
                #                      label, label_half_length, yaw,
                #                      out_feature)
                # out_feature = out_feature.astype(np.float32)

            out_end = time.time()
            # feature_generator = fg.FeatureGenerator(
            #     grid_range, width, height,
            #     use_constant_feature, use_intensity_feature)

            feature_generator = fgpb.FeatureGenerator(grid_range, width,
                                                      height)
            in_feature = feature_generator.generate(pc.points.T,
                                                    use_constant_feature,
                                                    use_intensity_feature)
            in_end = time.time()

            print('time total {} out {} in {}'.format(in_end - start,
                                                      out_end - start,
                                                      in_end - out_end))
            if use_constant_feature and use_intensity_feature:
                channels = 8
            elif use_constant_feature or use_intensity_feature:
                channels = 6
            else:
                channels = 4

            in_feature = np.array(in_feature).reshape(channels, size,
                                                      size).astype(np.float16)
            in_feature = in_feature.transpose(1, 2, 0)
            # instance_pt is flipped due to flip
            # out_feature = np.flip(np.flip(out_feature, axis=0), axis=1)
            # out_feature[:, :, 1:3] *= -1
            np.save(os.path.join(save_dir, 'in_feature/{:05}'.format(data_id)),
                    in_feature)
            np.save(
                os.path.join(save_dir, 'out_feature/{:05}'.format(data_id)),
                out_feature)
            token = my_sample['next']
            data_id += 1
            if data_id == end_id:
                return
Esempio n. 19
0
class nuScenesDataset(Dataset):
    def __init__(self,
                 cfg,
                 cam_name: str = 'CAM_FRONT',
                 lidar_name: str = 'LIDAR_TOP',
                 nusc_version: str = 'v1.0-trainval',
                 is_train=True,
                 transforms=None):
        super(nuScenesDataset, self).__init__()

        self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT

        if self.split == "train_detect":
            self.image_count = 14059
        elif self.split == "val":
            self.image_count = 6019

        self.cam_name = cam_name
        self.lidar_name = lidar_name
        self.nusc_version = nusc_version
        # Select subset of the data to look at.
        self.nusc = NuScenes(version=nusc_version)

        self.is_train = is_train
        self.transforms = transforms
        self.classes = cfg.DATASETS.DETECT_CLASSES
        self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0
        self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0
        self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN
        self.num_classes = len(self.classes)

        self.input_width = cfg.INPUT.WIDTH_TRAIN
        self.input_height = cfg.INPUT.HEIGHT_TRAIN
        self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.max_objs = cfg.DATASETS.MAX_OBJECTS

        self.logger = logging.getLogger(__name__)
        self.logger.info(
            "Initializing nuScenes {} set with {} files loaded".format(
                self.split, self.image_count))

        # Get assignment of scenes to splits.
        self.split_logs = create_splits_logs(self.split, self.nusc)
        self.sample_tokens = self._split_to_samples(self.split_logs)

    def __len__(self):
        return self.image_count

    def __getitem__(self, idx):
        sample_token = self.sample_tokens[idx]
        sample = self.nusc.get('sample', sample_token)

        cam_front_token = sample['data'][self.cam_name]
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)

        filename_cam_full = sd_record_cam['filename']
        src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)

        img = Image.open(src_im_path)
        anns, K = self.load_annotations(idx)

        center = np.array([i / 2 for i in img.size], dtype=np.float32)
        size = np.array([i for i in img.size], dtype=np.float32)
        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """
        flipped = False
        if (self.is_train) and (random.random() < self.flip_prob):
            flipped = True
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (random.random() < self.aug_prob):
            affine = True
            shift, scale = self.shift_scale[0], self.shift_scale[1]
            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
            center[0] += size[0] * random.choice(shift_ranges)
            center[1] += size[1] * random.choice(shift_ranges)

            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
            size *= random.choice(scale_ranges)

        center_size = [center, size]
        trans_affine = get_transfrom_matrix(
            center_size, [self.input_width, self.input_height])
        trans_affine_inv = np.linalg.inv(trans_affine)
        img = img.transform(
            (self.input_width, self.input_height),
            method=Image.AFFINE,
            data=trans_affine_inv.flatten()[:6],
            resample=Image.BILINEAR,
        )

        trans_mat = get_transfrom_matrix(
            center_size, [self.output_width, self.output_height])

        if not self.is_train:
            # for inference we parametrize with original size
            target = ParamsList(image_size=size, is_train=self.is_train)
            target.add_field("trans_mat", trans_mat)
            target.add_field("K", K)
            if self.transforms is not None:
                img, target = self.transforms(img, target)

            return img, target, sample_token

        heat_map = np.zeros(
            [self.num_classes, self.output_height, self.output_width],
            dtype=np.float32)
        regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32)
        cls_ids = np.zeros([self.max_objs], dtype=np.int32)
        proj_points = np.zeros([self.max_objs, 2], dtype=np.int32)
        p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32)
        dimensions = np.zeros([self.max_objs, 3], dtype=np.float32)
        locations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotys = np.zeros([self.max_objs], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        flip_mask = np.zeros([self.max_objs], dtype=np.uint8)

        for i, a in enumerate(anns):
            a = a.copy()
            cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1

            point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs)
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1)
            h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]

            if (0 < point[0] < self.output_width) and (0 < point[1] <
                                                       self.output_height):
                point_int = point.astype(np.int32)
                p_offset = point - point_int
                radius = gaussian_radius(h, w)
                radius = max(0, int(radius))
                heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int,
                                                    radius)

                cls_ids[i] = cls
                regression[i] = box3d
                proj_points[i] = point_int
                p_offsets[i] = p_offset
                dimensions[i] = np.array(a["dimensions"])
                locations[i] = locs
                rotys[i] = rot_y
                reg_mask[i] = 1 if not affine else 0
                flip_mask[i] = 1 if not affine and flipped else 0

        target = ParamsList(image_size=img.size, is_train=self.is_train)
        target.add_field("hm", heat_map)
        target.add_field("reg", regression)
        target.add_field("cls_ids", cls_ids)
        target.add_field("proj_p", proj_points)
        target.add_field("dimensions", dimensions)
        target.add_field("locations", locations)
        target.add_field("rotys", rotys)
        target.add_field("trans_mat", trans_mat)
        target.add_field("K", K)
        target.add_field("reg_mask", reg_mask)
        target.add_field("flip_mask", flip_mask)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, sample_token

    def load_annotations(self, idx):
        annotations = []
        """
        Converts nuScenes GT annotations to KITTI format.
        """
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse

        sample_token = self.sample_tokens[idx]
        sample = self.nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']

        cam_front_token = sample['data'][self.cam_name]
        lidar_token = sample['data'][self.lidar_name]

        # Retrieve sensor records.
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)
        sd_record_lid = self.nusc.get('sample_data', lidar_token)
        cs_record_cam = self.nusc.get('calibrated_sensor',
                                      sd_record_cam['calibrated_sensor_token'])
        cs_record_lid = self.nusc.get('calibrated_sensor',
                                      sd_record_lid['calibrated_sensor_token'])

        # Combine transformations and convert to KITTI format.
        # Note: cam uses same conventions in KITTI and nuScenes.
        lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                      Quaternion(cs_record_lid['rotation']),
                                      inverse=False)
        ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                      Quaternion(cs_record_cam['rotation']),
                                      inverse=True)
        velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

        # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
        velo_to_cam_kitti = np.dot(velo_to_cam,
                                   kitti_to_nu_lidar.transformation_matrix)

        # Currently not used.
        imu_to_velo_kitti = np.zeros((3, 4), dtype=np.float32)  # Dummy values.
        r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

        # Projection matrix.
        p_left_kitti = np.zeros((3, 4), dtype=np.float32)
        p_left_kitti[:3, :3] = cs_record_cam[
            'camera_intrinsic']  # Cameras are always rectified.
        p_left_kitti = p_left_kitti[:3, :3]

        # Create KITTI style transforms.
        velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
        velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

        # Check that the rotation has the same format as in KITTI.
        assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1],
                                                      [1, 0, 0]])).all()
        assert (velo_to_cam_trans[1:3] < 0).all()

        # Retrieve the token from the lidar.
        # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
        # not the camera.
        filename_cam_full = sd_record_cam['filename']
        filename_lid_full = sd_record_lid['filename']

        if self.is_train:
            for sample_annotation_token in sample_annotation_tokens:

                sample_annotation = self.nusc.get('sample_annotation',
                                                  sample_annotation_token)
                # Convert nuScenes category to nuScenes detection challenge category.
                detection_name = category_to_detection_name(
                    sample_annotation['category_name'])
                if detection_name in self.classes:
                    # Get box in LIDAR frame.
                    _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                        lidar_token,
                        box_vis_level=BoxVisibility.NONE,
                        selected_anntokens=[sample_annotation_token])
                    box_lidar_nusc = box_lidar_nusc[0]

                    # Convert from nuScenes to KITTI box format.
                    box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                        box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                        velo_to_cam_trans, r0_rect)

                    # Convert quaternion to yaw angle.
                    v = np.dot(box_cam_kitti.rotation_matrix,
                               np.array([1, 0, 0]))
                    yaw = -np.arctan2(v[2], v[0])

                    annotations.append({
                        "class":
                        detection_name,
                        "label":
                        TYPE_ID_CONVERSION[detection_name],
                        "dimensions": [
                            float(box_cam_kitti.wlh[2]),
                            float(box_cam_kitti.wlh[0]),
                            float(box_cam_kitti.wlh[1])
                        ],
                        "locations": [
                            float(box_cam_kitti.center[0]),
                            float(box_cam_kitti.center[1]),
                            float(box_cam_kitti.center[2])
                        ],
                        "rot_y":
                        float(yaw)
                    })

        return annotations, p_left_kitti

    def _split_to_samples(self, split_logs: List[str]) -> List[str]:
        """
        Convenience function to get the samples in a particular split.
        :param split_logs: A list of the log names in this split.
        :return: The list of samples.
        """
        samples = []
        for sample in self.nusc.sample:
            scene = self.nusc.get('scene', sample['scene_token'])
            log = self.nusc.get('log', scene['log_token'])
            logfile = log['logfile']
            if logfile in split_logs:
                samples.append(sample['token'])
        return samples
Esempio n. 20
0
        os.listdir(
            '/home/marcel/Documents/Datasets/NuScenes/samples/CAM_FRONT')):
    if i < 3:
        asdf.append([
            sd for sd in nusc.sample_data
            if sd['filename'] == os.path.join('samples/CAM_FRONT', file)
        ][0])
    #sd2_im = [sd for sd in nusc.sample_data if sd['filename'] == 'samples/CAM_FRONT/n015-2018-07-18-11-18-34+0800__CAM_FRONT__1531884168362460.jpg'][0]
sd1_im = asdf[0]
s1 = nusc.get('sample', sd1_im['sample_token'])
#s2 = nusc.get('sample', sd2_im['sample_token'])

# Camera
sd1 = sd1_im
#sd2 = sd2_im
_, boxes1, intrinsics = nusc.get_sample_data(sd1['token'], box_vis_level=0)
#_, boxes2, _ = nusc.get_sample_data(sd2['token'], box_vis_level=0)
box1 = boxes1[0]
#box2 = boxes2[3]
box2d = box1.box_2d(intrinsics)
#yaw2 = quaternion_yaw(box2.orientation) / np.pi * 180
#yaw_diff = yaw1 - yaw2
print(box2d)
#print(yaw2)
#print(yaw_diff)
nusc.render_sample_data(sd1['token'], box_vis_level=0)
rect = patches.Rectangle((box2d[0], box2d[1]),
                         box2d[2] - box2d[0],
                         box2d[3] - box2d[1],
                         linewidth=1,
                         edgecolor='r',
class NuscenesReader:

    scene_split_lists = {"train": set(split_train), "val": set(split_val)}
    sample_id_template = "ns_{}_{:02d}"
    np_str_to_cat = np.vectorize(nuscenes_category_to_object_class,
                                 otypes=[np.int64])
    np_str_to_sem = np.vectorize(nuscenes_category_to_semantic_class,
                                 otypes=[np.int64])

    # Nuscenes LiDAR has x-axis to vehicle right and y-axis to front.
    # Turn this 90 degrees to have x-axis facing the front
    turn_matrix = np.linalg.inv(
        np.asarray([[0, 1, 0, 0], [-1, 0, 0, 0], [0, 0, 1, 0], [0, 0, 0, 1]],
                   dtype=np.float64))
    turn_quaternion = Quaternion(axis=(0.0, 0.0, 1.0), radians=-np.pi / 2.0)

    def __init__(self,
                 nuscenes_root,
                 version="v1.0-trainval",
                 max_scenes=None,
                 *,
                 read_radar: bool = True,
                 read_camera: bool = True,
                 read_semantics: bool = True,
                 read_bounding_boxes: bool = True):
        self.nusc = NuScenes(version=version,
                             dataroot=nuscenes_root,
                             verbose=False)
        self.root = pathlib.Path(nuscenes_root)

        # global counter to sanity-check if we calculate the same number of points
        # within boxes as the dataset authors
        self.ns_lidar_pts_to_calculated_diff = 0

        # flags defining the data entries to return from 'read'
        self.read_radar = read_radar
        self.read_camera = read_camera
        self.read_semantics = read_semantics
        self.read_bounding_boxes = read_bounding_boxes

        if self.read_semantics and not hasattr(self.nusc, "lidarseg"):
            raise RuntimeError("Error: nuScenes-lidarseg not installed!")

        # assert that the training targets range from 0 - (|mapping| - 1)
        assert len(set(
            NUSCENES_SEM_CLASSES.values())) == len(NUSCENES_SEM_CLASSES)
        assert all(a == b
                   for a, b in zip(sorted(NUSCENES_SEM_CLASSES.values()),
                                   range(len(NUSCENES_SEM_CLASSES))))

        split_name = {
            "v1.0-trainval": "nuscenes_default",
            "v1.0-mini": "nuscenes_mini",
        }.get(version, "nuscenes_{}".format(version))

        # create split dict
        self.split = {
            "name": split_name,
            "data": {k: []
                     for k in self.scene_split_lists.keys()},
        }
        for i, scene in enumerate(self.nusc.scene):
            if max_scenes is not None and i > max_scenes:
                break
            name = scene["name"]
            for k, v in self.scene_split_lists.items():
                if name in v:
                    split_list = self.split["data"][k]
                    split_list.extend([
                        self.sample_id_template.format(name, i)
                        for i in range(0, scene["nbr_samples"])
                    ])
                    break
            else:
                raise RuntimeError(
                    "Found scene that is not in a split: {}".format(name))

    def _make_path(self, filename: str):
        return str(self.root / filename)

    def __iter__(self):
        self._scene_iter = self.nusc.scene.__iter__()
        self._next_scene()
        return self

    def __next__(self) -> typing.Tuple[dict, int]:
        try:
            return self._sample_iter.__next__()
        except StopIteration:
            self._next_scene()
            return self._sample_iter.__next__()

    def make_sample_id(self, sample: typing.Tuple[dict, int]) -> str:
        scene = self.nusc.get("scene", sample[0]["scene_token"])
        return self.sample_id_template.format(scene["name"], sample[1])

    def _next_scene(self):
        self._current_scene = self._scene_iter.__next__()

        class SampleIteration:
            """Iterate over nuscenes samples of a scene.
            Add an additional sample index.
            """
            def __init__(self, scene, nusc):
                self.nusc = nusc
                self._pos = scene["first_sample_token"]
                self._index = itertools.count().__iter__()

            def __next__(self):
                if not self._pos:
                    raise StopIteration()
                sample = self.nusc.get("sample", self._pos)
                self._pos = sample["next"]
                return sample, self._index.__next__()

        self._sample_iter = SampleIteration(self._current_scene, self.nusc)

    def read(self, sample: typing.Tuple[dict, int], _sample_id: str):
        d = self._read_sample(sample)

        def convert(entry):
            """Convert data types to be compatible with tfrecords features."""
            if isinstance(entry, str):
                return entry.encode("utf-8")
            if isinstance(entry, np.ndarray):
                entry = entry.flatten()
                if not np.issubdtype(entry.dtype, np.number):
                    entry = tf.io.serialize_tensor(tf.convert_to_tensor(entry))
                elif entry.dtype == np.float64:
                    entry = entry.astype(np.float32)
                return entry
            return entry

        return {k: convert(v) for k, v in d.items()}

    def _read_sample(self, sample: typing.Tuple[dict, int]):

        radars = ["RADAR_FRONT", "RADAR_FRONT_LEFT", "RADAR_FRONT_RIGHT"]
        cameras = ["CAM_FRONT"]

        sample_id = self.make_sample_id(sample)
        sample, sample_index = sample
        sample_data_token: str = sample["data"]["LIDAR_TOP"]

        lidar_sample_data = self.nusc.get("sample_data", sample_data_token)
        assert lidar_sample_data["is_key_frame"]

        data_path, box_list, cam_intrinsic = self.nusc.get_sample_data(
            sample_data_token)

        # POINT CLOUD [(x y z I) x P]. Intensity from 0.0 to 255.0
        point_cloud_orig = LidarPointCloud.from_file(data_path)
        if point_cloud_orig.points.dtype != np.float32:
            raise RuntimeError("Point cloud has wrong data type.")
        # -> [P x (x y z I)]
        point_cloud_orig = point_cloud_orig.points.transpose()

        camera_data = {}
        radar_data = {}
        box_data = {}
        lidarseg_data = {}

        if self.read_semantics:
            lidarseg_data = self._read_lidarseg_data(sample_data_token,
                                                     point_cloud_orig)

        if self.read_bounding_boxes:
            box_data = self._read_bounding_boxes(box_list, point_cloud_orig,
                                                 sample)

        # rotate point cloud 90 degrees around z-axis,
        # so that x-axis faces front of vehicle
        x = point_cloud_orig[:, 0:1]
        y = point_cloud_orig[:, 1:2]
        point_cloud = np.concatenate((y, -x, point_cloud_orig[:, 2:4]),
                                     axis=-1)

        # LiDAR extrinsic calibration
        calibration_lidar = self.nusc.get(
            "calibrated_sensor", lidar_sample_data["calibrated_sensor_token"])
        tf_lidar = transform_matrix(calibration_lidar["translation"],
                                    Quaternion(calibration_lidar["rotation"]))
        # vehicle -> point cloud coords (turned by 90 degrees)
        tf_vehicle_pc = np.linalg.inv(
            np.dot(tf_lidar, NuscenesReader.turn_matrix))

        if self.read_camera:
            # CAMERAS
            camera_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, cam_name,
                                                      tf_vehicle_pc,
                                                      self._read_camera_data)
                for cam_name in cameras
            ]
            camera_data = {k: v for d in camera_data for k, v in d.items()}

        if self.read_radar:
            # RADARS
            radar_data: [{
                str: typing.Any
            }] = [
                self._read_sensor_data_and_extrinsics(sample, radar_name,
                                                      tf_vehicle_pc,
                                                      self._read_radar_data)
                for radar_name in radars
            ]
            radar_data = {k: v for d in radar_data for k, v in d.items()}

        assert box_data.keys().isdisjoint(camera_data.keys())
        assert box_data.keys().isdisjoint(radar_data.keys())
        assert box_data.keys().isdisjoint(lidarseg_data.keys())
        # return feature array. Add sample ID.
        d = {
            "sample_id": sample_id,
            "point_cloud": point_cloud,
            **box_data,
            **camera_data,
            **radar_data,
            **lidarseg_data,
        }
        return d

    def _read_lidarseg_data(self, sample_data_token: str,
                            point_cloud_orig: np.ndarray):
        # self.nusc.lidarseg_idx2name_mapping

        lidarseg_labels_filename = (
            pathlib.Path(self.nusc.dataroot) /
            self.nusc.get("lidarseg", sample_data_token)["filename"])
        # Load labels from .bin file.
        points_label = np.fromfile(str(lidarseg_labels_filename),
                                   dtype=np.uint8)  # [num_points]

        if points_label.shape[0] != point_cloud_orig.shape[0]:
            raise ValueError("Semantic labels do not match point cloud.")

        return {"semantic_labels": tf.io.serialize_tensor(points_label)}

    def _read_bounding_boxes(self, box_list, point_cloud_orig, sample):
        # create transform matrices for all boxes
        dt = np.float32
        if box_list:
            r = np.asarray([x.rotation_matrix for x in box_list])
            c = np.asarray([x.center for x in box_list])
            center_pos = np.asarray([x.center for x in box_list], dtype=dt)
            # NuScenes format: width, length, height. Reorder to l w h
            box_dims_lwh = np.asarray([x.wlh for x in box_list],
                                      dtype=np.float64)[:, [1, 0, 2]]
            box_rot = np.asarray([(self.turn_quaternion * x.orientation).q
                                  for x in box_list],
                                 dtype=dt)
        else:
            r = np.zeros(shape=[0, 3, 3], dtype=np.float64)
            c = np.zeros(shape=[0, 3], dtype=np.float64)
            center_pos = np.zeros(shape=[0, 3], dtype=np.float64)
            box_dims_lwh = np.zeros(shape=[0, 3], dtype=np.float64)
            box_rot = np.zeros(shape=[0, 4], dtype=np.float64)

        rc = np.concatenate((r, c[:, :, None]), axis=-1)
        tfs = np.concatenate(
            (
                rc,
                np.broadcast_to(
                    tf.constant([0, 0, 0, 1], dtype=rc.dtype)[None, None, :],
                    [rc.shape[0], 1, 4],
                ),
            ),
            axis=1,
        )

        total_points_per_box, mapping = assign_point_cloud_to_bounding_boxes(
            point_cloud_orig,
            bounding_boxes_tfs=tfs,
            bounding_boxes_dims=box_dims_lwh,
        )

        # 3D BOXES IN LIDAR COORDS [(x y z) (w x y z) (l w h)]
        bboxes_spatial = np.empty(shape=(len(box_list), 10), dtype=dt)
        bboxes_spatial[:, 0] = center_pos[:, 1]
        bboxes_spatial[:, 1] = -center_pos[:, 0]
        bboxes_spatial[:, 2] = center_pos[:, 2]
        bboxes_spatial[:, 3:7] = box_rot
        bboxes_spatial[:, 7:10] = box_dims_lwh.astype(dt)

        object_str = np.array([x.name for x in box_list], dtype=np.unicode)
        object_category = NuscenesReader.np_str_to_cat(object_str).astype(
            np.int64)
        class_value = NuscenesReader.np_str_to_sem(object_str).astype(np.int64)
        lidar_pts = np.asarray([
            x["num_lidar_pts"] for x in [
                self.nusc.get("sample_annotation", ann_token)
                for ann_token in sample["anns"]
            ]
        ])
        # rotate box transforms (same as bboxes_spatial)
        tfs = np.matmul(np.linalg.inv(self.turn_matrix)[None, ...], tfs)

        # track to see if box calculation gives the same results as the
        # lidar points counter from the nuscenes dataset
        diff = np.sum(np.abs(lidar_pts - total_points_per_box))
        self.ns_lidar_pts_to_calculated_diff += diff

        return {
            "bounding_boxes_3d_spatial": bboxes_spatial,
            "bounding_boxes_3d_transforms": tfs,
            "bounding_boxes_class": class_value,
            "bounding_boxes_category": object_category,
            "bounding_boxes_class_str": object_str,
            "bounding_boxes_point_counter":
            total_points_per_box.astype(np.int64),
            "bounding_boxes_point_mapping": mapping,
        }

    @staticmethod
    def _read_sensor_data_and_extrinsics(sample, sensor_name: str,
                                         coord_transform: np.ndarray,
                                         reader_func):
        data, extrinsics = reader_func(sample["data"][sensor_name])
        extrinsics = np.matmul(coord_transform, extrinsics)
        return {
            **{k.format(sensor_name.lower()): v
               for k, v in data.items()},
            "{}_extrinsics".format(sensor_name.lower()): extrinsics,
        }

    def _read_camera_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "camera"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        imsize = (sd_record["width"], sd_record["height"])

        cam_intrinsic = np.array(cs_record["camera_intrinsic"])
        cam_extrinsics = transform_matrix(cs_record["translation"],
                                          Quaternion(cs_record["rotation"]))

        with open(data_path, "rb") as in_file:
            img_bytes_jpg = in_file.read()

        return (
            {
                "{}_jpg": img_bytes_jpg,
                "{}_size": np.asarray(imsize, dtype=np.int64),
                "{}_intrinsics": cam_intrinsic.astype(np.float32),
            },
            cam_extrinsics,
        )

    def _read_radar_data(self, sample_data_token):
        sd_record = self.nusc.get("sample_data", sample_data_token)
        cs_record = self.nusc.get("calibrated_sensor",
                                  sd_record["calibrated_sensor_token"])
        sensor_record = self.nusc.get("sensor", cs_record["sensor_token"])
        assert sensor_record["modality"] == "radar"

        # currently using only keyframes
        assert sd_record["is_key_frame"]

        data_path = self.nusc.get_sample_data_path(sample_data_token)
        radar_point_cloud = RadarPointCloud.from_file(data_path)
        points = tf.convert_to_tensor(radar_point_cloud.points.transpose())

        radar_extrinsics = transform_matrix(cs_record["translation"],
                                            Quaternion(cs_record["rotation"]))

        return {"{}_points": tf.io.serialize_tensor(points)}, radar_extrinsics
Esempio n. 22
0
def main():
    SCENE_SPLITS["mini-val"] = SCENE_SPLITS["val"]
    if not os.path.exists(DATA_PATH):
        os.mkdir(DATA_PATH)
    if not os.path.exists(OUT_PATH):
        os.mkdir(OUT_PATH)
    for split in SPLITS:
        data_path = DATA_PATH  # + '{}/'.format(SPLITS[split])
        nusc = NuScenes(version=SPLITS[split],
                        dataroot=data_path,
                        verbose=True)
        out_path = OUT_PATH + "{}.json".format(split)
        categories_info = [{
            "name": CATS[i],
            "id": i + 1
        } for i in range(len(CATS))]
        ret = {
            "images": [],
            "annotations": [],
            "categories": categories_info,
            "videos": [],
            "attributes": ATTRIBUTE_TO_ID,
        }
        num_images = 0
        num_anns = 0
        num_videos = 0

        # A "sample" in nuScenes refers to a timestamp with 6 cameras and 1 LIDAR.
        for sample in nusc.sample:
            scene_name = nusc.get("scene", sample["scene_token"])["name"]
            if not (split in ["mini", "test"
                              ]) and not (scene_name in SCENE_SPLITS[split]):
                continue
            if sample["prev"] == "":
                print("scene_name", scene_name)
                num_videos += 1
                ret["videos"].append({
                    "id": num_videos,
                    "file_name": scene_name
                })
                frame_ids = {k: 0 for k in sample["data"]}
                track_ids = {}
            # We decompose a sample into 6 images in our case.
            for sensor_name in sample["data"]:
                if sensor_name in USED_SENSOR:
                    image_token = sample["data"][sensor_name]
                    image_data = nusc.get("sample_data", image_token)
                    num_images += 1

                    # Complex coordinate transform. This will take time to understand.
                    sd_record = nusc.get("sample_data", image_token)
                    cs_record = nusc.get("calibrated_sensor",
                                         sd_record["calibrated_sensor_token"])
                    pose_record = nusc.get("ego_pose",
                                           sd_record["ego_pose_token"])
                    global_from_car = transform_matrix(
                        pose_record["translation"],
                        Quaternion(pose_record["rotation"]),
                        inverse=False,
                    )
                    car_from_sensor = transform_matrix(
                        cs_record["translation"],
                        Quaternion(cs_record["rotation"]),
                        inverse=False,
                    )
                    trans_matrix = np.dot(global_from_car, car_from_sensor)
                    _, boxes, camera_intrinsic = nusc.get_sample_data(
                        image_token, box_vis_level=BoxVisibility.ANY)
                    calib = np.eye(4, dtype=np.float32)
                    calib[:3, :3] = camera_intrinsic
                    calib = calib[:3]
                    frame_ids[sensor_name] += 1

                    # image information in COCO format
                    image_info = {
                        "id": num_images,
                        "file_name": image_data["filename"],
                        "calib": calib.tolist(),
                        "video_id": num_videos,
                        "frame_id": frame_ids[sensor_name],
                        "sensor_id": SENSOR_ID[sensor_name],
                        "sample_token": sample["token"],
                        "trans_matrix": trans_matrix.tolist(),
                        "width": sd_record["width"],
                        "height": sd_record["height"],
                        "pose_record_trans": pose_record["translation"],
                        "pose_record_rot": pose_record["rotation"],
                        "cs_record_trans": cs_record["translation"],
                        "cs_record_rot": cs_record["rotation"],
                    }
                    ret["images"].append(image_info)
                    anns = []
                    for box in boxes:
                        det_name = category_to_detection_name(box.name)
                        if det_name is None:
                            continue
                        num_anns += 1
                        v = np.dot(box.rotation_matrix, np.array([1, 0, 0]))
                        yaw = -np.arctan2(v[2], v[0])
                        box.translate(np.array([0, box.wlh[2] / 2, 0]))
                        category_id = CAT_IDS[det_name]

                        amodel_center = project_to_image(
                            np.array(
                                [
                                    box.center[0],
                                    box.center[1] - box.wlh[2] / 2,
                                    box.center[2],
                                ],
                                np.float32,
                            ).reshape(1, 3),
                            calib,
                        )[0].tolist()
                        sample_ann = nusc.get("sample_annotation", box.token)
                        instance_token = sample_ann["instance_token"]
                        if not (instance_token in track_ids):
                            track_ids[instance_token] = len(track_ids) + 1
                        attribute_tokens = sample_ann["attribute_tokens"]
                        attributes = [
                            nusc.get("attribute", att_token)["name"]
                            for att_token in attribute_tokens
                        ]
                        att = "" if len(attributes) == 0 else attributes[0]
                        if len(attributes) > 1:
                            print(attributes)
                            import pdb

                            pdb.set_trace()
                        track_id = track_ids[instance_token]
                        vel = nusc.box_velocity(box.token)  # global frame
                        vel = np.dot(
                            np.linalg.inv(trans_matrix),
                            np.array([vel[0], vel[1], vel[2], 0], np.float32),
                        ).tolist()

                        # instance information in COCO format
                        ann = {
                            "id":
                            num_anns,
                            "image_id":
                            num_images,
                            "category_id":
                            category_id,
                            "dim": [box.wlh[2], box.wlh[0], box.wlh[1]],
                            "location":
                            [box.center[0], box.center[1], box.center[2]],
                            "depth":
                            box.center[2],
                            "occluded":
                            0,
                            "truncated":
                            0,
                            "rotation_y":
                            yaw,
                            "amodel_center":
                            amodel_center,
                            "iscrowd":
                            0,
                            "track_id":
                            track_id,
                            "attributes":
                            ATTRIBUTE_TO_ID[att],
                            "velocity":
                            vel,
                        }

                        bbox = KittiDB.project_kitti_box_to_image(
                            copy.deepcopy(box),
                            camera_intrinsic,
                            imsize=(1600, 900))
                        alpha = _rot_y2alpha(
                            yaw,
                            (bbox[0] + bbox[2]) / 2,
                            camera_intrinsic[0, 2],
                            camera_intrinsic[0, 0],
                        )
                        ann["bbox"] = [
                            bbox[0],
                            bbox[1],
                            bbox[2] - bbox[0],
                            bbox[3] - bbox[1],
                        ]
                        ann["area"] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
                        ann["alpha"] = alpha
                        anns.append(ann)

                    # Filter out bounding boxes outside the image
                    visable_anns = []
                    for i in range(len(anns)):
                        vis = True
                        for j in range(len(anns)):
                            if anns[i]["depth"] - min(anns[i][
                                    "dim"]) / 2 > anns[j]["depth"] + max(
                                        anns[j]["dim"]) / 2 and _bbox_inside(
                                            anns[i]["bbox"], anns[j]["bbox"]):
                                vis = False
                                break
                        if vis:
                            visable_anns.append(anns[i])
                        else:
                            pass

                    for ann in visable_anns:
                        ret["annotations"].append(ann)
                    if DEBUG:
                        img_path = data_path + image_info["file_name"]
                        img = cv2.imread(img_path)
                        img_3d = img.copy()
                        for ann in visable_anns:
                            bbox = ann["bbox"]
                            cv2.rectangle(
                                img,
                                (int(bbox[0]), int(bbox[1])),
                                (int(bbox[2] + bbox[0]),
                                 int(bbox[3] + bbox[1])),
                                (0, 0, 255),
                                3,
                                lineType=cv2.LINE_AA,
                            )
                            box_3d = compute_box_3d(ann["dim"],
                                                    ann["location"],
                                                    ann["rotation_y"])
                            box_2d = project_to_image(box_3d, calib)
                            img_3d = draw_box_3d(img_3d, box_2d)

                            pt_3d = unproject_2d_to_3d(ann["amodel_center"],
                                                       ann["depth"], calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("location", ann["location"])
                            print("loc model", pt_3d)
                            pt_2d = np.array(
                                [(bbox[0] + bbox[2]) / 2,
                                 (bbox[1] + bbox[3]) / 2],
                                dtype=np.float32,
                            )
                            pt_3d = unproject_2d_to_3d(pt_2d, ann["depth"],
                                                       calib)
                            pt_3d[1] += ann["dim"][0] / 2
                            print("loc      ", pt_3d)
                        cv2.imshow("img", img)
                        cv2.imshow("img_3d", img_3d)
                        cv2.waitKey()
                        nusc.render_sample_data(image_token)
                        plt.show()
        print("reordering images")
        images = ret["images"]
        video_sensor_to_images = {}
        for image_info in images:
            tmp_seq_id = image_info["video_id"] * 20 + image_info["sensor_id"]
            if tmp_seq_id in video_sensor_to_images:
                video_sensor_to_images[tmp_seq_id].append(image_info)
            else:
                video_sensor_to_images[tmp_seq_id] = [image_info]
        ret["images"] = []
        for tmp_seq_id in sorted(video_sensor_to_images):
            ret["images"] = ret["images"] + video_sensor_to_images[tmp_seq_id]

        print("{} {} images {} boxes".format(split, len(ret["images"]),
                                             len(ret["annotations"])))
        print("out_path", out_path)
        json.dump(ret, open(out_path, "w"))
            )
            current_time = first_sample_rec['timestamp']
            while current_time < last_sample_rec['timestamp']:

                current_time += time_step
                for channel, sd_rec in current_recs.items():
                    while sd_rec['timestamp'] < current_time and sd_rec[
                            'next'] != '':
                        sd_rec = nusc.get('sample_data', sd_rec['next'])
                        current_recs[channel] = sd_rec

                count_frames = count_frames + 1
                cam_token = sd_rec['token']
                cam_record = nusc.get('sample_data', cam_token)
                cam_path = nusc.get_sample_data_path(cam_token)
                cam_path, boxes, camera_intrinsic = nusc.get_sample_data(
                    sd_rec['token'], )
                im = Image.open(cam_path)
                im.save(
                    data_directory + spl + str(scene_id) + camera_channel +
                    '/img1/' + str(count_frames) + '.jpg',
                    'JPEG',
                )
                TL_found = False
                bb = []
                patch_radius = 700
                sample_record = nusc.get('sample', sample_token)
                log_record = nusc.get('log', scene_record['log_token'])
                log_location = log_record['location']
                nusc_map = NuScenesMap(
                    dataroot=opt.nuscenes_root,
                    map_name=log_location,
    
    for present_sample in nusc.sample:
        # converting image data from 6 cameras (in the sensor list)
        for idx, present_sensor in enumerate(sensor_list):
            img_output_root = img_i_output_root.format(idx)
            label_output_root = label_i_output_root.format(idx)
            print('SENSOR: {}, img out: {}, label out: {}, idx: {}'.format(present_sensor, 
            img_output_root, label_output_root, idx))

            os.makedirs(img_output_root, exist_ok=True)
            os.makedirs(label_output_root, exist_ok=True)

            # each sensor_data corresponds to one specific image in the dataset
            sensor_data = nusc.get('sample_data', present_sample['data'][present_sensor])
            data_path, box_list, cam_intrinsic = nusc.get_sample_data(present_sample['data'][present_sensor], BoxVisibility.ALL)

            img_file = os.path.join(data_root, sensor_data['filename']) 
            seqname = str(frame_counter).zfill(6)
            output_label_file = os.path.join(label_output_root, seqname + '.txt')

            with open(output_label_file, 'a') as output_f:
                for box in box_list:
                    # obtaining visibility level of each 3D box
                    present_visibility_token = nusc.get('sample_annotation', box.token)['visibility_token']
                    if present_visibility_token > min_visibility_level:
                        if not (category_reflection[box.name] == 'DontCare' and delete_dontcare_objects):
                            w, l, h = box.wlh
                            x, y, z = box.center
                            yaw, pitch, roll = box.orientation.yaw_pitch_roll; yaw = -yaw
                            alpha = yaw - np.arctan2(x, z)
Esempio n. 25
0
class NuScenesDataset(Dataset):
    def __init__(self, cfg, root, is_train=True, transforms=None):
        super(NuScenesDataset, self).__init__()
        self.root = root
        self.image_dir = os.path.join(root, "image_2")
        self.label_dir = os.path.join(root, "label_2")
        self.calib_dir = os.path.join(root, "calib")

        self.split = cfg.DATASETS.TRAIN_SPLIT if is_train else cfg.DATASETS.TEST_SPLIT
        self.is_train = is_train
        self.transforms = transforms

        if self.split == "train":
            imageset_txt = os.path.join(root, "ImageSets", "train.txt")
        elif self.split == "val":
            imageset_txt = os.path.join(root, "ImageSets", "val.txt")
        elif self.split == "trainval":
            imageset_txt = os.path.join(root, "ImageSets", "trainval.txt")
        elif self.split == "test":
            imageset_txt = os.path.join(root, "ImageSets", "test.txt")
        else:
            raise ValueError("Invalid split!")

        image_files = []
        for line in open(imageset_txt, "r"):
            base_name = line.replace("\n", "")
            image_name = base_name + ".png"
            image_files.append(image_name)
        self.image_files = image_files
        self.label_files = [
            i.replace(".png", ".txt") for i in self.image_files
        ]
        self.num_samples = len(self.image_files)
        self.classes = cfg.DATASETS.DETECT_CLASSES

        self.flip_prob = cfg.INPUT.FLIP_PROB_TRAIN if is_train else 0
        self.aug_prob = cfg.INPUT.SHIFT_SCALE_PROB_TRAIN if is_train else 0
        self.shift_scale = cfg.INPUT.SHIFT_SCALE_TRAIN
        self.num_classes = len(self.classes)

        self.input_width = cfg.INPUT.WIDTH_TRAIN
        self.input_height = cfg.INPUT.HEIGHT_TRAIN
        self.output_width = self.input_width // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.output_height = self.input_height // cfg.MODEL.BACKBONE.DOWN_RATIO
        self.max_objs = cfg.DATASETS.MAX_OBJECTS

        self.logger = logging.getLogger(__name__)
        self.logger.info(
            "Initializing KITTI {} set with {} files loaded".format(
                self.split, self.num_samples))

        # NuScenes specific stuff here
        # KITTI unwanted lines not yet deleted above
        # Select subset of the data to look at.
        nusc_version = 'v1.0'
        self.nusc = NuScenes(version=nusc_version)

        # Get assignment of scenes to splits.
        self.split_logs = create_split_logs(self.split, self.nusc)

        # Use only the samples from the current split.
        self.sample_tokens = self._split_to_samples(self.split_logs)
        self.image_count = len(self.sample_tokens)
        self.image_count = 100
        self.sample_tokens = self.sample_tokens[:self.image_count]

    def __len__(self):
        return self.image_count

    def _split_to_samples(self, split_logs: List[str]) -> List[str]:
        """
        Convenience function to get the samples in a particular split.
        :param split_logs: A list of the log names in this split.
        :return: The list of samples.
        """
        samples = []
        for sample in self.nusc.sample:
            scene = self.nusc.get('scene', sample['scene_token'])
            log = self.nusc.get('log', scene['log_token'])
            logfile = log['logfile']
            if logfile in split_logs:
                samples.append(sample['token'])
        return samples

    def __getitem__(self, idx):
        # set defaults here
        kitti_to_nu_lidar = Quaternion(axis=(0, 0, 1), angle=np.pi / 2)
        kitti_to_nu_lidar_inv = kitti_to_nu_lidar.inverse
        imsize = (1600, 900)

        # sample_token based on index
        sample_token = self.sample_tokens[idx]

        # Get sample data.
        sample = self.nusc.get('sample', sample_token)
        sample_annotation_tokens = sample['anns']
        cam_front_token = sample['data'][self.cam_name]
        lidar_token = sample['data'][self.lidar_name]

        # Retrieve sensor records.
        sd_record_cam = self.nusc.get('sample_data', cam_front_token)
        sd_record_lid = self.nusc.get('sample_data', lidar_token)
        cs_record_cam = self.nusc.get('calibrated_sensor',
                                      sd_record_cam['calibrated_sensor_token'])
        cs_record_lid = self.nusc.get('calibrated_sensor',
                                      sd_record_lid['calibrated_sensor_token'])

        # Combine transformations and convert to KITTI format.
        # Note: cam uses same conventions in KITTI and nuScenes.
        lid_to_ego = transform_matrix(cs_record_lid['translation'],
                                      Quaternion(cs_record_lid['rotation']),
                                      inverse=False)
        ego_to_cam = transform_matrix(cs_record_cam['translation'],
                                      Quaternion(cs_record_cam['rotation']),
                                      inverse=True)
        velo_to_cam = np.dot(ego_to_cam, lid_to_ego)

        # Convert from KITTI to nuScenes LIDAR coordinates, where we apply velo_to_cam.
        velo_to_cam_kitti = np.dot(velo_to_cam,
                                   kitti_to_nu_lidar.transformation_matrix)

        r0_rect = Quaternion(axis=[1, 0, 0], angle=0)  # Dummy values.

        # Projection matrix.
        p_left_kitti = np.zeros((3, 4))
        p_left_kitti[:3, :3] = cs_record_cam[
            'camera_intrinsic']  # Cameras are always rectified.

        # Create KITTI style transforms.
        velo_to_cam_rot = velo_to_cam_kitti[:3, :3]
        velo_to_cam_trans = velo_to_cam_kitti[:3, 3]

        # Check that the rotation has the same format as in KITTI.
        assert (velo_to_cam_rot.round(0) == np.array([[0, -1, 0], [0, 0, -1],
                                                      [1, 0, 0]])).all()
        assert (velo_to_cam_trans[1:3] < 0).all()

        # Retrieve the token from the lidar.
        # Note that this may be confusing as the filename of the camera will include the timestamp of the lidar,
        # not the camera.
        filename_cam_full = sd_record_cam['filename']

        # set the img variable to its data here
        src_im_path = os.path.join(self.nusc.dataroot, filename_cam_full)
        img = Image.open(src_im_path)

        # Create calibration matrix.
        K = p_left_kitti
        K = [float(i) for i in K]
        K = np.array(K, dtype=np.float32).reshape(3, 4)
        K = K[:3, :3]

        # populate the list of object annotations for this sample
        anns = []
        for sample_annotation_token in sample_annotation_tokens:
            sample_annotation = self.nusc.get('sample_annotation',
                                              sample_annotation_token)

            # Get box in LIDAR frame.
            _, box_lidar_nusc, _ = self.nusc.get_sample_data(
                lidar_token,
                box_vis_level=BoxVisibility.NONE,
                selected_anntokens=[sample_annotation_token])
            box_lidar_nusc = box_lidar_nusc[0]

            # Truncated: Set all objects to 0 which means untruncated.
            truncated = 0.0

            # Occluded: Set all objects to full visibility as this information is not available in nuScenes.
            occluded = 0

            # Convert nuScenes category to nuScenes detection challenge category.
            detection_name = category_to_detection_name(
                sample_annotation['category_name'])

            # Skip categories that are not part of the nuScenes detection challenge.
            if detection_name is None:
                continue

            # Convert from nuScenes to KITTI box format.
            box_cam_kitti = KittiDB.box_nuscenes_to_kitti(
                box_lidar_nusc, Quaternion(matrix=velo_to_cam_rot),
                velo_to_cam_trans, r0_rect)

            # Project 3d box to 2d box in image, ignore box if it does not fall inside.
            bbox_2d = KittiDB.project_kitti_box_to_image(box_cam_kitti,
                                                         p_left_kitti,
                                                         imsize=imsize)
            if bbox_2d is None:
                continue

            # Set dummy score so we can use this file as result.
            box_cam_kitti.score = 0

            # Convert box to output string format.
            output = KittiDB.box_to_string(name=detection_name,
                                           box=box_cam_kitti,
                                           bbox_2d=bbox_2d,
                                           truncation=truncated,
                                           occlusion=occluded)
            fieldnames = [
                'type', 'truncated', 'occluded', 'alpha', 'xmin', 'ymin',
                'xmax', 'ymax', 'dh', 'dw', 'dl', 'lx', 'ly', 'lz', 'ry'
            ]
            if self.is_train:
                f = StringIO(output)
                reader = csv.DictReader(f,
                                        delimiter=' ',
                                        fieldnames=fieldnames)
                for line, row in enumerate(reader):
                    if row["type"] in self.classes:
                        anns.append({
                            "class":
                            row["type"],
                            "label":
                            TYPE_ID_CONVERSION[row["type"]],
                            "truncation":
                            float(row["truncated"]),
                            "occlusion":
                            float(row["occluded"]),
                            "alpha":
                            float(row["alpha"]),
                            "dimensions": [
                                float(row['dl']),
                                float(row['dh']),
                                float(row['dw'])
                            ],
                            "locations": [
                                float(row['lx']),
                                float(row['ly']),
                                float(row['lz'])
                            ],
                            "rot_y":
                            float(row["ry"])
                        })

        center = np.array([i / 2 for i in img.size], dtype=np.float32)
        size = np.array([i for i in img.size], dtype=np.float32)
        """
        resize, horizontal flip, and affine augmentation are performed here.
        since it is complicated to compute heatmap w.r.t transform.
        """
        flipped = False
        if (self.is_train) and (random.random() < self.flip_prob):
            flipped = True
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
            center[0] = size[0] - center[0] - 1
            K[0, 2] = size[0] - K[0, 2] - 1

        affine = False
        if (self.is_train) and (random.random() < self.aug_prob):
            affine = True
            shift, scale = self.shift_scale[0], self.shift_scale[1]
            shift_ranges = np.arange(-shift, shift + 0.1, 0.1)
            center[0] += size[0] * random.choice(shift_ranges)
            center[1] += size[1] * random.choice(shift_ranges)

            scale_ranges = np.arange(1 - scale, 1 + scale + 0.1, 0.1)
            size *= random.choice(scale_ranges)

        center_size = [center, size]
        trans_affine = get_transfrom_matrix(
            center_size, [self.input_width, self.input_height])
        trans_affine_inv = np.linalg.inv(trans_affine)
        img = img.transform(
            (self.input_width, self.input_height),
            method=Image.AFFINE,
            data=trans_affine_inv.flatten()[:6],
            resample=Image.BILINEAR,
        )

        trans_mat = get_transfrom_matrix(
            center_size, [self.output_width, self.output_height])

        if not self.is_train:
            # for inference we parametrize with original size
            target = ParamsList(image_size=size, is_train=self.is_train)
            target.add_field("trans_mat", trans_mat)
            target.add_field("K", K)
            if self.transforms is not None:
                img, target = self.transforms(img, target)

            return img, target, idx

        heat_map = np.zeros(
            [self.num_classes, self.output_height, self.output_width],
            dtype=np.float32)
        regression = np.zeros([self.max_objs, 3, 8], dtype=np.float32)
        cls_ids = np.zeros([self.max_objs], dtype=np.int32)
        proj_points = np.zeros([self.max_objs, 2], dtype=np.int32)
        p_offsets = np.zeros([self.max_objs, 2], dtype=np.float32)
        dimensions = np.zeros([self.max_objs, 3], dtype=np.float32)
        locations = np.zeros([self.max_objs, 3], dtype=np.float32)
        rotys = np.zeros([self.max_objs], dtype=np.float32)
        reg_mask = np.zeros([self.max_objs], dtype=np.uint8)
        flip_mask = np.zeros([self.max_objs], dtype=np.uint8)

        for i, a in enumerate(anns):
            a = a.copy()
            cls = a["label"]

            locs = np.array(a["locations"])
            rot_y = np.array(a["rot_y"])
            if flipped:
                locs[0] *= -1
                rot_y *= -1

            point, box2d, box3d = encode_label(K, rot_y, a["dimensions"], locs)
            point = affine_transform(point, trans_mat)
            box2d[:2] = affine_transform(box2d[:2], trans_mat)
            box2d[2:] = affine_transform(box2d[2:], trans_mat)
            box2d[[0, 2]] = box2d[[0, 2]].clip(0, self.output_width - 1)
            box2d[[1, 3]] = box2d[[1, 3]].clip(0, self.output_height - 1)
            h, w = box2d[3] - box2d[1], box2d[2] - box2d[0]

            if (0 < point[0] < self.output_width) and (0 < point[1] <
                                                       self.output_height):
                point_int = point.astype(np.int32)
                p_offset = point - point_int
                radius = gaussian_radius(h, w)
                radius = max(0, int(radius))
                heat_map[cls] = draw_umich_gaussian(heat_map[cls], point_int,
                                                    radius)

                cls_ids[i] = cls
                regression[i] = box3d
                proj_points[i] = point_int
                p_offsets[i] = p_offset
                dimensions[i] = np.array(a["dimensions"])
                locations[i] = locs
                rotys[i] = rot_y
                reg_mask[i] = 1 if not affine else 0
                flip_mask[i] = 1 if not affine and flipped else 0

        target = ParamsList(image_size=img.size, is_train=self.is_train)
        target.add_field("hm", heat_map)
        target.add_field("reg", regression)
        target.add_field("cls_ids", cls_ids)
        target.add_field("proj_p", proj_points)
        target.add_field("dimensions", dimensions)
        target.add_field("locations", locations)
        target.add_field("rotys", rotys)
        target.add_field("trans_mat", trans_mat)
        target.add_field("K", K)
        target.add_field("reg_mask", reg_mask)
        target.add_field("flip_mask", flip_mask)

        if self.transforms is not None:
            img, target = self.transforms(img, target)

        return img, target, idx
nbr_samples = nusc.scene[scene_index]['nbr_samples']
first_sample_token = my_scene['first_sample_token']
curr_sample = nusc.get('sample', first_sample_token)
prog = 0
for _ in range(nbr_samples):
    sample_tokens[prog] = curr_sample['token']
    if curr_sample['next']:
        next_token = curr_sample['next']
        curr_sample = nusc.get('sample', next_token)
    prog += 1
my_sample_token = sample_tokens[sample_index]
my_sample = nusc.get('sample', my_sample_token)
sensor_channel = "RADAR_FRONT"
my_sample_data = nusc.get('sample_data', my_sample['data'][sensor_channel])
radar_data = get_sensor_sample_data(nusc, my_sample, sensor_channel)
my_annotation_token = my_sample['anns'][18]
#note that the annotation here is in globar coordinate symstem
my_annotation_metadata = nusc.get('sample_annotation', my_annotation_token)
#pay attention to the class Box in nuscenes-devkit
#in get_sample_data will transform the coordinate from global to sensor frame
_, boxes, _ = nusc.get_sample_data(my_sample_data['token'])
box_list = []

for box in boxes:
    mask = points_in_box(box, radar_data[0:3, :])
    #caculate the number of points contains in a bounding box
    num_of_pts = np.count_nonzero(mask)
    if num_of_pts != 0:
        box_list.append((box.name, num_of_pts))
print(box_list)
    'static_object.bicycle_rack': 'ignore',
}

for sample in nusc.sample:
    cam_token = sample['data']['CAM_FRONT']
    pc_token = sample['data']['LIDAR_TOP']
    pointsensor = nusc.get('sample_data', pc_token)
    # pcl_path = osp.join(nusc.dataroot, pointsensor['filename'])

    cam = nusc.get('sample_data', cam_token)
    # print(cam)

    # im = Image.open(osp.join(nusc.dataroot, cam['filename']))
    # print(im.width, im.height)

    data_path, box_list, cam_intrinsic = nusc.get_sample_data(pc_token)

    # print(data_path)
    # print(pcl_path)

    # fname =  pcl_path.split('/')[-1].split('.')[0] + '.txt'
    # print(fname)

    # if sample['scene_token'] in train_scenes
    if sample['scene_token'] in val_scenes:
        fout = open(
            osp.join(os.getcwd(), 'validation', 'label_2', pc_token + '.txt'),
            'w')

        # pc = LidarPointCloud.from_file(pcl_path)
Esempio n. 28
0
class NuScenesPrepare():
    NameMapping = {
        'movable_object.barrier': 'barrier',
        'movable_object.trafficcone': 'barrier',
        'vehicle.bicycle': 'cyclist',
        'vehicle.motorcycle': 'cyclist',
        'vehicle.bus.bendy': 'vehicle',
        'vehicle.bus.rigid': 'vehicle',
        'vehicle.car': 'vehicle',
        'vehicle.trailer': 'vehicle',
        'vehicle.truck': 'vehicle',
        'vehicle.construction': 'vehicle',
        'human.pedestrian.adult': 'pedestrian',
        'human.pedestrian.child': 'pedestrian',
        'human.pedestrian.construction_worker': 'pedestrian',
        'human.pedestrian.police_officer': 'pedestrian',
    }
    labelmapping=LabelEncoder()
    labelmapping.fit(['barrier','cyclist','pedestrian','vehicle'])
    DefaultAttribute = {
        "car": "vehicle.parked",
        "pedestrian": "pedestrian.moving",
        "trailer": "vehicle.parked",
        "truck": "vehicle.parked",
        "bus": "vehicle.parked",
        "motorcycle": "cycle.without_rider",
        "construction_vehicle": "vehicle.parked",
        "bicycle": "cycle.without_rider",
        "barrier": "",
        "traffic_cone": "",
    }

    def __init__(self,args):
        self.args_dp=args['DataPrepare']
        self.args_vg=args['VoxelGenerator']
        self.cache_path=self.args_dp.data_root+'/'+self.args_dp.cache_name

        if os.path.exists(self.cache_path):
            self._Data_frags=pickle.load(open(self.cache_path, 'rb'))
        else:
            self.nusc = NuScenes(version=self.args_dp.version, dataroot=self.args_dp.data_root, verbose=self.args_dp.verbose)
            self._Data_frags=self.getFragAnnotations()
            pickle.dump(self._Data_frags,open(self.cache_path, 'wb'))

        if True:
            self._Data_frags=[item for scene_data in self._Data_frags for item in scene_data]

    def __len__(self):
        return len(self._Data_frags)

    def __getitem__(self, idx):
        c_frag = self._Data_frags[idx]
        res = {
            "lidar": {
                "type": "lidar",
                "points": None,
            },
            "gt_boxes": {
                "obs": None,
                "pred": None,
            },
            "metadata": {
                "token": c_frag['Data_frags'][self.args_dp.obs_length-1] # token of last observed frame
            },
        }
        sweep_voxels,sweep_coords,sweep_num_voxels,sweep_num_points=[],[],[],[]
        bev_imgs, cam_imgs = [], []
        gt_boxes=[]
        #ts = c_frag['Data_frags'][self.args.obs_length-1]["timestamp"] / 1e6

        #get BEV sweeps
        for fi,c_frame in enumerate(c_frag['Data_frags']):
            if fi < self.args_dp.obs_length:
                # get Annotations
                gt_boxes.append(c_frame['boxes'])
                # load lidar points
                lidar_path = c_frame['lidar_path']
                points = np.fromfile(
                    str(Path(self.args_dp.data_root)/lidar_path), dtype=np.float32, count=-1).reshape([-1, 5])
                points[:, 3] /= 255
                points[:, :3] = points[:, :3] @ c_frame["R_cl2rl"].T
                points[:, :3] += c_frame["T_cl2rl"]
                # generate voxel bev
                voxel_res=self.getVoxel(points)
                if 'bev_img' in self.args_vg.bev_data:
                    bev_img_size = [np.int(np.ceil((self.args_vg.point_cloud_range[3 + i] - self.args_vg.point_cloud_range[i])
                                       / self.args_vg.voxel_size[i])) for i in range(3)]
                    bev_img = np.zeros(bev_img_size)
                    bev_img[voxel_res['coordinates'][:, 2], voxel_res['coordinates'][:, 1], voxel_res['coordinates'][:, 0]]\
                    =voxel_res["num_points_per_voxel"]
                    bev_imgs.append(bev_img)
                if 'bev_index' in self.args_vg.bev_data:
                    sweep_voxels.append(voxel_res['voxels'])
                    sweep_coords.append(voxel_res['coordinates'])
                    sweep_num_voxels.append(np.array([voxel_res['voxels'].shape[0]], dtype=np.int64))
                    sweep_num_points.append( voxel_res["num_points_per_voxel"])
                # Load image
                if self.args_dp.use_image == 'last_image': # only use image of the last observed frame
                    load_image = fi == self.args_dp.obs_length - 1
                elif self.args_dp.use_image == 'key_images': # use image of all key frames
                    load_image = 'cam_path' in c_frame.keys()
                if load_image:
                    if Path(c_frame['cam_path']).exists():
                        with open(str(c_frame['cam_path']), 'rb') as f:
                            image_str = f.read()
                    else:
                        image_str=None
                    cam_imgs.append(image_str)

        res["lidar"]["voxel_sweeps"] =  np.concatenate(sweep_voxels, axis=0)
        res["lidar"]["bev_imgs"] =  np.stack(bev_imgs, axis=0)
        res["lidar"]["coordinates"] =  np.concatenate(sweep_coords, axis=0)
        res["lidar"]["num_voxels"] =  np.concatenate(sweep_num_voxels, axis=0)
        res["lidar"]["num_points"] =  np.concatenate(sweep_num_points, axis=0)
        res["cam"] = {
            "type": "camera",
            "data": image_str,
            "datatype": Path(c_frag['Data_frags'][self.args_dp.obs_length-1]['cam_path']).suffix[1:],
        }
        gt_boxes=np.stack(gt_boxes,axis=0)
        res["gt_boxes"]["obs"]=gt_boxes[:self.args_dp.obs_length]
        res["gt_boxes"]["pred"] =gt_boxes[self.args_dp.obs_length:]
        res['cls_label']=self.labelmapping.fit_transform(c_frag['names'])

        return res
        #Ground Truth (by instance)
        #res["GT"]["obs"]=
        #image of last
    def getVoxel(self,points):
        max_voxels=100000
        voxel_generator = VoxelGeneratorV2(
            voxel_size=list(self.args_vg.voxel_size),
            point_cloud_range=list(self.args_vg.point_cloud_range),
            max_num_points=self.args_vg.max_number_of_points_per_voxel,
            max_voxels=max_voxels,
            full_mean=self.args_vg.full_empty_part_with_mean,
            block_filtering=self.args_vg.block_filtering,
            block_factor=self.args_vg.block_factor,
            block_size=self.args_vg.block_size,
            height_threshold=self.args_vg.height_threshold)

        res = voxel_generator.generate(
            points, max_voxels)
        return res
    def getSamplebyFrame(self):
        #################interplot data 10hz
        scene_all=[]
        for si,scene in enumerate(self.nusc.scene):
            sample_interp_all = []
            first_sample = self.nusc.get('sample', scene['first_sample_token'])
            sd_rec = self.nusc.get('sample_data', first_sample['data']["LIDAR_TOP"])
            sample_interp_all.append(sd_rec)
            while sd_rec['next'] != '':
                sd_rec = self.nusc.get('sample_data', sd_rec['next'])
                sample_interp_all.append(sd_rec)
            scene_all.append(sample_interp_all)
        return scene_all
    def getFragAnnotations(self):

        scene_frames = self.getSamplebyFrame()
        Data_frags=[]
        key_slide_window=int(self.args_dp.obs_length*2) #find key frame in this time window
        si_start=0
        if os.path.exists(self.cache_path):
            Data_frags=pickle.load(open(self.cache_path, 'rb'))
            si_start=len(Data_frags)
        print('-------------Prepraing fragments--------------')
        for si in range(si_start,len(scene_frames)):
            scene_data=scene_frames[si]
            start = time.time()
            scene_frags = []
            for di,sample_data in enumerate(scene_data):
                frag_info={}
                if sample_data['is_key_frame']:
                    if di <= self.args_dp.obs_length or di >= len(scene_data)-self.args_dp.pred_length*self.args_dp.interval:
                        continue
                    cur_frag_index=[i+1 for i in range(di-self.args_dp.obs_length,di+self.args_dp.pred_length)]#the fragment index
                    if di !=cur_frag_index[self.args_dp.obs_length-1]:
                        print('error')
                    start_key=max(0,min(di-self.args_dp.obs_length,di - key_slide_window))
                    end_key=min(len(scene_data)-1,max(di+self.args_dp.pred_length,di + key_slide_window))
                    cur_key_index = [i+1 for i in range(start_key,end_key)]#find key frame in this index

                    ## Get reference coordinates
                    refer_frame = sample_data
                    refer_cs_rec = self.nusc.get('calibrated_sensor', refer_frame['calibrated_sensor_token'])
                    refer_pos_rec = self.nusc.get('ego_pose', refer_frame['ego_pose_token'])
                    R_rl2re, T_rl2re = refer_cs_rec['rotation'], refer_cs_rec['translation']
                    R_re2g, T_re2g = refer_pos_rec['rotation'], refer_pos_rec['translation']
                    R_rl2re_mat = Quaternion(R_rl2re).rotation_matrix
                    R_re2g_mat = Quaternion(R_re2g).rotation_matrix

                    # get key frame location
                    key_frame_flag = np.zeros((len(cur_key_index),), dtype='bool')
                    key_frame_index = []
                    for i, d in enumerate(cur_key_index):
                        try:
                            if scene_data[d]['is_key_frame'] == True:
                                key_frame_index.append(i)
                                key_frame_flag[i] = True
                        except:
                            print('error')
                    key_frames = np.array(scene_data[cur_key_index[0]:(cur_key_index[-1]+1)])[key_frame_flag]

                    # only key frame has annotations, so firstly get key frame infos
                    key_sample, key_sample_token, key_instances, key_annotations, key_velocity = [], [], [], [], []
                    for k, key_frame in enumerate(key_frames):
                        sample_token = key_frame['sample_token']
                        sample = self.nusc.get('sample', sample_token)
                        annotations = [
                            self.nusc.get('sample_annotation', token)
                            for token in sample['anns']
                        ]
                        velocity = np.array(
                            [self.nusc.box_velocity(token)[:2] for token in sample['anns']])
                        key_sample_token.append(sample_token)
                        key_instances.append([anno['instance_token'] for anno in annotations])
                        key_sample.append(sample)
                        key_annotations.append(annotations)
                        key_velocity.append(velocity)

                    # get full presented instance token in the candidate fragments
                    instances_intersect = list(set.intersection(*[set(i) for i in key_instances]))
                    #instances_union = list(set.union(*[set(i) for i in key_instances]))

                    # full presented instance flags
                    valid_inst_flags = [np.zeros(len(kinst), dtype='bool') for kinst in key_instances]
                    for kinst, key_inst in enumerate(key_instances):
                        for vkinst, valid_inst in enumerate(key_inst):
                            if valid_inst in instances_intersect:
                                valid_inst_flags[kinst][vkinst] = True

                    ##########################################
                    ##             Prepare fragments Database
                    ##########################################
                    cur_key_frame_index = []
                    for i, d in enumerate(cur_frag_index):
                        if scene_data[d]['is_key_frame'] == True:
                            cur_key_frame_index.append(i)

                    frag_info['Data_frags'] = []
                    frag_info['instance_token'] = instances_intersect
                    frag_info['key_frame_index'] = cur_key_frame_index
                    frag_info['last_obs_frame']=di
                    frag_info['scene_No'] = si
                    for i, d in enumerate(cur_frag_index):

                        frag_data = {}
                        sample_data=scene_data[d]
                        sample_token = sample_data['sample_token']
                        sample = self.nusc.get('sample', sample_token)

                        # find the key sample this frame data belongs to
                        try:
                            key_sample_ind = key_sample_token.index(sample_token)
                        except:
                            print('can not find corresponding  key frame at scene {} frame {}'.format(si,d))

                        valid_inst_flag = valid_inst_flags[key_sample_ind]

                        ## Pose matrix: lidar2ego and ego2global
                        s_cs_rec = self.nusc.get('calibrated_sensor', sample_data['calibrated_sensor_token'])
                        s_pos_rec = self.nusc.get('ego_pose', sample_data['ego_pose_token'])

                        R_cl2ce, T_cl2ce = s_cs_rec['rotation'], s_cs_rec['translation']
                        R_ce2g, T_ce2g = s_pos_rec['rotation'], s_pos_rec['translation']

                        R_cl2ce_mat = Quaternion(R_cl2ce).rotation_matrix
                        R_ce2g_mat = Quaternion(R_ce2g).rotation_matrix

                        # Data_frag['Info_frags']['T_l2e'],Data_frag['Info_frags']['R_l2e'] = cs_record['translation'],cs_record['rotation']
                        # Data_frag['Info_frags']['T_e2g'],Data_frag['Info_frags']['R_e2g'] = pose_record['translation'], pose_record['rotation']

                        ## Get Relative Pose: R_cl2rl,T_cl2rl, based on R/T_rl2re, R/T_re2g, R/T_cl2ce, R/T_ce2g
                        # r: reference, c: current, l: lidar, e: ego, g: global
                        # Attention: R_b2a = inv(R_a2b), T_b2a = - T_b2a * inv(R_b2a),

                        # R_cl2rl = R_cl2se * R_ce2g * [R_g2rl]
                        # R_g2rl= R_g2re * R_re2rl =  inv(R_re2g) * inv(R_rl2re)
                        R_cl2rl = (R_cl2ce_mat.T @ R_ce2g_mat.T) @ (
                                np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T)

                        # T_cl2rl = (T_cl2ce * R_ce2g + T_ce2g) * [R_g2rl] + [T_g2rl]
                        # T_g2rl = (T_g2re * R_re2rl + T_re2rl) = - T_re2g * inv(R_re2g) - T_rl2re * inv(R_rl2re)
                        T_cl2rl = (T_cl2ce @ R_ce2g_mat.T + T_ce2g) @ (
                                    np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(R_rl2re_mat).T) \
                                  - T_re2g @ (np.linalg.inv(R_re2g_mat).T @ np.linalg.inv(
                            R_rl2re_mat).T) - T_rl2re @ np.linalg.inv(R_rl2re_mat).T

                        frag_data['R_cl2rl'], frag_data['T_cl2rl'] = R_cl2rl, T_cl2rl

                        ### Get valid boxes.Then Transform to the reference coordinates
                        boxes = self.nusc.get_boxes(sample_data['token'])  # At global coordinate
                        for box in boxes:
                            # Move box to referred coord system
                            box.translate(-np.array(refer_pos_rec['translation']))
                            box.rotate(Quaternion(refer_pos_rec['rotation']).inverse)
                            box.translate(-np.array(refer_cs_rec['translation']))
                            box.rotate(Quaternion(refer_cs_rec['rotation']).inverse)

                        boxes = np.array(boxes)  # At reference coordinate
                        try:
                            valid_boxes = boxes[valid_inst_flag]
                        except:
                            print('can not find valid box at scene {} frame {}'.format(si, d))
                        ## Transform Boxes to [location,dimension,rotation]
                        locs = np.array([b.center for b in valid_boxes]).reshape(-1, 3)
                        dims = np.array([b.wlh for b in valid_boxes]).reshape(-1, 3)
                        rots = np.array([b.orientation.yaw_pitch_roll[0]
                                         for b in valid_boxes]).reshape(-1, 1)
                        gt_boxes = np.concatenate([locs, dims, -rots - np.pi / 2], axis=1)
                        frag_data['boxes'] = gt_boxes

                        ## Datapath
                        if i<self.args_dp.obs_length:
                            frag_data['lidar_path'] = sample_data['filename']
                            if sample_data['is_key_frame']:
                                cam_front_token = sample["data"]["CAM_FRONT"]
                                cam_path, _, _ = self.nusc.get_sample_data(cam_front_token)
                                frag_data['cam_path'] = cam_path

                        ## Object name
                        if 'names' not in frag_info.keys():
                            names = [b.name for b in valid_boxes]
                            for i in range(len(names)):
                                if names[i] in self.NameMapping:
                                    names[i] = self.NameMapping[names[i]]
                            names = np.array(names)
                            frag_info['names'] = names

                        ##Velocity (without interplotion)
                        valid_velo = key_velocity[key_sample_ind][valid_inst_flag]
                        # convert velo from global to current lidar
                        for i in range(len(valid_boxes)):
                            velo = np.array([*valid_velo[i], 0.0])
                            velo = velo @ np.linalg.inv(R_ce2g_mat).T @ np.linalg.inv(R_cl2ce_mat).T
                            valid_velo[i] = velo[:2]
                        frag_data['Velocity'] = valid_velo
                        frag_data['FrameNo.'] = d
                        frag_data['Token'] = sample_data['token']
                        frag_data['timestamp'] = sample_data['timestamp']
                        frag_info['Data_frags'].append(frag_data)
                    scene_frags.append(frag_info)
            Data_frags.append(scene_frags)
            end = time.time()
            print('scene {}/{}: total frags: {} time: {} '.format(si, len(scene_frames), len(scene_frags), end - start))
            if si%200==0 and si>0:
                pickle.dump(Data_frags, open(self.cache_path, 'wb'))
        return Data_frags