コード例 #1
0
def extract_datapoints(root_dir, test_set=False):
    argoverse_loader = ArgoverseTrackingLoader(root_dir=root_dir)

    data = []
    for log_id in argoverse_loader.log_list:
        # print(f"Processing log: {os.path.join(root_dir, log_id)}", end="\r")
        argoverse_data = argoverse_loader.get(log_id=log_id)
        # calibL = argoverse_data.get_calibration(camera=camL, log_id=log_id)
        # calibR = argoverse_data.get_calibration(camera=camR, log_id=log_id)
        calibs = {}
        for cam in STEREO_CAMERA_LIST + RING_CAMERA_LIST:
            calibs[cam] = argoverse_data.get_calibration(camera=cam,
                                                         log_id=log_id)

        count = 0
        for lidar_timestamp in argoverse_data.lidar_timestamp_list:
            data_point = dict()
            data_point["log_id"] = log_id
            data_point["frame_id"] = count
            count += 1
            for cam in STEREO_CAMERA_LIST + RING_CAMERA_LIST:
                cam_timestamp = argoverse_loader.sync.get_closest_cam_channel_timestamp(
                    lidar_timestamp=lidar_timestamp,
                    camera_name=cam,
                    log_id=log_id)
                if cam_timestamp is not None:
                    data_point[cam] = argoverse_loader.get_image_at_timestamp(
                        timestamp=cam_timestamp,
                        camera=cam,
                        log_id=log_id,
                        load=False)
                else:
                    data_point[cam] = None
            data_point["timestamp"] = lidar_timestamp
            data_point["lidar"] = argoverse_loader.timestamp_lidar_dict[
                lidar_timestamp]
            data_point["calibs"] = calibs
            d = argoverse_data.get_pose(
                argoverse_data.get_idx_from_timestamp(
                    lidar_timestamp)).translation
            r = Rotation.from_dcm(
                argoverse_data.get_pose(
                    argoverse_data.get_idx_from_timestamp(
                        lidar_timestamp)).rotation)
            data_point["pose"] = (d, r.as_euler('xyz'))
            if not test_set:
                data_point["labels"] = argoverse_loader.get_label_object(
                    idx=argoverse_loader.lidar_timestamp_list.index(
                        lidar_timestamp),
                    log_id=log_id)
            data.append(data_point)
    return data
コード例 #2
0
class ArgoDataset(Dataset):
    def __init__(self, args, transform=None):
        self.args = args
        self.transform = transform
        self.argoverse_loader = ArgoverseTrackingLoader(args.root_dir)
        self.image_list = []
        for log_id in self.argoverse_loader.log_list:
            argoverse_data = self.argoverse_loader.get(log_id)
            for idx in range(
                    len(argoverse_data.image_list_sync['ring_front_center'])):
                self.image_list.append(str(log_id) + '@' + str(idx))
        self.camera_name = [
            'ring_front_center', 'ring_side_left', 'ring_side_right',
            'ring_front_left', 'ring_front_right', 'ring_rear_right',
            'ring_rear_left'
        ]

    def __len__(self):
        return len(self.image_list)

    def __getitem__(self, index):
        lidar_index = int(str(self.image_list[index]).split('@')[1])
        log_id = str(self.image_list[index]).split('@')[0]
        argoverse_data = self.argoverse_loader.get(log_id)
        image_7 = []
        for camera in self.camera_name:
            img = argoverse_data.get_image_sync(lidar_index, camera=camera)
            img = Image.fromarray(img)
            img = self.transform(img) if self.transform is not None else img
            image_7.append(img)
        sample = {
            'image_7': image_7,
            'video_idxs': lidar_index,
            'image_names': log_id
        }
        return sample
コード例 #3
0
    def __init__(self, root_dir, split='train'):
        self.split = split
        is_test = (self.split == 'test')
        self.lidar_pathlist = []
        self.label_pathlist = []

        if split == 'train':
            for i in np.arange(1, 5):
                self.imageset_dir = os.path.join(root_dir, split + str(i))
                data_loader = ArgoverseTrackingLoader(
                    os.path.join(root_dir, split + str(i)))
                self.log_list = data_loader.log_list
                for log in self.log_list:
                    self.lidar_pathlist.extend(data_loader.get(log).lidar_list)
                    self.label_pathlist.extend(data_loader.get(log).label_list)
                print(len(self.lidar_pathlist))
        else:
            self.imageset_dir = os.path.join(root_dir, split)
            data_loader = ArgoverseTrackingLoader(os.path.join(
                root_dir, split))
            self.lidar_pathlist.extend(data_loader.lidar_list)
            self.label_pathlist.extend(data_loader.label_list)

        self.calib_file = data_loader.calib_filename

        assert len(self.lidar_pathlist) == len(self.label_pathlist)
        #z = list(zip(self.lidar_pathlist, self.label_pathlist))
        #random.shuffle(z)
        #self.lidar_pathlist[:], self.label_pathlist[:] = zip(*z)

        self.num_sample = len(self.lidar_pathlist)
        self.image_idx_list = np.arange(self.num_sample)

        self.argo_to_kitti = np.array(
            [[6.927964e-03, -9.999722e-01, -2.757829e-03],
             [-1.162982e-03, 2.749836e-03, -9.999955e-01],
             [9.999753e-01, 6.931141e-03, -1.143899e-03]])

        self.ground_removal = True

        self.image_dir = os.path.join('/data/')
        self.lidar_dir = os.path.join('/data/')
        self.calib_dir = os.path.join('/data/')
        self.label_dir = os.path.join('/data/')
コード例 #4
0
def generate_weak_static(dataset_dir="", log_id="", output_dir=""):

    argoverse_loader = ArgoverseTrackingLoader(dataset_dir)
    argoverse_data = argoverse_loader.get(log_id)
    camera = argoverse_loader.CAMERA_LIST[7]
    calib = argoverse_data.get_calibration(camera)
    ply_fpath = os.path.join(dataset_dir, log_id, 'lidar')
    ply_locs = []
    for idx, ply_name in enumerate(os.listdir(ply_fpath)):
        ply_loc = np.array([idx, int(ply_name.split('.')[0].split('_')[-1])])
        ply_locs.append(ply_loc)
    ply_locs = np.array(ply_locs)
    lidar_timestamps = sorted(ply_locs[:, 1])

    calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json"
    calib_data = read_json_file(calib_path)
    ind = 0
    for i in range(9):
        if calib_data['camera_data_'][i]['key'] == \
                                    'image_raw_stereo_front_left':
            ind = i
            break
    rotation = np.array(calib_data['camera_data_'][ind]['value']
                        ['vehicle_SE3_camera_']['rotation']['coefficients'])
    translation = np.array(calib_data['camera_data_'][ind]['value']
                           ['vehicle_SE3_camera_']['translation'])
    sparse_road_bev_loc = os.path.join(output_dir, log_id, 'sparse_road_bev')
    try:
        os.makedirs(sparse_road_bev_loc)
    except BaseException:
        pass
    dense_city_road_pts = []

    for idx in range(len(lidar_timestamps)):

        occupancy_map = np.zeros((256, 256))
        lidar_timestamp = lidar_timestamps[idx]
        try:
            img_loc = argoverse_data.get_image_list_sync(camera=camera)[idx]
        except BaseException:
            continue
        cam_timestamp = int(img_loc.split('.')[0].split('_')[-1])
        segment_loc = os.path.join(
            dataset_dir, log_id, 'segment',
            'stereo_front_left_' + str(cam_timestamp) + '.png')
        segment_img = cv2.imread(segment_loc, 0)

        pc = argoverse_data.get_lidar(idx)
        uv = calib.project_ego_to_image(pc).T
        idx_ = np.where(
            np.logical_and.reduce(
                (uv[0, :] >= 0.0, uv[0, :] < np.shape(segment_img)[1] - 1.0,
                 uv[1, :] >= 0.0, uv[1, :] < np.shape(segment_img)[0] - 1.0,
                 uv[2, :] > 0)))
        idx_ = idx_[0]
        uv1 = uv[:, idx_]
        uv1 = uv1.T

        x = uv1[:, 0].astype(np.int32)
        y = uv1[:, 1].astype(np.int32)
        col = segment_img[y, x]
        filt = np.logical_or(col == 13, np.logical_or(col == 24, col == 24))
        road_uv1 = uv1[filt, :]

        lidar_road_pts = calib.project_image_to_ego(road_uv1)

        cam_road_pts = calib.project_ego_to_cam(lidar_road_pts)
        X = cam_road_pts[:, 0]
        Z = cam_road_pts[:, 2]
        filt2 = np.logical_and(X > -20, X < 20)
        filt2 = np.logical_and(filt2, np.logical_and(Z > 0, Z < 40))
        y_img = (-Z[filt2] / res).astype(np.int32)
        x_img = (X[filt2] / res).astype(np.int32)
        x_img -= int(np.floor(-20 / res))
        y_img += int(np.floor(40 / res))
        occupancy_map[y_img, x_img] = 255
        occ_map_loc = os.path.join(
            sparse_road_bev_loc,
            'stereo_front_left_' + str(cam_timestamp) + '.png')
        cv2.imwrite(occ_map_loc, occupancy_map)
        pose_fpath = os.path.join(
            dataset_dir, log_id, "poses",
            "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json")
        if not Path(pose_fpath).exists():
            print("not a apth")
            continue
        pose_data = read_json_file(pose_fpath)
        rotation = np.array(pose_data["rotation"])
        translation = np.array(pose_data["translation"])
        city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation),
                                     translation=translation)
        sparse_city_road_pts = city_to_egovehicle_se3.transform_point_cloud(
            lidar_road_pts)
        try:
            dense_city_road_pts = np.concatenate(
                (dense_city_road_pts, sparse_city_road_pts), axis=0)
        except BaseException:
            dense_city_road_pts = sparse_city_road_pts

    dense_road_bev_loc = os.path.join(output_dir, log_id, 'dense_road_bev')
    try:
        os.makedirs(dense_road_bev_loc)
    except BaseException:
        pass

    for idx in range(len(lidar_timestamps)):

        occupancy_map = np.zeros((256, 256))
        lidar_timestamp = lidar_timestamps[idx]
        try:
            img_loc = argoverse_data.get_image_list_sync(camera=camera)[idx]
        except BaseException:
            continue
        cam_timestamp = int(img_loc.split('.')[0].split('_')[-1])
        pose_fpath = os.path.join(
            dataset_dir, log_id, "poses",
            "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json")
        if not Path(pose_fpath).exists():
            print("not a path")
            continue
        pose_data = read_json_file(pose_fpath)
        rotation = np.array(pose_data["rotation"])
        translation = np.array(pose_data["translation"])
        city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation),
                                     translation=translation)
        current_ego_frame_road_pts =\
            city_to_egovehicle_se3\
            .inverse_transform_point_cloud(dense_city_road_pts)
        current_cam_frame_road_pts = calib.project_ego_to_cam(
            current_ego_frame_road_pts)
        X = current_cam_frame_road_pts[:, 0]
        Z = current_cam_frame_road_pts[:, 2]
        filt2 = np.logical_and(X > -20, X < 20)
        filt2 = np.logical_and(filt2, np.logical_and(Z > 0, Z < 40))
        y_img = (-Z[filt2] / res).astype(np.int32)
        x_img = (X[filt2] / res).astype(np.int32)
        x_img -= int(np.floor(-20 / res))
        y_img += int(np.floor(40 / res)) - 1

        occupancy_map[y_img, x_img] = 255
        occ_map_loc = os.path.join(
            dense_road_bev_loc,
            'stereo_front_left_' + str(cam_timestamp) + '.png')
        cv2.imwrite(occ_map_loc, occupancy_map)
コード例 #5
0
        ]

        data_loader = ArgoverseTrackingLoader(os.path.join(root_dir, split))
        log_list = data_loader.log_list
        path_count = 0

        actual_idx_list = []
        logidx_to_count_map = {}
        log_to_count_map = {}
        map_objectid_bool = {}
        map_saveid_objectid = {}
        map_class_saveid_count = {}
        for log_id, log in enumerate(log_list):
            print("converting log {} {}".format(log, log_id))

            argoverse_data = data_loader.get(log)
            city_name = argoverse_data.city_name

            lidar_lst = data_loader.get(log).lidar_list
            label_lst = data_loader.get(log).label_list
            assert len(lidar_lst) == len(label_lst)
            pbar = tqdm(lidar_lst)
            for idx, each_path in enumerate(pbar):

                ide = log_id + idx
                # lidar_pts = argoverse_data.get_lidar(ide)
                lidar_pts = argoverse_data.get_lidar_ring(ide)

                if (GROUND_REMOVED):
                    city_to_egovehicle_se3 = argoverse_data.get_pose(ide)
                    roi_area_pts = city_to_egovehicle_se3.transform_point_cloud(
コード例 #6
0
class ArgoDataset(Dataset):
    def __init__(self, question_h5, image_feature_h5_path, lidar_feature_h5_path,vocab,load_lidar=True,npoint=1024,normal_channel=True,uniform=False,cache_size=15000,drivable_area=False,
                 mode='prefix', image_h5=None, lidar_h5=None, max_samples=None, question_families=None,
                 image_idx_start_from=None):

        #############read whole question_h5 file in memory#############################
        self.all_questions = question_h5['questions'][:]
        self.all_answers = get_answer_classes(question_h5['answers'][:], vocab)
        self.all_image_idxs = question_h5['image_idxs'][:]
        self.all_video_names = (question_h5['video_names'][:]).astype(str)
        self.questions_length = question_h5['question_length'][:]
        self.image_feature_h5 = image_feature_h5_path
        self.load_lidar=load_lidar

        ############for lidar##########################################################
        if self.load_lidar:
            self.argoverse_loader = ArgoverseTrackingLoader(lidar_feature_h5_path)
            self.am = ArgoverseMap()
            self.drivable_area=drivable_area
        ###############################################################################
    def __len__(self):
        print(self.all_questions.shape[0])
        return self.all_questions.shape[0]

    def __getitem__(self, index):
        question = self.all_questions[index]
        question_length = self.questions_length[index]
        answer = self.all_answers[index]
        image_name = self.all_video_names[index] + '@'+str(self.all_image_idxs[index])

        #####image feature ###############################################
        with h5py.File(self.image_feature_h5, 'r') as img_feat_file:
                image_feature = img_feat_file[image_name][:]
        
        ###########lidar feature###########################################
        if self.load_lidar:
            if index in self.cache:
                lidar_pts = self.cache[index]
            else:
                lidar_index=self.all_image_idxs[index]
                log_id=self.all_video_names[index]
                argoverse_data=self.argoverse_loader.get(log_id)

                lidar_pts = argoverse_data.get_lidar(lidar_index,load=True)
                city_to_egovehicle_se3 = argoverse_data.get_pose(lidar_index)
                city_name = argoverse_data.city_name
                

                roi_area_pts = copy.deepcopy(lidar_pts)
                roi_area_pts = city_to_egovehicle_se3.transform_point_cloud(roi_area_pts)  # put into city coords
                roi_area_pts = self.am.remove_non_roi_points(roi_area_pts, city_name)
                roi_area_pts = self.am.remove_ground_surface(roi_area_pts, city_name)
                roi_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(roi_area_pts)



                if self.drivable_area:
                    driveable_area_pts = copy.deepcopy(roi_area_pts)
                    driveable_area_pts = city_to_egovehicle_se3.transform_point_cloud(driveable_area_pts)  # put into city coords
                    driveable_area_pts = self.am.remove_non_driveable_area_points(driveable_area_pts, city_name)
                    driveable_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(driveable_area_pts)
                    point_set=driveable_area_pts
                else:
                    point_set=roi_area_pts
                del lidar_pts

        ############################################################################################################
        else:
            point_set=5
        return (question, image_feature, question_length,point_set,answer,image_name)
コード例 #7
0
def main():

    # set root_dir to the correct path to your dataset folder
    root_dir = core.data_dir(
    ) + '/datasets/argoverse/argoverse-tracking/sample/'

    vtk_window_size = (1280, 720)

    argoverse_loader = ArgoverseTrackingLoader(root_dir)

    print('Total number of logs:', len(argoverse_loader))
    argoverse_loader.print_all()

    for i, argoverse_data in enumerate(argoverse_loader):
        if i >= 3:
            break
        print(argoverse_data)

    argoverse_data = argoverse_loader[0]
    print(argoverse_data)

    argoverse_data = argoverse_loader.get(
        'c6911883-1843-3727-8eaa-41dc8cda8993')
    print(argoverse_data)

    log_id = 'c6911883-1843-3727-8eaa-41dc8cda8993'  # argoverse_loader.log_list[55]
    frame_idx = 150
    camera = argoverse_loader.CAMERA_LIST[0]

    argoverse_data = argoverse_loader.get(log_id)

    city_name = argoverse_data.city_name

    print('-------------------------------------------------------')
    print(f'Log: {log_id}, \n\tframe: {frame_idx}, camera: {camera}')
    print('-------------------------------------------------------\n')

    lidar_points = argoverse_data.get_lidar(frame_idx)
    img = argoverse_data.get_image_sync(frame_idx, camera=camera)
    objects = argoverse_data.get_label_object(frame_idx)
    calib = argoverse_data.get_calibration(camera)

    # TODO: Calculate point colours

    vtk_pc = VtkPointCloudGlyph()
    vtk_pc.set_points(lidar_points)
    vtk_pc.set_point_size(2)

    vtk_renderer = demo_utils.setup_vtk_renderer()
    vtk_render_window = demo_utils.setup_vtk_render_window(
        'Argoverse Demo', vtk_window_size, vtk_renderer)

    vtk_axes = vtk.vtkAxesActor()
    vtk_axes.SetTotalLength(2, 2, 2)
    vtk_renderer.AddActor(vtk_axes)
    vtk_renderer.AddActor(vtk_pc.vtk_actor)

    current_cam = vtk_renderer.GetActiveCamera()
    current_cam.SetViewUp(0, 0, 1)
    current_cam.SetPosition(-50, 0, 15)
    current_cam.SetFocalPoint(30, 0, 0)

    vtk_interactor = demo_utils.setup_vtk_interactor(vtk_render_window)
    vtk_interactor.Start()
コード例 #8
0
    def __init__(self, root_dir, split='train'):
        self.split = split
        is_test = (self.split == 'test')
        self.lidar_pathlist = []
        self.label_pathlist = []

        print("[LOG] Data dir is ", root_dir)
        self.lidar_dir = os.path.join(root_dir)

        if os.path.exists("lidar_pathlist.npy"):
            self.lidar_pathlist = np.load("lidar_pathlist.npy").tolist()
        if not is_test and os.path.exists("label_pathlist.npy"):
            self.label_pathlist = np.load("label_pathlist.npy").tolist()

        if not self.lidar_pathlist and not self.label_pathlist:
            data_loader = ArgoverseTrackingLoader(os.path.join(root_dir))
            for each in data_loader.log_list:
                data = data_loader.get(each)
                self.lidar_pathlist.extend(data.lidar_list)
                if not is_test: self.label_pathlist.extend(data.label_list)
            if not is_test:
                # if prune needed
                pruned_lidar_pathlist = []
                pruned_label_pathlist = []
                classs, local_classs = 0, 0
                t = tqdm.tqdm(range(len(self.label_pathlist)))
                for each in t:
                    rand = random.uniform(0, 1)
                    objs = self.get_label(each)
                    local_classs = 0
                    for obj in objs:
                        if obj.cls_id == 1:
                            classs += 1
                            local_classs += 1
                    if local_classs > 0 or rand > 0.5:
                        pruned_lidar_pathlist.append(self.lidar_pathlist[each])
                        pruned_label_pathlist.append(self.label_pathlist[each])
                    t.set_description(f" Peds: {classs}")
                np.save("lidar_pathlist", pruned_lidar_pathlist)
                np.save("label_pathlist", pruned_label_pathlist)
                print(
                    f"[LOG] selected {len(pruned_lidar_pathlist)} out of {len(self.lidar_pathlist)}"
                )
                self.lidar_pathlist = pruned_lidar_pathlist
                self.label_pathlist = pruned_label_pathlist
        # import pdb; pdb.set_trace()

        #self.calib_file = data_loader.calib_filename
        self.lidar_filename = [
            x.split('.')[0].rsplit('/', 1)[1] for x in self.lidar_pathlist
        ]

        #         assert len(self.lidar_pathlist) == len(self.label_pathlist)

        self.num_sample = len(self.lidar_pathlist)
        self.lidar_idx_list = ['%06d' % l for l in range(self.num_sample)]

        self.lidar_idx_table = dict(
            zip(self.lidar_idx_list, self.lidar_filename))
        self.argo_to_kitti = np.array([[0, -1, 0], [0, 0, -1], [1, 0, 0]])

        self.ground_removal = False

        self.lidar_dir = os.path.join('/data/')
        self.label_dir = os.path.join('/data/')
コード例 #9
0
def main():
    args = parser.parse_args()
    pdb.set_trace()
    if not (args.output_disp or args.output_depth):
        print('You must at least output one value !')
        return

    argoverse_loader = ArgoverseTrackingLoader(args.argoverse_data_path)
    camera = argoverse_loader.CAMERA_LIST[0]
    argoverse_data = argoverse_loader.get(args.argoverse_log)
    num_lidar = len(argoverse_data.get_image_list_sync(camera))

    disp_net = DispResNet(args.resnet_layers, False).to(device)
    weights = torch.load(args.pretrained, map_location=device)
    disp_net.load_state_dict(weights['state_dict'])
    disp_net.eval()

    #dataset_dir = Path(args.dataset_dir)
    output_dir = Path(args.output_dir)
    output_dir.makedirs_p()

    # if args.dataset_list is not None:
    #     with open(args.dataset_list, 'r') as f:
    #         test_files = [dataset_dir/file for file in f.read().splitlines()]
    # else:
    #     test_files = sum([dataset_dir.files('*.{}'.format(ext)) for ext in args.img_exts], [])

    # print('{} files to test'.format(len(test_files)))

    for frame in tqdm(range(0, num_lidar - 1)):

        file = argoverse_data.get_image_sync(frame, camera, load=False)

        img = imread(file).astype(np.float32)

        h, w, _ = img.shape
        if (not args.no_resize) and (h != args.img_height
                                     or w != args.img_width):
            img = imresize(img, (args.img_height, args.img_width)).astype(
                np.float32)
        img = np.transpose(img, (2, 0, 1))

        tensor_img = torch.from_numpy(img).unsqueeze(0)
        tensor_img = ((tensor_img / 255 - 0.45) / 0.225).to(device)

        output = disp_net(tensor_img)[0]

        file_path, file_ext = file.relpath(args.dataset_dir).splitext()
        file_name = '-'.join(file_path.splitall())

        if args.output_disp:
            disp = (255 * tensor2array(output, max_value=None,
                                       colormap='bone')).astype(np.uint8)
            imsave(output_dir / '{}_disp{}'.format(file_name, file_ext),
                   np.transpose(disp, (1, 2, 0)))
        if args.output_depth:
            depth = 1 / output
            depth = (255 * tensor2array(
                depth, max_value=10, colormap='rainbow')).astype(np.uint8)
            imsave(output_dir / '{}_depth{}'.format(file_name, file_ext),
                   np.transpose(depth, (1, 2, 0)))
class Lidar2Depth:
    """
    Convert 360 degree LiDAR point cloud to depth map corresponding to each ring camera
    for monocular depth estimation.

    To use:
    >>> input_log_dir = "path/to/3d20ae25-5b29-320d-8bae-f03e9dc177b9/"
    >>> output_save_path = "path/to/depth_dataset/"
    >>> Lidar2Depth(input_log_dir, output_save_path)  
    """
    def __init__(self, input_log_dir: str, output_save_path: str) -> None:

        self.input_log_dir = input_log_dir
        self.output_save_path = output_save_path
        self.log_id = os.path.basename(input_log_dir)
        print("Log ID ", self.log_id)

        # Load Argo data
        dataset = os.path.dirname(self.input_log_dir)
        self.argoverse_loader = ArgoverseTrackingLoader(dataset)
        self.argoverse_data = self.argoverse_loader.get(self.log_id)

        # Count the number of LiDAR ply files in the log dir
        self.lidar_frame_counter = len(
            glob.glob1(os.path.join(self.input_log_dir, "lidar"), "*.ply"))

        # Setup depth dataset dir
        self.depth_data_dir_setup()

        # Extract depth data and ring camera frames
        self.depth_extraction()

    def depth_data_dir_setup(self) -> None:
        """
        Depth dataset structure
        +-- train/val/test
        |   +-- depth
        |   |   +-- 00c561b9-2057-358d-82c6-5b06d76cebcf
        |   |   |   +-- ring_front_center
        |   |   |   |   +-- 1.png
        |   |   |   |   +-- 2.png
        |   |   |   |   +--   .
        |   |   |   |   +--   .
        |   |   |   |   +-- n.png
        |   |   |   +-- ring_front_left
        |   |   |   +--        .
        |   |   |   +--        .
        |   |   |   +-- ring_side_right
        |   |   +-- 0ef28d5c-ae34-370b-99e7-6709e1c4b929
        |   |   |   +-- ring_front_center
        |   |   |   +--        .
        |   |   |   +--        .
        |   |   |   +-- ring_side_right
        |   |   +--            .
        |   |   +--            .
        |   +-- rgb
        |   |   +-- 00c561b9-2057-358d-82c6-5b06d76cebcf
        |   |   |   +-- ring_front_center
        |   |   |   |   +-- 1.png
        |   |   |   |   +--   .
        |   |   |   |   +-- n.png
        |   |   |   +-- ring_front_left
        |   |   |   +--        .
        |   |   |   +-- ring_side_right
        |   |   +-- 0ef28d5c-ae34-370b-99e7-6709e1c4b929
        |   |   |   +-- ring_front_center
        |   |   |   +-- ring_front_left
        |   |   |   +--        .
        |   |   |   +-- ring_side_right
        |   |   +--            .
        |   |   +--            .
        """
        if fnmatch.fnmatchcase(self.input_log_dir, "*" + "train" + "*"):
            self.save_name = os.path.join(self.output_save_path, "train")
            self.logid_type = "train"

        elif fnmatch.fnmatchcase(self.input_log_dir, "*" + "val" + "*"):
            self.save_name = os.path.join(self.output_save_path, "val")
            self.logid_type = "val"

        elif fnmatch.fnmatchcase(self.input_log_dir, "*" + "test" + "*"):
            self.save_name = os.path.join(self.output_save_path, "test")
            self.logid_type = "test"

        for camera_name in RING_CAMERA_LIST:
            paths = [
                os.path.join(self.save_name, "depth", self.log_id,
                             camera_name),
                os.path.join(self.save_name, "rgb", self.log_id, camera_name),
            ]
            for sub_path in paths:
                if not os.path.exists(sub_path):
                    os.makedirs(sub_path)

    def extract_lidar_image_pair(
            self, camera_ID: int,
            lidar_frame_idx: str) -> Optional[Tuple[np.ndarray, np.ndarray]]:
        """
        For the provided camera_ID and LiDAR ply file,
        extract rgb image and corresponding LiDAR points in the fov.
        """
        img = self.argoverse_data.get_image_sync(lidar_frame_idx,
                                                 camera=camera_ID)
        self.calib = self.argoverse_data.get_calibration(camera_ID)
        pc = self.argoverse_data.get_lidar(lidar_frame_idx)
        uv = self.calib.project_ego_to_image(pc).T
        lidar_frame_idx_ = np.where(
            np.logical_and.reduce((
                uv[0, :] >= 0.0,
                uv[0, :] < np.shape(img)[1] - 1.0,
                uv[1, :] >= 0.0,
                uv[1, :] < np.shape(img)[0] - 1.0,
                uv[2, :] > 0,
            )))
        lidar_image_projection_points = uv[:, lidar_frame_idx_]
        if lidar_image_projection_points is None:
            print("No point image projection")
            return np.array(img), None
        else:
            return np.array(img), lidar_image_projection_points

    def save_image_pair(
        self,
        camera_ID: int,
        img: np.ndarray,
        lidar_frame_idx: str,
        lidar_image_projection_points: np.ndarray,
    ) -> None:
        """
        Save the depth images and camera frame to the created dataset dir.
        """
        x_values = np.round(lidar_image_projection_points[0], 0).astype(int)
        y_values = np.round(lidar_image_projection_points[1], 0).astype(int)
        lidar_depth_val = lidar_image_projection_points[2]

        # Create a blank image to place lidar points as pixels with depth information
        sparse_depth_img = np.zeros(
            [img.shape[0],
             img.shape[1]])  # keeping it float to maintain precision
        sparse_depth_img[y_values, x_values] = lidar_depth_val

        # Multiple to maintain precision, while model training, remember to divide by 256
        # NOTE: 0 denotes a null value, rather than actually zero depth in the saved depth map
        depth_rescaled = sparse_depth_img * 256.0
        depth_scaled = depth_rescaled.astype(np.uint16)
        depth_scaled = Image.fromarray(depth_scaled)
        raw_depth_path = os.path.join(
            self.save_name,
            "depth",
            self.log_id,
            str(camera_ID),
            str(lidar_frame_idx) + ".png",
        )
        depth_scaled.save(raw_depth_path)  # Save Depth image

        img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        raw_img_path = os.path.join(
            self.save_name,
            "rgb",
            self.log_id,
            str(camera_ID),
            str(lidar_frame_idx) + ".png",
        )
        cv2.imwrite(raw_img_path, img_rgb,
                    [cv2.IMWRITE_PNG_COMPRESSION, 0])  # Save RGB image

    def frame2depth_mapping(self, camera_ID: int,
                            lidar_frame_idx: str) -> None:
        """
        For your training dataloader, you will likely find it helpful to read image paths
        from a .txt file. We explicitly write to a .txt file all rgb image paths that have
        a corresponding sparse ground truth depth file along with focal length.
        """
        mapping_file = open(
            os.path.join(self.output_save_path,
                         "argo_" + self.logid_type + "_files_with_gt.txt"),
            "a",
        )
        file_path = os.path.join(str(self.log_id), camera_ID,
                                 str(lidar_frame_idx) + ".png")
        gt_string = file_path + " " + file_path + " " + str(
            np.round(self.calib.fv, 4))
        mapping_file.write(gt_string + "\n")

    def depth_extraction(self) -> None:
        """
        For every lidar file, extract ring camera frames and store it in the save dir
        along with depth map
        """
        for lidar_frame_idx in tqdm(range(self.lidar_frame_counter)):
            for camera_ID in RING_CAMERA_LIST:
                # Extract camera frames and associated lidar points
                img, lidar_image_projection_points = self.extract_lidar_image_pair(
                    camera_ID, lidar_frame_idx)
                # Save image and depth map if LiDAR projection points exist
                if lidar_image_projection_points is not None:
                    # Save the above extracted images
                    self.save_image_pair(camera_ID, img, lidar_frame_idx,
                                         lidar_image_projection_points)
                    # Write path of rgb image, depth image along with focal length
                    # in a txt file for data loader
                    self.frame2depth_mapping(camera_ID, lidar_frame_idx)
                else:
                    continue
コード例 #11
0
def make_att_files(root_dir: str) -> None:
    """Write a .pkl file with difficulty attributes per track"""
    path_output_vis = "vis_output"
    filename_output = "att_file.npy"

    if not os.path.exists(path_output_vis):
        os.mkdir(path_output_vis)

    list_folders = ["test"]
    list_name_class = ["VEHICLE", "PEDESTRIAN"]
    count_track = 0
    dict_att_all: Dict[str, Any] = {}

    for name_folder in list_folders:

        dict_att_all[name_folder] = {}
        list_log_folders = glob.glob(os.path.join(root_dir, name_folder, "*"))
        for ind_log, path_log in enumerate(list_log_folders):

            id_log = f"{Path(path_log).name}"
            print("%s %s %d/%d" %
                  (name_folder, id_log, ind_log, len(list_log_folders)))

            if check_track_label_folder:
                list_path_label_persweep = glob.glob(
                    os.path.join(path_log, "per_sweep_annotations_amodal",
                                 "*"))
                list_path_label_persweep.sort()

                dict_track_labels: Dict[str, Any] = {}
                for path_label_persweep in list_path_label_persweep:
                    data = read_json_file(path_label_persweep)
                    for data_obj in data:
                        id_obj = data_obj["track_label_uuid"]

                        if id_obj not in dict_track_labels.keys():
                            dict_track_labels[id_obj] = []
                        dict_track_labels[id_obj].append(data_obj)

                data_amodal: Dict[str, Any] = {}
                for key in dict_track_labels.keys():
                    dict_amodal: Dict[str, Any] = {}
                    data_amodal[key] = dict_amodal
                    data_amodal[key]["label_class"] = dict_track_labels[key][
                        0]["label_class"]
                    data_amodal[key]["uuid"] = dict_track_labels[key][0][
                        "track_label_uuid"]
                    data_amodal[key]["log_id"] = id_log
                    data_amodal[key]["track_label_frames"] = dict_track_labels[
                        key]

            argoverse_loader = ArgoverseTrackingLoader(
                os.path.join(root_dir, name_folder))
            data_log = argoverse_loader.get(id_log)
            list_lidar_timestamp = data_log.lidar_timestamp_list

            dict_tracks: Dict[str, Any] = {}
            for id_track in data_amodal.keys():

                data = data_amodal[id_track]
                if data["label_class"] not in list_name_class:
                    continue

                data_per_frame = data["track_label_frames"]

                dict_per_track: Dict[str, Any] = {}
                dict_tracks[id_track] = dict_per_track
                dict_tracks[id_track]["ind_lidar_min"] = -1
                dict_tracks[id_track]["ind_lidar_max"] = -1
                length_log = len(list_lidar_timestamp)
                dict_tracks[id_track]["list_city_se3"] = [None] * length_log
                dict_tracks[id_track]["list_bbox"] = [None] * length_log
                count_track += 1

                dict_tracks[id_track]["list_center"] = np.full([length_log, 3],
                                                               np.nan)
                dict_tracks[id_track]["list_center_w"] = np.full(
                    [length_log, 3], np.nan)
                dict_tracks[id_track]["list_dist"] = np.full([length_log],
                                                             np.nan)
                dict_tracks[id_track]["exists"] = np.full([length_log], False)

                for box in data_per_frame:

                    if box["timestamp"] in list_lidar_timestamp:
                        ind_lidar = list_lidar_timestamp.index(
                            box["timestamp"])
                    else:
                        continue

                    if dict_tracks[id_track]["ind_lidar_min"] == -1:
                        dict_tracks[id_track]["ind_lidar_min"] = ind_lidar

                    dict_tracks[id_track]["ind_lidar_max"] = max(
                        ind_lidar, dict_tracks[id_track]["ind_lidar_max"])

                    center = np.array([
                        box["center"]["x"], box["center"]["y"],
                        box["center"]["z"]
                    ])
                    city_SE3_egovehicle = argoverse_loader.get_pose(
                        ind_lidar, id_log)
                    if city_SE3_egovehicle is None:
                        print("Pose not found!")
                        continue
                    center_w = city_SE3_egovehicle.transform_point_cloud(
                        center[np.newaxis, :])[0]

                    dict_tracks[id_track]["list_center"][ind_lidar] = center
                    dict_tracks[id_track]["list_center_w"][
                        ind_lidar] = center_w
                    dict_tracks[id_track]["list_dist"][
                        ind_lidar] = np.linalg.norm(center[0:2])
                    dict_tracks[id_track]["exists"][ind_lidar] = True
                    dict_tracks[id_track]["list_city_se3"][
                        ind_lidar] = city_SE3_egovehicle
                    dict_tracks[id_track]["list_bbox"][ind_lidar] = box

                length_track = dict_tracks[id_track][
                    "ind_lidar_max"] - dict_tracks[id_track][
                        "ind_lidar_min"] + 1

                assert not (dict_tracks[id_track]["ind_lidar_max"] == -1
                            and dict_tracks[id_track]["ind_lidar_min"]
                            == -1), "zero-length track"
                dict_tracks[id_track]["length_track"] = length_track

                (
                    dict_tracks[id_track]["list_vel"],
                    dict_tracks[id_track]["list_acc"],
                ) = compute_v_a(dict_tracks[id_track]["list_center_w"])
                dict_tracks[id_track]["num_missing"] = (
                    dict_tracks[id_track]["length_track"] -
                    dict_tracks[id_track]["exists"].sum())
                dict_tracks[id_track]["difficult_att"] = []
                # get scalar velocity per timestamp as 2-norm of (vx, vy)
                vel_abs = np.linalg.norm(
                    dict_tracks[id_track]["list_vel"][:, 0:2], axis=1)
                acc_abs = np.linalg.norm(
                    dict_tracks[id_track]["list_acc"][:, 0:2], axis=1)

                ind_valid = np.nonzero(
                    1 - np.isnan(dict_tracks[id_track]["list_dist"]))[0]
                ind_close = np.nonzero(dict_tracks[id_track]["list_dist"]
                                       [ind_valid] < NEAR_DISTANCE_THRESH)[0]

                if len(ind_close) > 0:
                    ind_close_max = ind_close.max() + 1
                    ind_close_min = ind_close.min()

                # Only compute "fast" and "occluded" tags for near objects
                # The thresholds are not very meaningful for faraway objects, since they are usually pretty short.
                if dict_tracks[id_track]["list_dist"][ind_valid].min(
                ) > NEAR_DISTANCE_THRESH:
                    dict_tracks[id_track]["difficult_att"].append("far")
                else:
                    is_short_len_track1 = dict_tracks[id_track][
                        "length_track"] < SHORT_TRACK_LENGTH_THRESH
                    is_short_len_track2 = dict_tracks[id_track]["exists"].sum(
                    ) < SHORT_TRACK_COUNT_THRESH
                    if is_short_len_track1 or is_short_len_track2:
                        dict_tracks[id_track]["difficult_att"].append("short")
                    else:
                        if (ind_close_max -
                                ind_close_min) - dict_tracks[id_track][
                                    "exists"][ind_close_min:ind_close_max].sum(
                                    ) > MAX_OCCLUSION_PCT:
                            dict_tracks[id_track]["difficult_att"].append(
                                "occ")

                        if np.quantile(vel_abs[ind_valid][ind_close],
                                       0.9) > FAST_TRACK_THRESH:
                            dict_tracks[id_track]["difficult_att"].append(
                                "fast")

                if len(dict_tracks[id_track]["difficult_att"]) == 0:
                    dict_tracks[id_track]["difficult_att"].append("easy")

            if visualize:
                for ind_lidar, timestamp_lidar in enumerate(
                        list_lidar_timestamp):

                    list_bboxes = []
                    list_difficulty_att = []

                    for id_track in dict_tracks.keys():
                        if dict_tracks[id_track]["exists"][ind_lidar]:
                            list_bboxes.append(
                                dict_tracks[id_track]["list_bbox"][ind_lidar])
                            list_difficulty_att.append(
                                dict_tracks[id_track]["difficult_att"])

                    path_lidar = os.path.join(path_log, "lidar",
                                              "PC_%s.ply" % timestamp_lidar)
                    pc = np.asarray(o3d.io.read_point_cloud(path_lidar).points)
                    list_lidar_timestamp = data_log.lidar_timestamp_list
                    save_bev_img(
                        path_output_vis,
                        list_bboxes,
                        list_difficulty_att,
                        "argoverse_%s" % name_folder,
                        id_log,
                        timestamp_lidar,
                        pc,
                    )

            for id_track in dict_tracks.keys():
                list_key = list(dict_tracks[id_track].keys()).copy()
                for key in list_key:
                    if key != "difficult_att":
                        del dict_tracks[id_track][key]

            dict_att_all[name_folder][id_log] = dict_tracks

    save_pkl_dictionary(filename_output, dict_att_all)
コード例 #12
0
def process_a_split(data_dir,
                    target_data_dir,
                    split_file_path,
                    bev_bnds,
                    bev_meter_per_pixel,
                    bev_wanted_classes,
                    offset=0):
    """
    Args:
        data_dir: directory that contains data corresponding to A SPECIFIC
            SPLIT (train, validation, test)
        target_data_dir: the dir to write to. Contains data corresponding to
            A SPECIFIC SPLIT in KITTI (training, testing)
        split_file_path: location to write all the sample_idx of a split
        bev_bnds: [4] containing [x_min, x_max, y_min, y_max] in meter for the bev
        bev_meter_per_pixel: number of meters in a pixel in bev, most often fractional
        bev_wanted_classes: [VEHICLE, BICYCLIST, PEDESTRIAN, ...] the classes to be
            for bev
        offset: first sample_idx to start counting from, needed to differentiate
            training and validation sample_idx because in KITTI they are under
            the same dir
    """
    print("Processing", data_dir)

    target_velodyne_dir = os.path.join(target_data_dir, 'velodyne')
    target_velodyne_reduced_dir = os.path.join(target_data_dir,
                                               'velodyne_reduced')
    target_calib_dir = os.path.join(target_data_dir, 'calib')
    target_image_2_dir = os.path.join(target_data_dir, 'image_2')
    if 'test' not in split_file_path:
        target_label_2_dir = os.path.join(target_data_dir, 'label_2')

    os.makedirs(target_velodyne_dir, exist_ok=True)
    os.makedirs(target_velodyne_reduced_dir, exist_ok=True)
    os.makedirs(target_calib_dir, exist_ok=True)
    os.makedirs(target_image_2_dir, exist_ok=True)
    if 'test' not in split_file_path:
        os.makedirs(target_label_2_dir, exist_ok=True)

    ############################## for saving BEV segmentation masks paths
    target_bev_drivable_dir = os.path.join(target_data_dir, 'bev_DRIVABLE')
    os.makedirs(target_bev_drivable_dir, exist_ok=True)

    target_bev_fov_dir = os.path.join(target_data_dir, 'bev_FOV')
    os.makedirs(target_bev_fov_dir, exist_ok=True)

    target_bev_cls_dirs = []
    for wanted_cls in bev_wanted_classes:
        target_bev_cls_dir = os.path.join(target_data_dir,
                                          'bev_{}'.format(wanted_cls))
        os.makedirs(target_bev_cls_dir, exist_ok=True)
        target_bev_cls_dirs.append(target_bev_cls_dir)
    ############################## end for saving BEV segmentation masks paths

    # Check the number of logs, defined as one continuous trajectory
    argoverse_loader = ArgoverseTrackingLoader(data_dir)
    print('Total number of logs:', len(argoverse_loader))
    argoverse_loader.print_all()
    print('\n')

    cams = [
        'ring_front_center', 'ring_front_left', 'ring_front_right',
        'ring_rear_left', 'ring_rear_right', 'ring_side_left',
        'ring_side_right'
    ]

    # count total number of files
    total_number = 0
    for q in argoverse_loader.log_list:
        log_lidar_path = os.path.join(data_dir, q, 'lidar')
        path, dirs, files = next(os.walk(log_lidar_path))
        total_number = total_number + len(files)

    total_number = total_number * len(cams)

    print('Now write sample indices to split file.')
    write_split_file(split_file_path, offset, total_number)

    bar = progressbar.ProgressBar(maxval=total_number, \
                                  widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])

    print('Total number of files: {}. Translation starts...'.format(
        total_number))
    print('Progress:')
    bar.start()

    i = 0
    kitti_to_argo_mapping = {}
    for log_id in sorted(argoverse_loader.log_list):
        argoverse_data = argoverse_loader.get(log_id)
        from argoverse.map_representation.map_api import ArgoverseMap
        argoverse_map = ArgoverseMap()
        for cam in cams:

            ############################## Calibration for this whole log ##############################
            log_calib_path = os.path.join(data_dir, log_id,
                                          'vehicle_calibration_info.json')
            calibration_data = calibration.load_calib(log_calib_path)[cam]
            calib_file_content = construct_calib_str(calibration_data,
                                                     pts_in_cam_ego_coord=True)

            log_lidar_dir = os.path.join(data_dir, log_id, 'lidar')

            # Loop through the each lidar frame (10Hz) to rename, copy, and adapt
            # all images, lidars, calibration files, and label files.
            for frame_idx, timestamp in enumerate(
                    sorted(argoverse_data.lidar_timestamp_list)):

                idx = str(i + offset).zfill(9)

                # recording the mapping from kitti to argo
                # (log index, the lidar frame index) uniquely identify a sample
                kitti_to_argo_mapping[idx] = (log_id, frame_idx)

                i += 1
                if i < total_number:
                    bar.update(i + 1)

                ############################## Lidar ##############################

                # Save lidar file into .bin format under the new directory
                lidar_file_path = os.path.join(
                    log_lidar_dir, 'PC_{}.ply'.format(str(timestamp)))
                target_lidar_file_path = os.path.join(target_velodyne_dir,
                                                      idx + '.bin')

                lidar_data = load_ply(lidar_file_path)

                # run this block of code only if we are using the cam coord instead of the ego coord
                if True:
                    in_cam_coord = calibration_data.project_ego_to_cam(
                        lidar_data)
                    in_cam_coord = in_cam_coord[:, [2, 0,
                                                    1]]  # tranpose the xyz
                    in_cam_coord[:, [1, 2]] *= -1  # flip 2 of the axes
                    lidar_data = in_cam_coord

                lidar_data_augmented = np.concatenate(
                    (lidar_data, np.zeros([lidar_data.shape[0], 1])), axis=1)
                lidar_data_augmented = lidar_data_augmented.astype('float32')
                lidar_data_augmented.tofile(target_lidar_file_path)

                ############################## Image ##############################

                # Save the image file into .png format under the new directory
                cam_file_path = argoverse_data.image_list_sync[cam][frame_idx]
                target_cam_file_path = os.path.join(target_image_2_dir,
                                                    idx + '.png')
                copyfile(cam_file_path, target_cam_file_path)

                target_calib_file_path = os.path.join(target_calib_dir,
                                                      idx + '.txt')
                file = open(target_calib_file_path, 'w+')
                file.write(calib_file_content)
                file.close()

                ############################## BEV binary masks ##############################
                bev_drivable_save_path = os.path.join(target_bev_drivable_dir,
                                                      idx + '.png')
                bev_wanted_save_paths = [
                    os.path.join(cls_dir, idx + '.png')
                    for cls_dir in target_bev_cls_dirs
                ]
                bev_fov_save_path = os.path.join(target_bev_fov_dir,
                                                 idx + '.png')

                get_bev(argoverse_data,
                        argoverse_map,
                        log_id,
                        frame_idx,
                        bev_bnds,
                        bev_meter_per_pixel,
                        bev_drivable_save_path,
                        bev_wanted_classes,
                        bev_wanted_save_paths,
                        bev_fov_save_path,
                        visualize=False,
                        camera_calib=calibration_data)

                ############################## Labels ##############################

                if 'test' in split_file_path:
                    continue
                label_object_list = argoverse_data.get_label_object(frame_idx)
                target_label_2_file_path = os.path.join(
                    target_label_2_dir, idx + '.txt')
                file = open(target_label_2_file_path, 'w+')

                # DontCare objects must appear at the end as per KITTI label files
                object_lines = []
                dontcare_lines = []

                for detected_object in label_object_list:
                    classes = detected_object.label_class
                    classes = OBJ_CLASS_MAPPING_DICT[
                        classes]  # map class type from artoverse to KITTI
                    occulusion = round(detected_object.occlusion / 25)
                    height = detected_object.height
                    length = detected_object.length
                    width = detected_object.width
                    truncated = 0

                    center = detected_object.translation  # in ego frame

                    corners_ego_frame = detected_object.as_3d_bbox(
                    )  # all eight points in ego frame
                    corners_cam_frame = calibration_data.project_ego_to_cam(
                        corners_ego_frame
                    )  # all eight points in the camera frame
                    image_corners = calibration_data.project_ego_to_image(
                        corners_ego_frame)
                    image_bbox = [
                        min(image_corners[:, 0]),
                        min(image_corners[:, 1]),
                        max(image_corners[:, 0]),
                        max(image_corners[:, 1])
                    ]
                    # the four coordinates we need for KITTI
                    image_bbox = [round(x) for x in image_bbox]

                    center_cam_frame = calibration_data.project_ego_to_cam(
                        np.array([center]))

                    # if 0 < center_cam_frame[0][2] < max_d and 0 < image_bbox[0] < 1920 and 0 < image_bbox[1] < 1200 and 0 < \
                    #     image_bbox[2] < 1920 and 0 < image_bbox[3] < 1200:
                    if True:
                        # the center coordinates in cam frame we need for KITTI

                        # for the orientation, we choose point 1 and point 5 for application
                        p1 = corners_cam_frame[1]
                        p5 = corners_cam_frame[5]
                        # dz = p1[2] - p5[2]
                        # dx = p1[0] - p5[0]
                        # angle = math.atan2(dz, dx)

                        angle_vec = p1 - p5
                        # norm vec along the x axis, points to the right in KITTI cam rect coordinate
                        origin_vec = np.array([1, 0, 0])
                        import vg
                        angle = vg.signed_angle(origin_vec,
                                                angle_vec,
                                                look=vg.basis.y,
                                                units='rad')

                        # the orientation angle of the car
                        beta = math.atan2(center_cam_frame[0][2],
                                          center_cam_frame[0][0])
                        alpha = angle + beta - math.pi / 2
                        line = classes + ' {} {} {} {} {} {} {} {} {} {} {} {} {} {} \n'.format(
                            round(truncated, 2), occulusion, round(alpha, 2),
                            round(image_bbox[0], 2), round(image_bbox[1], 2),
                            round(image_bbox[2], 2), round(image_bbox[3], 2),
                            round(height, 2), round(width, 2), round(
                                length, 2), round(center_cam_frame[0][0], 2),
                            round(center_cam_frame[0][1], 2) + height / 2,
                            round(center_cam_frame[0][2], 2), round(angle, 2))

                        # separate the object lines so we can move all the dontcare lines at the end
                        if classes == DONT_CARE:
                            dontcare_lines.append(line)
                        else:
                            object_lines.append(line)

                for line in object_lines:
                    file.write(line)
                for line in dontcare_lines:
                    file.write(line)
                file.close()

    # write kitti_to_argo_mapping to a file
    split_dir = os.path.dirname(split_file_path)
    split_name = os.path.basename(split_file_path)
    split_name = os.path.splitext(split_name)[0]
    prefix = 'kitti_to_argo_mapping'
    kitti_to_argo_mapping_path = os.path.join(
        split_dir, "{}_{}.json".format(prefix, split_name))
    file_handle = open(kitti_to_argo_mapping_path, 'w')
    json.dump(kitti_to_argo_mapping, file_handle)
    file_handle.close()

    bar.finish()
    print('Translation finished, processed {} files'.format(i))
コード例 #13
0
class Argo_MonoDataset(data.Dataset):
    """Superclass for monocular dataloaders

    Args:
        data_path
        filenames
        height
        width
        frame_idxs
        num_scales
        is_train
        img_ext
    """
    def __init__(self,
                 data_path,
                 filenames,
                 height,
                 width,
                 frame_idxs,
                 num_scales,
                 is_train=False,
                 img_ext='.jpg'):
        super(Argo_MonoDataset, self).__init__()

        self.data_path = data_path
        self.filenames = filenames
        self.height = height  #2056
        self.width = width  #2464
        self.num_scales = num_scales
        self.interp = Image.ANTIALIAS

        self.frame_idxs = frame_idxs

        self.is_train = is_train
        self.img_ext = img_ext

        self.loader = argoverse_load_image
        self.argoverse_loader = ArgoverseTrackingLoader(self.data_path)
        self.camera = self.argoverse_loader.CAMERA_LIST[0]
        self.argoverse_data = 0

        self.to_tensor = transforms.ToTensor()

        # We need to specify augmentations differently in newer versions of torchvision.
        # We first try the newer tuple version; if this fails we fall back to scalars
        try:
            self.brightness = (0.8, 1.2)
            self.contrast = (0.8, 1.2)
            self.saturation = (0.8, 1.2)
            self.hue = (-0.1, 0.1)
            transforms.ColorJitter.get_params(self.brightness, self.contrast,
                                              self.saturation, self.hue)
        except TypeError:
            self.brightness = 0.2
            self.contrast = 0.2
            self.saturation = 0.2
            self.hue = 0.1

        self.resize = {}
        for i in range(self.num_scales):
            s = 2**i
            self.resize[i] = transforms.Resize(
                (self.height // s, self.width // s), interpolation=self.interp)

        self.load_depth = self.check_depth()

    def preprocess(self, inputs, color_aug):
        """Resize colour images to the required scales and augment if required

        We create the color_aug object in advance and apply the same augmentation to all
        images in this item. This ensures that all images input to the pose network receive the
        same augmentation.
        """
        for k in list(inputs):
            frame = inputs[k]
            if "color" in k:
                n, im, i = k
                for i in range(self.num_scales):
                    inputs[(n, im, i)] = self.resize[i](inputs[(n, im, i - 1)])

        for k in list(inputs):
            f = inputs[k]
            if "color" in k:
                n, im, i = k
                inputs[(n, im, i)] = self.to_tensor(f)
                inputs[(n + "_aug", im, i)] = self.to_tensor(color_aug(f))

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, index):
        """Returns a single training item from the dataset as a dictionary.

        Values correspond to torch tensors.
        Keys in the dictionary are either strings or tuples:

            ("color", <frame_id>, <scale>)          for raw colour images,
            ("color_aug", <frame_id>, <scale>)      for augmented colour images,
            ("K", scale) or ("inv_K", scale)        for camera intrinsics,
            "stereo_T"                              for camera extrinsics, and
            "depth_gt"                              for ground truth depth maps.

        <frame_id> is either:
            an integer (e.g. 0, -1, or 1) representing the temporal step relative to 'index',
        or
            "s" for the opposite image in the stereo pair.

        <scale> is an integer representing the scale of the image relative to the fullsize image:
            -1      images at native resolution as loaded from disk
            0       images resized to (self.width,      self.height     )
            1       images resized to (self.width // 2, self.height // 2)
            2       images resized to (self.width // 4, self.height // 4)
            3       images resized to (self.width // 8, self.height // 8)
        """
        inputs = {}

        do_color_aug = self.is_train and random.random() > 0.5
        do_flip = self.is_train and random.random() > 0.5

        line = self.filenames[index].split()
        folder = line[0]
        self.argoverse_data = self.argoverse_loader.get(folder)
        last_row = np.array([[0.0, 0.0, 0.0, 1.0]])
        K_ = self.argoverse_data.get_calibration(self.camera).K
        K = np.vstack((K_, last_row))
        H = 2056
        W = 2464
        K[0] = K[0] / W
        K[1] = K[1] / H

        if len(line) == 3:
            frame_index = int(line[1])
        else:
            frame_index = 0

        if len(line) == 3:
            side = line[2]
        else:
            side = None

        for i in self.frame_idxs:
            if i == "s":
                other_side = {"r": "l", "l": "r"}[side]
                inputs[("color", i,
                        -1)] = self.get_color(folder, frame_index, other_side,
                                              do_flip)
            else:
                inputs[("color", i,
                        -1)] = self.get_color(folder, frame_index + i, side,
                                              do_flip)

        # adjusting intrinsics to match each scale in the pyramid
        for scale in range(self.num_scales):

            K[0, :] *= self.width // (2**scale)
            K[1, :] *= self.height // (2**scale)

            inv_K = np.linalg.pinv(K)

            inputs[("K", scale)] = torch.from_numpy(K).type(torch.float32)
            inputs[("inv_K",
                    scale)] = torch.from_numpy(inv_K).type(torch.float32)

        if do_color_aug:
            color_aug = transforms.ColorJitter.get_params(
                self.brightness, self.contrast, self.saturation, self.hue)
        else:
            color_aug = (lambda x: x)

        self.preprocess(inputs, color_aug)

        for i in self.frame_idxs:
            del inputs[("color", i, -1)]
            del inputs[("color_aug", i, -1)]

        if self.load_depth:
            depth_gt = self.get_depth(folder, frame_index, side, do_flip)
            inputs["depth_gt"] = np.expand_dims(depth_gt, 0)
            inputs["depth_gt"] = torch.from_numpy(inputs["depth_gt"].astype(
                np.float32))

        if "s" in self.frame_idxs:
            stereo_T = np.eye(4, dtype=np.float32)
            baseline_sign = -1 if do_flip else 1
            side_sign = -1 if side == "l" else 1
            stereo_T[0, 3] = side_sign * baseline_sign * 0.1

            inputs["stereo_T"] = torch.from_numpy(stereo_T)

        return inputs

    def get_color(self, folder, frame_index, side, do_flip):
        raise NotImplementedError

    def check_depth(self):
        raise NotImplementedError

    def get_depth(self, folder, frame_index, side, do_flip):
        raise NotImplementedError
コード例 #14
0
    actual_idx_list = []
    logidx_to_count_map = {}
    log_to_count_map = {}

    print("____________SPLIT IS : {} ______________".format(split))
    if split == 'train':
        imageset_dir = os.path.join(root_dir, split)
        splitname = lambda x: [
            x[len(imageset_dir + "/"):-4].split("/")[0], x[len(
                imageset_dir + "/"):-4].split("/")[2].split("_")[1]
        ]
        data_loader = ArgoverseTrackingLoader(os.path.join(root_dir, split))
        log_list = data_loader.log_list
        path_count = 0
        for log_id, log in enumerate(log_list):
            lidar_lst = data_loader.get(log).lidar_list
            lidar_pathlist.extend(lidar_lst)
            label_pathlist.extend(data_loader.get(log).label_list)
            actual_idx_list.extend([splitname(each) for each in lidar_lst])
            logidx_to_count_map[log_id] = np.arange(
                path_count, path_count + len(lidar_lst))
            log_to_count_map[log] = np.arange(path_count,
                                              path_count + len(lidar_lst))
            path_count += len(lidar_lst)
        assert len(lidar_pathlist) == len(label_pathlist)

    elif split == 'test':
        imageset_dir = os.path.join(root_dir, split)
        splitname = lambda x: [
            x[len(imageset_dir + "/"):-4].split("/")[0], x[len(
                imageset_dir + "/"):-4].split("/")[2].split("_")[1]
コード例 #15
0
                data_amodel: Dict[str, Any] = {}
                for key in dict_track_labels.keys():
                    dict_amodal: Dict[str, Any] = {}
                    data_amodel[key] = dict_amodal
                    data_amodel[key]["label_class"] = dict_track_labels[key][
                        0]["label_class"]
                    data_amodel[key]["uuid"] = dict_track_labels[key][0][
                        "track_label_uuid"]
                    data_amodel[key]["log_id"] = id_log
                    data_amodel[key]["track_label_frames"] = dict_track_labels[
                        key]

            argoverse_loader = ArgoverseTrackingLoader(
                os.path.join(root_dir, name_folder))
            data_log = argoverse_loader.get(id_log)
            list_lidar_timestamp = data_log.lidar_timestamp_list

            dict_tracks: Dict[str, Any] = {}
            for id_track in data_amodel.keys():

                data = data_amodel[id_track]
                if data["label_class"] not in list_name_class:
                    continue

                data_per_frame = data["track_label_frames"]

                dict_per_track: Dict[str, Any] = {}
                dict_tracks[id_track] = dict_per_track
                dict_tracks[id_track]["ind_lidar_min"] = -1
                dict_tracks[id_track]["ind_lidar_max"] = -1
コード例 #16
0
ファイル: data.py プロジェクト: temporaryprojectpage/AUTO-QA
class ArgoDataset(Dataset):
    def __init__(self,
                 question_h5,
                 image_feature_h5_path,
                 lidar_feature_h5_path,
                 vocab,
                 load_lidar=True,
                 npoint=1024,
                 normal_channel=True,
                 uniform=False,
                 cache_size=15000,
                 driveable_area=False,
                 mode='prefix',
                 image_h5=None,
                 lidar_h5=None,
                 max_samples=None,
                 question_families=None,
                 image_idx_start_from=None):

        #############read whole question_h5 file in memory#############################
        self.all_questions = question_h5['questions'][:]
        self.all_answers = get_answer_classes(question_h5['answers'][:], vocab)
        self.all_image_idxs = question_h5['image_idxs'][:]
        self.all_video_names = (question_h5['video_names'][:]).astype(str)
        self.questions_length = question_h5['question_length'][:]
        self.image_feature_h5 = image_feature_h5_path
        self.load_lidar = load_lidar

        ###########for lidar##########################################################
        if self.load_lidar:
            self.lidar_feature_h5 = lidar_feature_h5_path
            self.argoverse_loader = ArgoverseTrackingLoader(
                self.lidar_feature_h5)
            self.am = ArgoverseMap()
            self.driveable_area = driveable_area
            #############################################################################

            self.npoints = npoint
            self.uniform = uniform
            self.normal_channel = normal_channel
            self.cache_size = cache_size  # how many data points to cache in memory
            self.cache = {}  # from index to (point_set, cls) tuple

    def __len__(self):
        print(self.all_questions.shape[0])
        return self.all_questions.shape[0]

    def __getitem__(self, index):
        question = self.all_questions[index]
        question_length = self.questions_length[index]
        answer = self.all_answers[index]
        image_name = self.all_video_names[index] + '@' + str(
            self.all_image_idxs[index])

        #####image feature ###############################################
        with h5py.File(self.image_feature_h5, 'r') as img_feat_file:
            image_feature = img_feat_file[image_name][:]

        if self.load_lidar:
            # if index in self.cache:
            #     point_set = self.cache[index]
            # else:
            lidar_index = self.all_image_idxs[index]
            log_id = self.all_video_names[index]
            argoverse_data = self.argoverse_loader.get(log_id)

            lidar_pts = argoverse_data.get_lidar(lidar_index, load=True)
            city_to_egovehicle_se3 = argoverse_data.get_pose(lidar_index)
            city_name = argoverse_data.city_name

            roi_area_pts = copy.deepcopy(lidar_pts)
            roi_area_pts = city_to_egovehicle_se3.transform_point_cloud(
                roi_area_pts)  # put into city coords
            roi_area_pts = self.am.remove_non_roi_points(
                roi_area_pts, city_name)
            roi_area_pts = self.am.remove_ground_surface(
                roi_area_pts, city_name)
            roi_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(
                roi_area_pts)

            if self.driveable_area:
                driveable_area_pts = roi_area_pts
                driveable_area_pts = city_to_egovehicle_se3.transform_point_cloud(
                    driveable_area_pts)  # put into city coords
                driveable_area_pts = self.am.remove_non_driveable_area_points(
                    driveable_area_pts, city_name)
                driveable_area_pts = city_to_egovehicle_se3.inverse_transform_point_cloud(
                    driveable_area_pts)
                point_set = driveable_area_pts
            else:
                point_set = roi_area_pts
            del lidar_pts
            if self.uniform:
                point_set = furthest_point_sample(point_set, self.npoints)
            else:
                point_set = point_set[0:self.npoints, :]

                point_set[:, 0:3] = pc_normalize(point_set[:, 0:3])

                if not self.normal_channel:
                    point_set = point_set[:, 0:3]
                if len(self.cache) < self.cache_size:
                    self.cache[index] = point_set
                # print(point_set.shape,type(point_set))
                point_set = point_set.T

        else:
            point_set = torch.randn(5)

        # image

        # image here means all 7 image
        # question feature like pretrained bert or word2vec or learned embeddings

        return (question, image_feature, question_length, point_set, answer)
コード例 #17
0
import argoverse
from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader
import pdb
pdb.set_trace()
root_dir = '/ssd_scratch/cvit/raghava.modhugu/argoverse-tracking/'
argoverse_loader = ArgoverseTrackingLoader(root_dir)
camera = 'ring_front_center'
with open('splits/argoverse/val_files.txt') as f:
    for cnt, line in enumerate(f):
        log, index, _ = line.split()
        print(log, index)
        print(
            argoverse_loader.get(log).get_image(int(index),
                                                camera=camera,
                                                load=False))
コード例 #18
0
def generate_road_bev(dataset_dir="", log_id="", output_dir=""):

    argoverse_loader = ArgoverseTrackingLoader(dataset_dir)
    argoverse_data = argoverse_loader.get(log_id)
    city_name = argoverse_data.city_name
    avm = ArgoverseMap()
    sdb = SynchronizationDB(dataset_dir, collect_single_log_id=log_id)
    try:
        path = os.path.join(output_dir, log_id, 'road_gt')
        command = "rm -r " + path
        os.system(command)
    except BaseException:
        pass
    try:
        os.makedirs(os.path.join(output_dir, log_id, 'road_gt'))
    except BaseException:
        pass

    ply_fpath = os.path.join(dataset_dir, log_id, 'lidar')

    ply_locs = []
    for idx, ply_name in enumerate(os.listdir(ply_fpath)):
        ply_loc = np.array([idx, int(ply_name.split('.')[0].split('_')[-1])])
        ply_locs.append(ply_loc)
    ply_locs = np.array(ply_locs)
    lidar_timestamps = sorted(ply_locs[:, 1])

    calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json"
    calib_data = read_json_file(calib_path)
    ind = 0
    for i in range(9):
        if calib_data['camera_data_'][i]['key'] ==\
                'image_raw_stereo_front_left':
            ind = i
            break
    rotation = np.array(calib_data['camera_data_'][ind]['value']
                        ['vehicle_SE3_camera_']['rotation']['coefficients'])
    translation = np.array(calib_data['camera_data_'][ind]['value']
                           ['vehicle_SE3_camera_']['translation'])
    egovehicle_SE3_cam = SE3(rotation=quat2rotmat(rotation),
                             translation=translation)

    for idx in range(len(lidar_timestamps)):
        lidar_timestamp = lidar_timestamps[idx]
        cam_timestamp = sdb.get_closest_cam_channel_timestamp(
            lidar_timestamp, "stereo_front_left", str(log_id))
        occupancy_map = np.zeros((256, 256))
        pose_fpath = os.path.join(
            dataset_dir, log_id, "poses",
            "city_SE3_egovehicle_" + str(lidar_timestamp) + ".json")
        if not Path(pose_fpath).exists():
            continue
        pose_data = read_json_file(pose_fpath)
        rotation = np.array(pose_data["rotation"])
        translation = np.array(pose_data["translation"])
        xcenter = translation[0]
        ycenter = translation[1]
        city_to_egovehicle_se3 = SE3(rotation=quat2rotmat(rotation),
                                     translation=translation)
        ego_car_nearby_lane_ids = avm.get_lane_ids_in_xy_bbox(
            xcenter, ycenter, city_name, 50.0)
        occupancy_map = get_lane_bev(ego_car_nearby_lane_ids, avm, city_name,
                                     city_to_egovehicle_se3,
                                     egovehicle_SE3_cam, occupancy_map, res,
                                     255)
        output_loc = os.path.join(
            output_dir, log_id, 'road_gt',
            'stereo_front_left_' + str(cam_timestamp) + '.png')
        cv2.imwrite(output_loc, occupancy_map)
コード例 #19
0
    path, dirs, files = next(os.walk(data_dir + q + '/lidar'))
    total_number = total_number + len(files)

total_number = total_number * 7

bar = progressbar.ProgressBar(maxval=total_number, \
    widgets=[progressbar.Bar('=', '[', ']'), ' ', progressbar.Percentage()])

print('Total number of files: {}. Translation starts...'.format(total_number))
print('Progress:')
bar.start()

i = 0

for log_id in argoverse_loader.log_list:
    argoverse_data = argoverse_loader.get(log_id)
    for cam in cams:
        # Recreate the calibration file content
        calibration_data = calibration.load_calib(
            data_dir + log_id + '/vehicle_calibration_info.json')[cam]
        L3 = 'P2: '
        for j in calibration_data.K.reshape(1, 12)[0]:
            L3 = L3 + str(j) + ' '
        L3 = L3[:-1]

        L6 = 'Tr_velo_to_cam: '
        for k in calibration_data.extrinsic.reshape(1, 16)[0][0:12]:
            L6 = L6 + str(k) + ' '
        L6 = L6[:-1]

        L1 = 'P0: 0 0 0 0 0 0 0 0 0 0 0 0'
コード例 #20
0
import argoverse
from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader
import argoverse.visualization.visualization_utils as viz_util
import matplotlib.pyplot as plt  #plotar
import cv2  #opencv
from PIL import Image

##set root_dir to the correct path to your dataset folder
root_dir = '/home/iago/Documents/argoverse/data/tracking/sample/sample'

argoverse_loader = ArgoverseTrackingLoader(root_dir)

argoverse_data = argoverse_loader[0]
print(argoverse_data)

argoverse_data = argoverse_loader.get('c6911883-1843-3727-8eaa-41dc8cda8993')
print(argoverse_data)

camera = "ring_front_center"
num_imgs = len(argoverse_data.image_list[camera])

for idx in range(0, num_imgs):

    img = argoverse_data.get_image_sync(idx, camera=camera)
    objects = argoverse_data.get_label_object(idx)
    calib = argoverse_data.get_calibration(camera)
    img_vis = Image.fromarray(
        viz_util.show_image_with_boxes(img, objects, calib))

    plt.imshow(img_vis)
    plt.title("Right Front Center")
コード例 #21
0
def test_data_loader_get(data_loader: ArgoverseTrackingLoader) -> None:
    data_0 = data_loader[0].current_log
    data_1 = data_loader.get("1").current_log
    assert data_0 == data_1
コード例 #22
0
class BagConverter:
    def __init__(self, args):
        # Setup Argoverse loader
        self.dataset_dir = args.dataset_dir
        self.log_id = args.log_id
        self.argoverse_loader = ArgoverseTrackingLoader(self.dataset_dir)
        self.argoverse_data = self.argoverse_loader.get(self.log_id)
        print(self.argoverse_data)

        # List of cameras to publish
        self.cameras_list = [camera for camera in self.argoverse_loader.CAMERA_LIST
                             if camera in args.cameras]
        self.load_camera_info()

        # Images timestamps
        self.image_files_dict = {
            camera: self.argoverse_data.timestamp_image_dict[camera]
            for camera in self.cameras_list}
        self.image_timestamps = {
            camera: list(self.image_files_dict[camera].keys())
            for camera in self.cameras_list}

        # Save image files dict to be added into the bag file later using Python2
        with open('/tmp/image_data_%s.json' % self.log_id, 'w') as f:
            data = {'image_files_dict': self.image_files_dict,
                    'image_timestamps': self.image_timestamps,
                    'cameras_list': self.cameras_list}
            json.dump(data, f, indent=4)

        # LiDAR timestamps
        self.lidar_files_dict = self.argoverse_data.timestamp_lidar_dict
        self.lidar_timestamps = list(self.lidar_files_dict.keys())

        # ROSBAG output path
        if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir)
        self.output_filename = os.path.join(args.output_dir, '%s_no_images.bag' % self.log_id)
        self.bag = rosbag.Bag(self.output_filename, 'w')

        # Topic names
        self.lidar_topic = '/argoverse/lidar/pointcloud'
        self.camera_info_topic_template = '/argoverse/%s/camera_info'

        # TF frames
        self.map_frame = 'city'
        self.vehicle_frame = 'egovehicle'
        self.load_camera_static_tf()

    def load_camera_info(self):
        # Make a dictionary of CameraInfo messages for all cameras listed
        self.camera_info_dict = {
            camera: utils.make_camera_info_message(
                self.argoverse_data.get_calibration(camera))
            for camera in self.cameras_list}

    def load_camera_static_tf(self):
        # Make a dictionary of static TF messages for all cameras listed
        self.camera_static_tf_dict = {
            camera: utils.make_transform_stamped_message(self.vehicle_frame,
                camera, self.argoverse_data.get_calibration(
                    camera).calib_data['value']['vehicle_SE3_camera_'])
            for camera in self.argoverse_loader.CAMERA_LIST}

    def get_camera_info_message(self, camera, timestamp):
        # Make camera info message
        camera_info_msg = self.camera_info_dict[camera]
        camera_info_msg.header.stamp = utils.ros_time_from_nsecs(timestamp)
        return camera_info_msg

    def get_lidar_message(self, timestamp):
        # Load ply file
        filename = self.lidar_files_dict[timestamp]
        with open(filename, 'rb') as f:
            plydata = PlyData.read(f)

        # Make header message
        header = Header()
        header.frame_id = self.vehicle_frame  # LiDAR frame is ego vehicle frame
        header.stamp = utils.ros_time_from_nsecs(timestamp)

        # Make point cloud message (both LiDARs combined - 64 rings)
        pcl_msg = utils.make_pointcloud_message(header, plydata)
        return pcl_msg

    def convert(self):
        # Keep first timestamp for publishing static TF message
        static_timestamp = self.lidar_timestamps[0]

        # Publish LiDAR PointCloud messages
        for timestamp in tqdm(self.lidar_timestamps, desc='LiDAR, Pose'):
            lidar_msg = self.get_lidar_message(timestamp)
            self.bag.write(self.lidar_topic, lidar_msg,
                utils.ros_time_from_nsecs(timestamp))

            # Publish ego vehicle pose
            tf_msg = utils.argoverse_pose_to_transform_message(self.dataset_dir,
                self.log_id, self.map_frame, self.vehicle_frame, timestamp, read_json_file)
            self.bag.write('/tf', tf_msg, utils.ros_time_from_nsecs(timestamp))

        # Publish camera messages
        for camera in tqdm(self.cameras_list, desc='CameraInfo'):
            static_timestamp = min(static_timestamp, self.image_timestamps[camera][0])
            for timestamp in tqdm(self.image_timestamps[camera], desc=camera, leave=False):
                camera_info_msg = self.get_camera_info_message(camera, timestamp)
                self.bag.write(self.camera_info_topic_template % camera, camera_info_msg,
                    utils.ros_time_from_nsecs(timestamp))

        # Publish static TF messages
        tf_msg = TFMessage()
        for camera in tqdm(self.camera_static_tf_dict, desc='Static TF'):
            msg = self.camera_static_tf_dict[camera]
            msg.header.stamp = utils.ros_time_from_nsecs(static_timestamp)
            tf_msg.transforms.append(msg)
        self.bag.write('/tf_static', tf_msg, utils.ros_time_from_nsecs(static_timestamp))

        # Close rosbag file
        self.bag.close()
コード例 #23
0
def generate_vehicle_bev(dataset_dir="", log_id="", output_dir=""):

    argoverse_loader = ArgoverseTrackingLoader(dataset_dir)
    argoverse_data = argoverse_loader.get(log_id)
    camera = argoverse_loader.CAMERA_LIST[7]
    calib = argoverse_data.get_calibration(camera)
    sdb = SynchronizationDB(dataset_dir, collect_single_log_id=log_id)

    calib_path = f"{dataset_dir}/{log_id}/vehicle_calibration_info.json"
    calib_data = read_json_file(calib_path)
    ind = 0
    for i in range(9):
        if calib_data['camera_data_'][i]['key'] == \
                'image_raw_stereo_front_left':
            ind = i
            break
    rotation = np.array(calib_data['camera_data_'][ind]['value']
                        ['vehicle_SE3_camera_']['rotation']['coefficients'])
    translation = np.array(calib_data['camera_data_'][ind]['value']
                           ['vehicle_SE3_camera_']['translation'])
    egovehicle_SE3_cam = SE3(rotation=quat2rotmat(rotation),
                             translation=translation)

    if not os.path.exists(os.path.join(output_dir, log_id, "car_bev_gt")):
        os.makedirs(os.path.join(output_dir, log_id, "car_bev_gt"))

    lidar_dir = os.path.join(dataset_dir, log_id, "lidar")
    ply_list = os.listdir(lidar_dir)

    pfa = PerFrameLabelAccumulator(dataset_dir,
                                   dataset_dir,
                                   "argoverse_bev_viz",
                                   save=False,
                                   bboxes_3d=True)
    pfa.accumulate_per_log_data(log_id)
    log_timestamp_dict = pfa.log_timestamp_dict
    for i, ply_name in enumerate(ply_list):
        lidar_timestamp = ply_name.split('.')[0].split('_')[1]
        lidar_timestamp = int(lidar_timestamp)

        cam_timestamp = sdb.get_closest_cam_channel_timestamp(
            lidar_timestamp, "stereo_front_left", str(log_id))
        image_path = os.path.join(
            output_dir, str(log_id), "car_bev_gt",
            "stereo_front_left_" + str(cam_timestamp) + ".jpg")
        objects = log_timestamp_dict[log_id][lidar_timestamp]
        top_view = np.zeros((256, 256))

        all_occluded = True
        for frame_rec in objects:
            if frame_rec.occlusion_val != IS_OCCLUDED_FLAG:
                all_occluded = False

        if not all_occluded:
            for i, frame_rec in enumerate(objects):
                bbox_ego_frame = frame_rec.bbox_ego_frame
                uv = calib.project_ego_to_image(bbox_ego_frame).T
                idx_ = np.all(
                    np.logical_and(
                        np.logical_and(
                            np.logical_and(uv[0, :] >= 0.0,
                                           uv[0, :] < size[1] - 1.0),
                            np.logical_and(uv[1, :] >= 0.0,
                                           uv[1, :] < size[0] - 1.0)),
                        uv[2, :] > 0))
                if not idx_:
                    continue
                bbox_cam_fr = egovehicle_SE3_cam.inverse().\
                    transform_point_cloud(bbox_ego_frame)
                X = bbox_cam_fr[:, 0]
                Z = bbox_cam_fr[:, 2]

                if (frame_rec.occlusion_val != IS_OCCLUDED_FLAG
                        and frame_rec.obj_class_str == "VEHICLE"):
                    y_img = (-Z / res).astype(np.int32)
                    x_img = (X / res).astype(np.int32)
                    x_img -= int(np.floor(-20 / res))
                    y_img += int(np.floor(40 / res))
                    box = np.array([x_img[2], y_img[2]])
                    box = np.vstack((box, [x_img[6], y_img[6]]))
                    box = np.vstack((box, [x_img[7], y_img[7]]))
                    box = np.vstack((box, [x_img[3], y_img[3]]))
                    cv2.drawContours(top_view, [box], 0, 255, -1)

        cv2.imwrite(image_path, top_view)
コード例 #24
0
def visualize_loop(args, val_loader):

    image_feature_size = 512
    lidar_feature_size = 1024

    if args.model_type == 'SAN':
        question_feat_size = 512
        model = SAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MCB':
        question_feat_size = 512
        model = MCB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MFB':
        question_feat_size = 512
        # image_feature_size=512
        model = MFB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MLB':
        question_feat_size = 1024
        image_feature_size = 512
        model = MLB(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')
    if args.model_type == 'MUTAN':
        question_feat_size = 1024
        image_feature_size = 512
        model = MUTAN(args,
                      question_feat_size,
                      image_feature_size,
                      lidar_feature_size,
                      num_classes=34,
                      qa=None,
                      encoder=args.encoder_type,
                      method='hierarchical')
    if args.model_type == 'DAN':
        question_feat_size = 512
        model = DAN(args,
                    question_feat_size,
                    image_feature_size,
                    lidar_feature_size,
                    num_classes=34,
                    qa=None,
                    encoder=args.encoder_type,
                    method='hierarchical')

    data = load_weights(args, model, optimizer=None)
    if type(data) == list:
        model, optimizer, start_epoch, loss, accuracy = data
        print("Loaded  weights")
        print("Epoch: %d, loss: %.3f, Accuracy: %.4f " %
              (start_epoch, loss, accuracy),
              flush=True)
    else:
        print(" error occured while loading model training freshly")
        model = data
        return

    ###########################################################################multiple GPU use#
    # if torch.cuda.device_count() > 1:
    #     print("Using ", torch.cuda.device_count(), "GPUs!")
    #     model = nn.DataParallel(model)

    model.to(device=args.device)
    model.eval()

    import argoverse
    from argoverse.data_loading.argoverse_tracking_loader import ArgoverseTrackingLoader
    from argoverse.utils.json_utils import read_json_file
    from argoverse.map_representation.map_api import ArgoverseMap

    vocab = load_vocab(os.path.join(args.input_base, args.vocab))
    argoverse_loader = ArgoverseTrackingLoader(
        '../../../Data/train/argoverse-tracking')

    k = 1
    with torch.no_grad():
        for data in tqdm(val_loader):
            question, image_feature, ques_lengths, point_set, answer, image_name = data
            question = question.to(device=args.device)
            ques_lengths = ques_lengths.to(device=args.device)
            image_feature = image_feature.to(device=args.device)
            point_set = point_set.to(device=args.device)

            pred, wgt, energies = model(question, image_feature, ques_lengths,
                                        point_set)

            question = question.cpu().data.numpy()
            answer = answer.cpu().data.numpy()
            pred = F.softmax(pred, dim=1)
            pred = torch.argmax(pred, dim=1)
            pred = np.asarray(pred.cpu().data)
            wgt = wgt.cpu().data.numpy()
            energies = energies.squeeze(1).cpu().data.numpy()
            ques_lengths = ques_lengths.cpu().data.numpy()
            pat = re.compile(r'(.*)@(.*)')
            _, keep = np.where([answer == pred])
            temp_batch_size = question.shape[0]
            for b in range(temp_batch_size):
                q = get_ques(question[b], ques_lengths[b], vocab)
                ans = get_ans(answer[b])
                pred_ans = get_ans(pred[b])
                # print(q,ans)
                c = list(re.findall(pat, image_name[b]))[0]
                log_id = c[0]
                idx = int(c[1])
                print(k)
                argoverse_data = argoverse_loader.get(log_id)
                if args.model_type == 'SAN':
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MCB':
                    plot_att(argoverse_data, idx, wgt[b], energies[b], q, ans,
                             args.save_dir, k, pred_ans)
                if args.model_type == 'MFB':
                    plot_att(argoverse_data, idx, wgt[b, :, :, 1], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MLB':
                    plot_att(argoverse_data, idx, wgt[b, :, 3, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'MUTAN':  #only two glimpses
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)
                if args.model_type == 'DAN':  #only two memory
                    plot_att(argoverse_data, idx, wgt[b, :, 1, :], energies[b],
                             q, ans, args.save_dir, k, pred_ans)

                k = k + 1