コード例 #1
0
def prepareVideoSample(info, images, cameras, bboxes, subject = 'S1', imageShape = [384, 384], scaleBox = 1.0, crop = True, normImage = False):
    sample = defaultdict(list) # return value
    subject_idx = info['subject_names'].index(subject) 
    for camera_idx, camera_name in enumerate(info['camera_names']):
        bbox = bboxes[camera_name][[1,0,3,2]] # TLBR to LTRB
        bbox_height = bbox[2] - bbox[0]

        if bbox_height == 0:
            # convention: if the bbox is empty, then this view is missing
            continue

        # scale the bounding box
        bbox = scale_bbox(bbox, scaleBox)
        # load camera
        shot_camera = cameras[camera_name]
        image = images[camera_name]
        #print(shot_camera)
        retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name)

        if crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

        if imageShape is not None:
            # resize
            image_shape_before_resize = image.shape[:2]
            image = resize_image(image, imageShape)
            retval_camera.update_after_resize(image_shape_before_resize, imageShape)

        sample['images'].append(image)
        sample['cameras'].append(retval_camera)
        sample['proj_matrices'].append(retval_camera.projection)
        # projection matricies
    #print(sample['proj_matrices'])
    sample.default_factory = None
    return sample
コード例 #2
0
    def __getitem__(self, idx):
        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        subject = self.labels['subject_names'][shot['subject_idx']]
        action = self.labels['action_names'][shot['action_idx']]
        frame_idx = shot['frame_idx']

        for camera_idx, camera_name in enumerate(self.labels['camera_names']):
            if camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3,
                                                            2]]  # TLBR to LTRB
            bbox_height = bbox[2] - bbox[0]
            if bbox_height == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # scale the bounding box
            bbox = scale_bbox(bbox, self.scale_bbox)

            # load image
            image_path = os.path.join(
                self.h36m_root, subject, action,
                'imageSequence' + '-undistorted' * self.undistort_images,
                camera_name, 'img_%06d.jpg' % (frame_idx + 1))
            assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
            image = cv2.imread(image_path)

            # load camera
            shot_camera = self.labels['cameras'][shot['subject_idx'],
                                                 camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # resize
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            sample['images'].append(image)
            sample['detections'].append(bbox +
                                        (1.0, ))  # TODO add real confidences
            sample['cameras'].append(retval_camera)
            sample['proj_matrices'].append(retval_camera.projection)

        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints],
                                        ((0, 0), (0, 1)),
                                        'constant',
                                        constant_values=1.0)

        # build cuboid
        # base_point = sample['keypoints_3d'][6, :3]
        # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
        # position = base_point - sides / 2
        # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

        # save sample's index
        sample['indexes'] = idx

        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample
コード例 #3
0
def prepareSample(idx, labels, human36mRoot, keyPoint3d = None , imageShape = None, scaleBox = 1.0, crop = True, normImage = False):
    sample = defaultdict(list) # return value
    shot = labels['table'][idx]
    subject = labels['subject_names'][shot['subject_idx']]
    action = labels['action_names'][shot['action_idx']]
    frame_idx = shot['frame_idx']

    for camera_idx, camera_name in enumerate(labels['camera_names']):
        bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1,0,3,2]] # TLBR to LTRB
        bbox_height = bbox[2] - bbox[0]

        if bbox_height == 0:
            # convention: if the bbox is empty, then this view is missing
            continue

        # scale the bounding box
        bbox = scale_bbox(bbox, scaleBox)

        # load image
        image_path = os.path.join(human36mRoot, subject, action, 'imageSequence', camera_name, 'img_%06d.jpg' % (frame_idx+1))
        assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
        image = cv2.imread(image_path)
        
        # load camera
        shot_camera = labels['cameras'][shot['subject_idx'], camera_idx]
        #print(shot_camera)
        retval_camera = Camera(shot_camera['R'], shot_camera['t'], shot_camera['K'], shot_camera['dist'], camera_name)

        if crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)
                

        if imageShape is not None:
            # resize
            image_shape_before_resize = image.shape[:2]
            image = resize_image(image, imageShape)
            retval_camera.update_after_resize(image_shape_before_resize, imageShape)

            sample['image_shapes_before_resize'].append(image_shape_before_resize)

        if normImage:
            image = normalize_image(image)

        sample['images'].append(image)
        sample['detections'].append(bbox + (1.0,)) # TODO add real confidences
        sample['cameras'].append(retval_camera)
        sample['proj_matrices'].append(retval_camera.projection)
        sample["action"].append(action)
        sample["subject"].append(subject)
        sample["frameId"].append(frame_idx)
        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(
            shot['keypoints'][:17],
            ((0,0), (0,1)), 'constant', constant_values=1.0)

    # build cuboid
    # base_point = sample['keypoints_3d'][6, :3]
    # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
    # position = base_point - sides / 2
    # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

    # save sample's index
    sample['indexes'] = idx

    if keyPoint3d is not None:
        sample['pred_keypoints_3d'] = keyPoint3d[idx]

    sample.default_factory = None
    return sample
コード例 #4
0
    def __getitem__(self, idx):
        camera_idx = self.camera_idxes[idx]

        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        subject = self.labels['subject_names'][shot['subject_idx']]
        sample['subject'] = subject
        action = self.labels['action_names'][shot['action_idx']]
        frame_idx = shot['frame_idx']

        for camera_idx, camera_name in [[
                camera_idx, self.labels['camera_names'][camera_idx]
        ]]:
            if camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            bbox = shot['bbox_by_camera_tlbr'][camera_idx][[1, 0, 3,
                                                            2]]  # TLBR to LTRB
            bbox_height = bbox[2] - bbox[0]
            if bbox_height == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # scale the bounding box
            bbox = scale_bbox(bbox, self.scale_bbox)
            scale = ((bbox[2] - bbox[0]) / 150.0, (bbox[3] - bbox[1]) / 150.0)

            # load image
            image_dir = self.root.replace(
                'processed',
                'processed.zip@') if self.data_format == 'zip' else self.root
            image_path = os.path.join(
                image_dir, subject, action,
                'imageSequence' + '-undistorted' * self.undistort_images,
                camera_name, 'img_%06d.jpg' % (frame_idx + 1))
            if self.data_format == 'zip':
                from mvn.datasets.utils import zipreader_imread
                image = zipreader_imread(image_path)
            else:
                image = cv2.imread(image_path)
            if image is None:
                assert os.path.isfile(
                    image_path), '%s doesn\'t exist' % image_path

            # load camera
            shot_camera = self.labels['cameras'][shot['subject_idx'],
                                                 camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # resize
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            if self.erase:
                # erase image
                keypoints_3d_gt = shot['keypoints'][:self.num_keypoints]
                keypoints_2d_gt = project_3d_points_to_image_plane_without_distortion(
                    retval_camera.projection, keypoints_3d_gt)
                erase_joints = [6, 1, 4, 11, 14]
                image = erase_image(
                    image, [keypoints_2d_gt[joint] for joint in erase_joints])

            sample['images'].append(image)
            sample['detections'].append(bbox +
                                        (1.0, ))  # TODO add real confidences
            sample['scale'].append(scale)
            sample['cameras'].append(retval_camera)
            sample['proj_matrices'].append(retval_camera.projection)

        # 3D keypoints
        # add dummy confidences
        sample['keypoints_3d'] = np.pad(shot['keypoints'][:self.num_keypoints],
                                        ((0, 0), (0, 1)),
                                        'constant',
                                        constant_values=1.0)

        # save sample's index
        sample['indexes'] = idx

        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample
コード例 #5
0
def makeNViewImagesAndCameras():
    sample = defaultdict(list)  # return value

    calib = ArucoCalibrator()
    image_paths_test = [
        "testdata/IMG_20210208_135527.jpg", "testdata/IMG_20210208_135532.jpg",
        "testdata/IMG_20210209_114242.jpg", "testdata/IMG_20210209_114246.jpg",
        "testdata/IMG_20210209_114249.jpg", "testdata/IMG_20210209_114255.jpg",
        "testdata/IMG_20210209_114300.jpg", "testdata/IMG_20210209_114305.jpg",
        "testdata/IMG_20210209_114311.jpg", "testdata/IMG_20210209_114318.jpg",
        "testdata/IMG_20210209_114323.jpg"
    ]
    calibration, cameraMatrix, distCoeffs, nView_rvecs, nView_tvecs = calib.getCameraIntrinsicFromImages(
        image_paths_test)

    print(cameraMatrix)
    print(distCoeffs)

    image_paths_test = [
        "testdata/IMG_20210210_202957.jpg", "testdata/IMG_20210210_203002.jpg",
        "testdata/IMG_20210210_203017.jpg"
    ]

    #camera_names_test = ['54138969', '55011271', '58860488', '60457274']
    #image_paths_test =[
    #    "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\54138969\\img_001771.jpg",
    #    "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\55011271\\img_001771.jpg",
    #    "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\58860488\\img_001771.jpg",
    #    "C:\\Users\\User\\Downloads\\learnable-triangulation-pytorch-master\\data\\human36m\\processed\\S9\\Posing-1\\imageSequence\\60457274\\img_001771.jpg"
    #    ]
    #bbox_test = [
    #    [221, 185, 616, 580],
    #    [201, 298, 649, 746],
    #    [182, 416, 554, 788],
    #    [120, 332, 690, 902]
    #    ]
    #  R_test = [
    #      [[-0.9153617,   0.40180838,  0.02574755],
    #       [ 0.05154812,  0.18037356, -0.9822465 ],
    #       [-0.39931902, -0.89778364, -0.18581952]],
    #[[ 0.92816836,  0.37215385,  0.00224838],
    # [ 0.08166409, -0.1977723,  -0.9768404 ],
    # [-0.36309022,  0.9068559,  -0.2139576 ]],
    #[[-0.91415495, -0.40277803, -0.04572295],
    # [-0.04562341,  0.2143085,  -0.97569996],
    # [ 0.4027893 , -0.8898549,  -0.21428728]],
    #[[ 0.91415626, -0.40060705,  0.06190599],
    # [-0.05641001, -0.2769532,  -0.9592262 ],
    # [ 0.40141782,  0.8733905,  -0.27577674]]
    #      ]
    #  t_test = [
    #[[-346.0509 ],
    # [ 546.98083],
    # [5474.481  ]],
    #[[ 251.4252 ],
    # [ 420.94223],
    # [5588.196  ]],
    #[[ 480.4826 ],
    # [ 253.83238],
    # [5704.2075 ]],
    #[[  51.883537],
    # [ 378.4209  ],
    # [4406.1494  ]]
    #      ]
    #  K_test = [
    #[[1.1450494e+03, 0.0000000e+00, 5.1254150e+02],
    # [0.0000000e+00, 1.1437811e+03, 5.1545148e+02],
    # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]],
    #[[1.1496757e+03, 0.0000000e+00, 5.0884863e+02],
    # [0.0000000e+00, 1.1475917e+03, 5.0806491e+02],
    # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]],
    #[[1.1491407e+03, 0.0000000e+00, 5.1981586e+02],
    # [0.0000000e+00, 1.1487990e+03, 5.0140265e+02],
    # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]],
    #[[1.1455114e+03, 0.0000000e+00, 5.1496820e+02],
    # [0.0000000e+00, 1.1447739e+03, 5.0188202e+02],
    # [0.0000000e+00, 0.0000000e+00, 1.0000000e+00]]
    #      ]
    #  dist_test = [
    #[-0.20709892,  0.24777518, -0.00142447, -0.0009757,  -0.00307515],
    #[-0.19421363,  0.24040854, -0.00274089, -0.00161903,  0.00681998],
    #[-0.20833819,  0.255488,   -0.00076,     0.00148439, -0.0024605 ],
    #[-0.19838409,  0.21832368, -0.00181336, -0.00058721, -0.00894781]
    #      ]

    for i, image_path in enumerate(image_paths_test):

        image = cv2.imread(image_paths_test[i])
        #retval_camera = Camera(R_test[i], t_test[i], K_test[i], dist_test[i], camera_names_test[i])

        retval, rmat, tvecs = calib.getRmatAndTvecFromImgWithCharuco(
            image, cameraMatrix, distCoeffs)
        if not retval:
            continue
        retval_camera = Camera(rmat, tvecs, cameraMatrix, distCoeffs,
                               "asaba_cell_photo")  # R, t, K, dist
        ############################
        rvecs, jacob = cv2.Rodrigues(cv2.UMat(np.array(rmat)))
        tempimg = calib.drawWorldBox(image, rvecs, tvecs, cameraMatrix,
                                     distCoeffs)
        #print(rvecs.get())
        #print(tvecs)
        #print(retval_camera.projection)
        #cv2.imshow('img',tempimg)
        #cv2.waitKey(0) & 0xff
        ############################

        # Get bbox
        det2u = Detectron2util()
        bbox = det2u.getBboxOfFirstHuman(image)

        # crop
        image = crop_image(
            image, bbox)  # 2nd arg is a tuple of (left, upper, right, lower)
        retval_camera.update_after_crop(bbox)

        # resize
        image_shape_before_resize = image.shape[:2]
        image = resize_image(image, [384, 384])
        retval_camera.update_after_resize(image_shape_before_resize,
                                          [384, 384])

        # Normalize
        image = normalize_image(image)

        sample['images'].append(image)
        sample['cameras'].append(retval_camera)
        sample['proj_matrices'].append(retval_camera.projection)

    sample['pred_keypoints_3d'] = np.zeros([17, 3])

    return sample
コード例 #6
0
    def __getitem__(self, idx):
        # TOOD: Change according to naming conventions
        sample = defaultdict(list)  # return value
        shot = self.labels['table'][idx]

        person = shot['person_id']

        action_idx = shot['action_idx']
        action = self.labels['action_names'][action_idx]

        frame_idx = shot['frame_name']

        for camera_idx, camera_name in enumerate(self.labels['camera_names']):
            if camera_idx not in self.choose_cameras or camera_idx in self.ignore_cameras:
                continue

            # load bounding box
            left, top, right, bottom, bbox_confidence = shot[
                'bbox_by_camera_tlbr'][camera_idx]
            bbox = (left, top, right, bottom)

            if top - bottom == 0 or left - right == 0:
                # convention: if the bbox is empty, then this view is missing
                continue

            # square and scale the bounding box
            if self.square_bbox:
                bbox = get_square_bbox(bbox)

            bbox = scale_bbox(bbox, self.scale_bbox)

            # TODO: Change according to dataset paths
            # load image
            # $DIR_ROOT/[action_NAME]/hdImgs/[VIEW_ID]/[VIEW_ID]_[FRAME_ID].jpg
            # NOTE: pad with 0s using {frame_idx:08}
            image_path = os.path.join(self.example_root, action, 'hdImgs',
                                      camera_name,
                                      f'{camera_name}_{frame_idx:08}.jpg')
            assert os.path.isfile(image_path), '%s doesn\'t exist' % image_path
            image = cv2.imread(image_path)

            # load camera
            shot_camera = self.labels['cameras'][action_idx, camera_idx]
            retval_camera = Camera(shot_camera['R'], shot_camera['t'],
                                   shot_camera['K'], shot_camera['dist'],
                                   camera_name)

            if self.crop:
                # crop image
                image = crop_image(image, bbox)
                retval_camera.update_after_crop(bbox)

            if self.image_shape is not None:
                # rescale_size
                image_shape_before_resize = image.shape[:2]
                image = resize_image(image, self.image_shape)
                retval_camera.update_after_resize(image_shape_before_resize,
                                                  self.image_shape)

                sample['image_shapes_before_resize'].append(
                    image_shape_before_resize)

            if self.norm_image:
                image = normalize_image(image)

            sample['images'].append(image)
            sample['detections'].append(bbox)
            sample['cameras'].append(retval_camera)
            # Maybe remove to save space?
            # sample['proj_matrices'].append(retval_camera.projection)

        # TODO: Can remove or modify depending on whether your dataset has ground truth
        # 3D keypoints (with real confidences, cos CMU)
        if 'keypoints' in shot:
            sample['keypoints_3d'] = np.array(
                shot['keypoints'][:self.num_keypoints])

        # build cuboid
        # base_point = sample['keypoints_3d'][6, :3]
        # sides = np.array([self.cuboid_side, self.cuboid_side, self.cuboid_side])
        # position = base_point - sides / 2
        # sample['cuboids'] = volumetric.Cuboid3D(position, sides)

        # save sample's index
        sample['indexes'] = idx

        # TODO: Check this? Keypoints are different
        if self.keypoints_3d_pred is not None:
            sample['pred_keypoints_3d'] = self.keypoints_3d_pred[idx]

        sample.default_factory = None
        return sample