Exemple #1
0
def get_person_keypoints(model, image, center, scale):
    rotation = 0

    # pose estimation transformation
    trans = get_affine_transform(center, scale, rotation, cfg.INPUT_SHAPE)
    model_input = cv.warpAffine(
        image,
        trans, (int(cfg.INPUT_SHAPE[1]), int(cfg.INPUT_SHAPE[0])),
        flags=cv.INTER_LINEAR)
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cfg.INPUT.MEANS, std=cfg.INPUT.STDS),
    ])

    # pose estimation inference
    model_input = transform(model_input).unsqueeze(0)
    # switch to evaluate mode
    model.eval()
    with torch.no_grad():
        # compute output heatmap
        output = model(model_input)
        preds, maxvals = get_results(output.clone().cpu().numpy(),
                                     np.asarray([center]), np.asarray([scale]),
                                     cfg.TEST.GAUSSIAN_KERNEL,
                                     cfg.TEST.SHIFT_RATIOS)

        return preds.squeeze(), maxvals.squeeze()
    def __getitem__(self, index):
        rotation = 0
        score = 1
        img_path = self.imgs[index]
        data_numpy = cv2.imread(img_path, cv2.IMREAD_COLOR)
        if data_numpy is None:
            raise ValueError('fail to read {}'.format(img_path))
        data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        bbox = self.bboxes[index]
        center, scale = self._bbox_to_center_and_scale(bbox)

        scale[0] *= (1 + self.test_x_ext)
        scale[1] *= (1 + self.test_y_ext)
        # fit the ratio
        if scale[0] > self.w_h_ratio * scale[1]:
            scale[1] = scale[0] * 1.0 / self.w_h_ratio
        else:
            scale[0] = scale[1] * 1.0 * self.w_h_ratio

        trans = get_affine_transform(center, scale, rotation, self.input_shape)

        img = cv2.warpAffine(
            data_numpy,
            trans, (int(self.input_shape[1]), int(self.input_shape[0])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            img = self.transform(img)
        img_id = self.ids[index]
        return img, score, center, scale, img_id
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn() * rf, -rf * 2, rf * 2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        keypoint_input = copy.deepcopy(input)
        keypoint_joints = copy.deepcopy(joints)
        keypoint_joints_vis = copy.deepcopy(joints_vis)
        parts_input = copy.deepcopy(input)
        parts_joints = copy.deepcopy(joints)
        parts_joints_vis = copy.deepcopy(joints_vis)

        if self.transform:
            if len(self.occlusion_transforms_keypoint) > 0:
                sample = {
                    'image': keypoint_input,
                    'target': keypoint_joints,
                    'joints_vis': keypoint_joints_vis
                }
                choose_transform = random.choice(
                    self.occlusion_transforms_keypoint)
                sample = choose_transform(sample)
                sample = self.transform(sample)
                keypoint_input = sample['image']
                keypoint_joints = sample['target']
                keypoint_joints_vis = sample['joints_vis']

            if len(self.occlusion_transforms_parts) > 0:
                sample = {
                    'image': parts_input,
                    'target': parts_joints,
                    'joints_vis': parts_joints_vis
                }
                choose_transform = random.choice(
                    self.occlusion_transforms_parts)
                sample = choose_transform(sample)
                sample = self.transform(sample)
                parts_input = sample['image']
                parts_joints = sample['target']
                parts_joints_vis = sample['joints_vis']

            # Casual image tranform
            sample = {
                'image': input,
                'target': joints,
                'joints_vis': joints_vis
            }
            sample = self.transform(sample)
            input = sample['image']
            joints = sample['target']
            joints_vis = sample['joints_vis']

        target, target_weight = self.generate_target(joints, joints_vis)
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        input_combined = {
            'normal': input,
            'keypoints_occluded': keypoint_input,
            'parts_occluded': parts_input
        }
        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'parts_joints_vis': parts_joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input_combined, target, target_weight, meta
    def __getitem__(self, idx):
        d = copy.deepcopy(self.data[idx])

        img_id = d['img_id']
        img_path = d['img_path']

        data_numpy = cv2.imread(img_path, cv2.IMREAD_COLOR)

        if data_numpy is None:
            raise ValueError('fail to read {}'.format(img_path))

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        joints = d['joints'][:, :2]
        joints_vis = d['joints'][:, -1].reshape((-1, 1))

        center = d['center']
        scale = d['scale']
        score = d['score'] if 'score' in d else 1
        rotation = 0

        if self.stage == 'train':
            scale[0] *= (1 + self.basic_ext)
            scale[1] *= (1 + self.basic_ext)
            rand = np.random.rand() if self.rand_ext else 1.0
            scale[0] *= (1 + rand * self.x_ext)
            rand = np.random.rand() if self.rand_ext else 1.0
            scale[1] *= (1 + rand * self.y_ext)
        else:
            scale[0] *= (1 + self.test_x_ext)
            scale[1] *= (1 + self.test_y_ext)

        # fit the ratio
        if scale[0] > self.w_h_ratio * scale[1]:
            scale[1] = scale[0] * 1.0 / self.w_h_ratio
        else:
            scale[0] = scale[1] * 1.0 * self.w_h_ratio

        # augmentation
        if self.stage == 'train':
            # half body
            if (np.sum(joints_vis[:, 0] > 0) > self.num_keypoints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    center, scale = c_half_body, s_half_body

            # scale
            rand = random.uniform(1 + self.scale_factor_low,
                                  1 + self.scale_factor_high)
            scale_ratio = self.scale_shrink_ratio * rand
            scale *= scale_ratio

            # rotation
            if random.random() <= self.prob_rotation:
                rotation = random.uniform(-self.rotation_factor,
                                          self.rotation_factor)

            # flip
            if random.random() <= self.prob_flip:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = flip_joints(joints, joints_vis,
                                                 data_numpy.shape[1],
                                                 self.flip_pairs)
                center[0] = data_numpy.shape[1] - center[0] - 1

        trans = get_affine_transform(center, scale, rotation, self.input_shape)

        img = cv2.warpAffine(
            data_numpy,
            trans, (int(self.input_shape[1]), int(self.input_shape[0])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            img = self.transform(img)

        if self.stage == 'train':
            for i in range(self.keypoint_num):
                if joints_vis[i, 0] > 0:
                    joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
                    if joints[i, 0] < 0 \
                            or joints[i, 0] > self.input_shape[1] - 1 \
                            or joints[i, 1] < 0 \
                            or joints[i, 1] > self.input_shape[0] - 1:
                        joints_vis[i, 0] = 0
            valid = torch.from_numpy(joints_vis).float()

            labels_num = len(self.gaussian_kernels)
            labels = np.zeros(
                (labels_num, self.keypoint_num, *self.output_shape))
            for i in range(labels_num):
                labels[i] = self.generate_heatmap(
                    joints, valid, kernel=self.gaussian_kernels[i])
            labels = torch.from_numpy(labels).float()

            return img, valid, labels
        else:
            return img, score, center, scale, img_id
    :return:
    """
    trans = torch.as_tensor(trans, device=pts.device, dtype=torch.float32)
    xy1 = torch.stack((pts[0], pts[1], torch.ones_like(pts[0]))).contiguous()
    return torch.mm(trans, xy1)


if __name__ == '__main__':
    import lib.utils.transforms as trans
    center = np.array([100., 100.], dtype=np.float32)
    scale = np.array([100., 120.], dtype=np.float32) / 200
    patch_size = [50, 60]

    trans_1 = trans.get_affine_transform(center=center,
                                         scale=scale,
                                         rot=0,
                                         inv=0,
                                         output_size=patch_size)
    trans_2 = get_affine_transform(center=center,
                                   scale=scale,
                                   inv=0,
                                   patch_size=patch_size)
    print(trans_1, trans_2)
    print(trans_1.shape, trans_1.dtype, trans_2.dtype)
    print(np.isclose(trans_1, trans_2, atol=1e-7))

    cords = torch.randn(2, 10)
    print(trans.affine_transform_pts(cords.numpy().T, trans_1).T)
    print(affine_transform_pts(cords, trans_1))
    cords = cords.cuda()
    print(affine_transform_pts(cords, trans_1))
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn()*sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis, data_numpy.shape[1], self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)
        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta