Exemple #1
0
def test_transformation(meta_data, image_size=(288, 384)):
    joints_original = meta_data['j_original'].squeeze().data.cpu().numpy()
    joints = meta_data['joints'].squeeze().data.cpu().numpy(
    )  # coordinates in 384*288 image box
    center, scale = meta_data['center'], meta_data['scale']
    center = center.data.cpu().numpy().reshape(2)
    scale = scale.data.cpu().numpy().reshape(2)
    trans = get_affine_transform(center=center,
                                 scale=scale,
                                 rot=0.0,
                                 output_size=image_size,
                                 inv=0)
    trans_inv = get_affine_transform(center=center,
                                     scale=scale,
                                     rot=0.0,
                                     output_size=image_size,
                                     inv=1)
    # calculate the distance in terms of pixels
    transformed_coordinates = affine_transform_modified(joints_original, trans)
    transformed_coordinates2 = affine_transform_modified(joints, trans_inv)
    dif1 = joints - transformed_coordinates
    dif2 = joints_original - transformed_coordinates2
    # compute inverse matrix
    inv_compute = np.zeros(trans.shape, trans.dtype)
    inv_compute[:2, :2] = np.linalg.inv(trans[:2, :2])
    inv_compute[:, 2] = -trans[:, 2]
    transformed_coordinates3 = affine_transform_modified(joints, inv_compute)
    dif3 = joints_original - transformed_coordinates3
    print(dif1, dif2, dif3)
    return
Exemple #2
0
def gather_inputs(args, logger, image_size=(288, 384)):
    root = args.data_path
    img_names = os.listdir(root)
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])
    inputs = []
    # these testing images were cropped from videos of subject 9 and 11
    for name in img_names:
        pass
        image_file = os.path.join(root, name)
        data_numpy = cv2.imread(image_file, 1 | 128)
        data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)
        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        c, s = xywh2cs(0, 0, data_numpy.shape[1], data_numpy.shape[0])
        r = 0
        trans = get_affine_transform(c, s, r, image_size)
        input = cv2.warpAffine(data_numpy,
                               trans, (image_size[0], image_size[1]),
                               flags=cv2.INTER_LINEAR)
        inputs.append(transform(input).unsqueeze(0))
    return torch.cat(inputs)
Exemple #3
0
def accuracy_pixel(output,
                   meta_data,
                   image_size=(288.0, 384.0),
                   arg_max='hard'):
    ''' 
    Report errors in terms of pixels in the original image plane.
    '''
    if arg_max == 'soft':
        if isinstance(output, np.ndarray):
            pred, max_vals = get_max_preds_soft(output)
        else:
            pred, max_vals = get_max_preds_soft_pt(output)
    elif arg_max == 'hard':
        if not isinstance(output, np.ndarray):
            output = output.data.cpu().numpy()
        pred, max_vals = get_max_preds(output)
    else:
        raise NotImplementedError

    # multiply by down-sample ratio
    if not isinstance(pred, np.ndarray):
        pred = pred.data.cpu().numpy()
        max_vals = max_vals.data.cpu().numpy()
    pred *= image_size[0] / output.shape[3]
    # inverse transform and compare pixel didstance
    centers, scales, rots = meta_data['center'], meta_data['scale'], meta_data[
        'rotation']
    centers = centers.data.cpu().numpy()
    scales = scales.data.cpu().numpy()
    rots = rots.data.cpu().numpy()
    joints_original_batch = meta_data['j_original'].data.cpu().numpy()

    distance_list = []
    all_src_coordinates = []
    for sample_idx in range(len(pred)):
        trans_inv = get_affine_transform(centers[sample_idx],
                                         scales[sample_idx],
                                         rots[sample_idx],
                                         image_size,
                                         inv=1)
        joints_original = joints_original_batch[sample_idx]
        pred_src_coordinates = affine_transform_modified(
            pred[sample_idx], trans_inv)
        all_src_coordinates.append(
            pred_src_coordinates.reshape(1, len(pred_src_coordinates), 2))
        distance_list.append(
            get_distance(joints_original, pred_src_coordinates))
    all_distance = np.hstack(distance_list)
    acc = all_distance
    avg_acc = all_distance.mean()
    cnt = len(distance_list) * len(all_distance)
    return acc, avg_acc, cnt, np.concatenate(all_src_coordinates,
                                             axis=0), pred, max_vals
Exemple #4
0
    def __getitem__(self, idx):
        db_rec = copy.deepcopy(self.db[idx])

        image_file = db_rec['image']
        filename = db_rec['filename'] if 'filename' in db_rec else ''
        imgnum = db_rec['imgnum'] if 'imgnum' in db_rec else ''

        if self.data_format == 'zip':
            from utils import zipreader
            data_numpy = zipreader.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)
        else:
            data_numpy = cv2.imread(
                image_file, cv2.IMREAD_COLOR | cv2.IMREAD_IGNORE_ORIENTATION)

        if self.color_rgb:
            data_numpy = cv2.cvtColor(data_numpy, cv2.COLOR_BGR2RGB)

        if data_numpy is None:
            logger.error('=> fail to read {}'.format(image_file))
            raise ValueError('Fail to read {}'.format(image_file))

        joints = db_rec['joints_3d']
        joints_vis = db_rec['joints_3d_vis']

        c = db_rec['center']
        s = db_rec['scale']
        score = db_rec['score'] if 'score' in db_rec else 1
        r = 0

        if self.is_train:
            if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body
                    and np.random.rand() < self.prob_half_body):
                c_half_body, s_half_body = self.half_body_transform(
                    joints, joints_vis)

                if c_half_body is not None and s_half_body is not None:
                    c, s = c_half_body, s_half_body

            sf = self.scale_factor
            rf = self.rotation_factor
            s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
            r = np.clip(np.random.randn()*rf, -rf*2, rf*2) \
                if random.random() <= 0.6 else 0

            if self.flip and random.random() <= 0.5:
                data_numpy = data_numpy[:, ::-1, :]
                joints, joints_vis = fliplr_joints(joints, joints_vis,
                                                   data_numpy.shape[1],
                                                   self.flip_pairs)
                c[0] = data_numpy.shape[1] - c[0] - 1

        trans = get_affine_transform(c, s, r, self.image_size)
        input = cv2.warpAffine(
            data_numpy,
            trans, (int(self.image_size[0]), int(self.image_size[1])),
            flags=cv2.INTER_LINEAR)

        if self.transform:
            input = self.transform(input)

        for i in range(self.num_joints):
            if joints_vis[i, 0] > 0.0:
                joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)

        target, target_weight = self.generate_target(joints, joints_vis)

        target = torch.from_numpy(target)
        target_weight = torch.from_numpy(target_weight)

        meta = {
            'image': image_file,
            'filename': filename,
            'imgnum': imgnum,
            'joints': joints,
            'joints_vis': joints_vis,
            'center': c,
            'scale': s,
            'rotation': r,
            'score': score
        }

        return input, target, target_weight, meta