예제 #1
0
def do_validation_step(model, input, target, target_weight=None, flip=False):
    assert not model.training, 'model must be in evaluation mode.'
    assert len(input) == len(
        target), 'input and target must contain the same number of examples.'

    # Forward pass and loss calculation.
    output = model(input)
    loss = sum(joints_mse_loss(o, target, target_weight) for o in output)

    # Get the heatmaps.
    if flip:
        # If `flip` is true, perform horizontally flipped inference as well. This should
        # result in more robust predictions at the expense of additional compute.
        flip_input = fliplr(input.clone().cpu().numpy())
        flip_input = torch.as_tensor(flip_input,
                                     dtype=torch.float32,
                                     device=device)
        flip_output = model(flip_input)
        flip_output = flip_output[-1].cpu()
        flip_output = flip_back(flip_output)
        heatmaps = (output[-1].cpu() + flip_output) / 2
    else:
        heatmaps = output[-1].cpu()

    return heatmaps, loss.item()
예제 #2
0
def do_validation_step(model,
                       input,
                       target,
                       data_info,
                       target_weight=None,
                       flip=False):
    assert not model.training, 'model must be in evaluation mode.'
    assert len(input) == len(
        target), 'input and target must contain the same number of examples.'

    # Forward pass and loss calculation.
    start = time.time()
    output = model(input)
    inference_time = (time.time() - start) * 1000
    loss = sum(joints_mse_loss(o, target, target_weight) for o in output)

    # Get the heatmaps.
    if flip:
        # If `flip` is true, perform horizontally flipped inference as well. This should
        # result in more robust predictions at the expense of additional compute.
        flip_input = fliplr(input)
        flip_output = model(flip_input)
        flip_output = flip_output[-1].cpu()
        flip_output = flip_back(flip_output.detach(), data_info.hflip_indices)
        heatmaps = (output[-1].cpu() + flip_output) / 2
    else:
        heatmaps = output[-1].cpu()

    return heatmaps, loss.item(), inference_time
def test_fliplr(device):
    tensor = torch.as_tensor([[
        [1, 2, 3],
        [4, 5, 6],
        [7, 8, 9],
    ]],
                             dtype=torch.float32)
    expected = torch.as_tensor([[
        [3, 2, 1],
        [6, 5, 4],
        [9, 8, 7],
    ]],
                               dtype=torch.float32)
    actual = fliplr(tensor)
    assert_allclose(actual, expected)
 def estimate_heatmaps(self, images, flip=False):
     is_batched = _check_batched(images)
     raw_images = images if is_batched else images.unsqueeze(0)
     input_tensor = torch.empty((len(raw_images), 3, *self.input_shape),
                                device=self.device,
                                dtype=torch.float32)
     for i, raw_image in enumerate(raw_images):
         input_tensor[i] = self.prepare_image(raw_image)
     heatmaps = self.do_forward(input_tensor)[-1].cpu()
     if flip:
         flip_input = fliplr(input_tensor)
         flip_heatmaps = self.do_forward(flip_input)[-1].cpu()
         heatmaps += flip_back(flip_heatmaps, self.data_info.hflip_indices)
         heatmaps /= 2
     if is_batched:
         return heatmaps
     else:
         return heatmaps[0]
 def estimate_heatmaps(self, images, mean, stddev, flip=False):
     is_batched = _check_batched(images)
     raw_images = images if is_batched else images.unsqueeze(0)
     input_tensor = torch.empty((len(raw_images), 3, 256, 256),
                                device=self.device,
                                dtype=torch.float32)
     for i, raw_image in enumerate(raw_images):
         input_tensor[i] = self.prepare_image(raw_image, mean, stddev)
     heatmaps = self.do_forward(input_tensor)[-1].cpu()
     if flip:
         flip_input = fliplr(input_tensor.cpu().clone().numpy())
         flip_input = torch.as_tensor(flip_input,
                                      device=self.device,
                                      dtype=torch.float32)
         flip_heatmaps = self.do_forward(flip_input)[-1].cpu()
         heatmaps += flip_back(flip_heatmaps)
         heatmaps /= 2
     if is_batched:
         return heatmaps
     else:
         return heatmaps[0]
예제 #6
0
    def __getitem__(self, index):
        sf = self.scale_factor
        rf = self.rot_factor
        if self.is_train:
            a = self.anno[self.train_list[index]]
        else:
            a = self.anno[self.valid_list[index]]

        img_path = os.path.join(self.img_folder, a['img_paths'])
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based

        # c = torch.Tensor(a['objpos']) - 1
        c = torch.Tensor(a['objpos'])
        s = a['scale_provided']

        # Adjust center/scale slightly to avoid cropping limbs
        if c[0] != -1:
            c[1] = c[1] + 15 * s
            s = s * 1.25

        # For single-person pose estimation with a centered/scaled figure
        nparts = pts.size(0)
        img = load_image(img_path)  # CxHxW

        r = 0
        if self.is_train:
            s = s*torch.randn(1).mul_(sf).add_(1).clamp(1-sf, 1+sf)[0]
            r = torch.randn(1).mul_(rf).clamp(-2*rf, 2*rf)[0] if random.random() <= 0.6 else 0

            # Flip
            if random.random() <= 0.5:
                img = torch.from_numpy(fliplr(img.numpy())).float()
                pts = shufflelr(pts, width=img.size(2), dataset='mpii')
                c[0] = img.size(2) - c[0]

            # Color
            img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)

        # Prepare image and groundtruth map
        inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r)
        inp = color_normalize(inp, self.mean, self.std)

        # Generate ground truth
        tpts = pts.clone()
        target = torch.zeros(nparts, self.out_res, self.out_res)
        target_weight = tpts[:, 2].clone().view(nparts, 1)

        for i in range(nparts):
            # if tpts[i, 2] > 0: # This is evil!!
            if tpts[i, 1] > 0:
                tpts[i, 0:2] = to_torch(transform(tpts[i, 0:2]+1, c, s, [self.out_res, self.out_res], rot=r))
                target[i], vis = draw_labelmap(target[i], tpts[i]-1, self.sigma, type=self.label_type)
                target_weight[i, 0] *= vis

        # Meta info
        meta = {'index' : index, 'center' : c, 'scale' : s,
        'pts' : pts, 'tpts' : tpts, 'target_weight': target_weight}

        return inp, target, meta
예제 #7
0
    def __getitem__(self, index):
        """Get an image referenced by index."""
        sf = self.scale_factor  # Generally from 0 to 0.25
        rf = self.rot_factor
        if self.is_train:
            a = self.train_list.iloc[index]
        else:
            a = self.valid_list.iloc[index]

        img_path = a['img_paths']
        # cv2 based image transformations
        img = cv2.imread(img_path, cv2.IMREAD_COLOR)  # HxWxC
        rows, cols, colors = img.shape
        # Joint label positions
        pts = torch.Tensor(a['joint_self'])
        # pts[:, 0:2] -= 1  # Convert pts to zero based
        c = tuple(a['objpos'])
        s = a['scale_provided']
        # In Mpii, scale_provided is the dim of the boundary box wrt 200 px
        # Depending on the flag "crop", we can decide to either:
        #   True: Crop to crop_size around obj_pos
        #   False: Keep original res
        # Then we downsize to inp_res
        if s == -1:  # Yogi data scale_provided is initialized to -1
            if self.crop:
                # If crop, then crop crop_size x crop_size around obj_pos
                s = self.crop_size / 200
                # Move enter away from the joint by a random distance < max_dist pixels
                max_dist = 64
                c = (int(
                    torch.randn(1).clamp(-1, 1).mul(max_dist).add(c[0]).clamp(
                        0, cols - 1)),
                     int(
                         torch.randn(1).clamp(-1,
                                              1).mul(max_dist).add(c[1]).clamp(
                                                  0, rows - 1)))
            else:
                # If no crop, then use the entire image
                s = rows / 200
                # Use the center of the image to rotate
                c = (int(cols / 2), int(rows / 2))

        # # Adjust scale slightly to avoid cropping limbs
        # if c[0] != -1:
        #     c[1] = c[1] + 15 * s
        #     s = s * 1.25

        # For pose estimation with a centered/scaled figure
        nparts = pts.size(0)
        r = 0
        if self.is_train:
            # Given sf, choose scale from [1-sf, 1+sf]
            # For sf = 0.25, scale is chosen from [0.75, 1.25]
            s = torch.randn(1).mul_(sf).add_(1).clamp(1 - sf, 1 + sf)[0]
            # Given rf, choose scale from [-rf, rf]
            # For sf = 30, scale is chosen from [-30, 30]
            r = torch.randn(1).mul_(rf).clamp(
                -rf, rf)[0] if random.random() <= 0.6 else 0
        if self.mode == 'original':
            img = load_image(img_path)  # CxHxW
            c = torch.Tensor(c)
            if self.is_train:
                # Flip
                if self.fliplr and random.random() <= 0.5:
                    img = torch.from_numpy(fliplr(img.numpy())).float()
                    pts = shufflelr(pts, width=img.size(2),
                                    dataset='yogi')  # TODO
                    c[0] = img.size(2) - c[0]

                # Color
                # img[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
                # img[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
                # img[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            # Prepare image and groundtruth map
            inp = crop(img, c, s, [self.inp_res, self.inp_res], rot=r)
            inp = color_normalize(inp, self.mean, self.std)
            t = None
        else:
            if self.is_train:
                # Flip
                if self.fliplr and random.random() <= 0.5:
                    img = cv2.flip(img, 1)
                    pts = torch.Tensor([[cols - x[0] - 1, x[1]] for x in pts])
                # TODO: Shuffle left and right labels

            # Rotate, scale and crop image using inp_res
            # And get transformation matrix
            img, t_inp = cv2_crop(img,
                                  c,
                                  s, (self.inp_res, self.inp_res),
                                  rot=r,
                                  crop=self.crop,
                                  crop_size=self.crop_size)
            # Get transformation matrix for resizing from inp_res to out_res
            # No other changes, i.e. new_center is center, no cropping, etc.
            # Please note scaling to out_res has to be done before
            _, t_resize = cv2_resize(img, (self.out_res, self.out_res))
            t = combine_transformations(t_resize, t_inp)
            # TODO Update color normalize
            inp = img_normalize(img, self.mean, self.std)
            # if self.is_train:
            #     # Color
            #     inp[0, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            #     inp[1, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)
            #     inp[2, :, :].mul_(random.uniform(0.8, 1.2)).clamp_(0, 1)

        # Generate ground truth
        tpts = pts.clone()
        target = torch.zeros(nparts, self.out_res, self.out_res)
        target_weight = tpts[:, 2].clone().view(nparts, 1)

        for i in range(nparts):
            if tpts[i, 2] > 0:  # This is evil!! # if tpts[i, 1] > 0:
                # Hack: Change later -
                # The + 1 and -1 wrt tpts is there in the original code
                # Using int(self.mode == 'original') to do the + 1, -1
                tpts[i, 0:2] = to_torch(
                    transform(tpts[i, 0:2] + int(self.mode == 'original'),
                              c,
                              s, [self.out_res, self.out_res],
                              rot=r,
                              t=t))
                target[i], vis = draw_labelmap(target[i],
                                               tpts[i] -
                                               int(self.mode == 'original'),
                                               self.sigma,
                                               type=self.label_type)
                target_weight[i, 0] *= vis

        # Meta info
        meta = {
            'index': index,
            'center': c,
            'scale': s,
            'pts': pts,
            'tpts': tpts,
            'target_weight': target_weight,
            'inp_res': self.inp_res,
            'out_res': self.out_res,
            'rot': r,
            'img_paths': img_path
        }

        return inp, target, meta