예제 #1
0
    def __call__(self, args):
        """
            img (PIL Image): Image to be transformed.

        Returns:
            PIL Image: Affine transformed image.
        """
        img, M = args
        ret = self.get_params(self.degrees, self.translate, self.scale,
                              self.shear, img.size)

        angle, translate, scale, shear = ret

        if not F._is_pil_image(img):
            raise TypeError('img should be PIL Image. Got {}'.format(
                type(img)))

        assert isinstance(translate, (tuple, list)) and len(translate) == 2, \
            "Argument translate should be a list or tuple of length 2"

        assert scale >= 0.0, "Argument scale should be positive"

        output_size = img.size
        center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
        matrix = F._get_inverse_affine_matrix(center, angle, translate, scale,
                                              shear)
        kwargs = {"fillcolor": self.fillcolor}
        img = img.transform(output_size, Image.AFFINE, matrix, self.resample,
                            **kwargs)

        # Update transformation matrix
        inv_M = np.array([*matrix, 0, 0, 1]).reshape(3, 3)
        M = np.linalg.inv(inv_M) @ M
        return img, M
예제 #2
0
def affine_image_tensor(
    img: torch.Tensor,
    angle: float,
    translate: List[float],
    scale: float,
    shear: List[float],
    interpolation: InterpolationMode = InterpolationMode.NEAREST,
    fill: Optional[List[float]] = None,
    center: Optional[List[float]] = None,
) -> torch.Tensor:
    angle, translate, shear, center = _affine_parse_args(
        angle, translate, scale, shear, interpolation, center)

    center_f = [0.0, 0.0]
    if center is not None:
        _, height, width = get_dimensions_image_tensor(img)
        # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
        center_f = [
            1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
        ]

    translate_f = [1.0 * t for t in translate]
    matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale,
                                        shear)

    return _FT.affine(img,
                      matrix,
                      interpolation=interpolation.value,
                      fill=fill)
예제 #3
0
 def __call__(self, file_a, file_b, label, mask):
     angle = float(
         torch.empty(1).uniform_(float(self.degrees[0]),
                                 float(self.degrees[1])).item())
     # print('angle: ', angle)
     center_f = [0.0, 0.0]
     matrix = tf._get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0],
                                            1.0, [0.0, 0.0])
     return (F_t.rotate(file_a,
                        matrix=matrix,
                        resample=self.resample,
                        expand=self.expand,
                        fill=self.fill),
             F_t.rotate(file_b,
                        matrix=matrix,
                        resample=self.resample,
                        expand=self.expand,
                        fill=self.fill),
             F_t.rotate(label.unsqueeze(0),
                        matrix=matrix,
                        resample=self.resample,
                        expand=self.expand,
                        fill=self.fill).squeeze(0),
             F_t.rotate(mask.unsqueeze(0),
                        matrix=matrix,
                        resample=self.resample,
                        expand=self.expand,
                        fill=self.fill).squeeze(0))
예제 #4
0
def affine_image_pil(
    img: PIL.Image.Image,
    angle: float,
    translate: List[float],
    scale: float,
    shear: List[float],
    interpolation: InterpolationMode = InterpolationMode.NEAREST,
    fill: Optional[List[float]] = None,
    center: Optional[List[float]] = None,
) -> PIL.Image.Image:
    angle, translate, shear, center = _affine_parse_args(
        angle, translate, scale, shear, interpolation, center)

    # center = (img_size[0] * 0.5 + 0.5, img_size[1] * 0.5 + 0.5)
    # it is visually better to estimate the center without 0.5 offset
    # otherwise image rotated by 90 degrees is shifted vs output image of torch.rot90 or F_t.affine
    if center is None:
        _, height, width = get_dimensions_image_pil(img)
        center = [width * 0.5, height * 0.5]
    matrix = _get_inverse_affine_matrix(center, angle, translate, scale, shear)

    return _FP.affine(img,
                      matrix,
                      interpolation=pil_modes_mapping[interpolation],
                      fill=fill)
예제 #5
0
    def __call__(self, sample):
        image, boxes, labels = sample
        height = image.height
        width = image.width
        angle, translate, scale, shear = self.get_params(height, width)

        center = (width * 0.5 + 0.5, height * 0.5 + 0.5)
        coeffs = F._get_inverse_affine_matrix(center, angle, translate, scale, shear)
        inverse_affine_matrix = np.eye(3)
        inverse_affine_matrix[:2] = np.array(coeffs).reshape(2, 3)     # Fill-in first 2 rows of an affine transformation matrix

        if np.random.rand() < self.p_hflip:
            # Post-apply horizontal flip
            # Pre-multiply by [ [-1, 0, width], [0, 1, 0], [0, 0, 1] ] matrix
            flip_matrix = np.eye(3)
            flip_matrix[0, 0] = -1
            flip_matrix[0, 2] = width-1
            # For inverse affine matrix, pre-multiply by a inverse flip matrix (which is the same as a flip matrix)
            inverse_affine_matrix = flip_matrix @ inverse_affine_matrix

        image = image.transform((width, height), Image.AFFINE, inverse_affine_matrix[:2].reshape(6), Image.BILINEAR)

        # Compute affine transform matrix and apply it to keypoints
        affine_matrix = np.linalg.pinv(inverse_affine_matrix)
        boxes, labels = apply_transform_and_clip(boxes, labels, affine_matrix, (width, height))

        return image, boxes, labels
예제 #6
0
def rotate_image_tensor(
    img: torch.Tensor,
    angle: float,
    interpolation: InterpolationMode = InterpolationMode.NEAREST,
    expand: bool = False,
    fill: Optional[List[float]] = None,
    center: Optional[List[float]] = None,
) -> torch.Tensor:
    center_f = [0.0, 0.0]
    if center is not None:
        if expand:
            warnings.warn(
                "The provided center argument has no effect on the result if expand is True"
            )
        else:
            _, height, width = get_dimensions_image_tensor(img)
            # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
            center_f = [
                1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
            ]

    # due to current incoherence of rotation angle direction between affine and rotate implementations
    # we need to set -angle.
    matrix = _get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0,
                                        [0.0, 0.0])
    return _FT.rotate(img,
                      matrix,
                      interpolation=interpolation.value,
                      expand=expand,
                      fill=fill)
    def __call__(self, sample):
        """
            img (PIL Image): Image to be transformed.

        Returns:
            PIL Image: Affine transformed image.
        """
        img, labels = sample['image'], sample['labels']
        warp_boxes = sample['warp_boxes']

        ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img.size)
        img = TF.affine(img, *ret, resample=self.resample, fillcolor=self.fillcolor)
        labels = TF.affine(labels, *ret, resample=self.resample, fillcolor=self.fillcolor)
        orig_box = warp_boxes * 256. + 256.

        # Affine boxes
        center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
        matrix = np.array(TF._get_inverse_affine_matrix(center, *ret)).reshape(2, 3)
        matrix = np.vstack([matrix, np.eye(3)[2]])
        assert matrix.shape == (3, 3)
        affine_trans = trans.AffineTransform(matrix=matrix)
        new_boxes = affine_trans.inverse(orig_box.reshape(-1, 2)) * (1. / 256.) - 1
        new_boxes = torch.from_numpy(new_boxes.reshape(-1, 4).astype(np.float32))

        sample.update({'image': img,
                       'labels': labels,
                       'warp_boxes': new_boxes
                       })
        return sample
예제 #8
0
def affine_image_tensor(
    img: torch.Tensor,
    angle: float,
    translate: List[float],
    scale: float,
    shear: List[float],
    interpolation: InterpolationMode = InterpolationMode.NEAREST,
    fill: Optional[List[float]] = None,
    center: Optional[List[float]] = None,
) -> torch.Tensor:
    num_channels, height, width = img.shape[-3:]
    extra_dims = img.shape[:-3]
    img = img.view(-1, num_channels, height, width)

    angle, translate, shear, center = _affine_parse_args(
        angle, translate, scale, shear, interpolation, center)

    center_f = [0.0, 0.0]
    if center is not None:
        # Center values should be in pixel coordinates but translated such that (0, 0) corresponds to image center.
        center_f = [
            1.0 * (c - s * 0.5) for c, s in zip(center, [width, height])
        ]

    translate_f = [1.0 * t for t in translate]
    matrix = _get_inverse_affine_matrix(center_f, angle, translate_f, scale,
                                        shear)

    output = _FT.affine(img,
                        matrix,
                        interpolation=interpolation.value,
                        fill=fill)
    return output.view(extra_dims + (num_channels, height, width))
예제 #9
0
파일: _geometry.py 프로젝트: evdcush/vision
def affine_bounding_box(
    bounding_box: torch.Tensor,
    format: features.BoundingBoxFormat,
    image_size: Tuple[int, int],
    angle: float,
    translate: List[float],
    scale: float,
    shear: List[float],
    center: Optional[List[float]] = None,
) -> torch.Tensor:
    original_shape = bounding_box.shape
    bounding_box = convert_bounding_box_format(
        bounding_box,
        old_format=format,
        new_format=features.BoundingBoxFormat.XYXY).view(-1, 4)

    dtype = bounding_box.dtype if torch.is_floating_point(
        bounding_box) else torch.float32
    device = bounding_box.device

    if center is None:
        height, width = image_size
        center_f = [width * 0.5, height * 0.5]
    else:
        center_f = [float(c) for c in center]

    translate_f = [float(t) for t in translate]
    affine_matrix = torch.tensor(
        _get_inverse_affine_matrix(center_f,
                                   angle,
                                   translate_f,
                                   scale,
                                   shear,
                                   inverted=False),
        dtype=dtype,
        device=device,
    ).view(2, 3)
    # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
    # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
    # Single point structure is similar to
    # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
    points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2)
    points = torch.cat(
        [points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1)
    # 2) Now let's transform the points using affine matrix
    transformed_points = torch.matmul(points, affine_matrix.T)
    # 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
    # and compute bounding box from 4 transformed points:
    transformed_points = transformed_points.view(-1, 4, 2)
    out_bbox_mins, _ = torch.min(transformed_points, dim=1)
    out_bbox_maxs, _ = torch.max(transformed_points, dim=1)
    out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1)
    # out_bboxes should be of shape [N boxes, 4]

    return convert_bounding_box_format(
        out_bboxes,
        old_format=features.BoundingBoxFormat.XYXY,
        new_format=format,
        copy=False).view(original_shape)
예제 #10
0
 def __call__(self, file):
     file = torch.from_numpy(file)
     angle = float(torch.empty(1).uniform_(float(self.degrees[0]), float(self.degrees[1])).item())
     # print('angle: ', angle)
     center_f = [0.0, 0.0]
     matrix = tf._get_inverse_affine_matrix(center_f, -angle, [0.0, 0.0], 1.0, [0.0, 0.0])
     return F_t.rotate(file, matrix=matrix, resample=self.resample,
                         expand=self.expand, fill=self.fill)
예제 #11
0
  def __init__(self, imageSize, shear=(0,0), angle=0, translate=(0,0), scale=0.9):
    center = (imageSize[0] * 0.5 + 0.5, imageSize[1] * 0.5 + 0.5)
    # shear = (np.random.uniform(-8,8),np.random.uniform(-8,8))
    # angle = np.random.uniform(-30,30)

    self.invAffMat = _get_inverse_affine_matrix(center=center, angle=angle, translate=translate, scale=scale, shear=shear)

    invAffM = np.mat([ [self.invAffMat[0],self.invAffMat[1],self.invAffMat[2] ],
                       [self.invAffMat[3],self.invAffMat[4],self.invAffMat[5] ],
                       [0        ,0        ,1                  ] ])
    affMat = np.linalg.inv(invAffM)
    self.affMat = affMat.item(0),affMat.item(1),affMat.item(2),affMat.item(3),affMat.item(4),affMat.item(5)
예제 #12
0
    def __call__(self, item):
        img = item[0]

        tx = _get_inverse_affine_matrix((img.shape[0] // 2, img.shape[1] // 2),
                                        self.angle, (0, 0), self.scale,
                                        self.shear)
        M = np.array(tx)
        M = np.reshape(M, (2, 3))

        return [
            cv2.warpAffine(x, M, dsize=(img.shape[0], img.shape[1]))
            for x in item
        ]
예제 #13
0
    def __call__(self, img):
        """
        Args
            img (PIL Image): Image to be transformed.

        Returns:
            PIL Image: Affine transformed image.
        """
        params = self.get_params(self.degrees, self.translate, self.scale,
                                 self.shear, img.size)
        center = (img.size[0] * 0.5 + 0.5, img.size[1] * 0.5 + 0.5)
        self._matrix = _get_inverse_affine_matrix(center, *params)
        return F.affine(img,
                        *params,
                        resample=self.resample,
                        fillcolor=self.fillcolor)
예제 #14
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        if self.train:
            img1, target = self.train_data[index], self.train_labels[index]
        else:
            img1, target = self.test_data[index], self.test_labels[index]

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        img1 = Image.fromarray(img1)
        if self.transform_pre is not None:
            img1 = self.transform_pre(img1)
        
        # affine transformation on image2
        ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img1.size)
        output_size = img1.size
        center = (img1.size[0] * 0.5 + 0.5, img1.size[1] * 0.5 + 0.5)
        matrix = _get_inverse_affine_matrix(center, *ret)
        kwargs = {"fillcolor": self.fillcolor} if PILLOW_VERSION[0] == '5' else {}
        img2 = img1.transform(output_size, Image.AFFINE, matrix, self.resample, **kwargs)

        if self.transform is not None:
            img1 = self.transform(img1)
            img2 = self.transform(img2)

        if self.target_transform is not None:
            target = self.target_transform(target)
        
        aff_para = [math.cos(math.radians(ret[0])),
                    math.sin(math.radians(ret[0])),
                    ret[1][0]/self.translate[0]/output_size[0],
                    ret[1][1]/self.translate[1]/output_size[1],
                    ret[2]*2./(self.scale[1]-self.scale[0])-(self.scale[0]+self.scale[1])/(self.scale[1]-self.scale[0]),
                    ret[3]*2./(self.shear[1]-self.shear[0])-(self.shear[0]+self.shear[1])/(self.shear[1]-self.shear[0])]
                    
        aff_para = torch.from_numpy(np.array(aff_para, np.float32, copy=False))

        return img1, img2, aff_para, target
예제 #15
0
 def _compute_extended_patch_size(
         w: float, h: float, rotation: float, scale: float,
         shear: List[float]) -> Tuple[float, float]:
     transform = np.concatenate([
         np.array(
             _get_inverse_affine_matrix(
                 center=(0.5, 0.5),
                 angle=rotation,
                 translate=(0, 0),
                 scale=scale,
                 shear=shear,
             )).reshape(2, -1),
         np.array([[0.0, 0.0, 1.0]]),
     ])
     corners = np.array([[0, 0, 1], [0, h, 1], [w, 0, 1], [w, h, 1]])
     inv_corners = transform @ np.transpose(corners)
     xmax, ymax = inv_corners[:2].max(1)
     xmin, ymin = inv_corners[:2].min(1)
     return xmax - xmin, ymax - ymin
예제 #16
0
        def _test_transformation(a, t, s, sh):
            a_rad = math.radians(a)
            s_rad = math.radians(sh)
            # 1) Check transformation matrix:
            c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]],
                                 [0.0, 0.0, 1.0]])
            c_inv_matrix = np.linalg.inv(c_matrix)
            t_matrix = np.array([[1.0, 0.0, t[0]], [0.0, 1.0, t[1]],
                                 [0.0, 0.0, 1.0]])
            r_matrix = np.array(
                [[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0],
                 [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0],
                 [0.0, 0.0, 1.0]])
            true_matrix = np.dot(
                t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix)))
            result_matrix = _to_3x3_inv(
                F._get_inverse_affine_matrix(center=cnt,
                                             angle=a,
                                             translate=t,
                                             scale=s,
                                             shear=sh))
            assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
            # 2) Perform inverse mapping:
            true_result = np.zeros((200, 200, 3), dtype=np.uint8)
            inv_true_matrix = np.linalg.inv(true_matrix)
            for y in range(true_result.shape[0]):
                for x in range(true_result.shape[1]):
                    res = np.dot(inv_true_matrix, [x, y, 1])
                    _x = int(res[0] + 0.5)
                    _y = int(res[1] + 0.5)
                    if 0 <= _x < input_img.shape[
                            1] and 0 <= _y < input_img.shape[0]:
                        true_result[y, x, :] = input_img[_y, _x, :]

            result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
            assert result.size == pil_img.size
            # Compute number of different pixels:
            np_result = np.array(result)
            n_diff_pixels = np.sum(np_result != true_result) / 3
            # Accept 3 wrong pixels
            assert n_diff_pixels < 3, \
                "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\
                "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))
예제 #17
0
        def _test_transformation(a, t, s, sh):
            a_rad = math.radians(a)
            s_rad = math.radians(sh)
            # 1) Check transformation matrix:
            c_matrix = np.array([[1.0, 0.0, cnt[0]], [0.0, 1.0, cnt[1]], [0.0, 0.0, 1.0]])
            c_inv_matrix = np.linalg.inv(c_matrix)
            t_matrix = np.array([[1.0, 0.0, t[0]],
                                 [0.0, 1.0, t[1]],
                                 [0.0, 0.0, 1.0]])
            r_matrix = np.array([[s * math.cos(a_rad), -s * math.sin(a_rad + s_rad), 0.0],
                                 [s * math.sin(a_rad), s * math.cos(a_rad + s_rad), 0.0],
                                 [0.0, 0.0, 1.0]])
            true_matrix = np.dot(t_matrix, np.dot(c_matrix, np.dot(r_matrix, c_inv_matrix)))
            result_matrix = _to_3x3_inv(F._get_inverse_affine_matrix(center=cnt, angle=a,
                                                                     translate=t, scale=s, shear=sh))
            assert np.sum(np.abs(true_matrix - result_matrix)) < 1e-10
            # 2) Perform inverse mapping:
            true_result = np.zeros((200, 200, 3), dtype=np.uint8)
            inv_true_matrix = np.linalg.inv(true_matrix)
            for y in range(true_result.shape[0]):
                for x in range(true_result.shape[1]):
                    res = np.dot(inv_true_matrix, [x, y, 1])
                    _x = int(res[0] + 0.5)
                    _y = int(res[1] + 0.5)
                    if 0 <= _x < input_img.shape[1] and 0 <= _y < input_img.shape[0]:
                        true_result[y, x, :] = input_img[_y, _x, :]

            result = F.affine(pil_img, angle=a, translate=t, scale=s, shear=sh)
            assert result.size == pil_img.size
            # Compute number of different pixels:
            np_result = np.array(result)
            n_diff_pixels = np.sum(np_result != true_result) / 3
            # Accept 3 wrong pixels
            assert n_diff_pixels < 3, \
                "a={}, t={}, s={}, sh={}\n".format(a, t, s, sh) +\
                "n diff pixels={}\n".format(np.sum(np.array(result)[:, :, 0] != true_result[:, :, 0]))
예제 #18
0
def _affine_bounding_box_xyxy(
    bounding_box: torch.Tensor,
    image_size: Tuple[int, int],
    angle: float,
    translate: Optional[List[float]] = None,
    scale: Optional[float] = None,
    shear: Optional[List[float]] = None,
    center: Optional[List[float]] = None,
    expand: bool = False,
) -> torch.Tensor:
    dtype = bounding_box.dtype if torch.is_floating_point(
        bounding_box) else torch.float32
    device = bounding_box.device

    if translate is None:
        translate = [0.0, 0.0]

    if scale is None:
        scale = 1.0

    if shear is None:
        shear = [0.0, 0.0]

    if center is None:
        height, width = image_size
        center_f = [width * 0.5, height * 0.5]
    else:
        center_f = [float(c) for c in center]

    translate_f = [float(t) for t in translate]
    affine_matrix = torch.tensor(
        _get_inverse_affine_matrix(center_f,
                                   angle,
                                   translate_f,
                                   scale,
                                   shear,
                                   inverted=False),
        dtype=dtype,
        device=device,
    ).view(2, 3)
    # 1) Let's transform bboxes into a tensor of 4 points (top-left, top-right, bottom-left, bottom-right corners).
    # Tensor of points has shape (N * 4, 3), where N is the number of bboxes
    # Single point structure is similar to
    # [(xmin, ymin, 1), (xmax, ymin, 1), (xmax, ymax, 1), (xmin, ymax, 1)]
    points = bounding_box[:, [[0, 1], [2, 1], [2, 3], [0, 3]]].view(-1, 2)
    points = torch.cat(
        [points, torch.ones(points.shape[0], 1, device=points.device)], dim=-1)
    # 2) Now let's transform the points using affine matrix
    transformed_points = torch.matmul(points, affine_matrix.T)
    # 3) Reshape transformed points to [N boxes, 4 points, x/y coords]
    # and compute bounding box from 4 transformed points:
    transformed_points = transformed_points.view(-1, 4, 2)
    out_bbox_mins, _ = torch.min(transformed_points, dim=1)
    out_bbox_maxs, _ = torch.max(transformed_points, dim=1)
    out_bboxes = torch.cat([out_bbox_mins, out_bbox_maxs], dim=1)

    if expand:
        # Compute minimum point for transformed image frame:
        # Points are Top-Left, Top-Right, Bottom-Left, Bottom-Right points.
        height, width = image_size
        points = torch.tensor(
            [
                [0.0, 0.0, 1.0],
                [0.0, 1.0 * height, 1.0],
                [1.0 * width, 1.0 * height, 1.0],
                [1.0 * width, 0.0, 1.0],
            ],
            dtype=dtype,
            device=device,
        )
        new_points = torch.matmul(points, affine_matrix.T)
        tr, _ = torch.min(new_points, dim=0, keepdim=True)
        # Translate bounding boxes
        out_bboxes[:, 0::2] = out_bboxes[:, 0::2] - tr[:, 0]
        out_bboxes[:, 1::2] = out_bboxes[:, 1::2] - tr[:, 1]

    return out_bboxes
예제 #19
0
    def __getitem__(self, idx):
        # pylint: disable=too-many-locals

        # Sample a random transformation
        rotation = np.random.uniform(-self._max_rotation_jitter,
                                     self._max_rotation_jitter)
        scale = np.exp(
            np.random.uniform(-self._max_scale_jitter, self._max_scale_jitter))
        shear = np.random.uniform(-self._max_shear_jitter,
                                  self._max_shear_jitter,
                                  size=2)

        # Compute the "extended" patch size. This is the size of the patch that
        # we will first transform and then center crop to the final size.
        extpatch_w, extpatch_h = self._compute_extended_patch_size(
            w=self._patch_w,
            h=self._patch_h,
            rotation=rotation,
            scale=scale,
            shear=shear,
        )

        # The slide may not be large enough for the extended patch size. In
        # this case, we will downscale the target patch size until the extended
        # patch size fits.
        adjmul = min(1.0, self._slide.W / extpatch_w,
                     self._slide.H / extpatch_h)
        extpatch_w = min(int(np.ceil(extpatch_w * adjmul)), self._slide.W)
        extpatch_h = min(int(np.ceil(extpatch_h * adjmul)), self._slide.H)
        patch_w = int(self._patch_w * adjmul)
        patch_h = int(self._patch_h * adjmul)

        # Extract the extended patch by sampling uniformly from the size of the
        # slide
        x, y = [
            np.random.randint(a - b + 1)
            for a, b in zip((self._slide.W, self._slide.H), (extpatch_w,
                                                             extpatch_h))
        ]
        image = self._slide.image[y:y + extpatch_h, x:x + extpatch_w]
        image = (255 * (image + 1) / 2).astype(np.uint8)
        image = to_pil_image(image)
        label = to_pil_image(self._slide.label[y:y + extpatch_h,
                                               x:x + extpatch_w])

        # Apply augmentations
        output_size = (max(extpatch_w, patch_w), max(extpatch_h, patch_h))
        transformation = _get_inverse_affine_matrix(
            center=(image.size[0] * 0.5, image.size[1] * 0.5),
            angle=rotation,
            translate=[(a - b) / 2 for a, b in zip(output_size, image.size)],
            scale=scale,
            shear=shear,
        )
        image = self.image_augmentation(image)
        image = np.array(
            image.transform(
                output_size,
                Image.AFFINE,
                transformation,
                resample=Image.BILINEAR,
            ))
        image = center_crop(image, (patch_h, patch_w))
        label = np.array(
            label.transform(
                output_size,
                Image.AFFINE,
                transformation,
                resample=Image.NEAREST,
            ))
        label = center_crop(label, (patch_h, patch_w))
        if np.random.rand() < 0.5:
            image = np.flip(image, 0).copy()
            label = np.flip(label, 0).copy()

        # Convert image to the correct data format (float32 in [-1, 1] and in
        # CHW order)
        image = 2 * image.astype(np.float32) / 255 - 1
        image = image.transpose(2, 0, 1)

        return self._slide.prepare_data(image, label)
예제 #20
0
    def __getitem__(self, index):
        """
        Args:
            index (int): Index

        Returns:
            tuple: (image, target) where target is index of the target class.
        """
        train_path = self.trainlist[index]
        aim_path = self.aimlist[index]
        img1 = np.load(train_path)
        target = np.load(aim_path)

        #if self.unlabel_Data:

        # doing this so that it is consistent with all other datasets
        # to return a PIL Image
        #print (img1.shape)
        #img1_transform = img1.copy()

        img1 = img1.transpose((1, 2, 0))  #change 3*32*32 to 32*32*3

        #print(img1.shape)
        #print(img1)
        img1 = self.normalise(img1, self.dataset_mean,
                              self.dataset_std)  #normalize#channel last format
        #print(img1.shape)
        #print(img1)
        #exit()
        img1 = img1.transpose((2, 0, 1))
        #change back to 3*32*32
        #print(img1.shape)
        #exit()

        if self.transform_pre is not None:
            #operation should be channel first
            #if self.unlabel_Data:#remove unlabel data label to save a dataloader to free computer cpu usage
            self.transform_now = TransformTwice(self.transform_pre)
            img1, img1_another = self.transform_now(img1)
        # if self.train_label == False and self.valid_size != 0:  # for the labeled dataset
        #     if self.transform is not None:
        #         img1 = self.transform(img1)
        #     if self.target_transform is not None:
        #         target = self.target_transform(target)
        #     img1 = torch.from_numpy(img1)
        #     return img1,target
        #else:
        #img1 = self.transform_pre(img1)
        #print(img1)
        #exit()
        #if self.transform_pre is not None:
        #    img1_transform=self.transform_pre(img1_transform)#get img1 from the tranform result
        #revoke bake img1 to make
        #print(img1)
        #print(img1.shape)
        img1_transform = img1.transpose((1, 2, 0))
        #print(img1_transform.shape)
        img1_transform = self.denormalize(img1_transform, self.dataset_mean,
                                          self.dataset_std)
        #print(img1_transform)
        #print(img1_transform.shape)
        #img1_transform = img1.transpose((2,0,1))
        #exit()
        #print(img1.shape())
        #exit()
        img1_transform = Image.fromarray(img1_transform.astype(np.uint8))
        # projective transformation on image2
        width, height = img1_transform.size
        center = (img1_transform.size[0] * 0.5 + 0.5,
                  img1_transform.size[1] * 0.5 + 0.5)
        shift = [
            float(random.randint(-int(self.shift), int(self.shift)))
            for ii in range(8)
        ]
        scale = random.uniform(self.scale[0], self.scale[1])
        rotation = random.randint(0, 3)

        pts = [((0 - center[0]) * scale + center[0],
                (0 - center[1]) * scale + center[1]),
               ((width - center[0]) * scale + center[0],
                (0 - center[1]) * scale + center[1]),
               ((width - center[0]) * scale + center[0],
                (height - center[1]) * scale + center[1]),
               ((0 - center[0]) * scale + center[0],
                (height - center[1]) * scale + center[1])]
        pts = [pts[(ii + rotation) % 4] for ii in range(4)]
        pts = [(pts[ii][0] + shift[2 * ii], pts[ii][1] + shift[2 * ii + 1])
               for ii in range(4)]

        coeffs = self.find_coeffs(pts, [(0, 0), (width, 0), (width, height),
                                        (0, height)])

        kwargs = {
            "fillcolor": self.fillcolor
        } if PILLOW_VERSION[0] == '5' else {}
        img2 = img1_transform.transform((width, height), Image.PERSPECTIVE,
                                        coeffs, self.resample, **kwargs)
        #img1_transform = np.array(img1_transform).astype('float32')
        img2 = np.array(img2).astype('float32')
        img2 = self.normalise(img2, self.dataset_mean, self.dataset_std)
        img2 = img2.transpose((2, 0, 1))
        #apply affine transformation here
        ret = self.get_params(self.degrees, self.translate, self.scale,
                              self.shear, img1_transform.size)
        output_size = img1_transform.size  # 32*32
        center = (img1_transform.size[0] * 0.5 + 0.5,
                  img1_transform.size[1] * 0.5 + 0.5)
        matrix = _get_inverse_affine_matrix(center, *ret)
        kwargs = {
            "fillcolor": self.fillcolor
        } if PILLOW_VERSION[0] == '5' else {}
        img3 = img1_transform.transform(output_size, Image.AFFINE, matrix,
                                        self.resample, **kwargs)
        img3 = np.array(img3).astype('float32')
        img3 = self.normalise(img3, self.dataset_mean, self.dataset_std)
        img3 = img3.transpose((2, 0, 1))
        aff_para = [
            math.cos(math.radians(ret[0])),  # degree cos
            math.sin(math.radians(ret[0])),  # degree sin
            ret[1][0] / self.translate[0] / output_size[0],  # translate x
            ret[1][1] / self.translate[1] / output_size[1],  # translate y
            ret[2] * 2. / (self.scale[1] - self.scale[0]) -
            (self.scale[0] + self.scale[1]) /
            (self.scale[1] - self.scale[0]),  # scale
            ret[3] * 2. / (self.shear[1] - self.shear[0]) -
            (self.shear[0] + self.shear[1]) / (self.shear[1] - self.shear[0])
        ]  # shear

        aff_para = torch.from_numpy(np.array(
            aff_para, np.float32, copy=False))  # affine transform parameter
        #apply similarity transformation
        matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], ret[2], 0)
        kwargs = {
            "fillcolor": self.fillcolor
        } if PILLOW_VERSION[0] == '5' else {}
        img4 = img1_transform.transform(output_size, Image.AFFINE, matrix,
                                        self.resample, **kwargs)
        img4 = np.array(img4).astype('float32')
        img4 = self.normalise(img4, self.dataset_mean, self.dataset_std)
        img4 = img4.transpose((2, 0, 1))
        #apply eculidean transfomration
        matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], 1.0, 0)
        kwargs = {
            "fillcolor": self.fillcolor
        } if PILLOW_VERSION[0] == '5' else {}
        img5 = img1_transform.transform(output_size, Image.AFFINE, matrix,
                                        self.resample, **kwargs)
        img5 = np.array(img5).astype('float32')
        img5 = self.normalise(img5, self.dataset_mean, self.dataset_std)
        img5 = img5.transpose((2, 0, 1))
        #apply the colorize, contrast, brightness, sharpeness to the image
        img6, oper_params = self.operate_CCBS(img1_transform)
        img6 = np.array(img6).astype('float32')
        img6 = self.normalise(img6, self.dataset_mean, self.dataset_std)
        img6 = img6.transpose((2, 0, 1))
        #add another image with cutout

        img7 = np.array(img1_transform).astype('float32')
        img7 = self.normalise(img7, self.dataset_mean, self.dataset_std)
        img7 = img7.transpose((2, 0, 1))
        img7 = self.cut_out(img7)
        img1_transform = np.array(img1_transform).astype('float32')
        if self.transform is not None:
            img1 = self.transform(img1)
            img1_transform = self.transform(img1_transform)
            img2 = self.transform(img2)
            img3 = self.transform(img3)
            img4 = self.transform(img4)
            img5 = self.transform(img5)
            img6 = self.transform(img6)
            img7 = self.transform(img7)
        if self.target_transform is not None:
            target = self.target_transform(target)

        img1 = torch.from_numpy(img1)
        img2 = torch.from_numpy(img2)
        img3 = torch.from_numpy(img3)
        img4 = torch.from_numpy(img4)
        img5 = torch.from_numpy(img5)
        img6 = torch.from_numpy(img6)
        img7 = torch.from_numpy(img7)
        img1_transform = torch.from_numpy(img1_transform)
        coeffs = torch.from_numpy(np.array(coeffs, np.float32,
                                           copy=False)).view(8, 1, 1)
        oper_params = torch.from_numpy(oper_params)
        if self.matrix_transform is not None:
            coeffs = self.matrix_transform(coeffs)
        #if self.unlabel_Data:
        #img1_another = np.array(img1_another).astype('float32')
        #img1_another = self.normalise(img1_another)
        #img1_another = img1_another.transpose((2, 0, 1))
        if self.transform is not None:
            img1_another = self.transform(img1_another)
        if self.transform_pre is not None:
            img1_another = torch.from_numpy(img1_another)
            #print(img1)
            #print(img1_another)
            #exit()
            return (
                img1, img1_another
            ), img2, img3, img4, img5, img6, img7, aff_para, coeffs, oper_params, target
        else:
            return (
                img1, img1
            ), img2, img3, img4, img5, img6, img7, aff_para, coeffs, oper_params, target
예제 #21
0
    def __getitem__(self, i):
        assert (type(i) is int)

        p = self.projs[i, :, :, :]

        s = None
        if self.segs is not None:
            s = self.segs[i, :, :, :]

        cur_lands = None
        if self.lands is not None:
            # we need a deep copy here because of possible data aug
            cur_lands = self.lands[i, :, :].clone()

        need_to_pad_proj = self.extra_pad > 0

        if (self.prob_of_aug > 0) and (random.random() < self.prob_of_aug):
            #print('augmenting...')

            if self.do_invert and (random.random() < 0.5):
                #print('  inversion...')

                p_max = p.max()
                #p_min = p.min()
                p = p_max - p

                if self.print_aug_info:
                    print('inverting')

            if self.do_noise:
                # normalize to [0,1] to apply noise
                p_min = p.min()
                p_max = p.max()

                p = (p - p_min) / (p_max - p_min)

                cur_noise_sigma = random.uniform(0.005, 0.01)
                p += torch.randn(p.shape) * cur_noise_sigma

                p = (p * (p_max - p_min)) + p_min

                if self.print_aug_info:
                    print('noise sigma: {:.3f}'.format(cur_noise_sigma))

            if self.do_gamma:
                # normalize to [0,1] to apply gamma
                p_min = p.min()
                p_max = p.max()

                p = (p - p_min) / (p_max - p_min)

                gamma = random.uniform(0.7, 1.3)
                p.pow_(gamma)

                p = (p * (p_max - p_min)) + p_min

                if self.print_aug_info:
                    print('gamma = {:.2f}'.format(gamma))

            if self.do_affine:
                # data needs to be in [0,1] for PIL functions
                p_min = p.min()
                p_max = p.max()

                p = (p - p_min) / (p_max - p_min)

                orig_p_shape = p.shape
                if self.pad_data_for_affine:
                    pad1 = int(math.ceil(orig_p_shape[1] / 2.0))
                    pad2 = int(math.ceil(orig_p_shape[2] / 2.0))
                    if need_to_pad_proj:
                        pad1 += self.extra_pad
                        pad2 += self.extra_pad
                        need_to_pad_proj = False

                    p = torch.from_numpy(
                        np.pad(p.numpy(), ((0, 0), (pad1, pad1), (pad2, pad2)),
                               'reflect'))

                p_il = TF.to_pil_image(p)

                # this uniformly samples the direction
                rand_trans = torch.randn(2)
                rand_trans /= rand_trans.norm()

                # now uniformly sample the magnitdue
                rand_trans *= random.random() * 20

                rot_ang = random.uniform(-5, 5)
                trans_x = rand_trans[0]
                trans_y = rand_trans[1]
                shear = random.uniform(-2, 2)

                scale_factor = random.uniform(0.9, 1.1)

                if self.print_aug_info:
                    print('Rot: {:.2f}'.format(rot_ang))
                    print('Trans X: {:.2f} , Trans Y: {:.2f}'.format(
                        trans_x, trans_y))
                    print('Shear: {:.2f}'.format(shear))
                    print('Scale: {:.2f}'.format(scale_factor))

                p = TF.to_tensor(
                    TF.affine(TF.to_pil_image(p),
                              rot_ang, (trans_x, trans_y),
                              scale_factor,
                              shear,
                              resample=PIL.Image.BILINEAR))

                if self.pad_data_for_affine:
                    # pad can be zero
                    pad_shape = (orig_p_shape[-2] + (2 * self.extra_pad),
                                 orig_p_shape[-1] + (2 * self.extra_pad))
                    p = center_crop(p, pad_shape)

                p = (p * (p_max - p_min)) + p_min

                if s is not None:
                    orig_s_shape = s.shape
                    if self.pad_data_for_affine:
                        pad1 = int(math.ceil(orig_s_shape[1] / 2.0))
                        pad2 = int(math.ceil(orig_s_shape[2] / 2.0))
                        s = torch.from_numpy(
                            np.pad(s.numpy(),
                                   ((0, 0), (pad1, pad1), (pad2, pad2)),
                                   'reflect'))

                    # warp each class separately, I don't want any wacky color
                    # spaces assumed by PIL
                    for c in range(s.shape[0]):
                        s[c, :, :] = TF.to_tensor(
                            TF.affine(TF.to_pil_image(s[c, :, :]), rot_ang,
                                      (trans_x, trans_y), scale_factor, shear))
                    if self.pad_data_for_affine:
                        s = center_crop(s, orig_s_shape)

                if cur_lands is not None:
                    shape_for_center_of_rot = s.shape if s is not None else p.shape

                    center_of_rot = ((shape_for_center_of_rot[-2] / 2.0) + 0.5,
                                     (shape_for_center_of_rot[-1] / 2.0) + 0.5)

                    A_inv = TF._get_inverse_affine_matrix(
                        center_of_rot, rot_ang, (trans_x, trans_y),
                        scale_factor, shear)
                    A = np.matrix([[A_inv[0], A_inv[1], A_inv[2]],
                                   [A_inv[3], A_inv[4], A_inv[5]], [0, 0,
                                                                    1]]).I

                    for pt_idx in range(cur_lands.shape[-1]):
                        cur_land = cur_lands[:, pt_idx]
                        if (not math.isinf(cur_land[0])) and (not math.isinf(
                                cur_land[1])):
                            tmp_pt = A * np.asmatrix(
                                np.pad(cur_land.numpy(), (0, 1),
                                       mode='constant',
                                       constant_values=1).reshape(3, 1))
                            xform_l = torch.from_numpy(
                                np.squeeze(np.asarray(tmp_pt))[0:2])
                            if (s is not None) and \
                               ((xform_l[0] < 0) or (xform_l[0] > (orig_s_shape[1] - 1)) or \
                                (xform_l[1] < 0) or (xform_l[1] < (orig_s_shape[0] - 1))):
                                xform_l[0] = math.inf
                                xform_l[1] = math.inf

                            cur_lands[:, pt_idx] = xform_l

            if self.do_erase and (random.random() < self.erase_prob):
                #print('  box noise/erase...')

                p_2d_shape = [p.shape[-2], p.shape[-1]]
                box_mean_dim = torch.Tensor(
                    [p_2d_shape[0] * 0.15, p_2d_shape[1] * 0.15])

                num_boxes = random.randint(1, 5)

                if self.print_aug_info:
                    print('  Random Corrupt: num. boxes: {}'.format(num_boxes))

                for box_idx in range(num_boxes):
                    box_valid = False

                    while not box_valid:
                        # First sample box dims
                        box_dims = torch.round((torch.randn(2) *
                                                (box_mean_dim)) +
                                               box_mean_dim).long()

                        if (box_dims[0] > 0) and (box_dims[1] > 0) and \
                                (box_dims[0] <= p_2d_shape[0]) and (box_dims[1] <= p_2d_shape[1]):
                            # Next sample box location
                            start_row = random.randint(
                                0, p_2d_shape[0] - box_dims[0])
                            start_col = random.randint(
                                0, p_2d_shape[1] - box_dims[1])

                            box_valid = True

                    p_roi = p[0, start_row:(start_row + box_dims[0]),
                              start_col:(start_col + box_dims[1])]

                    sigma_noise = (p_roi.max() - p_roi.min()) * 0.2

                    p_roi += torch.randn(p_roi.shape) * sigma_noise

        # end data aug

        if need_to_pad_proj:
            p = torch.from_numpy(
                np.pad(p.numpy(), ((0, 0), (self.extra_pad, self.extra_pad),
                                   (self.extra_pad, self.extra_pad)),
                       'reflect'))

        if self.do_norm_01_scale:
            p = (p - p.mean()) / p.std()

        h = None
        if self.include_heat_map:
            assert (s is not None)
            assert (cur_lands is not None)

            num_lands = cur_lands.shape[-1]

            h = torch.zeros(num_lands, 1, s.shape[-2], s.shape[-1])

            # "FH-l", "FH-r", "GSN-l", "GSN-r", "IOF-l", "IOF-r", "MOF-l", "MOF-r", "SPS-l", "SPS-r", "IPS-l", "IPS-r"
            #sigma_lut = [ 2.5, 2.5, 7.5, 7.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5, 2.5]
            sigma_lut = torch.full([num_lands], 2.5)

            (Y, X) = torch.meshgrid(torch.arange(0, s.shape[-2]),
                                    torch.arange(0, s.shape[-1]))
            Y = Y.float()
            X = X.float()

            for land_idx in range(num_lands):
                sigma = sigma_lut[land_idx]

                cur_land = cur_lands[:, land_idx]

                mu_x = cur_land[0]
                mu_y = cur_land[1]

                if not math.isinf(mu_x) and not math.isinf(mu_y):
                    pdf = torch.exp(
                        ((X - mu_x).pow(2) + (Y - mu_y).pow(2)) /
                        (sigma * sigma * -2)) / (2 * math.pi * sigma * sigma)
                    #pdf /= pdf.sum() # normalize to sum of 1
                    h[land_idx, 0, :, :] = pdf
            #assert(torch.all(torch.isfinite(h)))

        return (p, s, cur_lands, h)
예제 #22
0
    def __getitem__(self, index):
        clip_name, label = self.objects[index]
        clip_path = self.clip_name2path_dict[clip_name][0]
        cap = CV2VideoCapture(clip_path)
        trg_people_channel_num = 1 if self.players_in_same_channel else 2
        frames = np.zeros((self.filtered_seq_len, trg_people_channel_num, 128, 256, 3), dtype=np.uint8)
        flow = np.zeros((self.filtered_seq_len, trg_people_channel_num, 128, 256, 2), dtype=np.float32)
        angle, translate, scale, shear = 0.0, 0.0, 1.0, 0.0

        if self.mode == 'train':
            flip = random.choice([0, 1])
            angle, translate, scale, shear = self.get_augmentation_params(angle_max=15, translate_max=((-10, 10), (-20, 10)),
                                                                   scale_range=(0.75, 1.15), shear_max=10)
            center = (128 * 0.5 + 0.5, 256 * 0.5 + 0.5)
            affine_matrix = np.eye(3)
            affine_matrix[:, :2] = np.array(_get_inverse_affine_matrix(center, angle, translate, scale, shear=shear)).reshape(3,2)
            #print(affine_matrix)

        seqs_to_count = [i for i in range(self.seq_len) if
                         (i >= 0 and i <= 52 and i % self.filtered_seq_step_size == 0)]
        seqs_to_count = seqs_to_count[-self.filtered_seq_len:]  # filter sequence
        seqs_to_count.sort()

        for seq_ind in range(self.seq_len):
            for p in [0, 1]:
                curr_frame_img = cap.read()

                if seq_ind in seqs_to_count:
                    if self.mode == 'train':
                        img = Image.fromarray(curr_frame_img)
                        # augmentations
                        img = torchvision.transforms.functional.affine(img, angle=angle, translate=translate,
                                                                       scale=scale, shear=shear, resample=0, fillcolor=0)
                        curr_frame_img = np.array(img)
                    frames[seqs_to_count.index(seq_ind), p % trg_people_channel_num] += curr_frame_img

        if self.mode == 'train' and flip:
            label = flip_label(label)
            frames = frames[:, :, :, ::-1, :]

        if self.use_optical_flow:
            for i in range(self.filtered_seq_len - 1):
                for p in range(trg_people_channel_num):
                    flow[i, p] = self.calculate_optical_flow(frames[i, p], frames[i + 1, p])

        if self.use_pose_optical_flow:
            pose = self.poses_dict[clip_name]

            def get_y_lim(curr_poses):
                all_y = curr_poses[:, :, :, :, 1]
                all_y_non_zero = all_y[all_y > 0]
                if len(all_y_non_zero) > 0:
                    y_max = np.max(all_y_non_zero)
                    y_min = np.min(all_y_non_zero)
                else:
                    y_max = 0
                    y_min = 0
                assert not np.isinf(y_max)
                assert not np.isinf(y_min)
                return y_min, y_max

            y_min, y_max = get_y_lim(pose)
            #print('pose max/min:',y_max, y_min)

            pose[:, :, :, :, 0] = np.minimum(pose[:, :, :, :, 0] / 5.0, 1279//5)
            pose[:, :, :, :, 1] = np.minimum(pose[:, :, :, :, 1] / 2.0, 719//2)
            for i in range(self.filtered_seq_len - 1):
                for p in [0, 1]:
                    curr_flow = self.calculate_pose_optical_flow(pose[i, p], pose[i + 1, p]) #.transpose([1,0,2])
                    curr_flow = curr_flow[max(0, int(y_min / 2) - padding):min(720 // 2,int(y_max / 2) + padding), :, :]
                    #print('flow shape', curr_flow.shape)
                    curr_flow = zoom(curr_flow, np.divide((128, 256, 2), curr_flow.shape), order=0)

                    if self.mode == 'train':
                        #print(curr_flow.shape)
                        flow_img = torchvision.transforms.functional.affine(Image.fromarray(draw_hsv(curr_flow)), angle=angle, translate=translate,
                                                                       scale=scale, shear=shear, resample=0, fillcolor=0)
                        #curr_flow = affine_transform(curr_flow, affine_matrix, order=0)
                        flow_arr = np.array(flow_img)
                        curr_flow = np.stack([flow_arr[:,:,0], flow_arr[:,:,2]], axis=2)
                    flow[i, p % trg_people_channel_num] += curr_flow#.transpose([1,0,2])

        frames = frames.astype(np.float32)
        frames = frames / 255.0
        # [0, 1] => [-1, 1]
        frames = (frames * 2) - 1

        return torch.from_numpy(frames.copy()).unsqueeze(2).transpose(2, -1).squeeze(-1), \
               torch.from_numpy(flow).unsqueeze(2).transpose(2, -1).squeeze(-1), \
               label, clip_name
예제 #23
0
kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {}
resample = PIL.Image.BILINEAR
img2 = img1_transform.transform((width, height), Image.PERSPECTIVE, coeffs,
                                resample, **kwargs)
tmp_path = os.path.join(root_path, 'projective.png')
img2.save(tmp_path)
listIm.append(tmp_path)
degrees = (-180, 180)
translate = (-0.2, 0.2)
scale = (0.8, 1.2)
shear = (-30, 30)
ret = get_params(degrees, translate, scale, shear, img1_transform.size)
output_size = img1_transform.size
center = (img1_transform.size[0] * 0.5 + 0.5,
          img1_transform.size[1] * 0.5 + 0.5)
matrix = _get_inverse_affine_matrix(center, *ret)
kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {}
img3 = img1_transform.transform(output_size, Image.AFFINE, matrix, resample,
                                **kwargs)
tmp_path = os.path.join(root_path, 'affine.png')
img3.save(tmp_path)
listIm.append(tmp_path)
matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], ret[2], 0)
kwargs = {"fillcolor": fillcolor} if PILLOW_VERSION[0] == '5' else {}
img4 = img1_transform.transform(output_size, Image.AFFINE, matrix, resample,
                                **kwargs)
tmp_path = os.path.join(root_path, 'similarity.png')
img4.save(tmp_path)
listIm.append(tmp_path)

matrix = _get_inverse_affine_matrix(center, ret[0], ret[1], 1.0, 0)
예제 #24
0
def main(_):
    writer = SummaryWriter(log_dir=opts.tb_log_dir + str(opts.alpha) + '/' +
                           opts.exp_name)

    torch.manual_seed(0)
    if opts.category in ['horse', 'tiger']:
        dataset = tf_final.TigDogDataset_Final(opts.root_dir,
                                               opts.category,
                                               transforms=None,
                                               normalize=False,
                                               max_length=None,
                                               remove_neck_kp=False,
                                               split='train',
                                               img_size=opts.img_size,
                                               mirror=False,
                                               scale=False,
                                               crop=False)

        collate_fn = tf_final.TigDog_collate

    directory = opts.tmp_dir + '/' + opts.category + '/'
    if not osp.exists(directory):
        os.makedirs(directory)

    save_counter = 0
    sample_to_vid = {}
    samples_per_vid = {}
    print('Number of videos for ', opts.category, '-', len(dataset))
    i_sample = 0
    for i_sample, sample in enumerate(dataset):
        num_frames = sample['video'].shape[0]
        for i in range(num_frames):
            new_sample = {}
            for k in sample.keys():
                if k in [
                        'video', 'sfm_poses', 'landmarks', 'segmentations',
                        'bboxes'
                ]:
                    new_sample[k] = sample[k][i]

            pkl.dump(new_sample,
                     open(directory + str(save_counter) + '.pkl', 'wb'))
            sample_to_vid[save_counter] = i_sample
            if i_sample in samples_per_vid:
                samples_per_vid[i_sample].append(save_counter)
            else:
                samples_per_vid[i_sample] = [save_counter]
            save_counter += 1
        # if i >= 5:  # 35:  # TODO:fix this
        # break
        #if i_sample >= 3:  # TODO:fix this
        # break

    training_samples = save_counter
    print('Training samples (frames):', training_samples)
    dataset = tigdog_mf.TigDogDataset_MultiFrame(
        opts.tmp_dir,
        opts.category,
        num_frames=opts.num_frames,
        sample_to_vid=sample_to_vid,
        samples_per_vid=samples_per_vid,
        normalize=True,
        transforms=True,
        remove_neck_kp=True,
        split='train',
        img_size=opts.img_size,
        mirror=True,
        scale=True,
        crop=True,
        v2_crop=True,
        tight_bboxes=True)
    collate_fn = tigdog_mf.TigDog_collate

    dataloader = DataLoader(dataset,
                            opts.batch_size,
                            drop_last=True,
                            shuffle=True,
                            collate_fn=collate_fn,
                            num_workers=2)
    print('Dataloader:', len(dataloader))

    IMM_Model = IMM(dim=opts.num_kps,
                    heatmap_std=opts.std,
                    in_channel=3,
                    h_channel=32).cuda()
    loss_fn_vgg = lpips.LPIPS(net='vgg').cuda()
    loss_mse = torch.nn.MSELoss()
    optimizer = optim.Adam(IMM_Model.parameters(), lr=opts.lr)
    lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                              factor=0.1,
                                                              patience=0,
                                                              threshold=1e-5)
    n_iter = 0
    n_batch = 0
    for epoch in range(opts.epochs):
        avg_loss = 0
        for sample in dataloader:
            input_img_tensor = sample['img'].type(
                torch.FloatTensor).clone().cuda()
            mask_3channels = torch.unsqueeze(sample['mask'], 2)
            mask_3channels = mask_3channels.repeat(1, 1, 3, 1,
                                                   1).clone().cuda()
            input_img_tensor *= mask_3channels
            frame1 = input_img_tensor[:, 0]
            frame2 = input_img_tensor[:, 1]
            source = frame1
            target = frame2
            target_mask = mask_3channels[:, 1].cpu()
            #mask_edt = np.stack([compute_dt(m) for m in target_mask])
            total_loss_affine = 0

            #reconstruct image, get result_kps
            reconstruct, result_kps, gauss, pose_val = IMM_Model(
                source, target)
            reconstruct_display = torch.clamp(reconstruct, 0, 1)
            result_kps_vis = torch.cat(
                [result_kps, torch.ones_like(result_kps[:, :, :1])], dim=-1)
            #edts_barrier = torch.tensor(mask_edt).float().unsqueeze(1).cuda()
            #loss_mask = texture_dt_loss_v(result_kps, edts_barrier)
            loss_reconstruction = loss_fn_vgg.forward(reconstruct,
                                                      target).mean()

            for i in range(4):

                #transform target to target_affine
                rand_angle = np.random.uniform(0, 50)
                rand_shear = np.random.uniform(0, 50)
                target_affine = affine(target, rand_angle, [0.0, 0.0], 1.0,
                                       [0.0, rand_shear])  #transform image
                matrix = _get_inverse_affine_matrix(
                    [0.0, 0.0], rand_angle, [0.0, 0.0], 1.0, [0.0, rand_shear]
                )  #keep track of matrix used for affine, need to transform kps
                transformation_matrix = torch.tensor(
                    [[matrix[0], matrix[1], matrix[2]],
                     [matrix[3], matrix[4], matrix[5]], [0, 0, 1]]).cuda()

                #get predicted keypoints of affine image
                _, affine_kps, _, _ = IMM_Model(source, target_affine)

                #set the true affine keypoints = matrix @ predicted kps in original img
                true_affine_kps = torch.zeros(opts.batch_size, opts.num_kps,
                                              2).cuda()
                for batch in range(opts.batch_size):
                    for n in range(opts.num_kps):
                        result_xyz = torch.tensor([
                            result_kps[batch, n, 0], result_kps[batch, n, 1], 1
                        ]).cuda()
                        t = torch.matmul(torch.inverse(transformation_matrix),
                                         result_xyz)
                        true_affine_kps[batch, n] = t[:2]

                true_affine_kps_vis = torch.stack([
                    true_affine_kps[:, :, 0], true_affine_kps[:, :, 1],
                    torch.ones_like(true_affine_kps[:, :, 1])
                ],
                                                  dim=-1)
                pred_affine_kps_vis = torch.stack([
                    affine_kps[:, :, 0], affine_kps[:, :, 1],
                    torch.ones_like(affine_kps[:, :, 1])
                ],
                                                  dim=-1)
                loss_affine = loss_mse(affine_kps, true_affine_kps)
                total_loss_affine += loss_affine

                if n_batch % opts.vis_every == 0:
                    kp_img = utils.kp2im(
                        result_kps_vis[0].detach().cpu().numpy(),
                        target[0].cpu().numpy(),
                        radius=2) / 255
                    kp_img = torch.from_numpy(kp_img).permute(2, 0, 1)[None]
                    kp_img = kp_img.to(source.device)
                    kp_affine = utils.kp2im(
                        true_affine_kps_vis[0].detach().cpu().numpy(),
                        target_affine[0].cpu().numpy(),
                        radius=2) / 255
                    kp_affine = torch.from_numpy(kp_affine).permute(2, 0,
                                                                    1)[None]
                    kp_affine = kp_affine.to(source.device)
                    kp_affine_p = utils.kp2im(
                        pred_affine_kps_vis[0].detach().cpu().numpy(),
                        target_affine[0].cpu().numpy(),
                        radius=2) / 255
                    kp_affine_p = torch.from_numpy(kp_affine_p).permute(
                        2, 0, 1)[None]
                    kp_affine_p = kp_affine_p.to(source.device)
                    kp_mask = utils.kp2im(
                        result_kps_vis[0].detach().cpu().numpy(),
                        mask_3channels[0, 1].cpu().numpy(),
                        radius=2) / 255
                    kp_mask = torch.from_numpy(kp_mask).permute(2, 0, 1)[None]
                    kp_mask = kp_mask.to(source.device)
                    grid = torch.cat([
                        source[:1], target[:1], kp_img[:1], kp_affine[:1],
                        kp_affine_p[:1], kp_mask, reconstruct_display[:1]
                    ],
                                     dim=3)[0]
                    writer.add_image(
                        'iter {n} of image {i} (reconstruction, affine) = ({r},{a}) '
                        .format(r=loss_reconstruction,
                                a=loss_affine,
                                i=i,
                                n=str(n_iter)), grid, n_iter)

            n_batch += 1
            if (epoch == 20):  #reset learning rate scheduler
                optimizer.param_groups[0]['lr'] = opts.lr
                lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                    optimizer, factor=0.1, patience=0, threshold=1e-5)

            if (epoch < 20):
                alpha = 0.0
            else:
                alpha = opts.alpha
            loss = loss_reconstruction + (alpha * total_loss_affine)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_loss += loss.item()
            writer.add_scalar('Loss : ', loss, n_iter)
            writer.add_scalar('Reconstruction : ', loss_reconstruction, n_iter)
            writer.add_scalar('Affine : ', total_loss_affine, n_iter)
            n_iter += 1

        avg_loss = avg_loss / len(dataloader)
        lr_scheduler.step(avg_loss)
        print('Epoch ', epoch, ' average loss ', avg_loss, ' learning rate ',
              optimizer.param_groups[0]['lr'])