Exemple #1
0
    def test_pad_with_non_constant_padding_modes(self):
        """Unit tests for edge, reflect, symmetric padding"""
        img = torch.zeros(3, 27, 27).byte()
        img[:, :, 0] = 1  # Constant value added to leftmost edge
        img = transforms.ToPILImage()(img)
        img = F.pad(img, 1, (200, 200, 200))

        # pad 3 to all sidess
        edge_padded_img = F.pad(img, 3, padding_mode='edge')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # edge_pad, edge_pad, edge_pad, constant_pad, constant value added to leftmost edge, 0
        edge_middle_slice = np.asarray(edge_padded_img).transpose(2, 0, 1)[0][17][:6]
        assert np.all(edge_middle_slice == np.asarray([200, 200, 200, 200, 1, 0]))
        assert transforms.ToTensor()(edge_padded_img).size() == (3, 35, 35)

        # Pad 3 to left/right, 2 to top/bottom
        reflect_padded_img = F.pad(img, (3, 2), padding_mode='reflect')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # reflect_pad, reflect_pad, reflect_pad, constant_pad, constant value added to leftmost edge, 0
        reflect_middle_slice = np.asarray(reflect_padded_img).transpose(2, 0, 1)[0][17][:6]
        assert np.all(reflect_middle_slice == np.asarray([0, 0, 1, 200, 1, 0]))
        assert transforms.ToTensor()(reflect_padded_img).size() == (3, 33, 35)

        # Pad 3 to left, 2 to top, 2 to right, 1 to bottom
        symmetric_padded_img = F.pad(img, (3, 2, 2, 1), padding_mode='symmetric')
        # First 6 elements of leftmost edge in the middle of the image, values are in order:
        # sym_pad, sym_pad, sym_pad, constant_pad, constant value added to leftmost edge, 0
        symmetric_middle_slice = np.asarray(symmetric_padded_img).transpose(2, 0, 1)[0][17][:6]
        assert np.all(symmetric_middle_slice == np.asarray([0, 1, 200, 200, 1, 0]))
        assert transforms.ToTensor()(symmetric_padded_img).size() == (3, 32, 34)
Exemple #2
0
    def __call__(self, img, mask):
        assert img.size == mask.size
        x_offset = int(2 * (random.random() - 0.5) * self.offset[0])
        y_offset = int(2 * (random.random() - 0.5) * self.offset[1])

        x_crop_offset = x_offset
        y_crop_offset = y_offset
        if x_offset < 0:
            x_crop_offset = 0
        if y_offset < 0:
            y_crop_offset = 0

        cropped_img = tf.crop(
            img,
            y_crop_offset,
            x_crop_offset,
            img.size[1] - abs(y_offset),
            img.size[0] - abs(x_offset),
        )

        if x_offset >= 0 and y_offset >= 0:
            padding_tuple = (0, 0, x_offset, y_offset)

        elif x_offset >= 0 and y_offset < 0:
            padding_tuple = (0, abs(y_offset), x_offset, 0)

        elif x_offset < 0 and y_offset >= 0:
            padding_tuple = (abs(x_offset), 0, 0, y_offset)

        elif x_offset < 0 and y_offset < 0:
            padding_tuple = (abs(x_offset), abs(y_offset), 0, 0)

        return (
            tf.pad(cropped_img, padding_tuple, padding_mode="reflect"),
            tf.affine(
                mask,
                translate=(-x_offset, -y_offset),
                scale=1.0,
                angle=0.0,
                shear=0.0,
                fillcolor=250,
            ),
        )
Exemple #3
0
 def pad_with_mask(self, image, target, param):
     # assumes that we only pad on the bottom right corners
     padded_image = F.pad(image, param)
     assert padded_image.size == self.target_size
     if target is None:
         return padded_image, None
     target = target.copy()
     h, w = padded_image.size
     mask = torch.ones(size=(h, w), dtype=torch.bool)
     mask[param[1]:h - param[3], param[0]:w - param[2]] = 0
     # should we do something wrt the original size?
     target.update({"nest_mask": mask})
     target["size"] = torch.tensor(padded_image.size[::-1])
     target["boxes"] = target["boxes"] + torch.tensor(
         [param[0], param[1], param[0], param[1]])
     if "masks" in target:
         _param = (param[0], param[2], param[1], param[3])
         target['masks'] = torch.nn.functional.pad(target['masks'], _param)
     return padded_image, target
Exemple #4
0
    def forward(
        self, image: Tensor, target: Optional[Dict[str, Tensor]] = None
    ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]:
        if isinstance(image, torch.Tensor):
            if image.ndimension() not in {2, 3}:
                raise ValueError(f"image should be 2/3 dimensional. Got {image.ndimension()} dimensions.")
            elif image.ndimension() == 2:
                image = image.unsqueeze(0)

        if torch.rand(1) >= self.p:
            return image, target

        _, orig_h, orig_w = F.get_dimensions(image)

        r = self.side_range[0] + torch.rand(1) * (self.side_range[1] - self.side_range[0])
        canvas_width = int(orig_w * r)
        canvas_height = int(orig_h * r)

        r = torch.rand(2)
        left = int((canvas_width - orig_w) * r[0])
        top = int((canvas_height - orig_h) * r[1])
        right = canvas_width - (left + orig_w)
        bottom = canvas_height - (top + orig_h)

        if torch.jit.is_scripting():
            fill = 0
        else:
            fill = self._get_fill_value(F._is_pil_image(image))

        image = F.pad(image, [left, top, right, bottom], fill=fill)
        if isinstance(image, torch.Tensor):
            # PyTorch's pad supports only integers on fill. So we need to overwrite the colour
            v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view(-1, 1, 1)
            image[..., :top, :] = image[..., :, :left] = image[..., (top + orig_h) :, :] = image[
                ..., :, (left + orig_w) :
            ] = v

        if target is not None:
            target["boxes"][:, 0::2] += left
            target["boxes"][:, 1::2] += top

        return image, target
Exemple #5
0
    def __call__(self, image):
        width, height = image.size

        left_pad = 0
        right_pad = 0
        top_pad = 0
        bottom_pad = 0

        if width > height:
            total_pad_size = width - height
            top_pad = int(total_pad_size / 2)
            bottom_pad = total_pad_size - top_pad
        elif width < height:
            total_pad_size = height - width
            left_pad = int(total_pad_size / 2)
            right_pad = total_pad_size - left_pad

        image = image.convert('RGB')
        return pad(image, (left_pad, top_pad, right_pad, bottom_pad),
                   fill=(255, 255, 255))
Exemple #6
0
def pad(
    img: torch.Tensor,
    new_size: Union[int, Tuple[int, int]],
) -> torch.Tensor:
    """torchscript-compatible implementation of pad.

    Args:
        img (torch.Tensor): image with shape [..., height, width] to pad
        new_size (Union[int, Tuple[int, int]]): size to pad to. If int, resizes to square image of that size.

    Returns:
        torch.Tensor: padded image of size [..., size[0], size[1]] or [..., size, size] if size is int.
    """
    new_size = to_tuple(new_size)
    old_size = img.shape[-2:]
    pad_size = (torch.tensor(new_size) - torch.tensor(old_size)) / 2
    padding = torch.cat((torch.floor(pad_size), torch.ceil(pad_size)))
    padding[padding < 0] = 0
    padding = [int(x) for x in padding]
    return F.pad(img, padding=padding, padding_mode="edge")
    def __call__(self, img, target):
        w, h = img.size
        if isinstance(self.size, int):
            w_ratio, h_ratio = self.size / min(w, h), self.size / min(w, h)
        else:
            if w / h != self.size[1] / self.size[0] and self.scale_with_padding:
                if w / h < self.size[1] / self.size[0]:
                    pad = (int((h * self.size[1] / self.size[0] - w) / 2), 0)
                else:
                    pad = (0, int((w * self.size[0] / self.size[1] - h) / 2))
                img = F.pad(img, pad)
                target['boxes'][:, (0, 2)] = target['boxes'][:, (0, 2)] + pad[0]
                target['boxes'][:, (1, 3)] = target['boxes'][:, (1, 3)] + pad[1]

            w_ratio, h_ratio = self.size[1] / img.size[0], self.size[0] / img.size[1]

        img = F.resize(img, self.size)
        target['boxes'][:, (0, 2)] = (target['boxes'][:, (0, 2)] * w_ratio).floor()
        target['boxes'][:, (1, 3)] = (target['boxes'][:, (1, 3)] * h_ratio).floor()
        return img, target
Exemple #8
0
    def __call__(self, img):
        width, height = img.size
        max_size = max(width, height)

        left, right, top, bottom = 0, 0, 0, 0
        if width > height:
            top = bottom = (width - height) // 2
            if (width - height) % 2 == 1:
                top += 1
        elif height > width:
            left = right = (height - width) // 2
            if (height - width) % 2 == 1:
                left += 1

        img = pad(img, (left, top, right, bottom),
                  fill=self.fill,
                  padding_mode=self.padding_mode)
        img = resize(img, size=self.size, interpolation=self.interpolation)

        return img
Exemple #9
0
    def undo_transform(self, sample):
        rdict = {}
        input_data = sample['input']
        params = self.get_params(sample)
        th, tw = self.size
        for i in range(len(input_data)):
            fh, fw, w, h = params[i]

            pad_left = fw
            pad_right = w - pad_left - tw
            pad_top = fh
            pad_bottom = h - pad_top - th

            padding = (pad_left, pad_top, pad_right, pad_bottom)
            input_data[i] = F.pad(input_data[i], padding)

        rdict['input'] = input_data

        sample.update(rdict)
        return sample
    def __call__(self, input_image, target_image):
        if self.padding is not None:
            input_image = F.pad(input_image, self.padding, self.fill, self.padding_mode)
            target_image = F.pad(target_image, self.padding, self.fill, self.padding_mode)
        # pad the width if needed
        if self.pad_if_needed and input_image.size[0] < self.size[1]:
            input_image = F.pad(input_image, (self.size[1] - input_image.size[0], 0), self.fill, self.padding_mode)
            target_image = F.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode)
        # pad the height if needed
        if self.pad_if_needed and input_image.size[1] < self.size[0]:
            input_image = F.pad(input_image, (0, self.size[0] - input_image.size[1]), self.fill, self.padding_mode)
            target_image = F.pad(target_image, (0, self.size[0] - target_image.size[1]), self.fill, self.padding_mode)

        i, j, h, w = self.get_params(input_image, self.size)
        return F.crop(input_image, i, j, h, w), F.crop(target_image, i, j, h, w)
Exemple #11
0
    def __getitem__(self, index):
        filename = self.filenames[index]

        with open(image_path(self.images_root, filename, '.jpg'), 'rb') as f:
            image = load_image(f).convert('RGB')
        with open(image_path(self.labels_root, filename, '.png'), 'rb') as f:
            label = load_image(f).convert('P')

        if self.input_transform is None and self.target_transform is None:
            tw, th = 256, 256
            # tw = random.randint(image.size[0]//2, image.size[0])
            # th = random.randint(image.size[1]//2, image.size[1])

            padding = (max(0, tw - image.size[0]), max(0, th - image.size[1]))
            image = F.pad(image, padding)

            iw, ih = image.size[0], image.size[1]

            if iw == tw and tw == th:
                bi, bj = 0, 0
            else:
                bi = random.randint(0, ih - th)
                bj = random.randint(0, iw - tw)

            self.input_transform = Compose([
                Crop(bi, bj, th, tw),
                ToTensor(),
                Normalize([.485, .456, .406], [.229, .224, .225]),
            ])
            self.target_transform = Compose([
                Crop(bi, bj, th, tw),
                ToLabel(),
                Relabel(255, 0),
            ])

        if self.input_transform is not None:
            image = self.input_transform(image)
        if self.target_transform is not None:
            label = self.target_transform(label)

        return image, label
Exemple #12
0
    def __getitem__(self, index):
        """Return one image and its corresponding attribute label."""
        dataset = self.train_dataset if self.mode == 'train' else self.test_dataset
        facefile, keypointfile = dataset[index]
        faceimage = Image.open(os.path.join(self.face_dir, facefile))
        keypointimage = Image.open(
            os.path.join(self.keypoints_dir, keypointfile))

        angle = random.randint(-5, 5)
        rotate_faceimage = TF.resize(faceimage, (224, 224))
        rotate_faceimage = TF.pad(rotate_faceimage,
                                  padding=(62, 62),
                                  padding_mode='edge')
        rotate_keypointimage = TF.resize(keypointimage, (224, 224))

        rotate_faceimage = TF.rotate(rotate_faceimage, angle)
        rotate_faceimage = TF.center_crop(rotate_faceimage, (224, 224))
        rotate_keypointimage = TF.rotate(rotate_keypointimage, angle)

        return self.transform(rotate_faceimage), self.transform(
            rotate_keypointimage)
def pad(image, target, padding):
    # pad_left, pad_top, pad_right, pad_bottom
    padded_image = F.pad(image, padding)
    if target is None:
        return padded_image, None
    target = target.copy()
    # should we do something wrt the original size?
    w, h = padded_image.size

    if "boxes" in target:
        # correct xyxy from left and right paddings
        target["boxes"] += torch.tensor(
            [padding[0], padding[1], padding[0], padding[1]])

    target["size"] = torch.tensor([h, w])
    if "masks" in target:
        # padding_left, padding_right, padding_top, padding_bottom
        target['masks'] = torch.nn.functional.pad(
            target['masks'],
            (padding[0], padding[2], padding[1], padding[3]))
    return padded_image, target
Exemple #14
0
    def __call__(self, x):
        w, h = x.size

        if w >= self.size:
            hp = 0
        else:
            hp = int((self.size - w) / 2)

        if h >= self.size:
            vp = 0
        else:
            vp = int((self.size - h) / 2)

        padding = (hp + (1 if (self.size - w) % 2 == 0 else 0),
                   vp + (1 if (self.size - h) % 2 == 0 else 0), hp, vp)

        res = tvF.pad(x, padding, 0, 'constant')
        res = tvF.crop(res, randint(0, max(0, w - self.size)),
                       randint(0, max(0, h - self.size)), self.size, self.size)

        return res
Exemple #15
0
    def __call__(self, img, expand=False, pad=False):  # pad = False CIFAR
        if pad and self.angle % 90 != 0:
            w, h = img.size
            # # deterimne crop size (without cutting the image)
            # nw, nh = F.rotate(img, self.angle, expand=True).size

            rad_angle = np.deg2rad(self.angle)
            dw = np.abs(np.ceil(
                w * (np.cos(rad_angle) * np.sin(rad_angle)))).astype(int)
            dh = np.abs(np.ceil(
                h * (np.cos(rad_angle) * np.sin(rad_angle)))).astype(int)
            img = F.pad(img, padding=(dw, dh), padding_mode='reflect')

            # actual rotation
            img = F.rotate(img, self.angle, fill=(0, ))
            #img = F.center_crop(img, (nw, nh))
            #img = F.center_crop(img, (w, h)) # no remove for CIFAR
        else:
            img = F.rotate(img, self.angle, expand=expand, fill=(0, ))

        return img
Exemple #16
0
    def __getitem__(self, index):
        image_path = self.images[index]
        # image_path = image_path.replace('SW_VBCus', 'SW_VBSoft')
        # image_path = image_path.replace('/DB/rhome/bllai/Data/DATA3/Vertebrae/Sagittal',   # for ai-research server
        #                                 '/mnt/lustre/ai-vision/home/yz891/bllai/Data/Vertebrae_Collapse')
        image = Image.open(image_path)
        image = Image.fromarray(np.asarray(image)[:, :, 0]) if not self.useRGB else image  # 得到的RGB图片三通道数值相等,只选择其中一个

        if self.padding:  # 调整图像长边为224,以下代码出自torchvision.transforms.functional.resize
            size = 224
            w, h = image.size
            if max(w, h) == size:
                ow, oh = w, h
                pass
            elif w < h:
                ow = int(size * w / h)
                oh = size
                image = image.resize((ow, oh), resample=Image.BILINEAR)
            else:
                ow = size
                oh = int(size * h / w)
                image = image.resize((ow, oh), resample=Image.BILINEAR)

            # 将短边补齐到224
            image = functional.pad(image, fill=0, padding_mode='constant',
                                   padding=((size - ow) // 2, (size - oh) // 2,
                                            (size - ow) - (size - ow) // 2, (size - oh) - (size - oh) // 2))
        else:  # resize到224*224
            image = functional.resize(image, (224, 224))

        image = self.trans(image)

        label = self.labels[index]

        # if label == 0:
        #     image = self.neg_trans(image)
        # else:
        #     image = self.trans(image)

        return image, label, image_path
    def __call__(self, sample):
        image, polygon, labels = sample["image"], sample["polygon"], sample[
            "labels"]
        """

        :param sample:
        :type sample:
        :return:
        :rtype:
        """

        w, h = image.size
        y1, x1 = polygon[1, 1] - polygon[0, 1], polygon[1, 0] - polygon[0, 0]
        y2, x2 = polygon[2, 1] - polygon[1, 1], polygon[2, 0] - polygon[1, 0]
        object_w = max((np.sqrt(x1 * x1 + y1 * y1)),
                       (np.sqrt(x2 * x2 + y2 * y2)))

        if (x1 * x1 + y1 * y1) > (x2 * x2 + y2 * y2):
            angle = np.arctan2(y1, x1) * 180 / np.pi
        else:
            angle = np.arctan2(y2, x2) * 180 / np.pi

        pad = max(0, int(10 - (w - object_w) / 2))
        image = functional.pad(img=image, padding=[pad], padding_mode='edge')
        for i in range(polygon.shape[0]):
            polygon[i][0] = polygon[i][0] + pad
            polygon[i][1] = polygon[i][1] + pad

        w, h = image.size
        image = functional.rotate(img=image, angle=angle)
        angle_rad = np.pi * angle / 180
        w_r, h_r = w / 2, -h / 2
        for i in range(polygon.shape[0]):
            x0, y0 = polygon[i][0], -polygon[i][1]
            polygon[i][0] = (x0 - w_r) * np.cos(angle_rad) - (
                y0 - h_r) * np.sin(angle_rad) + w_r
            polygon[i][1] = -((x0 - w_r) * np.sin(angle_rad) +
                              (y0 - h_r) * np.cos(angle_rad) + h_r)
        sample = {'image': image, 'polygon': polygon, 'labels': labels}
        return sample
Exemple #18
0
    def __call__(self, sample):
        """
        Args:
            sample (dict): image (PIL Image) to be cropped and landmarks points to be adjusted
        Returns:
            PIL Image: Cropped image.
        """
        image, landmarks = sample['image'], sample['landmarks']

        orig_w, orig_h = image.size

        if self.padding > 0:
            image = F.pad(image, self.padding)

            left = top = right = bottom = 0

            if type(self.padding) == int:
                left = top = right = bottom = self.padding
            elif len(self.padding) == 2:
                left, top = self.padding
                right = left
                bottom = left
            elif len(self.padding) == 4:
                left, top, right, bottom = self.padding

            landmarks[:, 0] += left / orig_w
            landmarks[:, 1] += top / orig_h

        # i: upper pixel coordinate
        # j: left pixel coordinate
        i, j, h, w = self.get_params(image, self.size)

        landmarks -= [j / orig_w, i / orig_h]

        landmarks *= [orig_w / w, orig_h / h]

        image = F.crop(image, i, j, h, w)

        return {'image': image, 'landmarks': landmarks}
    def __call__(self, img):
        """Randomly resize and 0-pad the given PIL.

        Parameters
        ----------
        img PIL.Image : input image.

        Returns
        -------
        img PIL.Image : trasnsormed image.
        """
        # Randomly resize the image.
        resize = random.randint(img.width, self.size)
        resized_img = F.resize(img, resize)
        # 0-pad the resized image. 0-pad to all left, right, top and bottom.
        pad_size = self.size - resize
        padded_img = F.pad(resized_img, pad_size, fill=0)
        # Crop the padded image to get (size, size) image.
        pos_top = random.randint(0, pad_size)
        pos_left = random.randint(0, pad_size)
        transformed_img = F.crop(padded_img, pos_top, pos_left, self.size, self.size)
        return transformed_img
Exemple #20
0
 def __call__(self, img):
     w, h = img.size
     if h > w:
         r = self.size / float(h)
         resized_w = int(w * r)
         resized_h = self.size
         img = img.resize((resized_w, resized_h))
         pad_x_left = int((self.size - resized_w) / 2.)
         pad_x_right = self.size - resized_w - pad_x_left
         pad_y_top = 0
         pad_y_bottom = 0
     else:
         r = self.size / float(w)
         resized_w = self.size
         resized_h = int(h * r)
         img = img.resize((resized_w, resized_h))
         pad_x_left = 0
         pad_x_right = 0
         pad_y_top = int((self.size - resized_h) / 2.)
         pad_y_bottom = self.size - resized_h - pad_y_top
     padding = (pad_x_left, pad_y_top, pad_x_right, pad_y_bottom)
     return TF.pad(img, padding=padding, padding_mode='symmetric')
    def __call__(self, img, target=None, mask=None):
        """
        Args:
            img (PIL Image): Image to be cropped.
            target (PIL Image): (optional) Target to be cropped

        Returns:
            PIL Images: Cropped image(s).
        """
        if self.padding > 0:
            img = F.pad(img, self.padding)
            if target is not None:
                target = F.pad(target, self.padding)

        # pad the width if needed
        if self.pad_if_needed and img.size[0] < self.size[1]:
            img = F.pad(img, (int((1 + self.size[1] - img.size[0]) / 2), 0))
            if target is not None:
                target = F.pad(target, (int((1 + self.size[1] - target.size[0]) / 2), 0))
            if mask is not None:
                target = F.pad(mask, (int((1 + self.size[1] - mask.size[0]) / 2), 0))
        # pad the height if needed
        if self.pad_if_needed and img.size[1] < self.size[0]:
            img = F.pad(img, (0, int((1 + self.size[0] - img.size[1]) / 2)))
            if target is not None:
                target = F.pad(target, (0, int((1 + self.size[0] - target.size[1]) / 2)))
            if mask is not None:
                mask = F.pad(mask, (0, int((1 + self.size[0] - mask.size[1]) / 2)))

        i, j, h, w = self.get_params(img, self.size)

        if target is not None and mask is None:
            return F.crop(img, i, j, h, w), F.crop(target, i, j, h, w)
        if target is not None and mask is not None:
            return F.crop(img, i, j, h, w), F.crop(target, i, j, h, w), F.crop(mask, i, j, h, w)
        return F.crop(img, i, j, h, w)
Exemple #22
0
    def __call__(self, img, lab):
        if self.padding is not None:
            img = F.pad(img, self.padding, self.fill, self.padding_mode)
            if isinstance(lab, np.ndarray):
                lab = np.pad(lab, ((self.padding[1], self.padding[3]),
                                   (self.padding[0], self.padding[2]), (0, 0)),
                             mode='constant')
            else:
                lab = F.pad(lab, self.padding, self.label_fill,
                            self.padding_mode)

        # pad the width if needed
        if self.pad_if_needed and img.size[0] < self.size[1]:
            img = F.pad(img, (self.size[1] - img.size[0], 0), self.fill,
                        self.padding_mode)
            if isinstance(lab, np.ndarray):
                lab = np.pad(lab,
                             ((0, 0), (self.size[1] - img.size[0],
                                       self.size[1] - img.size[0]), (0, 0)),
                             mode='constant')
            else:
                lab = F.pad(lab, (self.size[1] - lab.size[0], 0),
                            self.label_fill, self.padding_mode)

        # pad the height if needed
        if self.pad_if_needed and img.size[1] < self.size[0]:
            img = F.pad(img, (0, self.size[0] - img.size[1]), self.fill,
                        self.padding_mode)
            if isinstance(lab, np.ndarray):
                lab = np.pad(
                    lab,
                    ((self.size[0] - img.size[1], self.size[0] - img.size[1]),
                     (0, 0), (0, 0)),
                    mode='constant')
            else:
                lab = F.pad(lab, (0, self.size[0] - lab.size[1]),
                            self.label_fill, self.padding_mode)

        i, j, h, w = self.get_params(img, self.size)
        img = F.crop(img, i, j, h, w)
        if isinstance(lab, np.ndarray):
            # assert the shape of label is in the order of (h, w, c)
            lab = lab[i:i + h, j:j + w, :]
        else:
            lab = F.crop(lab, i, j, h, w)
        return img, lab
Exemple #23
0
def resize_image(image,
                 desired_width=768,
                 desired_height=384,
                 random_pad=False):
    """Resizes an image keeping the aspect ratio mostly unchanged.

    Returns:
    image: the resized image
    window: (x1, y1, x2, y2). If max_dim is provided, padding might
        be inserted in the returned image. If so, this window is the
        coordinates of the image part of the full image (excluding
        the padding). The x2, y2 pixels are not included.
    scale: The scale factor used to resize the image
    padding: Padding added to the image [left, top, right, bottom]
    """
    # Default window (x1, y1, x2, y2) and default scale == 1.
    w, h = image.size

    width_scale = desired_width / w
    height_scale = desired_height / h
    scale = min(width_scale, height_scale)

    # Resize image using bilinear interpolation
    if scale != 1:
        image = functional.resize(image, (round(h * scale), round(w * scale)))
    w, h = image.size
    y_pad = desired_height - h  # 其实是有一个为0的,要么x要么y
    x_pad = desired_width - w
    top_pad = random.randint(0, y_pad) if random_pad else y_pad // 2
    left_pad = random.randint(0, x_pad) if random_pad else x_pad // 2

    padding = (left_pad, top_pad, x_pad - left_pad, y_pad - top_pad
               )  # https://blog.csdn.net/Rocky6688/article/details/104475911
    assert all([x >= 0 for x in padding])
    image = functional.pad(image, padding)
    window = [left_pad, top_pad, w + left_pad, h + top_pad]

    return image, window, scale, padding
Exemple #24
0
 def __call__(self, image, target):
     w, h = image.size
     new_h = min(h, self.crop_size)
     new_w = min(w, self.crop_size)
     while True:
         top = np.random.randint(0, h - new_h + 1)
         left = np.random.randint(0, w - new_w + 1)
         box = (left, top, left + new_w, top + new_h)
         # should make sure target crop method does not modify itself
         new_target = target.crop(box, remove_empty=True)
         # Attention: If Densebox does not support empty targets, random crop
         # should not provide empty targets
         # if len(new_target) > 0 or random.random() > self.discard_prob:
         if len(new_target) > 0:
             target = new_target
             break
     image = F.crop(image, top, left, new_h, new_w)
     if new_h < self.crop_size or new_w < self.crop_size:
         padding = (0, 0, (self.crop_size - new_w),
                    (self.crop_size - new_h))
         image = F.pad(image, padding=padding)
         target = target.pad(padding)
     return image, target
    def __getitem__(self, idx):
        image = Image.open(self.image_list[idx])
        label_1 = Image.open(self.label_list_1[idx])
        label_2 = Image.open(self.label_list_2[idx])
        w, h = image.size

        if self.random_scale:
            scale = int(min(w, h) * (np.random.uniform() + 0.5))
            resize_bl = transforms.Resize(size=scale, interpolation=PIL.Image.BILINEAR)
            resize_nn = transforms.Resize(size=scale, interpolation=PIL.Image.NEAREST)
            image = resize_bl(image)
            label_1 = resize_nn(label_1)
            label_2 = resize_nn(label_2)

        if self.random_mirror:
            if np.random.uniform() < 0.5:
                image = TF.hflip(image)
                label_1 = TF.hflip(label_1)
                label_2 = TF.hflip(label_2)

        if self.random_crop:
            # pad the width if needed
            if image.size[0] < self.output_size[1]:
                image = TF.pad(image, (self.output_size[1] - image.size[0], 0))
                label_1 = TF.pad(label_1, (self.output_size[1] - label_1.size[0], 0), self.ignore_label, 'constant')
                label_2 = TF.pad(label_2, (self.output_size[1] - label_2.size[0], 0),
                                 tuple([self.ignore_label] * 3), 'constant')
            # pad the height if needed
            if image.size[1] < self.output_size[0]:
                image = TF.pad(image, (0, self.output_size[0] - image.size[1]))
                label_1 = TF.pad(label_1, (0, self.output_size[0] - label_1.size[1]), self.ignore_label, 'constant')
                label_2 = TF.pad(label_2, (0, self.output_size[0] - label_2.size[1]),
                                 tuple([self.ignore_label] * 3), 'constant')

            i, j, h, w = transforms.RandomCrop.get_params(
                image, output_size=self.output_size)
            image = TF.crop(image, i, j, h, w)
            label_1 = TF.crop(label_1, i, j, h, w)
            label_2 = TF.crop(label_2, i, j, h, w)

        image = self.normalize(self.to_tensor(np.array(image) - 255.).float() + 255.)
        label_1 = self.to_tensor(np.array(label_1) - 255.) + 255.
        label_2 = self.to_tensor(np.array(label_2) - 255.) + 255.

        return image, label_1.long(), label_2.float()
Exemple #26
0
    def __call__(self, x):
        height, width = x.shape[-2:]

        aspect_ratio = width / height

        if width > height:
            new_width = self._size
            new_height = round(new_width / aspect_ratio)
        else:
            new_height = self._size
            new_width = round(aspect_ratio * new_height)

        resized = FileCheck.resize(x, [new_height, new_width],
                                   interpolation=self._interpolation)

        pad_width = self._size - new_width
        pad_left = pad_width // 2
        pad_right = pad_width - pad_left
        pad_height = self._size - new_height
        pad_top = pad_height // 2
        pad_bottom = pad_height - pad_top

        return F.pad(resized, [pad_left, pad_top, pad_right, pad_bottom])
Exemple #27
0
    def __call__(self, img):
        """
        Pad the image to a specified size.

        Args:
            img (PIL Image): Image to be padded.

        Returns:
            PIL Image: Padded image.

        """
        if self.padding > 100:
            sz = img.size
            delta_w = self.padding - sz[0]
            delta_h = self.padding - sz[1]

            t, b = delta_h // 2, delta_h - (delta_h // 2)
            l, r = delta_w // 2, delta_w - (delta_w // 2)
            pad = (l, t, r, b)
        else:
            pad = self.padding

        return F.pad(img, pad, self.fill, self.padding_mode)
Exemple #28
0
def image_to_tensor(image,
                    resolution=None,
                    paddingval=None,
                    padding_mode='constant',
                    do_imagenet_norm=True,
                    do_padding=True):
    if isinstance(image, str):
        image = Image.open(image).convert('RGB')
    if image.width != image.height:  # if not square image, crop the long side's edges to make it square
        r = min(image.width, image.height)
        image = tr.center_crop(image, (r, r))
    if do_padding:  # if not square image, crop the long side's edges to make it square
        image = tr.pad(image,
                       padding=paddingval,
                       padding_mode=padding_mode,
                       fill=0)
        # image = tr.pad(input=data, mode='reflect', value=0)
    if resolution is not None:  #f size is an int, smaller edge of the image will be matched to this number
        image = tr.resize(image, resolution)
    image = tr.to_tensor(image)
    if do_imagenet_norm:
        image = imagenet_norm(image)
    return image
Exemple #29
0
    def _transform_image(self, x):
        # PIL images must be in format float 0-1 gray scale:
        min_x = torch.min(x)
        x_transformed = x - min_x
        max_x = torch.max(x)
        x_transformed /= max_x

        fillcolor = int(
            -min_x / max_x * 255
        )  # Value to use to fill so that when reconverted to tensor, fill value is 0.
        self.affine_transform.fillcolor = fillcolor

        x_pil = tf.to_pil_image(
            x_transformed
        )  # Conversion to PIl image looses quality because it is converted to 0-255 gray scale.
        x_pil = tf.crop(
            self.affine_transform(tf.pad(x_pil, self.padding, fill=fillcolor)),
            self.padding, self.padding, self.bank_height, self.bank_width)
        x_transformed = tf.to_tensor(x_pil)
        x_transformed *= max_x
        x_transformed += min_x

        return x_transformed
def extend_mnist(Xtr, Ytr, N=1000, degrees=15, scale=(.85, 1.11), shear=15):
    Xtr_torch = torch.from_numpy(Xtr).reshape((-1, 1, 28, 28))
    AffineTransform = RandomAffine(degrees=degrees, scale=scale, shear=shear)

    ex_Xtr = np.zeros((N, 28, 28), dtype=Xtr.dtype)
    ex_Ytr = np.zeros((N, ), dtype=Ytr.dtype)
    for i in range(N):
        idx = np.random.randint(Xtr.shape[0])
        X = Xtr_torch[idx]
        X_pil = tf.pad(tf.to_pil_image(X), 3)
        # params, X_transform = AffineTransform(X_pil)
        X_transform = AffineTransform(X_pil)
        X_transform = tf.to_tensor(tf.crop(X_transform, 3, 3, 28,
                                           28)).numpy().reshape(28, 28)
        if Xtr.dtype == np.uint8:
            X_transform *= 255
        # trans_title = f'trans-d={params[0]:.2f}-scale={params[2]:.2f}-shear={params[3]:.2f}'
        # trans_title = f'trans'
        # plot_images([Xtr[i].reshape(28,28), X_transform], ['orig', trans_title])
        ex_Xtr[i] = X_transform
        ex_Ytr[i] = Ytr[idx]

    return np.concatenate((Xtr, ex_Xtr)), np.concatenate((Ytr, ex_Ytr))
    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to be scaled.

        Returns:
            PIL Image: Rescaled image.
        """
        img_w, img_h = img.size
        h, w = self.size
        if img_w / img_h > w / h:
            resized_w = w
            resized_h = int(img_h / img_w * resized_w)
            padding = int((h - resized_h) / 2)
            padding = (0, padding, 0, h - resized_h - padding)
        else:
            resized_h = h
            resized_w = int(img_w / img_h * resized_h)
            padding = int((w - resized_w) / 2)
            padding = (padding, 0, w - resized_w - padding, 0)
        resized_image = F.resize(img, (resized_h, resized_w),
                                 self.interpolation)
        img = F.pad(resized_image, padding)
        return img
 def torchvision(self, img):
     if img.size[0] < 512:
         img = torchvision.pad(img, (int((1 + 512 - img.size[0]) / 2), 0), padding_mode='reflect')
     if img.size[1] < 512:
         img = torchvision.pad(img, (0, int((1 + 512 - img.size[1]) / 2)), padding_mode='reflect')
     return img