def center_crop(img, output_size): """ This is the same function 'center_crop' inside functional which return also the parameters used for the crop """ if isinstance(output_size, numbers.Number): output_size = (int(output_size), int(output_size)) elif isinstance(output_size, (tuple, list)) and len(output_size) == 1: output_size = (output_size[0], output_size[0]) image_width, image_height = F._get_image_size(img) crop_height, crop_width = output_size if crop_width > image_width or crop_height > image_height: padding_ltrb = [ (crop_width - image_width) // 2 if crop_width > image_width else 0, (crop_height - image_height) // 2 if crop_height > image_height else 0, (crop_width - image_width + 1) // 2 if crop_width > image_width else 0, (crop_height - image_height + 1) // 2 if crop_height > image_height else 0, ] img = pad(img, padding_ltrb, fill=0) # PIL uses fill value 0 image_width, image_height = F._get_image_size(img) if crop_width == image_width and crop_height == image_height: return img crop_top = int(round((image_height - crop_height) / 2.)) crop_left = int(round((image_width - crop_width) / 2.)) return F.crop(img, crop_top, crop_left, crop_height, crop_width), (crop_top, crop_left, crop_height, crop_width)
def __call__(self, sample): if self.degrees == 0: return sample img, vertebrae = sample['image'], sample['vertebrae'] fill = self.fill if isinstance(img, torch.Tensor): if isinstance(fill, (int, float)): fill = [float(fill)] * F._get_image_num_channels(img) else: fill = [float(f) for f in fill] angle = self.get_params(self.degrees) img = F.rotate(img, angle, self.resample, self.expand, self.center, fill) vertebrae[:, 1:3] = self.rotate_coord(img, angle, vertebrae[:, 1:3]) width, height = F._get_image_size(img) x_check = torch.logical_or(vertebrae[:, 1] < 0, vertebrae[:, 1] >= width) y_check = torch.logical_or(vertebrae[:, 2] < 0, vertebrae[:, 2] >= height) xy_check = torch.logical_or(x_check, y_check) return { 'image': img, 'vertebrae': vertebrae[torch.logical_not(xy_check)], 'info': sample['info'] }
def __call__(self, sample): img, vertebrae = sample['image'], sample['vertebrae'] if torch.rand(1) < self.p: width, height = F._get_image_size(img) img = F.hflip(img) vertebrae[:, 1] = width - vertebrae[:, 1] return {'image': img, 'vertebrae': vertebrae, 'info': sample['info']}
def get_params(img, scale, ratio): width, height = F._get_image_size(img) area = height * width for _ in range(10): target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() log_ratio = torch.log(torch.tensor(ratio)) aspect_ratio = torch.exp( torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item() w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: i = torch.randint(0, height - h + 1, size=(1, )).item() j = torch.randint(0, width - w + 1, size=(1, )).item() return i, j, h, w in_ratio = float(width) / float(height) if in_ratio < min(ratio): w = width h = int(round(w / min(ratio))) elif in_ratio > max(ratio): h = height w = int(round(h * max(ratio))) else: w = width h = height i = (height - h) // 2 j = (width - w) // 2 return i, j, h, w
def __call__(self, img, dst): if isinstance(self.transform, T.Compose): for t in self.transform.transforms: img, dst = t(img, dst) elif any([isinstance(self.transform, t) for t in self.image_only_transforms]): img, dst = self.transform(img), dst elif isinstance(self.transform, T.RandomAffine): img_size = F._get_image_size(img) ret = self.transform.get_params(self.transform.degrees, self.transform.translate, self.transform.scale, self.transform.shear, img_size) img = F.affine(img, *ret, interpolation=T.InterpolationMode.BILINEAR, fill=self.transform.fill) dst = F.affine(dst, *ret, interpolation=T.InterpolationMode.NEAREST, fill=self.transform.fill) elif isinstance(self.transform, T.RandomHorizontalFlip): if torch.rand(1) < self.transform.p: img, dst = F.hflip(img), F.hflip(dst) elif isinstance(self.transform, T.RandomVerticalFlip): if torch.rand(1) < self.transform.p: img, dst = F.vflip(img), F.vflip(dst) elif isinstance(self.transform, T.Resize): w, h = self.transform.size scale = self.transform.mask_scale if isinstance(self.transform, Resize) else 1 dst = F.resize(dst, [w // scale, h // scale], interpolation=T.InterpolationMode.NEAREST) img = self.transform(img) else: img, dst = self.transform(img), self.transform(dst) return img, dst
def forward(self, img, img_type): """ Args: img (PIL Image or Tensor): Image to be cropped. img_type: Input type Returns: PIL Image or Tensor: Cropped image. """ if img_type in ['flow', 'flow_fwd', 'flow_bwd']: raise NotImplementedError( "We don't support cropping normalized flow yet!") fill = self.fill_map[img_type] width, height = TF._get_image_size(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] img = TF.pad(img, padding, fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] img = TF.pad(img, padding, fill, self.padding_mode) i, j, h, w = self.get_params(img, self.size) return TF.crop(img, i, j, h, w)
def __call__(self, sample): """ Args: img (PIL Image or Tensor): Image to be cropped. Returns: PIL Image or Tensor: Cropped image. """ img, target = sample['image'], sample['target'] if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) target = F.pad(target, self.padding, self.ignore_label, self.padding_mode) width, height = F._get_image_size(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] img = F.pad(img, padding, self.fill, self.padding_mode) target = F.pad(target, padding, self.ignore_label, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] img = F.pad(img, padding, self.fill, self.padding_mode) target = F.pad(target, padding, self.ignore_label, self.padding_mode) i, j, h, w = self.get_params(img, self.size) return {'image': F.crop(img, i, j, h, w), 'target': F.crop(target, i, j, h, w)}
def forward(self, sample): img, vertebrae = sample['image'], sample['vertebrae'] if self.padding is not None: img = F.pad(img, self.padding, self.fill, self.padding_mode) width, height = F._get_image_size(img) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] img = F.pad(img, padding, self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] img = F.pad(img, padding, self.fill, self.padding_mode) top, left, h, w = self.get_params(img, self.size) cropped_img = F.crop(img, top, left, h, w) vertebrae[:, 1] -= left vertebrae[:, 2] -= top left_check = torch.logical_and(vertebrae[:, 1] < w, vertebrae[:, 1] >= 0) top_check = torch.logical_and(vertebrae[:, 2] < h, vertebrae[:, 2] >= 0) correct_vertebrae = torch.logical_and(top_check, left_check) vertebrae = vertebrae[correct_vertebrae] return { 'image': cropped_img, 'vertebrae': vertebrae, 'info': sample['info'] }
def forward(self, image, mask): if torch.rand(1) < self.p: width, height = F._get_image_size(image) startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return F.perspective(image, startpoints, endpoints, self.interpolation, self.fill), F.perspective( mask, startpoints, endpoints, self.interpolation, self.fill) return image, mask
def forward(self, img, img_type): new_scale = self.get_params(self.scale_range, self.scale_step) w, h = TF._get_image_size(img) new_h, new_w = int(h * new_scale), int(w * new_scale) if img_type == 'semantic': img = TF.resize(img, (new_h, new_w), PIL.Image.NEAREST) else: img = TF.resize(img, (new_h, new_w), PIL.Image.BILINEAR) return img
def _pad_image_to(image, output_size, pad_value): """ pad the input image to output_size on the right and bottom borders """ width, height = TF._get_image_size(image) height_out, width_out = output_size assert height_out >= height and width_out >= width, "output_size must be larger than input size!" padding = [0, 0, width_out - width, height_out - height] image = TF.pad(image, padding, pad_value, 'constant') return image
def forward(self, image: Tensor, target: Optional[Dict[str, Tensor]] = None) -> \ Tuple[Tensor, Optional[Dict[str, Tensor]]]: if torch.rand(1) < self.p: image = F.hflip(image) if target is not None: width, _ = F._get_image_size(image) target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] if "masks" in target: target["masks"] = target["masks"].flip(-1) return image, target
def forward(self, img): """ Args: img (PIL Image or Tensor): Image to be cropped and resized. Returns: PIL Image or Tensor: Randomly cropped and resized image. """ ori_w, ori_h = F._get_image_size(img) i, j, h, w = self.get_params(ori_w, ori_h) return F.resized_crop(img, i, j, h, w, (ori_h, ori_w), self.interpolation)
def forward(self, image, mask): img_size = F._get_image_size(image) ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, img_size) return F.affine(image, *ret, resample=self.resample, fillcolor=self.fillcolor), F.affine( mask, *ret, resample=self.resample, fillcolor=self.fillcolor)
def spine_class(img, predicted): assert isinstance(img, Image.Image) predicted = torch.clone(predicted) predicted = torch.clamp(predicted, 0, 1) w, h = F._get_image_size(img) rect_w = h // len(predicted) rect_h = h // len(predicted) draw = ImageDraw.Draw(img) # print_column(predicted[:, 0], draw, w - (rect_w * 2), rect_w, rect_h) print_column(predicted[:, 0], draw, w - rect_w, rect_w, rect_h) return img
def get_params(image, output_size): w, h = F._get_image_size(image) th, tw = output_size if (h + 1 < th) or (w + 1 < tw): raise ValueError( 'Required crop size {} is larger then input image size {}'. format((th, tw), (h, w))) if w == tw and h == th: return 0, 0, h, w i = torch.randint(0, h - th + 1, size=(1, )).item() j = torch.randint(0, w - tw + 1, size=(1, )).item() return i, j, th, tw
def forward(self, image: Tensor, target: Optional[Dict[str, Tensor]] = None) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if torch.rand(1) < self.p: image = F.hflip(image) if target is not None: width, _ = F._get_image_size(image) target["boxes"][:, [0, 2]] = width - target["boxes"][:, [2, 0]] if "masks" in target: target["masks"] = target["masks"].flip(-1) if "keypoints" in target: keypoints = target["keypoints"] keypoints = _flip_coco_person_keypoints(keypoints, width) target["keypoints"] = keypoints return image, target
def forward( self, image: Tensor, target: Optional[Dict[str, Tensor]] = None ) -> Tuple[Tensor, Optional[Dict[str, Tensor]]]: if isinstance(image, torch.Tensor): if image.ndimension() not in {2, 3}: raise ValueError( "image should be 2/3 dimensional. Got {} dimensions.".format( image.ndimension() ) ) elif image.ndimension() == 2: image = image.unsqueeze(0) if torch.rand(1) < self.p: return image, target orig_w, orig_h = F._get_image_size(image) r = self.side_range[0] + torch.rand(1) * ( self.side_range[1] - self.side_range[0] ) canvas_width = int(orig_w * r) canvas_height = int(orig_h * r) r = torch.rand(2) left = int((canvas_width - orig_w) * r[0]) top = int((canvas_height - orig_h) * r[1]) right = canvas_width - (left + orig_w) bottom = canvas_height - (top + orig_h) if torch.jit.is_scripting(): fill = 0 else: fill = self._get_fill_value(F._is_pil_image(image)) image = F.pad(image, [left, top, right, bottom], fill=fill) if isinstance(image, torch.Tensor): v = torch.tensor(self.fill, device=image.device, dtype=image.dtype).view( -1, 1, 1 ) image[..., :top, :] = image[..., :, :left] = image[ ..., (top + orig_h) :, : ] = image[..., :, (left + orig_w) :] = v if target is not None: target["boxes"][:, 0::2] += left target["boxes"][:, 1::2] += top return image, target
def __call__(self, sample): """ Args: img (PIL Image or Tensor): Image to be Perspectively transformed. Returns: PIL Image or Tensor: Randomly transformed image. """ img, target = sample['image'], sample['target'] if torch.rand(1) < self.p: width, height = F._get_image_size(img) startpoints, endpoints = self.get_params(width, height, self.distortion_scale) return {'image': F.perspective(img, startpoints, endpoints, self.interpolation, self.fill), 'target': F.perspective(img, startpoints, endpoints, Image.NEAREST, self.ignore_label)} return {'image': img,'target': target}
def rotate_coord(img, angle, coords): x, y = coords.T width, height = F._get_image_size(img) cx, cy = width // 2, height // 2 x -= cx y -= cy angle = (angle * np.pi) / 180 r, theta = to_polar(x, y) theta -= angle x, y = to_cartesian(r, theta) x += cx y += cy return torch.stack([x, y]).T.int()
def batch_gen(img_path, stride=None, max_batch_size=32, resize=224): image = io.imread(img_path) image = np.interp(image, (image.min(), image.max()), (0, 255)) image = np.stack((image, ) * 3, axis=-1) image = Image.fromarray(np.uint8(image)) # TODO resize image to fixed short edge like 512 window_size = args.rand_crop stride = window_size // 2 if stride is None else stride width, height = F._get_image_size(image) w_list = [x for x in range(0, width, stride) if (x + window_size) <= width] h_list = [ y for y in range(0, height, stride) if (y + window_size) <= height ] w_list.append(width - window_size) h_list.append(height - window_size) wh_list = [(w, h) for w in w_list for h in h_list] images = [] for x, y in wh_list: transform = transforms.Compose([ FixedCrop((x, y), window_size), Resize(resize), ToTensor(), ScaleCenters(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) sample = { 'image': image, 'vertebrae': torch.zeros([1, 4]).float(), 'info': {} } sample = transform(sample) images.append(sample['image']) if len(images) >= max_batch_size: batch = torch.stack(images) yield batch, wh_list, image images = [] if len(images) > 0: batch = torch.stack(images) yield batch, wh_list, image
def get_params(img: Tensor, output_size: Tuple[int, int]) -> Tuple[int, int, int, int]: w, h = functional._get_image_size(img) th, tw = output_size if h + 1 < th or w + 1 < tw: raise ValueError( "Required crop size {} is larger then input image size {}". format((th, tw), (h, w))) if w == tw and h == th: return 0, 0, h, w i = torch.randint(0, h - th + 1, size=(1, )).item() j = torch.randint(0, w - tw + 1, size=(1, )).item() return i, j, th, tw
def forward( self, image: Union[Image.Image, Tensor], quads: np.ndarray, texts: np.ndarray ) -> Tuple[Union[Image.Image], np.ndarray, np.ndarray]: """ Args: image: image to be cropped quads: texts: Returns: """ if self.padding is not None: image = F.pad(image, self.padding, self.fill, self.padding_mode) width, height = F._get_image_size(image) # pad the width if needed if self.pad_if_needed and width < self.size[1]: padding = [self.size[1] - width, 0] image = F.pad(image, padding, self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and height < self.size[0]: padding = [0, self.size[0] - height] image = F.pad(image, padding, self.fill, self.padding_mode) top, left, height, width = self.get_params(image, self.size) image = F.crop(image, top, left, height, width) bottom, right = top + height, left + width indices = [] for i in range(len(quads)): quads[i] = sort_vertices(quads[i]) quad_left, quad_top = quads[i][0] quad_right, quad_bottom = quads[i][2] # Only quadrilaterals that are fully inside the cropped image is included if ( left < quad_left and top < quad_top and quad_right < right and quad_bottom < bottom ): # quads[i, :, 0] = np.minimum(quads[i, :, 0], right) # quads[i, :, 0] = np.maximum(quads[i, :, 0], left) # quads[i, :, 1] = np.minimum(quads[i, :, 1], bottom) # quads[i, :, 1] = np.maximum(quads[i, :, 1], top) indices.append(i) quads = quads[indices] - (left, top) texts = texts[indices] return image, quads, texts
def get_params(img: Tensor, scale: List[float], ratio: List[float]) -> Tuple[int, int, int, int]: """Get parameters for ``crop`` for a random sized crop. Args: img (PIL Image or Tensor): Input image. scale (list): range of scale of the origin size cropped ratio (list): range of aspect ratio of the origin aspect ratio cropped Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for a random sized crop. """ width, height = F._get_image_size(img) area = height * width for _ in range(10): target_area = area * torch.empty(1).uniform_(scale[0], scale[1]).item() log_ratio = torch.log(torch.tensor(ratio)) aspect_ratio = torch.exp( torch.empty(1).uniform_(log_ratio[0], log_ratio[1])).item() w = int(round(math.sqrt(target_area * aspect_ratio))) h = int(round(math.sqrt(target_area / aspect_ratio))) if 0 < w <= width and 0 < h <= height: i = torch.randint(0, height - h + 1, size=(1, )).item() j = torch.randint(0, width - w + 1, size=(1, )).item() return i, j, h, w # Fallback to central crop in_ratio = float(width) / float(height) if in_ratio < min(ratio): w = width h = int(round(w / min(ratio))) elif in_ratio > max(ratio): h = height w = int(round(h * max(ratio))) else: # whole image w = width h = height i = (height - h) // 2 j = (width - w) // 2 return i, j, h, w
def __call__( self, input: torch.Tensor, target: Optional[torch.Tensor] = None ) -> (torch.Tensor, Optional[torch.Tensor]): if self.padding is not None: input = VF.pad(input, self.padding, self.fill, self.padding_mode) if self.pad_if_needed: w, h = VF._get_image_size(input) eh, ew = self.size pw, ph = max(ew - w, 0), max(eh - h, 0) if pw > 0 or ph > 0: input = VF.pad(input, [0, 0, pw, ph], fill=self.fill) if self.target_type == "segmentation": target = VF.pad(target, [0, 0, pw, ph], fill=self.mask_fill) return super().__call__(input, target)
def forward(self, sample): assert isinstance(self.size, int), 'Tuple not supported yet' img, vertebrae = sample['image'], sample['vertebrae'] width, height = F._get_image_size(img) if width < height: out_h = int(self.size * height / width) out_w = self.size else: out_w = int(self.size * width / height) out_h = self.size vertebrae[:, 1] = (vertebrae[:, 1] / width) * out_w vertebrae[:, 2] = (vertebrae[:, 2] / height) * out_h return { 'image': F.resize(img, (out_h, out_w), self.interpolation), 'vertebrae': vertebrae, 'info': sample['info'] }
def forward(self, image, mask): if self.padding is not None: image = F.pad(image, self.padding, self.fill, self.padding_mode) mask = F.pad(mask, self.padding, self.fill, self.padding_mode) width, height = F._get_image_size(image) # pad if needed if self.pad_if_needed and (width < self.size[1]): padding = [self.size[1] - width, 0] image = F.pad(image, padding, self.fill, self.padding_mode) mask = F.pad(mask, padding, self.fill, self.padding_mode) if self.pad_if_needed and (height < self.size[0]): padding = [0, self.size[0] - height] image = F.pad(image, padding, self.fill, self.padding_mode) mask = F.pad(mask, padding, self.fill, self.padding_mode) i, j, h, w = self.get_params(image, self.size) return F.crop(image, i, j, h, w), F.crop(mask, i, j, h, w)
def __call__(self, sample): img, vertebrae = sample['image'], sample['vertebrae'] width, height = F._get_image_size(img) top, left, h, w = self.y, self.x, self.size, self.size cropped_img = F.crop(img, top, left, h, w) vertebrae[:, 1] -= left vertebrae[:, 2] -= top left_check = torch.logical_and(vertebrae[:, 1] < w, vertebrae[:, 1] >= 0) top_check = torch.logical_and(vertebrae[:, 2] < h, vertebrae[:, 2] >= 0) correct_vertebrae = torch.logical_and(top_check, left_check) vertebrae = vertebrae[correct_vertebrae] return { 'image': cropped_img, 'vertebrae': vertebrae, 'info': sample['info'] }
def __call__( self, input: torch.Tensor, target: Optional[torch.Tensor] = None ) -> torch.Tensor or Tuple[torch.Tensor, torch.Tensor]: input = self.ensure_tensor(input, True) original_size = VF._get_image_size(input) if target is not None: target = self.ensure_tensor(target, False) params = self.get_params(input) input = self.apply_image(input, params) if target is None: if self.target_type is not None: warnings.warn( f"target is None, but target_type=={self.target_type}") return input if self.target_type == "bbox": target = self.apply_bbox(target, params, original_size) elif self.target_type == "mask": target = self.apply_mask(target, params) return input, target
def get_params(img, output_size): """Get parameters for ``crop`` for a random crop. Args: img (PIL Image or Tensor): Image to be cropped. output_size (tuple): Expected output size of the crop. Returns: tuple: params (i, j, h, w) to be passed to ``crop`` for random crop. """ w, h = F._get_image_size(img) th, tw = output_size if h + 1 < th or w + 1 < tw: raise ValueError( "Required crop size {} is larger then input image size {}".format((th, tw), (h, w)) ) if w == tw and h == th: return 0, 0, h, w i = torch.randint(0, h - th + 1, size=(1, )).item() j = torch.randint(0, w - tw + 1, size=(1, )).item() return i, j, th, tw