def forward(self, img, seg): assert any(isinstance(img, x) and isinstance(seg, x) for x in [Image.Image, t.Tensor]), "BUG CHECK: 'img' and 'seg' must be of the same type." assert (isinstance(x, (Image.Image, t.Tensor)) for x in [img, seg]), "BUG CHECK: 'img' and 'seg' must be of either Image or Tensor type." assert (img.shape[-2:] == seg.shape[-2:]) if isinstance(img, t.Tensor) else (img.size[-2:] == seg.size[-2:]), "BUG CHECK: 'img' and 'seg' must be of same dimensions." org_size = tuple(img.shape[-2:]) if isinstance(img, t.Tensor) else img.size # CAUTION: For Image, size is in (W, H) order and (H, W) for Tensor scale_factor = t.empty(1).uniform_(self.min_scale, self.max_scale).item() if scale_factor > 1.0: # CAUTION: Interpolation mode must be 'nearest' for 'seg' if isinstance(img, Image.Image): crop_width = int(1.0 / scale_factor * org_size[0]) crop_height = int(1.0 / scale_factor * org_size[1]) crop_x = int(t.empty(1).uniform_(0., (org_size[0] - crop_width) // 2).item()) crop_y = int(t.empty(1).uniform_(0., (org_size[1] - crop_height) // 2).item()) crop_box = [crop_x,\ crop_y,\ crop_x+crop_width,\ crop_y+crop_height] img = img.resize(size=org_size, resample=Image.BILINEAR, box=crop_box) seg = seg.resize(size=org_size, resample=Image.NEAREST, box=crop_box) else: crop_width = int(1.0 /scale_factor * org_size[1]) crop_height = int(1.0 /scale_factor * org_size[0]) crop_box = [int(t.empty(1).uniform_(0., (org_size[0] - crop_height) // 2).item()),\ int(t.empty(1).uniform_(0., (org_size[1] - crop_width) // 2).item()),\ crop_height,\ crop_width] img = F.resized_crop(img, *crop_box, size=org_size, interpolation=Image.BILINEAR) seg = t.squeeze(F.resized_crop(t.unsqueeze(seg, dim=0), *crop_box, size=org_size, interpolation=Image.NEAREST), dim=0) return img, seg
def test_resized_crop(self): # test values of F.resized_crop in several cases: # 1) resize to the same size, crop to the same size => should be identity tensor, _ = self._create_data(26, 36) for i in [0, 2, 3]: out_tensor = F.resized_crop(tensor, top=0, left=0, height=26, width=36, size=[26, 36], interpolation=i) self.assertTrue(tensor.equal(out_tensor), msg="{} vs {}".format(out_tensor[0, :5, :5], tensor[0, :5, :5])) # 2) resize by half and crop a TL corner tensor, _ = self._create_data(26, 36) out_tensor = F.resized_crop(tensor, top=0, left=0, height=20, width=30, size=[10, 15], interpolation=0) expected_out_tensor = tensor[:, :20:2, :30:2] self.assertTrue(expected_out_tensor.equal(out_tensor), msg="{} vs {}".format(expected_out_tensor[0, :10, :10], out_tensor[0, :10, :10]))
def __getitem__(self, index): image = Image.open(self.imgs[index]) mask = Image.open(self.masks[index]) if "training" in self.split: i, j, h, w = transforms.RandomResizedCrop.get_params( image, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]) image = resized_crop(image, i, j, h, w, [224, 224]) mask = resized_crop(mask, i, j, h, w, [224, 224]) if random.random() > 0.5: image = hflip(image) mask = hflip(mask) if random.random() > 0.5: image = vflip(image) mask = vflip(mask) else: image = transforms.Resize(256)(image) image = transforms.CenterCrop(224)(image) mask = transforms.Resize(256)(mask) mask = transforms.CenterCrop(224)(mask) image = to_tensor(image) mask = to_tensor(mask) image = self.normalize(image) return image, mask
def __call__(self, sample): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ img, labels = sample['image'], sample['labels'] croped_img = [] for r in range(len(img)): i, j, h, w = self.get_params(img[r], self.scale, self.ratio) croped_img.append( TF.resized_crop(img[r], i, j, h, w, self.size, self.interpolation)) croped_labels = { x: [ TF.resized_crop(labels[x][r], i, j, h, w, self.size, self.interpolation) for r in range(len(labels[x])) ] for x in ['eye1', 'eye2', 'nose', 'mouth'] } sample = {'image': croped_img, 'labels': croped_labels} return sample
def __call__(self, sample): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ img, labels = sample['image'], sample['labels'] i, j, h, w = self.get_params(img, self.scale, self.ratio) croped_img = TF.resized_crop(img, i, j, h, w, self.size, self.interpolation) croped_labels = [ TF.resized_crop(labels[r], i, j, h, w, self.size, self.interpolation) for r in range(len(labels)) ] sample = { 'image': croped_img, 'labels': croped_labels, 'index': sample['index'] } return sample
def __getitem__(self, idx): image_data, idx = self.dataset.__getitem__(idx) image_data = torchvision.transforms.functional.to_pil_image(image_data) if self.transform_chain is not None: image_data = self.transform_chain(image_data) # Create rotated target rotated_image_data = TF.rotate(image_data, angle=self.rotation_angle, resample=PIL.Image.BILINEAR) # Crop and resize to get rid of unknown image parts image_data = TF.resized_crop(image_data, top=8, left=8, height=16, width=16, size=32) rotated_image_data = TF.resized_crop(rotated_image_data, top=8, left=8, height=16, width=16, size=32) # Convert to float32 image_data = np.asarray(image_data, dtype=np.float32) rotated_image_data = np.asarray(rotated_image_data, dtype=np.float32) # Perform normalization based on input values of individual sample mean = image_data.mean() std = image_data.std() image_data[:] -= mean image_data[:] /= std rotated_image_data[:] -= mean rotated_image_data[:] /= std # Add information about relative position in image to inputs # full_inputs = image_data # Not feeding information about the position in the image would be bad for our CNN full_inputs = np.zeros(shape=(*image_data.shape, 3), dtype=image_data.dtype) full_inputs[..., 0] = image_data full_inputs[np.arange(full_inputs.shape[0]), :, 1] = np.linspace(start=-1, stop=1, num=full_inputs.shape[1]) full_inputs[:, :, 2] = np.transpose(full_inputs[:, :, 1]) # Convert numpy arrays to tensors full_inputs = TF.to_tensor(full_inputs) rotated_image_data = TF.to_tensor(rotated_image_data) return full_inputs, rotated_image_data, idx
def augment(im, mask): im = transforms.ToTensor()(im) i, j, h, w = transforms.RandomResizedCrop.get_params( im, [0.95, 1.0], [1.0, 1.0]) im = F.resized_crop(im, i, j, h, w, [512, 512], interpolation=Image.BILINEAR) mask = F.resized_crop(mask, i, j, h, w, [512, 512], interpolation=Image.NEAREST) ang = transforms.RandomRotation.get_params([-5, 5]) im = F.rotate(im, ang, interpolation=Image.BILINEAR) mask = F.rotate(mask, ang, interpolation=Image.NEAREST) if torch.rand(1) < 0.4: im = F.hflip(im) mask = F.hflip(mask) return torch.squeeze(im).numpy(), mask
def transform(self, image, label): mode = random.random() if mode < 0.5: return image, label image, label = Image.fromarray(image), Image.fromarray(label) if mode >= 0.5: i, j, h, w = transforms.RandomResizedCrop.get_params(image, scale=(0.5, 1.0), ratio=(1, 1)) image = TF.resized_crop(image, i, j, h, w, 512, interpolation=Image.BICUBIC) label = TF.resized_crop(label, i, j, h, w, 512, interpolation=Image.BICUBIC) # ============================================================================= # if mode>=0.75: # angle = transforms.RandomRotation.get_params([180,-180]) # image = TF.rotate(image,angle) # label = TF.rotate(label,angle) # image,label = TF.center_crop(image,256),TF.center_crop(label,256) # image,label = TF.resize(image,512,interpolation=Image.BICUBIC),TF.resize(label,512,interpolation=Image.BICUBIC) # ============================================================================= return np.asarray(image), np.asarray(label)
def __getitem__(self, index): image = np.load(os.path.join(self.file_path, self.img_list[index]))[:, :, ::-1] label = np.load(os.path.join(self.file_path, self.label_list[index])) height, width = image.shape[1:] ch_label = label[2] if self.train and self.augment: # random rotations if np.random.randint(2) == 0: ang = np.random.choice([90, -90]) image = np.dstack([F.rotate(_np2pil(image[:, :, i]), ang) for i in range(3)]) label = np.dstack([F.rotate(_np2pil(label[:, :, i]), ang) for i in range(ch_label)]) # random h-flips if np.random.randint(2) == 0: image = np.dstack([F.hflip(_np2pil(image[:, :, i])) for i in range(3)]) label = np.dstack([F.hflip(_np2pil(label[:, :, i])) for i in range(ch_label)]) # random v-flips if np.random.randint(2) == 0: image = np.dstack([F.vflip(_np2pil(image[:, :, i])) for i in range(3)]) label = np.dstack([F.vflip(_np2pil(label[:, :, i])) for i in range(ch_label)]) # random crops if np.random.randint(2) == 0: i, j, h, w = transforms.RandomCrop.get_params(_np2pil(label), output_size=(height//2, width//2)) image = np.dstack([F.resized_crop(_np2pil(image[:, :, ii]), i, j, h, w, (height, width)) for ii in range(3)]) label = np.dstack([F.resized_crop(_np2pil(label[:, :, ii]), i, j, h, w, (height, width)) for ii in range(ch_label)]) return image, label
def scale(self, img, seg, do, prob, scale_factor): img_size = img.size[0] seg_size = seg.size[0] if do: if random.random() < prob: scale_ratio = random.uniform(1, 1 + scale_factor) img_cs = int(img_size / scale_ratio) img_x1, img_y1 = (img_size - img_cs) // 2, (img_size - img_cs) // 2 seg_cs = int(seg_size / scale_ratio) seg_x1, seg_y1 = (seg_size - seg_cs) // 2, (seg_size - seg_cs) // 2 img = tff.resized_crop(img, i=img_x1, j=img_y1, h=img_cs, w=img_cs, size=img_size, interpolation=PIL.Image.BICUBIC) seg = tff.resized_crop(seg, i=seg_x1, j=seg_y1, h=seg_cs, w=seg_cs, size=seg_size, interpolation=PIL.Image.BICUBIC) return img, seg
def __getitem__(self, idx): img = self.pil_images[idx] segm = self.pil_images_segm[idx] # same random transformations for image and mask if self.perform_flips: if random.random() > 0.5: img = TF.hflip(img) segm = TF.hflip(segm) if random.random() > 0.5: img = TF.vflip(img) segm = TF.vflip(segm) if self.perform_crop: scale = (0.08, 1.0) ratio = (3. / 4., 4. / 3.) i, j, h, w = transforms.RandomResizedCrop.get_params( img, scale, ratio) size = (self.size0, self.size0) img = TF.resized_crop(img, i, j, h, w, size, Image.BILINEAR) segm = TF.resized_crop(segm, i, j, h, w, size, Image.NEAREST) if self.transform: img = self.transform(img) segm = self.transform(segm) img = self.to_tensor(img) no_norm_image = img.detach().clone() img = self.normalize(img) segm = self.to_tensor(segm) label = label_to_tensor(self.image_to_onehot[self.image_names[idx]]) return { 'image': img, 'label': label, 'segm': segm, 'name': self.image_names[idx], 'no_norm_image': no_norm_image }
def __call__(self, img, gt=None): img_crops = [] crop_tuples = self.getCropTuples(img) crop_img_list = [] if gt is not None: crop_gt_list = [] else: crop_gt_list = None for tup in crop_tuples: crop_img = TF.resized_crop(img, tup[0], tup[1], tup[2], tup[3], self.expected_blob_size, interpolation=PIL.Image.BICUBIC) if gt is not None: crop_gt = TF.resized_crop(gt, tup[0], tup[1], tup[2], tup[3], self.expected_blob_size, interpolation=PIL.Image.NEAREST) crop_img_list.append(np.array(crop_img)) if gt is not None: crop_gt_list.append(np.array(crop_gt)) return crop_img_list, crop_gt_list
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask = self.get_mask(raw_img, clean_img) mask_t = to_tensor(mask) mask_t = (mask_t > 0).float() mask_t = torch.nn.functional.max_pool2d(mask_t, kernel_size=5, stride=1, padding=2) # mask_t = mask_t.byte() raw_img = ImageChops.difference(mask, clean_img) return self.transformer(raw_img), 1 - mask_t, self.transformer( clean_img)
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask = self.get_mask(raw_img, clean_img) mask_t = to_tensor(mask) binary_mask = (1 - mask_t) binary_mask = binary_mask.expand(3, -1, -1) clean_img = self.transformer(clean_img) corrupted_img = clean_img * binary_mask return corrupted_img, binary_mask, clean_img
def pairedTransformations(self, img, gtImg, config): # Convert to PIL image img = TF.to_pil_image(img) gtImg = TF.to_pil_image(gtImg) # Resize images img = TF.resize(img, size=(config["imgSize"], config["imgSize"]), interpolation=2) gtImg = TF.resize(gtImg, size=(config["imgSize"], config["imgSize"]), interpolation=0) # Rotate images if config["rotate"] & random.choice([True, False]): angle = random.randint(-10, 10) img = TF.rotate(img, angle) gtImg = TF.rotate(gtImg, angle) # Randomly crop images if config["crop"] & random.choice([True, False]): i, j, h, w = transforms.RandomResizedCrop.get_params(img, scale=(0.8, 1), ratio=(0.75, 1)) img = TF.resized_crop(img, i, j, h, w, size=(config["imgSize"], config["imgSize"]), interpolation=2) gtImg = TF.resized_crop(gtImg, i, j, h, w, size=(config["imgSize"], config["imgSize"]), interpolation=0) img = TF.to_tensor(img) if config["normalize"]: img = TF.normalize(img, mean=[config["normVals"][0]], std=[config["normVals"][1]]) gtImg = torch.from_numpy(np.expand_dims(np.array(gtImg), 0)) return img, gtImg
def __getitem__(self, index): if self.mode == "train": img_name = os.path.split(self.train_input_files[index % len(self.train_input_files)])[-1] img_input = cv2.imread(self.train_input_files[index % len(self.train_input_files)],-1) if len(self.train_input_files) == len(self.train_target_files): img_exptC = Image.open(self.train_target_files[index % len(self.train_target_files)]) if self.use_mask: img_mask = Image.open(os.path.join(self.root, "train/masks/" + img_name[:-4] + ".png")) else: split_name = img_name.split('_') if len(split_name) == 2: img_exptC = Image.open(os.path.join(self.root, "train/target_" + self.retoucher + '/' + img_name)) if self.use_mask: img_mask = Image.open(os.path.join(self.root, "train/masks/" + img_name[:-4] + ".png")) else: img_exptC = Image.open( os.path.join(self.root, "train/target_" + self.retoucher + '/' + split_name[0] + "_" + split_name[1] + ".tif")) if self.use_mask: img_mask = Image.open( os.path.join(self.root, "train/masks/" + split_name[0] + "_" + split_name[1] + ".png")) elif self.mode == "test": img_name = os.path.split(self.test_input_files[index % len(self.test_input_files)])[-1] img_input = cv2.imread(self.test_input_files[index % len(self.test_input_files)],-1) img_exptC = Image.open(self.test_target_files[index % len(self.test_target_files)]) if self.use_mask: img_mask = Image.open(self.test_mask_files[index % len(self.test_mask_files)]) img_input = np.array(img_input) img_input = img_input[:, :, [2, 1, 0]] if self.mode == "train": ratio_H = np.random.uniform(0.6, 1.0) ratio_W = np.random.uniform(0.6, 1.0) W,H = img_exptC._size crop_h = round(H * ratio_H) crop_w = round(W * ratio_W) i, j, h, w = transforms.RandomCrop.get_params(img_exptC, output_size=(crop_h, crop_w)) img_input = TF_x.resized_crop(img_input, i, j, h, w, (448, 448)) img_exptC = TF.resized_crop(img_exptC, i, j, h, w, (448, 448)) if self.use_mask: img_mask = TF.resized_crop(img_mask, i, j, h, w, (448, 448)) if np.random.random() > 0.5: img_input = TF_x.hflip(img_input) img_exptC = TF.hflip(img_exptC) if self.use_mask: img_mask = TF.hflip(img_mask) img_input = TF_x.to_tensor(img_input) img_exptC = TF.to_tensor(img_exptC) if self.use_mask: img_mask = TF.to_tensor(img_mask) if self.use_mask: return {"A_input": img_input, "A_exptC": img_exptC, "input_name": img_name, "mask": img_mask} else: return {"A_input": img_input, "A_exptC": img_exptC, "input_name": img_name}
def __call__(self, img, lbl): """ Args: img (PIL Image): Image to be cropped. Returns: PIL Image: Cropped image. """ if self.padding is not None: img = functional.pad(img, self.padding, self.fill, self.padding_mode) lbl = functional.pad(lbl, self.padding, self.fill, self.padding_mode) # pad the width if needed if self.pad_if_needed and img.size[0] < self.size[1]: img = functional.pad(img, (self.size[1] - img.size[0], 0), self.fill, self.padding_mode) lbl = functional.pad(lbl, (self.size[1] - lbl.size[0], 0), self.fill, self.padding_mode) # pad the height if needed if self.pad_if_needed and img.size[1] < self.size[0]: img = functional.pad(img, (0, self.size[0] - img.size[1]), self.fill, self.padding_mode) lbl = functional.pad(lbl, (0, self.size[0] - lbl.size[1]), self.fill, self.padding_mode) i, j, h, w = self.get_params(img, self.scale, self.ratio) img = functional.resized_crop(img, i, j, h, w, self.size, self.interpolation) lbl = functional.resized_crop(lbl, i, j, h, w, self.size, self.interpolation) return img, lbl
def __getitem__(self, idx): traj_i = idx // self.traj_length trans_i = idx % self.traj_length x = Image.fromarray(self.data['observations'][traj_i, trans_i].reshape( 48, 48, 3), mode='RGB') c = Image.fromarray(self.data['env'][traj_i].reshape(48, 48, 3), mode='RGB') # upsampling gives bad images so random resizing params set to 1 for now # crop = self.crop.get_params(c, (0.9, 0.9), (1, 1)) crop = self.crop.get_params(c, (1, 1), (1, 1)) # jitter = self.jitter.get_params((0.5,1.5), (0.9,1.1), (0.9,1.1), (-0.1,0.1)) jitter = self.jitter.get_params(0.5, 0.1, 0.1, 0.1) x = jitter( F.resized_crop(x, crop[0], crop[1], crop[2], crop[3], (48, 48), Image.BICUBIC)) c = jitter( F.resized_crop(c, crop[0], crop[1], crop[2], crop[3], (48, 48), Image.BICUBIC)) x_t = normalize_image(np.array(x).flatten()).squeeze() env = normalize_image(np.array(c).flatten()).squeeze() data_dict = { 'x_t': x_t, 'env': env, } return data_dict
def segment_transform(self, image, mask): seed_top = np.random.randint(0, 568) seed_left = np.random.randint(0, 1408) image = TF.resized_crop(image, seed_top, seed_left, 512, 512, (self.image_height, self.image_width)) mask = TF.resized_crop(mask, seed_top, seed_left, 512, 512, (self.image_height, self.image_width)) #cropped = transforms.RandomCrop(size=(self.image_height, self.image_width)) # # # # # Random crop # i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(int(1920*self.scale), int(1080*self.scale))) # image = TF.crop(image, i, j, h, w) # mask = TF.crop(mask, i, j, h, w) # # # Random horizontal flipping # if random.random() > 0.5: # image = TF.hflip(image) # mask = TF.hflip(mask) # if random.random() > 0.5: # angle = random.randint(-30, 30) # image = TF.rotate(image, angle) # mask = TF.rotate(mask, angle) # Transform to tensor image = TF.to_tensor(image) mask = TF.to_tensor(mask) TF.normalize(image, (0.485, 0.456, 0.406), (0.229, 0.224, 0.225)) return image, mask
def __call__(self, img, ldmrk, heatmap=None): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ i, j, h, w = self.get_params(img, self.scale, self.ratio) #Now reposition the landmark #print(i,j,h,w) img2 = F.resized_crop(img, i, j, h, w, self.size, self.interpolation) ldmrk2 = ldmrk.copy() if heatmap is not None: htmp = F.resized_crop(heatmap, i, j, h, w, self.size, self.interpolation) ldmrk2[:68] -= j ldmrk2[68:] -= i #now rescale imHeight, imWidth = img.size sH = truediv(imHeight, h) sW = truediv(imWidth, w) ldmrk2[:68] *= sW ldmrk2[68:] *= sH '''if heatmap is None : return img2,ldmrk2 else :''' return img2, ldmrk2, heatmap
def __call__(self, sample): image, label = sample['image'], sample['label'] if self.i is None or self.image_mode: self.i, self.j, self.h, self.w = transforms.RandomResizedCrop.get_params(image, self.scale, self.ratio) image = F.resized_crop(image, self.i, self.j, self.h, self.w, self.size, Image.BILINEAR) label = F.resized_crop(label, self.i, self.j, self.h, self.w, self.size, Image.BILINEAR) sample['image'], sample['label'] = image, label return sample
def __call__(self, image, target): i, j, h, w = T.RandomResizedCrop.get_params(image, self.scale, self.ratio) image = F.resized_crop(image, i, j, h, w, self.size, InterpolationMode.BILINEAR) target = F.resized_crop(target, i, j, h, w, self.size, InterpolationMode.NEAREST) return image, target
def __call__(self, image, labels): i, j, h, w = self.get_params(image) image = F.resized_crop(image, i, j, h, w, self.rnd_crop.size, Image.CUBIC) labels = F.resized_crop(labels, i, j, h, w, self.rnd_crop.size, Image.NEAREST) return image, labels
def __call__(self, image, ground_truth): if self.train: # shape augmentations # left-right mirroring if np.random.random() > 0.5: image = TF.hflip(image) ground_truth = TF.hflip(ground_truth) # up-down mirroring if np.random.random() > 0.5: image = TF.vflip(image) ground_truth = TF.vflip(ground_truth) # random rotation angle = np.random.uniform(-180, 180) image = TF.rotate(image, angle, expand=True) ground_truth = TF.rotate(ground_truth, angle, expand=True) # center crop center_crop = transforms.CenterCrop(int(self.output_size * 1.5)) image = center_crop(image) ground_truth = center_crop(ground_truth) # random resized crop if np.random.random() > 0.2: # random crop i, j, h, w = transforms.RandomResizedCrop.get_params(image, scale=(0.2, 0.9), ratio=(1, 1)) image = TF.resized_crop(image, i, j, h, w, size=int(self.output_size)) ground_truth = TF.resized_crop(ground_truth, i, j, h, w, size=int(self.output_size)) # random crop without resize else: i, j, h, w = transforms.RandomCrop.get_params(image, output_size=(self.output_size, self.output_size)) image = TF.crop(image, i, j, h, w) ground_truth = TF.crop(ground_truth, i, j, h, w) # color augmentations for col_aug in [TF.adjust_contrast, TF.adjust_brightness, TF.adjust_saturation, TF.adjust_gamma]: if np.random.random() > 0.5: adjust_factor = np.random.uniform(0.5, 1.5) image = col_aug(image, adjust_factor) ground_truth = col_aug(ground_truth, adjust_factor) if np.random.random() > 0.5: hue_factor = np.random.uniform(-0.15, 0.15) image = TF.adjust_hue(image, hue_factor) ground_truth = TF.adjust_hue(ground_truth, hue_factor) else: center_crop = transforms.CenterCrop(self.output_size) image = center_crop(image) ground_truth = center_crop(ground_truth) # change locations to tensor ground_truth = TF.normalize(TF.to_tensor(ground_truth), [0.5] * 3, [0.25] * 3) image = TF.normalize(TF.to_tensor(image), [0.5] * 4, [0.25] * 4) return image, ground_truth
def __call__(self, input_img, target_img): """ Args: img (PIL Image): Image to be cropped and resized. Returns: PIL Image: Randomly cropped and resized image. """ i, j, h, w = self.get_params(input_img, self.scale, self.ratio) return F.resized_crop(input_img, i, j, h, w, self.size, self.interpolation), F.resized_crop(target_img, i, j, h, w, self.size, Image.NEAREST)
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.1, 2), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) raw_img = self.transformer(raw_img) # raw_img = np.array(raw_img) mask_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # mask_img = np.array(mask_img) return to_tensor(raw_img), to_tensor(mask_img)
def __getitem__(self, index): row = self.dataset[index] participant_id = row[1] video_id = row[2] action = row[3] start_frame = int(row[6]) end_frame = int(row[7]) traj_length = end_frame - start_frame d0, dt, dT = sorted(np.random.randint(0, traj_length, 3)) x0 = self.load_frame(participant_id, video_id, start_frame + d0) xt = self.load_frame(participant_id, video_id, start_frame + dt) xT = self.load_frame(participant_id, video_id, start_frame + dT + 1) yt = int((dt - d0) / (dT + 1 - d0) * self.output_classes) # x0 = self.t_to_pil(x0) i, j, h, w = get_random_crop_params( x0, self.t_random_resize.scale, self.t_random_resize.scale, ) t_color_jitter = self.t_color_jitter.get_params( self.t_color_jitter.brightness, self.t_color_jitter.contrast, self.t_color_jitter.saturation, self.t_color_jitter.hue, ) x0 = TF.resized_crop(x0, i, j, h, w, (CROP_HEIGHT, CROP_WIDTH,), self.t_random_resize.interpolation) x0 = t_color_jitter(x0) x0 = self.t_to_tensor(x0) # xt = self.t_to_pil(xt) xt = TF.resized_crop(xt, i, j, h, w, (CROP_HEIGHT, CROP_WIDTH,), self.t_random_resize.interpolation) xt = t_color_jitter(xt) xt = self.t_to_tensor(xt) # xT = self.t_to_pil(xT) xT = TF.resized_crop(xT, i, j, h, w, (CROP_HEIGHT, CROP_WIDTH,), self.t_random_resize.interpolation) xT = t_color_jitter(xT) xT = self.t_to_tensor(xT) batch = dict( # x0=normalize(x0), # xt=normalize(xt), # xT=normalize(xT), x0=x0, xt=xt, xT=xT, yt=yt, ) return batch
def __call__(self, sample): img = sample['image'] sal = sample['sal'] i, j, h, w = self.get_params(img, self.scale, self.ratio) sample['image'] = F.resized_crop(img, i, j, h, w, self.size, self.interpolation_img) sample['sal'] = F.resized_crop(sal, i, j, h, w, self.size, self.interpolation_sal) return sample
def __call__(self, img1, img2): i1, j1, h1, w1 = self.get_params(img1, self.scale, self.ratio) img1 = F.resized_crop(img1, i1, j1, h1, w1, self.size, self.interpolation) i2, j2, h2, w2 = self.get_params(img2, self.scale, self.ratio) img2 = F.resized_crop(img2, i2, j2, h2, w2, self.size, self.interpolation) return img1, img2
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask_tensor = self.get_mask(raw_img, clean_img) raw_img = self.transformer(raw_img) return raw_img, mask_tensor