def transform_d(self, image, mask): """ Random crop + Random horizontal flipping + Random vertical flipping """ # Random crop i, j, h, w = RandomCrop.get_params(image, output_size=(self.crop_size, self.crop_size)) image = TF.crop(image, i, j, h, w) mask = TF.crop(mask, i, j, h, w) # resize image = image image = TF.resize(image, (self.crop_size // self.upscale_factor, self.crop_size // self.upscale_factor)) # Random horizontal flipping if random.random() > 0.5: image = TF.hflip(image) mask = TF.hflip(mask) # Random vertical flipping if random.random() > 0.5: image = TF.vflip(image) mask = TF.vflip(mask) # Transform to tensor image = TF.to_tensor(image) mask = TF.to_tensor(mask) return image, mask
def __getitem__(self, idx): triple = self.file_triples[idx] color_file = os.path.join(self.data_dir, 'color', triple[0]) depth_file = os.path.join(self.data_dir, 'height', triple[2]) cache_triple = self.cache_triples[idx] if cache_triple is None: color_image = Image.open(color_file) depth_image = Image.open(depth_file) depth_image = TF.to_grayscale(depth_image) color_image = TF.resize(color_image, self.size * 2) depth_image = TF.resize(depth_image, self.size * 2) self.cache_triples[idx] = (color_image, depth_image) else: color_image = cache_triple[0] depth_image = cache_triple[1] crop_paras = RandomCrop.get_params(color_image, output_size=(self.size, self.size)) color_image = TF.crop(color_image, *crop_paras) depth_image = TF.crop(depth_image, *crop_paras) color_image = TF.to_tensor(color_image).to(self.device) depth_image = TF.to_tensor(depth_image).to(self.device) item = {'color': color_image, 'depth': depth_image} return item
def cache_func(self, i): # caches the ith chunk of images # custom function for using HDF5Cache lr_images = [] hr_images = [] offset = i * self.cache.cache_size // self.mult for idx in range(self.cache.cache_size // self.mult): if offset + idx + 1 > 800: idx -= self.cache.cache_size img_hr_name = ("./datasets/saved/DIV2K_train_HR/" + str(offset + idx + 1).zfill(4) + ".png") img_lr_name = ( f"./datasets/saved/DIV2K_train_LR_bicubic/X{self.factor}/" + str(offset + idx + 1).zfill(4) + f"x{self.factor}.png") # C,H,W img_hr = Image.open(img_hr_name) img_lr = Image.open(img_lr_name) hr_size = self.size * self.factor f = self.factor for j in range(self.mult): ii, j, k, l = RandomCrop.get_params( img_hr, (hr_size, hr_size)) # can't use i as variable name :/ hr_crop = TF.crop(img_hr, ii, j, k, l) lr_crop = TF.crop(img_lr, ii // f, j // f, k // f, l // f) lr_images.append(ToTensor()(lr_crop)) hr_images.append(ToTensor()(hr_crop)) lr_stacked = np.stack(lr_images) hr_stacked = np.stack(hr_images) # print(lr_stacked.shape) lr_type = lr_stacked.astype(np.float32) hr_type = hr_stacked.astype(np.float32) self.cache.cache_images(i, lr_type, hr_type)
def random_resize_crop_synth(augment_targets, size): image, region_score, affinity_score, confidence_mask = augment_targets image = Image.fromarray(image) region_score = Image.fromarray(region_score) affinity_score = Image.fromarray(affinity_score) confidence_mask = Image.fromarray(confidence_mask) short_side = min(image.size) i, j, h, w = RandomCrop.get_params(image, output_size=(short_side, short_side)) image = resized_crop(image, i, j, h, w, size=(size, size), interpolation=InterpolationMode.BICUBIC) region_score = resized_crop(region_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC) affinity_score = resized_crop( affinity_score, i, j, h, w, (size, size), interpolation=InterpolationMode.BICUBIC, ) confidence_mask = resized_crop( confidence_mask, i, j, h, w, (size, size), interpolation=InterpolationMode.NEAREST, ) image = np.array(image) region_score = np.array(region_score) affinity_score = np.array(affinity_score) confidence_mask = np.array(confidence_mask) augment_targets = [image, region_score, affinity_score, confidence_mask] return augment_targets
def __getitem__(self, index): edge = pil_loader( os.path.join(os.getcwd(), self.datapath_lines, self.lines[index])) img = pil_loader( os.path.join(os.getcwd(), self.datapath_imgs, self.imgs[index])) # Random scaling/rotation scale = random.random() + 0.5 angle = random.random() * 0.25 edge = TF.affine(edge, angle=angle, scale=scale, translate=(0, 0), shear=0) img = TF.affine(img, angle=angle, scale=scale, translate=(0, 0), shear=0) # Random horizontal flipping if random.random() > 0.5: img = TF.hflip(img) edge = TF.hflip(edge) # Random crop i, j, h, w = RandomCrop.get_params(edge, output_size=(384, 384)) edge = TF.crop(edge, i, j, h, w) img = TF.crop(img, i, j, h, w) # Random brightness/contrast/sat/hue t_jitter = ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) img = t_jitter(img) # Deterioration if random.random() > 0.1: edge_d = TF.to_tensor(CustomDataset.deteriorate(edge)) edge = 1 - TF.to_tensor( edge) # inversion as BIPED images are white on black else: edge = 1 - TF.to_tensor(edge) edge_d = edge.detach() img = TF.to_tensor(img) return self.normalize(img), edge, edge_d
def train_transform(self, hr, lr, crop_size, upscale_factor): wd_hr, ht_hr = hr.size wd_lr = int(crop_size / upscale_factor) ht_lr = wd_lr # print(ht_lr,wd_lr) i, j, h, w = RandomCrop.get_params(hr, output_size=(crop_size, crop_size)) hr_train = TF.crop(hr, i, j, h, w) if upscale_factor == 1: lr_train = TF.crop(lr, i, j, h, w) else: lr_temp = lr.resize((wd_hr, ht_hr), Image.BICUBIC) lr_train = TF.crop(lr_temp, i, j, h, w) lr_train = lr_train.resize((wd_lr, ht_lr), Image.BICUBIC) hr_train = TF.to_tensor(hr_train) lr_train = TF.to_tensor(lr_train) return lr_train, hr_train
def random_crop(augment_targets, size): image, region_score, affinity_score, confidence_mask = augment_targets image = Image.fromarray(image) region_score = Image.fromarray(region_score) affinity_score = Image.fromarray(affinity_score) confidence_mask = Image.fromarray(confidence_mask) i, j, h, w = RandomCrop.get_params(image, output_size=(size, size)) image = crop(image, i, j, h, w) region_score = crop(region_score, i, j, h, w) affinity_score = crop(affinity_score, i, j, h, w) confidence_mask = crop(confidence_mask, i, j, h, w) image = np.array(image) region_score = np.array(region_score) affinity_score = np.array(affinity_score) confidence_mask = np.array(confidence_mask) augment_targets = [image, region_score, affinity_score, confidence_mask] return augment_targets
def __getitem__(self, index): #Get meta-data video_id, entity_id = self.entity_list[index] entity_metadata = self.entity_data[video_id][entity_id] audio_offset = float(entity_metadata[0][1]) mid_index = random.randint(0, len(entity_metadata) - 1) midone = entity_metadata[mid_index] target = int(midone[-1]) target_audio = self.speech_data[video_id][midone[1]] clip_meta_data = cu.generate_clip_meta(entity_metadata, mid_index, self.half_clip_length) video_data, audio_data = io.load_av_clip_from_metadata( clip_meta_data, self.video_root, self.audio_root, audio_offset, self.target_size) if self.do_video_augment: # random flip if bool(random.getrandbits(1)): video_data = [ s.transpose(Image.FLIP_LEFT_RIGHT) for s in video_data ] # random crop width, height = video_data[0].size f = random.uniform(0.5, 1) i, j, h, w = RandomCrop.get_params(video_data[0], output_size=(int(height * f), int(width * f))) video_data = [s.crop(box=(j, i, w, h)) for s in video_data] if self.video_transform is not None: video_data = [self.video_transform(vd) for vd in video_data] video_data = torch.cat(video_data, dim=0) return (np.float32(audio_data), video_data), target, target_audio
def crop_imgs(*pil_imgs, size): i, j, h, w = RandomCrop.get_params(pil_imgs[0], size) for img in pil_imgs: yield F.crop(img, i, j, h, w)