model = sft.SFT_Net() model.load_state_dict(torch.load(model_path), strict=True) model.eval() model = model.cuda() print('sftgan testing...') idx = 0 for path in glob.glob(test_img_folder + '/*'): idx += 1 basename = os.path.basename(path) base = os.path.splitext(basename)[0] print(idx, base) # read image img = cv2.imread(path, cv2.IMREAD_UNCHANGED) img = modcrop(img, 8) img = img * 1.0 / 255 if img.ndim == 2: img = np.expand_dims(img, axis=2) img = torch.from_numpy(np.transpose(img[:, :, [2, 1, 0]], (2, 0, 1))).float() # matlab imresize img_LR = imresize(img, 1 / 4, antialiasing=True) img_LR = img_LR.unsqueeze(0) img_LR = img_LR.cuda() # read seg seg = torch.load(os.path.join(test_prob_path, base + '_bic.pth')) seg = seg.unsqueeze(0) # change probability # seg.fill_(0)
def __getitem__(self, index): HR_path, LR_path = None, None data_type = self.opt.get('data_type', 'img') scale = self.opt['scale'] HR_size = self.opt['HR_size'] # get HR image if self.opt['phase'] == 'train' and \ random.choice(list(range(self.ratio))) == 0: # read background images bg_index = random.randint(0, len(self.paths_HR_bg) - 1) HR_path = self.paths_HR_bg[bg_index] img_HR = util.read_img(env=data_type, path=HR_path, lmdb_env=self.HR_env_bg) seg = torch.FloatTensor(8, img_HR.shape[0], img_HR.shape[1]).fill_(0) seg[0, :, :] = 1 # background else: HR_path = self.paths_HR[index] img_HR = util.read_img(env=data_type, path=HR_path, lmdb_env=self.HR_env) seg = torch.load( HR_path.replace('/img/', '/bicseg/').replace('.png', '.pth')) # read segmentation files, you should change it to your settings. # modcrop in the validation / test phase if self.opt['phase'] != 'train': img_HR = util.modcrop(img_HR, 8) seg = np.transpose(seg.numpy(), (1, 2, 0)) # get LR image if self.paths_LR: LR_path = self.paths_LR[index] img_LR = util.read_img(env=data_type, path=LR_path, lmdb_env=self.LR_env) else: # down-sampling on-the-fly # randomly scale during training if self.opt['phase'] == 'train': random_scale = random.choice(self.random_scale_list) H_s, W_s, _ = seg.shape def _mod(n, random_scale, scale, thres): rlt = int(n * random_scale) rlt = (rlt // scale) * scale return thres if rlt < thres else rlt H_s = _mod(H_s, random_scale, scale, HR_size) W_s = _mod(W_s, random_scale, scale, HR_size) img_HR = cv2.resize(np.copy(img_HR), (W_s, H_s), interpolation=cv2.INTER_LINEAR) seg = cv2.resize(np.copy(seg), (W_s, H_s), interpolation=cv2.INTER_NEAREST) H, W, _ = img_HR.shape # using matlab imresize img_LR = imresize(img_HR, 1 / scale, antialiasing=True) if img_LR.ndim == 2: img_LR = np.expand_dims(img_LR, axis=2) H, W, C = img_LR.shape if self.opt['phase'] == 'train': LR_size = HR_size // scale # randomly crop rnd_h = random.randint(0, max(0, H - LR_size)) rnd_w = random.randint(0, max(0, W - LR_size)) img_LR = img_LR[rnd_h:rnd_h + LR_size, rnd_w:rnd_w + LR_size, :] rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale) img_HR = img_HR[rnd_h_HR:rnd_h_HR + HR_size, rnd_w_HR:rnd_w_HR + HR_size, :] seg = seg[rnd_h_HR:rnd_h_HR + HR_size, rnd_w_HR:rnd_w_HR + HR_size, :] # augmentation - flip, rotate img_LR, img_HR, seg = util.augment([img_LR, img_HR, seg], self.opt['use_flip'], self.opt['use_rot']) # category if 'building' in HR_path: category = 1 elif 'plant' in HR_path: category = 2 elif 'mountain' in HR_path: category = 3 elif 'water' in HR_path: category = 4 elif 'sky' in HR_path: category = 5 elif 'grass' in HR_path: category = 6 elif 'animal' in HR_path: category = 7 else: category = 0 # background else: category = -1 # during val, useless # BGR to RGB, HWC to CHW, numpy to tensor img_HR = util.np2tensor(img_HR, normalize=self.znorm, add_batch=False) img_LR = util.np2tensor(img_LR, normalize=self.znorm, add_batch=False) seg = util.np2tensor(seg, normalize=self.znorm, add_batch=False) if LR_path is None: LR_path = HR_path return { 'LR': img_LR, 'HR': img_HR, 'seg': seg, 'category': category, 'LR_path': LR_path, 'HR_path': HR_path }
def __getitem__(self, index): HR_path, LR_path = None, None scale = self.opt.get('scale', 4) HR_size = self.opt.get('HR_size', 128) if HR_size: LR_size = HR_size // scale # Default case: tensor will result in the [0,1] range # Alternative: tensor will be z-normalized to the [-1,1] range znorm = self.opt.get('znorm', False) ######## Read the images ######## #TODO: check cases where default of 3 channels will be troublesome image_channels = self.opt.get('image_channels', 3) # Check if LR Path is provided if self.paths_LR: #If LR is provided, check if 'rand_flip_LR_HR' is enabled if self.opt.get('rand_flip_LR_HR', None) and self.opt['phase'] == 'train': LRHRchance = random.uniform(0, 1) flip_chance = self.opt.get('flip_chance', 0.05) #print("Random Flip Enabled") # Normal case, no flipping: else: LRHRchance = 0. flip_chance = 0. #print("No Random Flip") # get HR and LR images # If enabled, random chance that LR and HR images are flipped # Normal case, no flipping # If img_LR (LR_path) doesn't exist, use img_HR (HR_path) if LRHRchance < (1 - flip_chance): HR_path = self.paths_HR[index] LR_path = self.paths_LR[index] if LR_path is None: LR_path = HR_path #print("HR kept") # Flipped case: # If img_HR (LR_path) doesn't exist, use img_HR (LR_path) else: HR_path = self.paths_LR[index] LR_path = self.paths_HR[index] if HR_path is None: HR_path = LR_path #print("HR flipped") # Read the LR and HR images from the provided paths img_LR = util.read_img(self.LR_env, LR_path, out_nc=image_channels) img_HR = util.read_img(self.HR_env, HR_path, out_nc=image_channels) # Even if LR dataset is provided, force to generate aug_downscale % of downscales OTF from HR # The code will later make sure img_LR has the correct size if self.opt.get('aug_downscale', None): #aug_downscale = self.opt['aug_downscale'] if np.random.rand() < self.opt['aug_downscale']: img_LR = img_HR # If LR is not provided, use HR and modify on the fly else: HR_path = self.paths_HR[index] img_HR = util.read_img(self.HR_env, HR_path, out_nc=image_channels) img_LR = img_HR LR_path = HR_path # tmp_vis(img_HR, False) # tmp_vis(img_LR, False) ######## Modify the images ######## # HR modcrop in the validation / test phase if self.opt['phase'] != 'train': img_HR = util.modcrop(img_HR, scale) # change color space if necessary # Note: Changing the LR colorspace here could make it so some colors are introduced when # doing the augmentations later (ie: with Gaussian or Speckle noise), may be good if the # model can learn to remove color noise in grayscale images, otherwise move to before # converting to tensors # self.opt['color'] For both LR and HR as in the the original code, kept for compatibility # self.opt['color_HR'] and self.opt['color_LR'] for independent control if self.opt.get('color', None): # Change both img_HR = util.channel_convert(img_HR.shape[2], self.opt['color'], [img_HR])[0] img_LR = util.channel_convert(img_LR.shape[2], self.opt['color'], [img_LR])[0] if self.opt.get('color_HR', None): # Only change HR img_HR = util.channel_convert(img_HR.shape[2], self.opt['color_HR'], [img_HR])[0] if self.opt.get('color_LR', None): # Only change LR img_LR = util.channel_convert(img_LR.shape[2], self.opt['color_LR'], [img_LR])[0] ######## Augmentations ######## #Augmentations during training if self.opt['phase'] == 'train': # Note: this is NOT recommended, HR should not be exposed to degradation, as it will # degrade the model's results, only added because it exist as an option in downstream forks # HR downscale if self.opt.get('hr_downscale', None): ds_algo = self.opt.get('hr_downscale_types', 777) hr_downscale_amt = self.opt.get('hr_downscale_amt', 2) if isinstance(hr_downscale_amt, list): hr_downscale_amt = random.choice(hr_downscale_amt) # will ignore if 1 or if result is smaller than hr size if hr_downscale_amt > 1 and img_HR.shape[ 0] // hr_downscale_amt >= HR_size and img_HR.shape[ 1] // hr_downscale_amt >= HR_size: #img_HR, hr_scale_interpol_algo = augmentations.scale_img(img_HR, hr_downscale_amt, algo=ds_algo) img_HR, _ = Scale(img=img_HR, scale=hr_downscale_amt, algo=ds_algo) # Downscales LR to match new size of HR if scale does not match after if img_LR is not None and ( img_HR.shape[0] // scale != img_LR.shape[0] or img_HR.shape[1] // scale != img_LR.shape[1]): #img_LR, lr_scale_interpol_algo = augmentations.scale_img(img_LR, hr_downscale_amt, algo=ds_algo) img_LR, _ = Scale(img=img_LR, scale=hr_downscale_amt, algo=ds_algo) # Validate there's an img_LR, if not, use img_HR if img_LR is None: img_LR = img_HR print("Image LR: ", LR_path, ( "was not loaded correctly, using HR pair to downscale on the fly." )) # Check that HR and LR have the same dimensions ratio, else use an option to process #TODO: add options variable shape_change = self.opt.get('shape_change', 'reshape_hr') #"reshape_lr" if img_HR.shape[0] // img_LR.shape[0] != img_HR.shape[ 1] // img_LR.shape[1]: if shape_change == "reshape_lr": img_LR = transforms.Resize( (int(img_HR.shape[0] / scale), int(img_HR.shape[1] / scale)), interpolation="BILINEAR")(np.copy(img_LR)) elif shape_change == "reshape_hr": # print("pre: ", img_HR.shape[0], img_HR.shape[1]) nh = img_HR.shape[0] * (2 * img_LR.shape[1]) // ( img_LR.shape[0] + img_LR.shape[1]) nw = img_HR.shape[1] * (2 * img_LR.shape[0]) // ( img_LR.shape[0] + img_LR.shape[1]) #TODO: temporary change to test contextual loss with unaligned LR-HR pairs, forcing them to have the correct scale #img_HR, _ = augmentations.resize_img(np.copy(img_HR), newdim=(img_LR.shape[1]*scale,img_LR.shape[0]*scale), algo=cv2.INTER_LINEAR) #img_HR = transforms.Resize((img_LR.shape[0]*scale,img_LR.shape[1]*scale), interpolation="BILINEAR")(np.copy(img_HR)) img_HR = transforms.Resize( (nh, nw), interpolation="BILINEAR")(np.copy(img_HR)) # print("post: ", img_HR.shape[0], img_HR.shape[1]) img_LR = transforms.Resize( (int(img_HR.shape[0] / scale), int(img_HR.shape[1] / scale)), interpolation="BILINEAR")(np.copy(img_LR)) #TODO: check, this may not make sense # elif shape_change == "pad_lr": # LR_pad, _ = get_pad(img_LR, HR_size//scale, fill=0, padding_mode='constant') # img_LR = LR_pad(np.copy(img_LR)) else: # generate new LR from HR #TODO: disabled to test cx loss #print("Warning: img_LR dimensions ratio does not match img_HR dimensions ratio for: ", HR_path) img_LR = img_HR # centercrop image if self.opt['center_crop'] == True: if img_HR.shape[0] > img_HR.shape[1]: img_HR = transforms.CenterCrop( (img_HR.shape[1], img_HR.shape[1]))(np.copy(img_HR)) img_LR = transforms.CenterCrop( (img_HR.shape[1], img_HR.shape[1]))(np.copy(img_LR)) else: img_HR = transforms.CenterCrop( (img_HR.shape[0], img_HR.shape[0]))(np.copy(img_HR)) img_LR = transforms.CenterCrop( (img_HR.shape[0], img_HR.shape[0]))(np.copy(img_LR)) # resize lr and hr image to given hr dimension if self.opt['resize_HR_dimension'] == True: img_HR = transforms.Resize( (self.opt['HR_size'], self.opt['HR_size']), interpolation="BILINEAR")(np.copy(img_HR)) img_LR = transforms.Resize( (self.opt['HR_size'], self.opt['HR_size']), interpolation="BILINEAR")(np.copy(img_LR)) # Random Crop (reduce computing cost and adjust images to correct size first) if img_HR.shape[0] > HR_size or img_HR.shape[1] > HR_size: #Here the scale should be in respect to the images, not to the training scale (in case they are being scaled on the fly) scaleor = img_HR.shape[0] // img_LR.shape[0] img_HR, img_LR = augmentations.random_crop_pairs( img_HR, img_LR, HR_size, scaleor) #TODO: test to be removed # img_HR = transforms.Resize((HR_size-100,img_HR.shape[1]), interpolation="BILINEAR")(np.copy(img_HR)) # img_LR = transforms.Resize((LR_size-(100//scale),img_LR.shape[1]), interpolation="BILINEAR")(np.copy(img_LR)) # Or if the HR images are too small, Resize to the HR_size size and fit LR pair to LR_size too #TODO: add options variable dim_change = self.opt.get('dim_change', 'pad') if img_HR.shape[0] < HR_size or img_HR.shape[1] < HR_size: if dim_change == "resize": #TODO: temp disabled to test #print("Warning: Image: ", HR_path, " size does not match HR size: (", HR_size,"). The image size is: ", img_HR.shape) # rescale HR image to the HR_size # img_HR, _ = augmentations.resize_img(np.copy(img_HR), newdim=(HR_size,HR_size), algo=cv2.INTER_LINEAR) img_HR = transforms.Resize( (HR_size, HR_size), interpolation="BILINEAR")(np.copy(img_HR)) # rescale LR image to the LR_size (The original code discarded the img_LR and generated a new one on the fly from img_HR) #img_LR, _ = augmentations.resize_img(np.copy(img_LR), newdim=(LR_size,LR_size), algo=cv2.INTER_LINEAR) img_LR = transforms.Resize( (LR_size, LR_size), interpolation="BILINEAR")(np.copy(img_LR)) elif dim_change == "pad": # if img_LR is img_HR, padding will be wrong, downscaling LR before padding if img_LR.shape[ 0] != img_HR.shape[0] // scale or img_LR.shape[ 1] != img_HR.shape[1] // scale: ds_algo = 777 # default to matlab-like bicubic downscale if self.opt.get( 'lr_downscale', None ): # if manually set and scale algorithms are provided, then: ds_algo = self.opt.get('lr_downscale_types', 777) if self.opt.get('lr_downscale', None) and self.opt.get( 'dataroot_kernels', None ) and 999 in self.opt["lr_downscale_types"]: ds_kernel = self.ds_kernels else: ds_kernel = None img_LR, _ = Scale(img=img_LR, scale=scale, algo=ds_algo, ds_kernel=ds_kernel) HR_pad, fill = get_pad(img_HR, HR_size, fill='random', padding_mode=self.opt.get( 'pad_mode', 'constant')) img_HR = HR_pad(np.copy(img_HR)) LR_pad, _ = get_pad(img_LR, HR_size // scale, fill=fill, padding_mode=self.opt.get( 'pad_mode', 'constant')) img_LR = LR_pad(np.copy(img_LR)) # (Randomly) scale LR (from HR) during training if : # - LR dataset is not provided # - LR dataset is not in the correct scale # - Also to check if LR is not at the correct scale already (if img_LR was changed to img_HR) if img_LR.shape[0] != LR_size or img_LR.shape[1] != LR_size: ds_algo = 777 # default to matlab-like bicubic downscale if self.opt.get( 'lr_downscale', None ): # if manually set and scale algorithms are provided, then: ds_algo = self.opt.get('lr_downscale_types', 777) else: # else, if for some reason img_LR is too large, default to matlab-like bicubic downscale #if not self.opt['aug_downscale']: #only print the warning if not being forced to use HR images instead of LR dataset (which is a known case) print( "LR image is too large, auto generating new LR for: ", LR_path) if self.opt.get('lr_downscale', None) and self.opt.get( 'dataroot_kernels', None) and 999 in self.opt["lr_downscale_types"]: ds_kernel = self.ds_kernels #KernelDownscale(scale, self.kernel_paths, self.num_kernel) else: ds_kernel = None #img_LR, scale_interpol_algo = augmentations.scale_img(img_LR, scale, algo=ds_algo) #print("algo") #print(ds_algo) img_LR, _ = Scale(img=img_LR, scale=scale, algo=ds_algo, ds_kernel=ds_kernel) # resize = get_resize(size=LR_size, scale=scale, ds_algo=ds_algo) # img_LR = resize(np.copy(img_LR)) #""" # Rotations. 'use_flip' = 180 or 270 degrees (mirror), 'use_rot' = 90 degrees, 'HR_rrot' = random rotations +-45 degrees if (self.opt['use_flip'] or self.opt['use_rot']) and self.opt.get( 'hr_rrot', None): if np.random.rand() > 0.5: img_LR, img_HR = util.augment([img_LR, img_HR], self.opt['use_flip'], \ self.opt['use_rot']) else: if np.random.rand( ) > 0.5: # randomize the random rotations, so half the images are the original img_HR, img_LR = augmentations.random_rotate_pairs( img_HR, img_LR, HR_size, scale) elif (self.opt['use_flip'] or self.opt['use_rot']) and not self.opt.get('hr_rrot', None): # augmentation - flip, rotate img_LR, img_HR = util.augment([img_LR, img_HR], self.opt['use_flip'], \ self.opt['use_rot']) elif self.opt.get('hr_rrot', None): if np.random.rand( ) > 0.5: # randomize the random rotations, so half the images are the original img_HR, img_LR = augmentations.random_rotate_pairs( img_HR, img_LR, HR_size, scale) # Final sizes checks # if the resulting HR image size so far is too large or too small, resize HR to the correct size and downscale to generate a new LR on the fly # if the resulting LR so far does not have the correct dimensions, also generate a new HR-LR image pair on the fly if img_HR.shape[0] != HR_size or img_HR.shape[ 1] != HR_size or img_LR.shape[ 0] != LR_size or img_LR.shape[1] != LR_size: #if img_HR.shape[0] != HR_size or img_HR.shape[1] != HR_size: #TODO: temp disabled to test #print("Image: ", HR_path, " size does not match HR size: (", HR_size,"). The image size is: ", img_HR.shape) #if img_LR.shape[0] != LR_size or img_LR.shape[0] != LR_size: #TODO: temp disabled to test #print("Image: ", LR_path, " size does not match LR size: (", HR_size//scale,"). The image size is: ", img_LR.shape) # rescale HR image to the HR_size (should not be needed in LR case, but something went wrong before, just for sanity) #img_HR, _ = augmentations.resize_img(np.copy(img_HR), newdim=(HR_size,HR_size), algo=cv2.INTER_LINEAR) img_HR = transforms.Resize( (HR_size, HR_size), interpolation="BILINEAR")(np.copy(img_HR)) # if manually provided and scale algorithms are provided, then use it, else use matlab imresize to generate LR pair ds_algo = self.opt.get('lr_downscale_types', 777) #img_LR, _ = augmentations.scale_img(img_HR, scale, algo=ds_algo) img_LR, _ = Scale(img_HR, scale, algo=ds_algo) # Below are the On The Fly augmentations # Apply "auto levels" to images #rand_levels = (1 - self.opt.get('rand_auto_levels', 0)) # Randomize for augmentation rand_levels = self.opt.get('rand_auto_levels', 0) #rand_percent = self.opt.get('rand_auto_per', 10) #if self.opt.get('auto_levels', None) and np.random.rand() > rand_levels: if self.opt.get('auto_levels', None) == 'HR' or self.opt.get( 'auto_levels', None) == 'Both': #img_HR = augmentations.simplest_cb(img_HR, znorm=znorm) #TODO: now images are processed in the [0,255] range #img_HR = augmentations.simplest_cb(img_HR) img_HR = transforms.FilterColorBalance( p=rand_levels, percent=10, random_params=True)(img_HR) if self.opt.get('auto_levels', None) == 'LR' or self.opt.get( 'auto_levels', None) == 'Both': #img_LR = augmentations.simplest_cb(img_LR, znorm=znorm) #TODO: now images are processed in the [0,255] range #img_LR = augmentations.simplest_cb(img_LR) img_LR = transforms.FilterColorBalance( p=rand_levels, percent=10, random_params=True)(img_LR) # Apply unsharpening mask to LR images ''' rand_unsharp = (1 - self.opt.get('lr_rand_unsharp', 0)) # Randomize for augmentation if self.opt.get('lr_unsharp_mask', None) and np.random.rand() > rand_unsharp: #img_LR = augmentations.unsharp_mask(img_LR, znorm=znorm) #TODO: now images are processed in the [0,255] range img_LR = augmentations.unsharp_mask(img_LR) ''' if self.opt.get('lr_unsharp_mask', None): lr_rand_unsharp = self.opt.get('lr_rand_unsharp', 0) img_LR = transforms.FilterUnsharp(p=lr_rand_unsharp)(img_LR) # Apply unsharpening mask to HR images ''' rand_unsharp = (1 - self.opt.get('hr_rand_unsharp', 0)) # Randomize for augmentation if self.opt.get('hr_unsharp_mask', None) and np.random.rand() > rand_unsharp: #img_HR = augmentations.unsharp_mask(img_HR, znorm=znorm) #TODO: now images are processed in the [0,255] range img_HR = augmentations.unsharp_mask(img_HR) ''' if self.opt.get('hr_unsharp_mask', None): hr_rand_unsharp = self.opt.get('hr_rand_unsharp', 0) img_HR = transforms.FilterUnsharp(p=hr_rand_unsharp)(img_HR) # Add noise to HR if enabled AND noise types are provided (for noise2noise and similar) if self.opt.get('hr_noise', None): noise_option = get_noise(self.opt.get('hr_noise_types', None)) if noise_option: #img_HR, hr_noise_algo = augmentations.noise_img(img_HR, noise_types=self.opt['hr_noise_types']) img_HR = noise_option(img_HR) #TODO: check if necessary # else: # print("Noise types 'hr_noise_types' not defined. Skipping OTF noise for HR.") # Create color fringes # Caution: Can easily destabilize a model # Only applied to a small % of the images. Around 20% and 50% appears to be stable. if self.opt.get('lr_fringes', None): lr_fringes_chance = self.opt['lr_fringes_chance'] if self.opt[ 'lr_fringes_chance'] else 0.4 if np.random.rand() > (1. - lr_fringes_chance): img_LR = augmentations.translate_chan(img_LR) #""" #v LR blur AND blur types are provided, else will skip if self.opt.get('lr_blur', None): blur_option = get_blur(self.opt.get('lr_blur_types', None)) if blur_option: #img_LR, blur_algo, blur_kernel_size = augmentations.blur_img(img_LR, blur_algos=blur_algos) img_LR = blur_option(img_LR) #TODO: check if necessary # else: # print("Blur types 'lr_blur_types' not defined. Skipping OTF blur.") #""" #""" #v LR primary noise: Add noise to LR if enabled AND noise types are provided, else will skip if self.opt.get('lr_noise', None): noise_option = get_noise(self.opt.get('lr_noise_types', None), self.noise_patches) if noise_option: #img_LR, noise_algo = augmentations.noise_img(img_LR, noise_types=self.opt['lr_noise_types']) img_LR = noise_option(img_LR) #TODO: check if necessary # else: # print("Noise types 'lr_noise_types' not defined. Skipping OTF noise.") #v LR secondary noise: Add additional noise to LR if enabled AND noise types are provided, else will skip if self.opt.get('lr_noise2', None): noise_option = get_noise(self.opt.get('lr_noise_types2', None), self.noise_patches) if noise_option: #img_LR, noise_algo2 = augmentations.noise_img(img_LR, noise_types=self.opt['lr_noise_types2']) img_LR = noise_option(img_LR) #TODO: check if necessary # else: # print("Noise types 'lr_noise_types2' not defined. Skipping OTF secondary noise.") #""" #""" #v LR cutout / LR random erasing (for inpainting/classification tests) if self.opt.get( 'lr_cutout', None) and (self.opt.get('lr_erasing', None) != True): # img_LR = augmentations.cutout(img_LR, img_LR.shape[0] // 2) img_LR = transforms.Cutout(p=1, mask_size=img_LR.shape[0] // 2)(img_LR) elif self.opt.get( 'lr_erasing', None) and (self.opt.get('lr_cutout', None) != True): # img_LR = augmentations.random_erasing(img_LR) img_LR = transforms.RandomErasing(p=1)(img_LR, mode=[3]) elif self.opt.get('lr_cutout', None) and self.opt.get( 'lr_erasing', None): if np.random.rand( ) > 0.5: #only do cutout or erasing, not both at the same time #img_LR = augmentations.cutout(img_LR, img_LR.shape[0] // 2, p=0.5) img_LR = transforms.Cutout(p=1, mask_size=img_LR.shape[0] // 2)(img_LR) else: #img_LR = augmentations.random_erasing(img_LR, p=0.5, modes=[3]) img_LR = transforms.RandomErasing(p=1)(img_LR, mode=[3]) #""" # For testing and validation if self.opt['phase'] != 'train': # Randomly downscale LR if enabled if self.opt['lr_downscale']: # if self.opt['lr_downscale_types']: # img_LR, scale_interpol_algo = augmentations.scale_img(img_LR, scale, algo=self.opt['lr_downscale_types']) # else: # Default to matlab-like bicubic downscale # img_LR, scale_interpol_algo = augmentations.scale_img(img_LR, scale, algo=777) img_LR, _ = Scale(img_LR, scale, algo=self.opt.get('lr_downscale_types', 777)) # Alternative position for changing the colorspace of LR. # if self.opt['color_LR']: # Only change LR # img_LR = util.channel_convert(img_LR.shape[2], self.opt['color'], [img_LR])[0] # Debug #TODO: use the debugging functions to visualize or save images instead # Save img_LR and img_HR images to a directory to visualize what is the result of the on the fly augmentations # DO NOT LEAVE ON DURING REAL TRAINING # self.output_sample_imgs = True if self.opt['phase'] == 'train': if self.output_sample_imgs: import os # LR_dir, im_name = os.path.split(LR_path) HR_dir, im_name = os.path.split(HR_path) #baseHRdir, _ = os.path.split(HR_dir) #debugpath = os.path.join(baseHRdir, os.sep, 'sampleOTFimgs') # debugpath = os.path.join(os.path.split(LR_dir)[0], 'sampleOTFimgs') debugpath = os.path.join('D:/tmp_test', 'sampleOTFimgs') #print(debugpath) if not os.path.exists(debugpath): os.makedirs(debugpath) import uuid hex = uuid.uuid4().hex cv2.imwrite(debugpath + "\\" + im_name + hex + '_LR.png', img_LR) #random name to save cv2.imwrite(debugpath + "\\" + im_name + hex + '_HR.png', img_HR) #random name to save # cv2.imwrite(debugpath+"\\"+im_name+hex+'_HR1.png',img_HRn1) #random name to save ######## Convert images to PyTorch Tensors ######## """ # for debugging if (img_HR.min() < -1): describe_numpy(img_HR, all=True) print(HR_path) if (img_HR.max() > 1): describe_numpy(img_HR, all=True) print(HR_path) if (img_LR.min() < -1): describe_numpy(img_LR, all=True) print(LR_path) if (img_LR.max() > 1): describe_numpy(img_LR, all=True) print(LR_path) #""" # check for grayscale images #TODO: should not be needed anymore if len(img_HR.shape) < 3: img_HR = img_HR[..., np.newaxis] if len(img_LR.shape) < 3: img_LR = img_LR[..., np.newaxis] # tmp_vis(img_HR, False) # tmp_vis(img_LR, False) # edge-connect if self.opt['training_with_canny'] == True: img_HR_gray = cv2.cvtColor(img_HR, cv2.COLOR_BGR2GRAY) img_HR_canny = cv2.Canny(img_HR_gray, 100, 150) img_HR_gray = torch.from_numpy(img_HR_gray).unsqueeze(0) img_HR_canny = torch.from_numpy(img_HR_canny).unsqueeze(0) if self.opt['training_with_canny_SR'] == True: img_LR_gray = cv2.cvtColor(img_LR, cv2.COLOR_BGR2GRAY) img_LR_canny = cv2.Canny(img_LR_gray, 100, 150) img_LR_canny = torch.from_numpy(img_LR_canny).unsqueeze(0) if self.opt['noise_estimation'] == True: ds_kernel = torch.from_numpy(getattr( ds_kernel, 'used_kernel')).unsqueeze(0).float() noise_est = noise_estimate(img_LR, 4) sigma = torch.tensor(noise_est).float().view([1, 1, 1]) img_HR = util.np2tensor(img_HR, normalize=znorm, add_batch=False) #.astype('uint8').clip(0,255) img_LR = util.np2tensor(img_LR, normalize=znorm, add_batch=False) if LR_path is None: LR_path = HR_path if self.opt['training_with_canny'] == True: return { 'LR': img_LR, 'HR': img_HR, 'LR_path': LR_path, 'HR_path': HR_path, 'img_HR_gray': img_HR_gray, 'img_HR_canny': img_HR_canny } elif self.opt['training_with_canny_SR'] == True: return { 'LR': img_LR, 'HR': img_HR, 'LR_path': LR_path, 'HR_path': HR_path, 'img_LR_canny': img_LR_canny } if self.opt['noise_estimation'] == True: return { 'LR': img_LR, 'HR': img_HR, 'LR_path': LR_path, 'HR_path': HR_path, 'ds_kernel': ds_kernel, 'sigma': sigma } else: return { 'LR': img_LR, 'HR': img_HR, 'LR_path': LR_path, 'HR_path': HR_path }
def __getitem__(self, idx): scale = self.opt.get('scale', 4) HR_size = self.opt.get('HR_size', 128) LR_size = HR_size // scale idx_center = (self.num_frames - 1) // 2 ds_kernel = None # Default case: tensor will result in the [0,1] range # Alternative: tensor will be z-normalized to the [-1,1] range znorm = self.opt.get('znorm', False) if self.opt['phase'] == 'train': if self.opt.get('lr_downscale', None) and self.opt.get('dataroot_kernels', None) and 999 in self.opt["lr_downscale_types"]: ds_kernel = self.ds_kernels #KernelDownscale(scale, self.kernel_paths, self.num_kernel) # get a random video directory idx_video = random.randint(0, len(self.video_list)-1) video_dir = self.video_list[idx_video] # print(video_dir) else: # only one video and paths_LR/paths_HR is already the video dir video_dir = "" # list the frames in the directory # hr_dir = self.trainset_dir + '/' + video_dir + '/hr' paths_HR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_HR, video_dir)) # print(paths_HR) if self.opt['phase'] == 'train': # random reverse augmentation random_reverse = self.opt.get('random_reverse', False) # skipping intermediate frames to learn from low FPS videos augmentation # testing random frameskip up to 'max_frameskip' frames max_frameskip = self.opt.get('max_frameskip', 0) if max_frameskip > 0: max_frameskip = min(max_frameskip, len(paths_HR)//(self.num_frames-1)) frameskip = random.randint(1, max_frameskip) else: frameskip = 1 # print("max_frameskip: ", max_frameskip) assert ((self.num_frames-1)*frameskip) <= (len(paths_HR)-1), ( f'num_frame*frameskip must be smaller than the number of frames per video, check {video_dir}') # if number of frames of training video is for example 31, "max index -num_frames" = 31-3=28 idx_frame = random.randint(0, (len(paths_HR)-1)-((self.num_frames-1)*frameskip)) # print('frameskip:', frameskip) else: frameskip = 1 idx_frame = idx ''' List based frames loading ''' if self.paths_LR: paths_LR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_LR, video_dir)) else: paths_LR = paths_HR ds_algo = 777 # default to matlab-like bicubic downscale if self.opt.get('lr_downscale', None): # if manually set and scale algorithms are provided, then: ds_algo = self.opt.get('lr_downscale_types', 777) # get the video directory HR_dir, _ = os.path.split(paths_HR[idx_frame]) LR_dir, _ = os.path.split(paths_HR[idx_frame]) # read HR & LR frames HR_list = [] LR_list = [] resize_type = None LR_bicubic = None HR_center = None # print('len(paths_HR)', len(paths_HR)) for i_frame in range(self.num_frames): # print('frame path:', paths_HR[int(idx_frame)+(frameskip*i_frame)]) HR_img = util.read_img(None, paths_HR[int(idx_frame)+(frameskip*i_frame)], out_nc=self.image_channels) HR_img = util.modcrop(HR_img, scale) if self.opt['phase'] == 'train': ''' If using individual image augmentations, get cropping parameters for reuse ''' if self.otf_noise and i_frame == 0: #only need to calculate once, from the first frame # reuse the cropping parameters for all LR and HR frames hr_crop_params, lr_crop_params = get_crop_params(HR_img, LR_size, scale) if self.opt.get('lr_noise', None): # reuse the same noise type for all the frames noise_option = get_noise(self.opt.get('lr_noise_types', None), self.noise_patches) if self.opt.get('lr_blur', None): # reuse the same blur type for all the frames blur_option = get_blur(self.opt.get('lr_blur_types', None)) if self.paths_LR: # LR images are provided at the correct scale LR_img = util.read_img(None, paths_LR[int(idx_frame)+(frameskip*i_frame)], out_nc=self.image_channels) if LR_img.shape == HR_img.shape: LR_img, resize_type = Scale(img=HR_img, scale=scale, algo=ds_algo, ds_kernel=ds_kernel, resize_type=resize_type) else: # generate LR images on the fly LR_img, resize_type = Scale(img=HR_img, scale=scale, algo=ds_algo, ds_kernel=ds_kernel, resize_type=resize_type) # get the bicubic upscale of the center frame to concatenate for SR if self.y_only and self.srcolors and i_frame == idx_center: LR_bicubic, _ = Scale(img=LR_img, scale=1/scale, algo=777) # bicubic upscale HR_center = HR_img # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: # extract Y channel from frames # normal path, only Y for both HR_img = util.bgr2ycbcr(HR_img, only_y=True) LR_img = util.bgr2ycbcr(LR_img, only_y=True) # crop patches randomly if using otf noise #TODO: make a BasicSR composable random_crop #TODO: note the original crop should go here and crop after loading each image, but could also be much simpler # to crop after concatenating. Check the speed difference. if self.otf_noise and self.opt['phase'] == 'train': HR_img, LR_img = apply_crop_params(HR_img, LR_img, hr_crop_params, lr_crop_params) if self.y_only and self.srcolors and i_frame == idx_center: LR_bicubic, _ = apply_crop_params(LR_bicubic, None, hr_crop_params, None) HR_center, _ = apply_crop_params(HR_center, None, hr_crop_params, None) # expand Y images to add the channel dimension # normal path, only Y for both if self.y_only: HR_img = util.fix_img_channels(HR_img, 1) LR_img = util.fix_img_channels(LR_img, 1) if self.opt['phase'] == 'train': # single frame augmentation (noise, blur, etc). Would only be efficient if patches are cropped in this loop if self.opt.get('lr_blur', None): if blur_option: LR_img = blur_option(LR_img) if self.opt.get('lr_noise', None): if noise_option: LR_img = noise_option(LR_img) # expand LR images to add the channel dimension again if needed (blur removes the grayscale channel) #TODO: add a if condition, can compare to the ndim before the augs, maybe move inside the aug condition # if not fullimgchannels: #TODO: TMP, this should be when using srcolors for HR or when training with 3 channels tests, separatedly if self.y_only: LR_img = util.fix_img_channels(LR_img, 1) # print("HR_img.shape: ", HR_img.shape) # print("LR_img.shape", LR_img.shape) HR_list.append(HR_img) # h, w, c LR_list.append(LR_img) # h, w, c # print(len(HR_list)) # print(len(LR_list)) if self.opt['phase'] == 'train': # random reverse sequence augmentation if random_reverse and random.random() < 0.5: HR_list.reverse() LR_list.reverse() if not self.y_only: t = self.num_frames HR = [np.asarray(GT) for GT in HR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) HR = np.asarray(HR) # numpy, [T,H,W,C] h_HR, w_HR, c = HR_img.shape #HR_center.shape #TODO: check, may be risky HR = HR.transpose(1,2,3,0).reshape(h_HR, w_HR, -1) # numpy, [H',W',CT] LR = [np.asarray(LT) for LT in LR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) LR = np.asarray(LR) # numpy, [T,H,W,C] LR = LR.transpose(1,2,3,0).reshape(h_HR//scale, w_HR//scale, -1) # numpy, [Hl',Wl',CT] else: HR = np.concatenate((HR_list), axis=2) # h, w, t LR = np.concatenate((LR_list), axis=2) # h, w, t if self.opt['phase'] == 'train': ''' # If not using individual image augmentations, this cropping should be faster, only once ''' # crop patches randomly. If not using otf noise, crop all concatenated images if not self.otf_noise: HR, LR, hr_crop_params, _ = random_crop_mod(HR, LR, LR_size, scale) if self.y_only and self.srcolors: LR_bicubic, _, _, _ = random_crop_mod(LR_bicubic, _, LR_size, scale, hr_crop_params) HR_center, _, _, _ = random_crop_mod(HR_center, _, LR_size, scale, hr_crop_params) # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) # data augmentation #TODO: use BasicSR augmentations #TODO: use variables from config LR, HR, LR_bicubic, HR_center = augmentation()([LR, HR, LR_bicubic, HR_center]) # tmp_vis(HR, False) # tmp_vis(LR, False) # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] else: HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] #TODO: TMP to test generating 3 channel images for SR loss # HR = util.np2tensor(HR, normalize=znorm, bgr2rgb=False, add_batch=True) # Tensor, [CT',H',W'] or [T, H, W] # LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=True) # Tensor, [CT',H',W'] or [T, H, W] # if self.srcolors: # HR = HR.view(c,t,HR_size,HR_size) # Tensor, [C,T,H,W] if not self.y_only: HR = HR.view(c,t,HR_size,HR_size) # Tensor, [C,T,H,W] LR = LR.view(c,t,LR_size,LR_size) # Tensor, [C,T,H,W] if self.shape == 'TCHW': HR = HR.transpose(0,1) # Tensor, [T,C,H,W] LR = LR.transpose(0,1) # Tensor, [T,C,H,W] # generate Cr, Cb channels using bicubic interpolation #TODO: check, it might be easier to return the whole image and separate later when needed if self.y_only and self.srcolors: LR_bicubic = util.bgr2ycbcr(LR_bicubic, only_y=False) # HR_center = util.bgr2ycbcr(HR_center, only_y=False) #not needed, can directly use rgb image ## LR_bicubic = util.ycbcr2rgb(LR_bicubic, only_y=False) #test, looks ok ## HR_center = util.ycbcr2rgb(HR_center, only_y=False) #test, looks ok ## _, SR_cb, SR_cr = util.bgr2ycbcr(LR_bicubic, only_y=False, separate=True) LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=False) # HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=False, add_batch=False) # will test using rgb image instead HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=True, add_batch=False) #TODO: TMP to test generating 3 channel images for SR loss # LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=True) # HR_center = util.np2tensor(HR_center, normalize=znorm, bgr2rgb=False, add_batch=True) elif self.y_only and not self.srcolors: LR_bicubic = [] HR_center = [] else: HR_center = HR[:,idx_center,:,:] if self.shape == 'CTHW' else HR[idx_center,:,:,:] LR_bicubic = [] # return toTensor(LR), toTensor(HR) return {'LR': LR, 'HR': HR, 'LR_path': LR_dir, 'HR_path': HR_dir, 'LR_bicubic': LR_bicubic, 'HR_center': HR_center}
def __getitem__(self, idx): scale = self.opt.get('scale', 4) idx_center = (self.num_frames - 1) // 2 h_LR = None w_LR = None # Default case: tensor will result in the [0,1] range # Alternative: tensor will be z-normalized to the [-1,1] range znorm = self.opt.get('znorm', False) # only one video and paths_LR/paths_HR is already the video dir video_dir = "" # list the frames in the directory # hr_dir = self.trainset_dir + '/' + video_dir + '/hr' ''' List based frames loading ''' paths_LR = util.get_image_paths(self.opt['data_type'], os.path.join(self.paths_LR, video_dir)) assert self.num_frames <= len(paths_LR), ( f'num_frame must be smaller than the number of frames per video, check {video_dir}') idx_frame = idx LR_name = paths_LR[idx_frame + 1] # center frame # print(LR_name) # print(len(self.video_list)) # read LR frames # HR_list = [] LR_list = [] resize_type = None LR_bicubic = None for i_frame in range(self.num_frames): if idx_frame == len(self.video_list)-2 and self.num_frames == 3: # print("second to last frame:", i_frame) if i_frame == 0: LR_img = util.read_img(None, paths_LR[int(idx_frame)], out_nc=self.image_channels) else: LR_img = util.read_img(None, paths_LR[int(idx_frame)+1], out_nc=self.image_channels) elif idx_frame == len(self.video_list)-1 and self.num_frames == 3: # print("last frame:", i_frame) LR_img = util.read_img(None, paths_LR[int(idx_frame)], out_nc=self.image_channels) # every other internal frame else: # print("normal frame:", idx_frame) LR_img = util.read_img(None, paths_LR[int(idx_frame)+(i_frame)], out_nc=self.image_channels) #TODO: check if this is necessary LR_img = util.modcrop(LR_img, scale) # get the bicubic upscale of the center frame to concatenate for SR if not self.y_only and self.srcolors and i_frame == idx_center: if self.opt.get('denoise_LRbic', False): LR_bicubic = transforms.RandomAverageBlur(p=1, kernel_size=3)(LR_img) # LR_bicubic = transforms.RandomBoxBlur(p=1, kernel_size=3)(LR_img) else: LR_bicubic = LR_img LR_bicubic, _ = Scale(img=LR_bicubic, scale=1/scale, algo=777) # bicubic upscale # HR_center = HR_img # tmp_vis(LR_bicubic, False) # tmp_vis(HR_center, False) if self.y_only: # extract Y channel from frames # normal path, only Y for both LR_img = util.bgr2ycbcr(LR_img, only_y=True) # expand Y images to add the channel dimension # normal path, only Y for both LR_img = util.fix_img_channels(LR_img, 1) # print("HR_img.shape: ", HR_img.shape) # print("LR_img.shape", LR_img.shape) LR_list.append(LR_img) # h, w, c if not self.y_only and (not h_LR or not w_LR): h_LR, w_LR, c = LR_img.shape if not self.y_only: t = self.num_frames LR = [np.asarray(LT) for LT in LR_list] # list -> numpy # input: list (contatin numpy: [H,W,C]) LR = np.asarray(LR) # numpy, [T,H,W,C] LR = LR.transpose(1,2,3,0).reshape(h_LR, w_LR, -1) # numpy, [Hl',Wl',CT] else: LR = np.concatenate((LR_list), axis=2) # h, w, t if self.y_only: LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=False, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] else: LR = util.np2tensor(LR, normalize=znorm, bgr2rgb=True, add_batch=False) # Tensor, [CT',H',W'] or [T, H, W] LR = LR.view(c,t,h_LR,w_LR) # Tensor, [C,T,H,W] if self.shape == 'TCHW': LR = LR.transpose(0,1) # Tensor, [T,C,H,W] if self.y_only and self.srcolors: # generate Cr, Cb channels using bicubic interpolation LR_bicubic = util.bgr2ycbcr(LR_bicubic, only_y=False) LR_bicubic = util.np2tensor(LR_bicubic, normalize=znorm, bgr2rgb=False, add_batch=False) HR_center = [] else: LR_bicubic = [] HR_center = [] # return toTensor(LR), toTensor(HR) return {'LR': LR, 'LR_path': LR_name, 'LR_bicubic': LR_bicubic, 'HR_center': HR_center}
def __getitem__(self, index): HR_path, LR_path = None, None scale = self.opt['scale'] HR_size = self.opt['HR_size'] # get HR image HR_path = self.paths_HR[index] img_HR = util.read_img(self.HR_env, HR_path) # modcrop in the validation / test phase if self.opt['phase'] != 'train': img_HR = util.modcrop(img_HR, scale) # change color space if necessary if self.opt['color']: img_HR = util.channel_convert(img_HR.shape[2], self.opt['color'], [img_HR])[0] # get LR image if self.paths_LR: LR_path = self.paths_LR[index] img_LR = util.read_img(self.LR_env, LR_path) else: # down-sampling on-the-fly # randomly scale during training if self.opt['phase'] == 'train': random_scale = random.choice(self.random_scale_list) H_s, W_s, _ = img_HR.shape def _mod(n, random_scale, scale, thres): rlt = int(n * random_scale) rlt = (rlt // scale) * scale return thres if rlt < thres else rlt H_s = _mod(H_s, random_scale, scale, HR_size) W_s = _mod(W_s, random_scale, scale, HR_size) img_HR = cv2.resize(np.copy(img_HR), (W_s, H_s), interpolation=cv2.INTER_LINEAR) # force to 3 channels if img_HR.ndim == 2: img_HR = cv2.cvtColor(img_HR, cv2.COLOR_GRAY2BGR) H, W, _ = img_HR.shape # using matlab imresize img_LR = util.imresize_np(img_HR, 1 / scale, True) if img_LR.ndim == 2: img_LR = np.expand_dims(img_LR, axis=2) if self.opt['phase'] == 'train': # if the image size is too small H, W, _ = img_HR.shape if H < HR_size or W < HR_size: img_HR = cv2.resize(np.copy(img_HR), (HR_size, HR_size), interpolation=cv2.INTER_LINEAR) # using matlab imresize img_LR = util.imresize_np(img_HR, 1 / scale, True) if img_LR.ndim == 2: img_LR = np.expand_dims(img_LR, axis=2) H, W, C = img_LR.shape LR_size = HR_size // scale # randomly crop rnd_h = random.randint(0, max(0, H - LR_size)) rnd_w = random.randint(0, max(0, W - LR_size)) img_LR = img_LR[rnd_h:rnd_h + LR_size, rnd_w:rnd_w + LR_size, :] rnd_h_HR, rnd_w_HR = int(rnd_h * scale), int(rnd_w * scale) img_HR = img_HR[rnd_h_HR:rnd_h_HR + HR_size, rnd_w_HR:rnd_w_HR + HR_size, :] # augmentation - flip, rotate img_LR, img_HR = util.augment([img_LR, img_HR], self.opt['use_flip'], \ self.opt['use_rot']) # change color space if necessary if self.opt['color']: img_LR = util.channel_convert( C, self.opt['color'], [img_LR])[0] # TODO during val no definetion # BGR to RGB, HWC to CHW, numpy to tensor if img_HR.shape[2] == 3: img_HR = img_HR[:, :, [2, 1, 0]] img_LR = img_LR[:, :, [2, 1, 0]] elif img_LR.shape[2] == 4: img_HR = img_HR[:, :, [2, 1, 0, 3]] img_LR = img_LR[:, :, [2, 1, 0, 3]] img_HR = torch.from_numpy( np.ascontiguousarray(np.transpose(img_HR, (2, 0, 1)))).float() img_LR = torch.from_numpy( np.ascontiguousarray(np.transpose(img_LR, (2, 0, 1)))).float() if LR_path is None: LR_path = HR_path return { 'LR': img_LR, 'HR': img_HR, 'LR_path': LR_path, 'HR_path': HR_path }