def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] # Load gt and lq images. Dimension order: HWC; channel order: BGR; # image range: [0, 1], float32. gt_path = self.paths[index]['gt_path'] img_bytes = self.file_client.get(gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) lq_path = self.paths[index]['lq_path'] img_bytes = self.file_client.get(lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) # augmentation for training if self.opt['phase'] == 'train': gt_size = self.opt['gt_size'] # random crop img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path) # flip, rotation img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_flip'], self.opt['use_rot']) # TODO: color space transform # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True) # normalize if self.mean is not None or self.std is not None: normalize(img_lq, self.mean, self.std, inplace=True) normalize(img_gt, self.mean, self.std, inplace=True) return {'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) # random reverse if self.random_reverse and random.random() < 0.5: self.neighbor_list.reverse() scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip, seq = key.split('/') # key example: 00001/0001 # get the neighboring LQ and GT frames img_lqs = [] img_gts = [] for neighbor in self.neighbor_list: if self.is_lmdb: img_lq_path = f'{clip}/{seq}/im{neighbor}' img_gt_path = f'{clip}/{seq}/im{neighbor}' else: img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png' img_gt_path = self.gt_root / clip / seq / f'im{neighbor}.png' # LQ img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) # GT img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) img_lqs.append(img_lq) img_gts.append(img_gt) # randomly crop img_gts, img_lqs = paired_random_crop(img_gts, img_lqs, gt_size, scale, img_gt_path) # augmentation - flip, rotate img_lqs.extend(img_gts) img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = img2tensor(img_results) img_lqs = torch.stack(img_results[:7], dim=0) img_gts = torch.stack(img_results[7:], dim=0) if self.flip_sequence: # flip the sequence: 7 frames to 14 frames img_lqs = torch.cat([img_lqs, img_lqs.flip(0)], dim=0) img_gts = torch.cat([img_gts, img_gts.flip(0)], dim=0) # img_lqs: (t, c, h, w) # img_gt: (c, h, w) # key: str return {'lq': img_lqs, 'gt': img_gts, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] # Load gt and lq images. Dimension order: HWC; channel order: BGR; # image range: [0, 1], float32. gt_path = self.paths[index]['gt_path'] img_bytes = self.file_client.get(gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) lq_path = self.paths[index]['lq_path'] img_bytes = self.file_client.get(lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) # augmentation for training if self.opt['phase'] == 'train': gt_size = self.opt['gt_size'] # random crop img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path) # flip, rotation img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_hflip'], self.opt['use_rot']) # color space transform if 'color' in self.opt and self.opt['color'] == 'y': img_gt = rgb2ycbcr(img_gt, y_only=True)[..., None] img_lq = rgb2ycbcr(img_lq, y_only=True)[..., None] # crop the unmatched GT images during validation or testing, especially for SR benchmark datasets # TODO: It is better to update the datasets, rather than force to crop if self.opt['phase'] != 'train': img_gt = img_gt[0:img_lq.shape[0] * scale, 0:img_lq.shape[1] * scale, :] # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True) # normalize if self.mean is not None or self.std is not None: normalize(img_lq, self.mean, self.std, inplace=True) normalize(img_gt, self.mean, self.std, inplace=True) return { 'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path }
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) # random reverse if self.random_reverse and random.random() < 0.5: self.neighbor_list.reverse() scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip, seq = key.split('/') # key example: 00001/0001 # get the GT frame (im4.png) if self.is_lmdb: img_gt_path = f'{key}/im4' else: img_gt_path = self.gt_root / clip / seq / 'im4.png' img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. # get the neighboring LQ frames img_lqs = [] for neighbor in self.neighbor_list: if self.is_lmdb: img_lq_path = f'{clip}/{seq}/im{neighbor}' else: img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. img_lqs.append(img_lq) # randomly crop img_gt, img_lqs = paired_random_crop(img_gt, img_lqs, gt_size, scale, img_gt_path) # augmentation - flip, rotate img_lqs.append(img_gt) img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = totensor(img_results) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] # img_lqs: (t, c, h, w) # img_gt: (c, h, w) # key: str return {'lq': img_lqs, 'gt': img_gt, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient( self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] lq_map_type = self.opt['lq_map_type'] gt_map_type = self.opt['gt_map_type'] # Load gt and lq images. Dimension order: HWC; channel order: RGGB; # HDR image range: [0, +inf], float32. gt_path = self.paths[index]['gt_path'] lq_path = self.paths[index]['lq_path'] img_gt = self.file_client.get(gt_path) img_lq = self.file_client.get(lq_path) # tone mapping img_lq = self._tonemap(img_lq, type=lq_map_type) img_gt = self._tonemap(img_gt, type=gt_map_type) # expand dimension img_gt = self._expand_dim(img_gt) img_lq = self._expand_dim(img_lq) # augmentation if self.opt['phase'] == 'train': gt_size = self.opt['gt_size'] # random crop img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path) # flip, rotation img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_flip'], self.opt['use_rot']) # TODO: color space transform # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq = totensor([img_gt, img_lq], bgr2rgb=False, float32=True) return { 'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path }
def feed_data(self, data): """Accept data from dataloader, and then add two-order degradations to obtain LQ images. """ if self.is_train and self.opt.get('high_order_degradation', True): # training data synthesis self.gt = data['gt'].to(self.device) self.gt_usm = self.usm_sharpener(self.gt) self.kernel1 = data['kernel1'].to(self.device) self.kernel2 = data['kernel2'].to(self.device) self.sinc_kernel = data['sinc_kernel'].to(self.device) ori_h, ori_w = self.gt.size()[2:4] # ----------------------- The first degradation process ----------------------- # # blur out = filter2D(self.gt_usm, self.kernel1) # random resize updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob'])[0] if updown_type == 'up': scale = np.random.uniform(1, self.opt['resize_range'][1]) elif updown_type == 'down': scale = np.random.uniform(self.opt['resize_range'][0], 1) else: scale = 1 mode = random.choice(['area', 'bilinear', 'bicubic']) out = F.interpolate(out, scale_factor=scale, mode=mode) # add noise gray_noise_prob = self.opt['gray_noise_prob'] if np.random.uniform() < self.opt['gaussian_noise_prob']: out = random_add_gaussian_noise_pt( out, sigma_range=self.opt['noise_range'], clip=True, rounds=False, gray_prob=gray_noise_prob) else: out = random_add_poisson_noise_pt( out, scale_range=self.opt['poisson_scale_range'], gray_prob=gray_noise_prob, clip=True, rounds=False) # JPEG compression jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range']) out = torch.clamp(out, 0, 1) # clamp to [0, 1], otherwise JPEGer will result in unpleasant artifacts out = self.jpeger(out, quality=jpeg_p) # ----------------------- The second degradation process ----------------------- # # blur if np.random.uniform() < self.opt['second_blur_prob']: out = filter2D(out, self.kernel2) # random resize updown_type = random.choices(['up', 'down', 'keep'], self.opt['resize_prob2'])[0] if updown_type == 'up': scale = np.random.uniform(1, self.opt['resize_range2'][1]) elif updown_type == 'down': scale = np.random.uniform(self.opt['resize_range2'][0], 1) else: scale = 1 mode = random.choice(['area', 'bilinear', 'bicubic']) out = F.interpolate( out, size=(int(ori_h / self.opt['scale'] * scale), int(ori_w / self.opt['scale'] * scale)), mode=mode) # add noise gray_noise_prob = self.opt['gray_noise_prob2'] if np.random.uniform() < self.opt['gaussian_noise_prob2']: out = random_add_gaussian_noise_pt( out, sigma_range=self.opt['noise_range2'], clip=True, rounds=False, gray_prob=gray_noise_prob) else: out = random_add_poisson_noise_pt( out, scale_range=self.opt['poisson_scale_range2'], gray_prob=gray_noise_prob, clip=True, rounds=False) # JPEG compression + the final sinc filter # We also need to resize images to desired sizes. We group [resize back + sinc filter] together # as one operation. # We consider two orders: # 1. [resize back + sinc filter] + JPEG compression # 2. JPEG compression + [resize back + sinc filter] # Empirically, we find other combinations (sinc + JPEG + Resize) will introduce twisted lines. if np.random.uniform() < 0.5: # resize back + the final sinc filter mode = random.choice(['area', 'bilinear', 'bicubic']) out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode) out = filter2D(out, self.sinc_kernel) # JPEG compression jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2']) out = torch.clamp(out, 0, 1) out = self.jpeger(out, quality=jpeg_p) else: # JPEG compression jpeg_p = out.new_zeros(out.size(0)).uniform_(*self.opt['jpeg_range2']) out = torch.clamp(out, 0, 1) out = self.jpeger(out, quality=jpeg_p) # resize back + the final sinc filter mode = random.choice(['area', 'bilinear', 'bicubic']) out = F.interpolate(out, size=(ori_h // self.opt['scale'], ori_w // self.opt['scale']), mode=mode) out = filter2D(out, self.sinc_kernel) # clamp and round self.lq = torch.clamp((out * 255.0).round(), 0, 255) / 255. # random crop gt_size = self.opt['gt_size'] (self.gt, self.gt_usm), self.lq = paired_random_crop([self.gt, self.gt_usm], self.lq, gt_size, self.opt['scale']) # training pair pool self._dequeue_and_enqueue() # sharpen self.gt again, as we have changed the self.gt with self._dequeue_and_enqueue self.gt_usm = self.usm_sharpener(self.gt) self.lq = self.lq.contiguous() # for the warning: grad and param do not obey the gradient layout contract else: # for paired training or validation self.lq = data['lq'].to(self.device) if 'gt' in data: self.gt = data['gt'].to(self.device) self.gt_usm = self.usm_sharpener(self.gt)
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip_name, frame_name = key.split('/') # key example: 000/00000000 center_frame_idx = int(frame_name) # determine the neighboring frames interval = random.choice(self.interval_list) # ensure not exceeding the borders start_frame_idx = center_frame_idx - self.num_half_frames * interval end_frame_idx = center_frame_idx + self.num_half_frames * interval # each clip has 100 frames starting from 0 to 99 while (start_frame_idx < 0) or (end_frame_idx > 99): center_frame_idx = random.randint(0, 99) start_frame_idx = (center_frame_idx - self.num_half_frames * interval) end_frame_idx = center_frame_idx + self.num_half_frames * interval frame_name = f'{center_frame_idx:08d}' neighbor_list = list( range(center_frame_idx - self.num_half_frames * interval, center_frame_idx + self.num_half_frames * interval + 1, interval)) # random reverse if self.random_reverse and random.random() < 0.5: neighbor_list.reverse() assert len(neighbor_list) == self.num_frame, ( f'Wrong length of neighbor list: {len(neighbor_list)}') # get the GT frame (as the center frame) if self.is_lmdb: img_gt_path = f'{clip_name}/{frame_name}' else: img_gt_path = self.gt_root / clip_name / f'{frame_name}.png' img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) # get the neighboring LQ frames img_lqs = [] for neighbor in neighbor_list: if self.is_lmdb: img_lq_path = f'{clip_name}/{neighbor:08d}' else: img_lq_path = self.lq_root / clip_name / f'{neighbor:08d}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) img_lqs.append(img_lq) # get flows if self.flow_root is not None: img_flows = [] # read previous flows for i in range(self.num_half_frames, 0, -1): if self.is_lmdb: flow_path = f'{clip_name}/{frame_name}_p{i}' else: flow_path = (self.flow_root / clip_name / f'{frame_name}_p{i}.png') img_bytes = self.file_client.get(flow_path, 'flow') cat_flow = imfrombytes(img_bytes, flag='grayscale', float32=False) # uint8, [0, 255] dx, dy = np.split(cat_flow, 2, axis=0) flow = dequantize_flow(dx, dy, max_val=20, denorm=False) # we use max_val 20 here. img_flows.append(flow) # read next flows for i in range(1, self.num_half_frames + 1): if self.is_lmdb: flow_path = f'{clip_name}/{frame_name}_n{i}' else: flow_path = (self.flow_root / clip_name / f'{frame_name}_n{i}.png') img_bytes = self.file_client.get(flow_path, 'flow') cat_flow = imfrombytes(img_bytes, flag='grayscale', float32=False) # uint8, [0, 255] dx, dy = np.split(cat_flow, 2, axis=0) flow = dequantize_flow(dx, dy, max_val=20, denorm=False) # we use max_val 20 here. img_flows.append(flow) # for random crop, here, img_flows and img_lqs have the same # spatial size img_lqs.extend(img_flows) # randomly crop img_gt, img_lqs = paired_random_crop(img_gt, img_lqs, gt_size, scale, img_gt_path) if self.flow_root is not None: img_lqs, img_flows = img_lqs[:self.num_frame], img_lqs[self. num_frame:] # augmentation - flip, rotate img_lqs.append(img_gt) if self.flow_root is not None: img_results, img_flows = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot'], img_flows) else: img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = img2tensor(img_results) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] if self.flow_root is not None: img_flows = img2tensor(img_flows) # add the zero center flow img_flows.insert(self.num_half_frames, torch.zeros_like(img_flows[0])) img_flows = torch.stack(img_flows, dim=0) # img_lqs: (t, c, h, w) # img_flows: (t, 2, h, w) # img_gt: (c, h, w) # key: str if self.flow_root is not None: return {'lq': img_lqs, 'flow': img_flows, 'gt': img_gt, 'key': key} else: return {'lq': img_lqs, 'gt': img_gt, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient( self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] gt_size = self.opt.get('gt_size', None) key = self.keys[index] clip_name, frame_name = key.split('/') # key example: 000/00000000 center_frame_idx = int(frame_name) # determine the frameing frames interval = random.choice(self.interval_list) # ensure not exceeding the borders start_frame_idx = center_frame_idx - self.num_half_frames * interval end_frame_idx = start_frame_idx + (self.num_frame - 1) * interval # each clip has 100 frames starting from 0 to 99 while (start_frame_idx < 0) or (end_frame_idx > 99): center_frame_idx = random.randint( self.num_half_frames * interval, 99 - self.num_half_frames *interval) start_frame_idx = (center_frame_idx - self.num_half_frames * interval) end_frame_idx = start_frame_idx + (self.num_frame - 1) * interval frame_name = f'{center_frame_idx:08d}' frame_list = list( range(start_frame_idx, end_frame_idx + 1, interval)) # random reverse if self.random_reverse and random.random() < 0.5: frame_list.reverse() assert len(frame_list) == self.num_frame, ( f'Wrong length of frame list: {len(frame_list)}') # get the GT frame (as the center frame) img_gts = [] for frame in frame_list: if self.is_lmdb: img_gt_path = f'{clip_name}/{frame:08d}' else: img_gt_path = self.gt_root / clip_name / f'{frame:08d}.png' img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) img_gts.append(img_gt) # get the LQ frames img_lqs = [] for frame in frame_list: if self.is_lmdb: img_lq_path = f'{clip_name}/{frame:08d}' else: img_lq_path = self.lq_root / clip_name / f'{frame:08d}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) img_lqs.append(img_lq) # randomly crop if self.is_train: img_gts, img_lqs = paired_random_crop(img_gts, img_lqs, gt_size, scale, clip_name) # augmentation - flip, rotate img_lqs.extend(img_gts) if self.is_train: img_lqs = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = img2tensor(img_lqs) img_lqs = torch.stack(img_results[:self.num_frame], dim=0) img_gts = torch.stack(img_results[self.num_frame:], dim=0) # img_lqs: (t, c, h, w) # img_gt: (t, c, h, w) # key: str return {'lq': img_lqs, 'gt': img_gts, 'key': key, 'frame_list': frame_list}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) # random reverse if self.random_reverse and random.random() < 0.5: self.neighbor_list.reverse() scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip, seq = key.split('/') # key example: 00001/0001 # get the GT frame (im4.png) if self.is_lmdb: img_gt_path = f'{key}/im4' else: img_gt_path = self.gt_root / clip / seq / 'im4.png' img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. ### get 160 img_160_path = self.lq_root / clip / seq / 'im4_hr.png' img_bytes_160 = self.file_client.get(img_160_path, 'gt') img_160 = mmcv.imfrombytes(img_bytes_160).astype(np.float32) / 255. # get the neighboring LQ frames img_gt_160 = [] img_gt_160.append(img_gt) img_gt_160.append(img_160) # ###visualization # path='/home/wei/exp/EDVR/visualization' # number = 1 # ###visualization img_lqs = [] for neighbor in self.neighbor_list: if self.is_lmdb: img_lq_path = f'{clip}/{seq}/im{neighbor}' else: img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. # img_con_3d = np.vstack((img_lq, img_3d)) # ##visualization # number +=1 # visual_lq = img_con_3d[:,:,:3] # visual_lq = Image.fromarray((visual_lq).astype(np.uint8)).convert("RGB") # visual_lq.save(path+'/'+str(number)+'_lq.png') # visual_3d = img_con_3d[:,:,3:] # visual_3d = Image.fromarray((visual_3d).astype(np.uint8)).convert("RGB") # visual_3d.save(path+'/'+str(number)+'_3d.png') # ##visualization img_lqs.append(img_lq) # randomly crop img_gt, img_lqs = paired_random_crop(img_gt_160, img_lqs, gt_size, scale, img_gt_path) img_160_input = img_gt[1] img_gt = img_gt[0] # augmentation - flip, rotate img_lqs.append(img_160_input) img_lqs.append(img_gt) img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = totensor(img_results) hr_3d = img_results[-2] img_lqs = torch.stack(img_results[0:-2], dim=0) img_gt = img_results[-1] # img_lqs: (t, c, h, w) # img_gt: (c, h, w) # key: str return {'lq': img_lqs, 'gt': img_gt, 'hr_3d': hr_3d, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip_name, frame_name = key.split('/') # key example: 000/00000000 # determine the neighboring frames interval = random.choice(self.interval_list) # ensure not exceeding the borders start_frame_idx = int(frame_name) if start_frame_idx > 100 - self.num_frame * interval: start_frame_idx = random.randint(0, 100 - self.num_frame * interval) end_frame_idx = start_frame_idx + self.num_frame * interval neighbor_list = list(range(start_frame_idx, end_frame_idx, interval)) # random reverse if self.random_reverse and random.random() < 0.5: neighbor_list.reverse() # get the neighboring LQ and GT frames img_lqs = [] img_gts = [] for neighbor in neighbor_list: if self.is_lmdb: img_lq_path = f'{clip_name}/{neighbor:08d}' img_gt_path = f'{clip_name}/{neighbor:08d}' else: img_lq_path = self.lq_root / clip_name / f'{neighbor:08d}.png' img_gt_path = self.gt_root / clip_name / f'{neighbor:08d}.png' # get LQ img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) img_lqs.append(img_lq) # get GT img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) img_gts.append(img_gt) # randomly crop img_gts, img_lqs = paired_random_crop(img_gts, img_lqs, gt_size, scale, img_gt_path) # augmentation - flip, rotate img_lqs.extend(img_gts) img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = img2tensor(img_results) img_gts = torch.stack(img_results[len(img_lqs) // 2:], dim=0) img_lqs = torch.stack(img_results[:len(img_lqs) // 2], dim=0) # img_lqs: (t, c, h, w) # img_gts: (t, c, h, w) # key: str return {'lq': img_lqs, 'gt': img_gts, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) # random reverse if self.random_reverse and random.random() < 0.5: self.neighbor_list.reverse() scale = self.opt['scale'] gt_size = self.opt['gt_size'] key = self.keys[index] clip, seq = key.split('/') # key example: 00001/0001 # get the GT frame (im4.png) if self.is_lmdb: img_gt_path = f'{key}/im4' else: img_gt_path = self.gt_root / clip / seq / 'im4.png' img_bytes = self.file_client.get(img_gt_path, 'gt') img_gt = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. # get the neighboring LQ frames img_lqs = [] for neighbor in self.neighbor_list: if self.is_lmdb: img_lq_path = f'{clip}/{seq}/im{neighbor}' else: img_lq_path = self.lq_root / clip / seq / f'im{neighbor}.png' img_bytes = self.file_client.get(img_lq_path, 'lq') img_lq = mmcv.imfrombytes(img_bytes).astype(np.float32) / 255. img_lqs.append(img_lq) # randomly crop img_gt, img_lqs = paired_random_crop(img_gt, img_lqs, gt_size, scale, img_gt_path) # augmentation - flip, rotate img_lqs.append(img_gt) img_results = augment(img_lqs, self.opt['use_flip'], self.opt['use_rot']) img_results = totensor(img_results) img_lqs = torch.stack(img_results[0:-1], dim=0) img_gt = img_results[-1] # img_lqs: (t, c, h, w) # img_gt: (c, h, w) # key: str ### get 18 # ztm = np.load(path_flow,allow_pickle=True) # result_7 = [] # for test in ztm: # test = np.transpose(test, [2,1,0]) # width = test.shape[1] # height = test.shape[2] # ndarray=np.pad(test,((0,0),(1,1),(1,1)),'constant', constant_values=0) # result=[] # for i in range(0,3): # for j in range(0,3): # result.append(ndarray[:,i:i+448,j:j+448]) # result = np.array(result).reshape(18,448,448) # #result = np.repeat(result,8,axis=0) # result_7.append(np.array(result)) # save_path = path_flow.replace('flow.npy','flow_7.npy') # np.save(save_path,np.array(result_7)) ### get18 #return np.array(result_7) return {'lq': img_lqs, 'gt': img_gt, 'key': key}
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] lq_map_type = self.opt['lq_map_type'] gt_map_type = self.opt['gt_map_type'] crop_scale = self.opt.get('crop_scale', None) # Load gt and lq images. Dimension order: HWC; channel order: RGGB; # HDR image range: [0, +inf], float32. gt_path = self.paths[index]['gt_path'] lq_path = self.paths[index]['lq_path'] psf_path = self.paths[index]['psf_path'] img_gt = self.file_client.get(gt_path) img_lq = self.file_client.get(lq_path) psf_code = self.file_client.get(psf_path) # tone mapping img_lq = self._tonemap(img_lq, type=lq_map_type) img_gt = self._tonemap(img_gt, type=gt_map_type) # expand dimension img_gt = self._expand_dim(img_gt) img_lq = self._expand_dim(img_lq) # Rescale for random crop if crop_scale != None: h, w, _ = img_lq.shape img_lq = cv2.resize(img_lq, (int(w * crop_scale), int(h * crop_scale)), interpolation=cv2.INTER_LINEAR) img_gt = cv2.resize(img_gt, (int(w * crop_scale), int(h * crop_scale)), interpolation=cv2.INTER_LINEAR) # augmentation if self.opt['phase'] == 'train': gt_size = self.opt['gt_size'] # random crop img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path) # flip, rotation img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_flip'], self.opt['use_rot']) # TODO: color space transform # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq = totensor([img_gt, img_lq], bgr2rgb=False, float32=True) psf_code = torch.from_numpy(psf_code)[..., None, None] return { 'lq': img_lq, 'gt': img_gt, 'psf_code': psf_code, 'lq_path': lq_path, 'gt_path': gt_path, 'psf_path': psf_path, }
def __getitem__(self, index): if self.file_client is None: self.file_client = FileClient(self.io_backend_opt.pop('type'), **self.io_backend_opt) scale = self.opt['scale'] # Load gt and lq images. Dimension order: HWC; channel order: BGR; # image range: [0, 1], float32. gt_path = self.paths[index]['gt_path'] img_bytes = self.file_client.get(gt_path, 'gt') img_gt = imfrombytes(img_bytes, float32=True) lq_path = self.paths[index]['lq_path'] img_bytes = self.file_client.get(lq_path, 'lq') img_lq = imfrombytes(img_bytes, float32=True) # augmentation for training if self.opt['phase'] == 'train': gt_size = self.opt['gt_size'] # random crop img_gt, img_lq = paired_random_crop(img_gt, img_lq, gt_size, scale, gt_path) # flip, rotation img_gt, img_lq = augment([img_gt, img_lq], self.opt['use_flip'], self.opt['use_rot']) # to create pyramid for img_gt img_re1 = cv2.resize(cv2.resize(img_gt, (gt_size // 2, gt_size // 2), interpolation=cv2.INTER_LINEAR), (gt_size, gt_size), interpolation=cv2.INTER_LINEAR) img_re2 = cv2.resize(cv2.resize(img_gt, (gt_size // 4, gt_size // 4), interpolation=cv2.INTER_LINEAR), (gt_size, gt_size), interpolation=cv2.INTER_LINEAR) img_re3 = cv2.resize(cv2.resize(img_gt, (gt_size // 8, gt_size // 8), interpolation=cv2.INTER_LINEAR), (gt_size, gt_size), interpolation=cv2.INTER_LINEAR) img_re4 = cv2.resize(cv2.resize(img_gt, (gt_size // 16, gt_size // 16), interpolation=cv2.INTER_LINEAR), (gt_size, gt_size), interpolation=cv2.INTER_LINEAR) # TODO: color space transform # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq, img_re1, img_re2, img_re3, img_re4 = img2tensor( [img_gt, img_lq, img_re1, img_re2, img_re3, img_re4], bgr2rgb=True, float32=True) # normalize if self.mean is not None or self.std is not None: normalize(img_lq, self.mean, self.std, inplace=True, align_corners=True) normalize(img_gt, self.mean, self.std, inplace=True, align_corners=True) normalize(img_re1, self.mean, self.std, inplace=True, align_corners=True) normalize(img_re2, self.mean, self.std, inplace=True, align_corners=True) normalize(img_re3, self.mean, self.std, inplace=True, align_corners=True) normalize(img_re4, self.mean, self.std, inplace=True, align_corners=True) return { 'lq': img_lq, 'gt': torch.cat((img_gt, img_re1, img_re2, img_re3, img_re4), 0), 'lq_path': lq_path, 'gt_path': gt_path } elif self.opt['phase'] == 'val': h, w, c = img_lq.shape if h % 16 != 0 or w % 16 != 0: h = h // 16 * 16 w = w // 16 * 16 img_lq = cv2.resize(img_lq, (h, w), interpolation=cv2.INTER_LINEAR) img_gt = cv2.resize(img_gt, (2 * h, 2 * w), interpolation=cv2.INTER_LINEAR) # TODO: color space transform # BGR to RGB, HWC to CHW, numpy to tensor img_gt, img_lq = img2tensor([img_gt, img_lq], bgr2rgb=True, float32=True) # normalize if self.mean is not None or self.std is not None: normalize(img_lq, self.mean, self.std, inplace=True, align_corners=True) normalize(img_gt, self.mean, self.std, inplace=True, align_corners=True) return { 'lq': img_lq, 'gt': img_gt, 'lq_path': lq_path, 'gt_path': gt_path }