def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # set zeros in depth as NaN depth_np[depth_np == 0] = np.nan # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( float(image_size) / iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip), ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_np = normalize(rgb_np) # from [0,1] to [-1,1] depth_np = transform(depth_np) depth_np[np.isnan(depth_np)] = 0 depth_np = depth_np / 10.0 return rgb_np, depth_np
def train_transform(self, rgb: np.ndarray, depth_raw: np.ndarray, depth_fix: np.ndarray) -> TNpData: s = np.random.uniform(1.0, 1.5) # random scaling depth_raw = depth_raw / s depth_fix = depth_fix / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize( 250.0 / self.iheight ), # this is for computational efficiency, since rotation is very slow transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((self.oheight, self.owidth)), transforms.HorizontalFlip(do_flip) ]) rgb = transform(rgb) # random color jittering rgb = color_jitter(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 depth_raw = transform(depth_raw) depth_fix = transform(depth_fix) return rgb, depth_raw, depth_fix
def train(args): train_transforms = transforms.Compose([ transforms.Resize(args.image_shape), transforms.RandomHorizontalFlip(), transforms.Normalize() ]) eval_transforms = transforms.Compose( [transforms.Resize(args.image_shape), transforms.Normalize()]) train_dataset = Dataset( data_dir=args.data_dir, file_list=args.train_list, transforms=train_transforms, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=True) eval_dataset = None if args.val_list is not None: eval_dataset = Dataset( data_dir=args.data_dir, file_list=args.val_list, transforms=eval_transforms, num_workers='auto', buffer_size=100, parallel_method='thread', shuffle=False) if args.model_type == 'HumanSegMobile': model = HumanSegMobile(num_classes=2) elif args.model_type == 'HumanSegLite': model = HumanSegLite(num_classes=2) elif args.model_type == 'HumanSegServer': model = HumanSegServer(num_classes=2) else: raise ValueError( "--model_type: {} is set wrong, it shold be one of ('HumanSegMobile', " "'HumanSegLite', 'HumanSegServer')".format(args.model_type)) model.train( num_epochs=args.num_epochs, train_dataset=train_dataset, train_batch_size=args.batch_size, eval_dataset=eval_dataset, save_interval_epochs=args.save_interval_epochs, save_dir=args.save_dir, pretrained_weights=args.pretrained_weights, resume_weights=args.resume_weights, learning_rate=args.learning_rate, use_vdl=args.use_vdl)
def build_model(self): """ DataLoader """ train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.Resize((self.img_size + 30, self.img_size + 30)), transforms.RandomCrop(self.img_size), transforms.ToTensor(), transforms.Normalize(mean=0.5, std=0.5) ]) test_transform = transforms.Compose([ transforms.Resize((self.img_size, self.img_size)), transforms.ToTensor(), transforms.Normalize(mean=0.5, std=0.5) ]) self.trainA_loader = paddle.batch( a_reader(shuffle=True, transforms=train_transform), self.batch_size)() self.trainB_loader = paddle.batch( b_reader(shuffle=True, transforms=train_transform), self.batch_size)() self.testA_loader = a_test_reader(transforms=test_transform) self.testB_loader = b_test_reader(transforms=test_transform) """ Define Generator, Discriminator """ self.genA2B = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.genB2A = ResnetGenerator(input_nc=3, output_nc=3, ngf=self.ch, n_blocks=self.n_res, img_size=self.img_size, light=self.light) self.disGA = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disGB = Discriminator(input_nc=3, ndf=self.ch, n_layers=7) self.disLA = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) self.disLB = Discriminator(input_nc=3, ndf=self.ch, n_layers=5) """ Define Loss """ self.L1_loss = L1Loss() self.MSE_loss = MSELoss() self.BCE_loss = BCEWithLogitsLoss() """ Trainer """ self.G_optim = self.optimizer_setting(self.genA2B.parameters() + self.genB2A.parameters()) self.D_optim = self.optimizer_setting(self.disGA.parameters() + self.disGB.parameters() + self.disLA.parameters() + self.disLB.parameters()) """ Define Rho clipper to constraint the value of rho in AdaILN and ILN""" self.Rho_clipper = RhoClipper(0, 1)
def build_dataset(path_to_imgs, path_to_json_train, path_to_json_test): data_transforms = { 'train': transforms.Compose([ transforms.Resize(256), transforms.CustomCrop(), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]), 'val': transforms.Compose([ transforms.Resize(224), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) } TID_train = TIDLoader(path_to_imgs, path_to_json_train, data_transforms['train']) TID_test = TIDLoader(path_to_imgs, path_to_json_test, data_transforms['val']) """ Since ground truth distributions were in train-test split (no val data), train data is split in two parts.Only 76 % of original train data is used for training, remaining is added to original test data. THis test data is then split into validation and test data. In short, the total number of images available in TID2013 (3k) are split in a ratio of 70-20-10 (train-val-test). """ train_len = len(TID_train) idx = list(range(train_len)) random.shuffle(idx) split_idx = idx[:int(0.76 * train_len)] train_split = Subset(TID_train, split_idx) split_idx = idx[int(0.76 * train_len):] train_val_split = Subset(TID_train, split_idx) val_split = ConcatDataset([train_val_split, TID_test]) val_len = len(val_split) val_idx = list(range(val_len)) random.shuffle(val_idx) val_split_idx = val_idx[:int(0.75 * val_len)] final_val_split = Subset(val_split, val_split_idx) test_split_idx = val_idx[int(0.75 * val_len):] test_split = Subset(val_split, test_split_idx) return train_split, final_val_split, test_split
def __init__(self, img_size: int, original_size: int, mean: float = 0, std: float = 1, brightness: float = 0.3, contrast: float = 0.5, saturation: float = 0.5, hue: float = 0.3, rotation_degree: int = 10, hflip: float = 0.5, debug: bool = False): self.original_size = original_size self.target_size = img_size self.to_pil = transforms.ToPILImage() self.color_jitter = transforms.ColorJitter(brightness=brightness, contrast=contrast, saturation=saturation, hue=hue) self.resize = transforms.Resize(img_size) self.to_tensor = transforms.ToTensor() self.normalize = transforms.Normalize(mean, std) self.r_horizontal_flip = RandomHorizontalFlip(p=hflip) self.r_rotation = RandomRotation(rotation_degree) self.debug = debug
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Crop(130, 10, 240, 1200), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 # Scipy affine_transform produced RuntimeError when the depth map was # given as a 'numpy.ndarray' depth_np = np.asfarray(depth_np, dtype='float32') depth_np = transform(depth_np) return rgb_np, depth_np
def get_transform(train): transforms = [T.Resize((800, 800)), T.ToTensor()] #transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) #import pdb; pdb.set_trace() return T.Compose(transforms)
def get(args): """ Entry point. Call this function to get all Charades dataloaders """ normalize = arraytransforms.Normalize(mean=[0.502], std=[1.0]) train_file = args.train_file val_file = args.val_file train_dataset = Charadesflow(args.data, 'train', train_file, args.cache, transform=transforms.Compose([ arraytransforms.RandomResizedCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ])) val_transforms = transforms.Compose([ arraytransforms.Resize(256), arraytransforms.CenterCrop(224), arraytransforms.ToTensor(), normalize, transforms.Lambda(lambda x: torch.cat(x)), ]) val_dataset = Charadesflow(args.data, 'val', val_file, args.cache, transform=val_transforms) valvideo_dataset = Charadesflow(args.data, 'val_video', val_file, args.cache, transform=val_transforms) return train_dataset, val_dataset, valvideo_dataset
def create_transforms(self): transforms_list = [] if self.mode == 'pretrain_tnet': transforms_list.extend([ transforms.RandomCrop(400), transforms.RandomRotation(180), transforms.RandomHorizontalFlip() ]) if self.mode == 'pretrain_mnet': transforms_list.extend([ transforms.RandomCrop(320), ]) if self.mode == 'end_to_end': transforms_list.extend([ transforms.RandomCrop(800), ]) transforms_list.extend([ transforms.Resize((self.patch_size, self.patch_size)), transforms.ToTensor() ]) self.transforms = transforms.Compose(transforms_list)
def get_data(data_dir, source, target, height, width, batch_size, re=0, workers=8): # def get_data(height, width): dataset = IU_X_RAY('./dataset/') normalizer = T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_transformer = T.Compose([ T.Resize((height, width), interpolation=3), T.ToTensor(), normalizer, ]) train_sampler = MRG_sampler(dataset.source_train, root=dataset.train_images_dir, transform=train_transformer) val_sampler = MRG_sampler(dataset.source_valid, root=dataset.valid_images_dir, transform=train_transformer) test_sampler = MRG_sampler(dataset.source_test, root=dataset.test_images_dir, transform=train_transformer) mesh_term_list = list( set(train_sampler.mesh_term_list) & set(test_sampler.mesh_term_list)) train_sampler.mesh_term_list = mesh_term_list test_sampler.mesh_term_list = mesh_term_list print('mesh_term:' + str(len(mesh_term_list))) train_dataloader = DataLoader(train_sampler, batch_size=batch_size, num_workers=workers, shuffle=True, pin_memory=True, drop_last=True) val_dataloader = DataLoader(val_sampler, batch_size=batch_size, num_workers=workers, shuffle=True, pin_memory=True, drop_last=True) test_dataloader = DataLoader(test_sampler, batch_size=batch_size, num_workers=workers, shuffle=True, pin_memory=True, drop_last=True) return dataset, mesh_term_list, train_dataloader, val_dataloader, test_dataloader
def get_transform(train, resolution): transforms = [] # if square resolution, perform some aspect cropping # otherwise, resize to the resolution as specified if resolution[0] == resolution[1]: base_size = resolution[0] + 32 #520 crop_size = resolution[0] #480 min_size = int((0.5 if train else 1.0) * base_size) max_size = int((2.0 if train else 1.0) * base_size) transforms.append(T.RandomResize(min_size, max_size)) # during training mode, perform some data randomization if train: transforms.append(T.RandomHorizontalFlip(0.5)) transforms.append(T.RandomCrop(crop_size)) else: transforms.append(T.Resize(resolution)) if train: transforms.append(T.RandomHorizontalFlip(0.5)) transforms.append(T.ToTensor()) transforms.append(T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])) return T.Compose(transforms)
def get_transform(train): transforms = [] transforms.append(T.Resize((imsize, imsize))) transforms.append(T.ToTensor()) if train: transforms.append(T.RandomHorizontalFlip(0.5)) return T.Compose(transforms)
def _setup_dataloaders(root_dir, return_dataset=False): """ Setup dataloaders. """ preprocessing = [ aug.NormalizeBboxes(cfg.grid_size), aug.Bboxes2Matrices(cfg.grid_size, cfg.num_classes), aug.Resize(cfg.target_size), aug.Normalize(cfg.mean, cfg.std, 1. / 255), aug.ToTensor() ] transforms_train = preprocessing transforms_val = preprocessing ds_train = VOCDataset(root_dir, image_set="train") dl_train = get_dataloader(ds_train, transforms_train, cfg.batch_size, num_workers=4) ds_val = VOCDataset(root_dir, image_set="val") dl_val = get_dataloader(ds_val, transforms_val, cfg.batch_size) if return_dataset: return dl_train, dl_val, ds_train, ds_val return dl_train, dl_val
def get_iterator(mode): normalize = transforms.Normalize(mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) kwargs = {'num_workers': 4, 'pin_memory': True} transform_augment = transforms.Compose([ # transforms.RandomResizedCrop(args.size, scale=(0.8, 1.2)), # random scale 0.8-1 of original image area, crop to args.size transforms.RandomResizedCrop(size), transforms.RandomRotation(15), # random rotation -15 to +15 degrees transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) transform = transforms.Compose([transforms.Resize((size, size)), transforms.ToTensor(), normalize, ]) if mode: dataset = Dataset.MURA(split="train", transform=(transform_augment if augment else transform), type=type) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True, **kwargs) else: dataset = Dataset.MURA(split="test", transform=transform, type=type) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=False, **kwargs) return loader
def get_data_loaders(train_batch_size, val_batch_size): normalize = transforms.Normalize(mean=torch.Tensor([0.5]), std=torch.Tensor([0.2])) train_transform = transforms.Compose([ transforms.RandomResizedCrop(256), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.MultiplicativeGaussianNoise(1, 0.01), normalize ]) val_transform = transforms.Compose( [transforms.Resize((512, 512)), transforms.ToTensor(), normalize]) train_loader = DataLoader(DWTDataset('dataset', split='train', transform=train_transform), batch_size=train_batch_size, shuffle=True) val_loader = DataLoader(DWTDataset('dataset', split='valid', transform=val_transform), batch_size=val_batch_size, shuffle=False) return train_loader, val_loader
def test(valdir, bs, sz, rect_val=False): if rect_val: idx_ar_sorted = sort_ar(valdir) idx_sorted, _ = zip(*idx_ar_sorted) idx2ar = map_idx2ar(idx_ar_sorted, bs) ar_tfms = [transforms.Resize(int(sz * 1.14)), CropArTfm(idx2ar, sz)] val_dataset = ValDataset(valdir, transform=ar_tfms) return PaddleDataLoader(val_dataset, concurrent=1, indices=idx_sorted, shuffle=False).reader() val_tfms = [transforms.Resize(int(sz * 1.14)), transforms.CenterCrop(sz)] val_dataset = datasets.ImageFolder(valdir, transforms.Compose(val_tfms)) return PaddleDataLoader(val_dataset).reader()
def __init__(self, masked_paths: list, unmasked_paths: list, tgt_size: tuple): self.masked_paths = masked_paths self.unmasked_paths = unmasked_paths self.transform = T.Compose([ T.ToPILImage(), T.ToTensor(), T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), T.Resize(tgt_size) ])
def get(args): """ Entry point. Call this function to get all Charades dataloaders """ normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_file = args.train_file val_file = args.val_file train_dataset = Charades( args.data, 'train', train_file, args.cache, transform=transforms.Compose([ transforms.RandomResizedCrop(args.inputsize), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), # missing PCA lighting jitter normalize, ])) val_dataset = Charades(args.data, 'val', val_file, args.cache, transform=transforms.Compose([ transforms.Resize( int(256. / 224 * args.inputsize)), transforms.CenterCrop(args.inputsize), transforms.ToTensor(), normalize, ])) valvideo_dataset = Charades(args.data, 'val_video', val_file, args.cache, transform=transforms.Compose([ transforms.Resize( int(256. / 224 * args.inputsize)), transforms.CenterCrop(args.inputsize), transforms.ToTensor(), normalize, ])) return train_dataset, val_dataset, valvideo_dataset
def _to_torch(image: np.array) -> _t.Tuple[_torch.Tensor, tuple]: shape = image.shape transform = _torchvision.transforms.Compose( [_transforms.Resize(320), _transforms.ToTensor()] ) image = transform(image) image.unsqueeze_(0) image = image.type(_torch.FloatTensor) image = Variable(image.cuda()) return image, shape
def get_transform(train): mean = [0.3297] std = [0.2566] transform = [] transform.append(T.Resize(IMAGE_SIZE)) if train: transform.append(T.RandomHorizontalFlip(0.5)) transform.append(T.ToTensor()) return T.Compose(transform)
def val_transform(self, rgb: np.ndarray, depth_raw: np.ndarray, depth_fix: np.ndarray) -> TNpData: # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(240.0 / self.iheight), transforms.CenterCrop((self.oheight, self.owidth)), ]) rgb = transform(rgb) rgb = np.asfarray(rgb, dtype='float') / 255 depth_raw = transform(depth_raw) depth_fix = transform(depth_fix) return rgb, depth_raw, depth_fix
def get_imgs(img_path, imsize, transform=None, normalize_img=None): if cfg.IMAGE_CHANNEL == 3: img = Image.open(img_path).convert('RGB') elif cfg.IMAGE_CHANNEL == 1: img = Image.open(img_path).convert('L') if transform is not None: img = transform(img) ret = [] for i in range(cfg.TREE.BRANCH_NUM): re_img = transforms.Resize(imsize[i])(img) ret.append(normalize_img(re_img)) return ret
def load_data_transformers(resize_reso=512, crop_reso=448, swap_num=[7, 7]): center_resize = 600 Normalize = transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) data_transforms = { 'swap': transforms.Compose([ transforms.Randomswap((swap_num[0], swap_num[1])), ]), 'common_aug': transforms.Compose([ transforms.Resize((resize_reso, resize_reso)), transforms.RandomRotation(degrees=15), transforms.RandomCrop((crop_reso, crop_reso)), transforms.RandomHorizontalFlip(), ]), 'train_totensor': transforms.Compose([ transforms.Resize((crop_reso, crop_reso)), # ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'val_totensor': transforms.Compose([ transforms.Resize((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'test_totensor': transforms.Compose([ transforms.Resize((resize_reso, resize_reso)), transforms.CenterCrop((crop_reso, crop_reso)), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]), 'None': None, } return data_transforms
def val_transform(rgb, depth): depth_np = depth # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(240.0 / iheight), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) return rgb_np, depth_np
def get_transform(train): mean = [0.3297] std = [0.2566] transform = [] transform.append(T.Resize(IMAGE_SIZE)) if train: # during training, randomly flip the training images # and ground-truth for data augmentation transform.append(T.RandomHorizontalFlip(0.5)) transform.append(T.ToTensor()) # transform.append(T.Normalize(mean, std)) # Faster-RCNN Uses their own normalize function, so this deep fries the img if we did it twice return T.Compose(transform)
def val_transform(rgb, depth): # perform 1st part of data augmentation transform = transforms.Compose([ transforms.Resize(float(image_size) / iheight), transforms.CenterCrop((oheight, owidth)), ]) rgb_np = transform(rgb) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 rgb_np = normalize(rgb_np) # from [0,1] to [-1,1] depth_np = transform(depth) depth_np = depth_np / 10.0 return rgb_np, depth_np
def train_transform(rgb, depth): s = np.random.uniform(1.0, 1.5) # random scaling # print("scale factor s={}".format(s)) depth_np = depth / s angle = np.random.uniform(-5.0, 5.0) # random rotation degrees do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip # perform 1st part of data augmentation transform = transforms.Compose([ #transforms.Resize(530 / iheight), # this is for computational efficiency, since rotation is very slow transforms.Resize(250 / iheight), transforms.Rotate(angle), transforms.Resize(s), transforms.CenterCrop((oheight, owidth)), transforms.HorizontalFlip(do_flip) ]) rgb_np = transform(rgb) # 自己添加 # rgb_np = Transform.resize(rgb_np, [512, 512]) rgb_np = cv2.resize(rgb_np, (512, 512), interpolation=cv2.INTER_NEAREST) ########### # random color jittering rgb_np = color_jitter(rgb_np) rgb_np = np.asfarray(rgb_np, dtype='float') / 255 depth_np = transform(depth_np) #自己添加 depth_np = cv2.resize(depth_np, (512, 512), interpolation=cv2.INTER_NEAREST) #depth_np=Transform.resize(depth_np,[512,512]) ########### #data=rgb_np*255 #data=Image.fromarray(data.astype(np.uint8)) #data.show() return rgb_np, depth_np
def get_imgs_test(img_path, imsize, transform=None, normalize_img=None): if cfg.IMAGE_CHANNEL == 3: img = Image.open(img_path).convert('RGB') elif cfg.IMAGE_CHANNEL == 1: img = Image.open(img_path).convert('L') width, height = img.size if transform is not None: img = transform(img) ret = [] re_img = transforms.Resize(imsize[-1])(img) ret.append(normalize_img(re_img)) return ret