def __init__(self, image_size, mean_std): self.transform1 = T.Compose([ T.RandomResizedCrop(image_size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation=Image.BICUBIC), T.RandomHorizontalFlip(p=0.5), T.RandomApply([T.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), T.RandomGrayscale(p=0.2), T.GaussianBlur(kernel_size=image_size // 20 * 2 + 1, sigma=(0.1, 2.0)), # simclr paper gives the kernel size. Kernel size has to be odd positive number with torchvision T.ToTensor(), T.Normalize(*mean_std) ]) self.transform2 = T.Compose([ T.RandomResizedCrop(image_size, scale=(0.08, 1.0), ratio=(3.0 / 4.0, 4.0 / 3.0), interpolation=Image.BICUBIC), T.RandomHorizontalFlip(p=0.5), T.RandomApply([T.ColorJitter(0.4, 0.4, 0.2, 0.1)], p=0.8), T.RandomGrayscale(p=0.2), # transforms.RandomApply([GaussianBlur(kernel_size=int(0.1 * image_size))], p=0.1), T.RandomApply([ T.GaussianBlur(kernel_size=image_size // 20 * 2 + 1, sigma=(0.1, 2.0)) ], p=0.1), T.RandomApply([Solarization()], p=0.2), T.ToTensor(), T.Normalize(*mean_std) ])
def _build_model(self, blur, brightness, need_ray, need_speed): assert self.obs_shapes[0] == (84, 84, 3) assert self.obs_shapes[1] == (2,) # ray assert self.obs_shapes[2] == (8,) # vector self.need_ray = need_ray self.need_speed = need_speed self.conv = m.ConvLayers(84, 84, 3, 'simple', out_dense_n=64, out_dense_depth=2) self.dense = m.LinearLayers(self.conv.output_size, dense_n=64, dense_depth=1) self.rnn = m.GRU(64 + self.c_action_size, 64, 1) if blur != 0: self.blurrer = m.Transform(T.GaussianBlur(blur, sigma=blur)) else: self.blurrer = None self.brightness = m.Transform(T.ColorJitter(brightness=(brightness, brightness))) cropper = torch.nn.Sequential( T.RandomCrop(size=(50, 50)), T.Resize(size=(84, 84)) ) self.random_transformers = T.RandomChoice([ m.Transform(SaltAndPepperNoise(0.2, 0.5)), m.Transform(GaussianNoise()), m.Transform(T.GaussianBlur(9, sigma=9)), m.Transform(cropper) ])
def __init__(self, capacity=10000, first_trans=None, second_trans=None): self.images = [] self.capacity = capacity self.first_trans = first_trans self.second_trans = second_trans if self.first_trans is None: self.first_trans = transforms.Compose([ transforms.RandomResizedCrop(320), transforms.RandomApply( [transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.GaussianBlur(33), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if self.second_trans is None: self.second_trans = transforms.Compose([ transforms.RandomResizedCrop(320), transforms.RandomApply( [transforms.ColorJitter(0.8, 0.8, 0.8, 0.2)], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.GaussianBlur(33), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ])
def _build_model(self, blur, brightness, ray_random, need_speed): assert self.obs_shapes[0] == (84, 84, 3) assert self.obs_shapes[1] == (1442, ) # ray (1 + 360 + 360) * 2 assert self.obs_shapes[2] == (6, ) # vector self.ray_random = ray_random self.need_speed = need_speed if blur != 0: self.blurrer = m.Transform(T.GaussianBlur(blur, sigma=blur)) else: self.blurrer = None self.brightness = m.Transform( T.ColorJitter(brightness=(brightness, brightness))) self.ray_index = [] for i in reversed(range(RAY_SIZE // 2)): self.ray_index.append((i * 2 + 1) * 2) self.ray_index.append((i * 2 + 1) * 2 + 1) for i in range(RAY_SIZE // 2): self.ray_index.append((i * 2 + 2) * 2) self.ray_index.append((i * 2 + 2) * 2 + 1) self.conv = m.ConvLayers(84, 84, 3, 'simple', out_dense_n=64, out_dense_depth=2) self.ray_conv = m.Conv1dLayers(RAY_SIZE, 2, 'default', out_dense_n=64, out_dense_depth=2) self.vis_ray_dense = m.LinearLayers(self.conv.output_size + self.ray_conv.output_size, dense_n=64, dense_depth=1) self.rnn = m.GRU(64 + self.c_action_size, 64, 1) cropper = torch.nn.Sequential(T.RandomCrop(size=(50, 50)), T.Resize(size=(84, 84))) self.random_transformers = T.RandomChoice([ m.Transform(SaltAndPepperNoise(0.2, 0.5)), m.Transform(GaussianNoise()), m.Transform(T.GaussianBlur(9, sigma=9)), m.Transform(cropper) ])
def augment(size, dir): assert dir != None, 'Augment requires directory argument' labels = [label for label in os.listdir(dir)] train_augment = transforms.Compose([ transforms.ColorJitter(0.5, 0.5, 0.5), transforms.RandomAffine(degrees=10), transforms.GaussianBlur(5), transforms.RandomPerspective(distortion_scale=0.2) ]) for label in labels: subdir = os.path.join(dir, label) output = os.path.join(subdir, 'augmented') if os.path.isdir(output): shutil.rmtree(output) images = glob(os.path.join(subdir, '*')) num_images = len(images) if num_images >= size: print(f'\'{subdir}\' already has enough images ({num_images})') continue os.makedirs(output) for i in tqdm(range(size - num_images)): img_name = random.choice(images) image = Image.open(img_name) transformed = train_augment(image) transformed = transformed.resize((224, 224)) transformed.save( os.path.join(output, f'{i}_{img_name.split(os.sep)[-1]}'))
def __init__(self, input_size): self.training = transforms.Compose([ transforms.Resize([input_size, input_size]), transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1), transforms.RandomAffine( degrees=15, translate=None, scale=None, shear=15, resample=0, fillcolor=0, ), transforms.GaussianBlur(kernel_size=3, sigma=(0.001, 2.0)), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) self.validation = transforms.Compose([ transforms.Resize([input_size, input_size]), transforms.ToTensor(), transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ])
def get_transform_1(train): transforms_list = [] transforms_list.append(transforms.RandomApply(torch.nn.ModuleList([transforms.GaussianBlur(3)]), p=0.5)) transforms_list.append(transforms.RandomApply(torch.nn.ModuleList([transforms.ColorJitter(brightness = 0.1)]), p=0.5)) transforms_list.append(transforms.RandomApply(torch.nn.ModuleList([transforms.ColorJitter(contrast = 0.5)]), p=0.5)) return transforms.Compose(transforms_list)
def __init__(self, train=False, seed=1, resize=510, centerCrop=448): self.transformations = transforms.Compose([ transforms.Resize(resize), transforms.CenterCrop(centerCrop), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.random_transformations = transforms.Compose([ transforms.RandomRotation(90, expand=False, fill=None), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomVerticalFlip(p=0.5), transforms.GaussianBlur(11, sigma=(0.1, 2.0)), torch.nn.Dropout2d(0.1), transforms.RandomErasing(p=0.25) ]) self.train = train # pandas dataframe that stores image names and labels self.df = pd.read_csv('./data/train.csv').sample(frac=1, random_state=seed) test_section = int(0.8 * len(self.df)) if train: self.df = self.df.iloc[:test_section] self.df_transformed = self.df.copy() self.df = pd.concat([self.df, self.df_transformed]) else: self.df = self.df.iloc[test_section:]
def __getitem__(self, idx): (path, label) = self.data[idx] temp_img = Image.open(path) picture_h_w = self.image_size if self.data_augumentation: result = transforms.Compose([ transforms.CenterCrop((self.crop_size, self.crop_size)), # transforms.RandomResizedCrop(self.crop_size, scale=(0.1, 1), ratio=(0.5, 2)), transforms.Resize((picture_h_w, picture_h_w)), transforms.RandomHorizontalFlip(), transforms.ColorJitter(brightness=0.5, hue=0.5, contrast=0.5), transforms.RandomRotation(20), transforms.GaussianBlur((5, 5), sigma=(0.1, 2.0)), transforms.ToTensor(), transforms.Normalize([0.7906623], [0.16963087]) #[0.7906623] [0.16963087] ])(temp_img) else: result = transforms.Compose([ transforms.CenterCrop((self.crop_size, self.crop_size)), transforms.Resize((picture_h_w, picture_h_w)), transforms.ToTensor(), transforms.Normalize([0.7906623], [0.16963087]) #[0.7906623] [0.16963087] ])(temp_img) return {'result': result, 'label': torch.LongTensor([label])}
def __init__(self, df, image_dir, targets=['Type1'], is_train=True): #for t in targets: # assert t in ['Type1', 'Type2'] self.df = df self.image_dir = image_dir self.targets = targets self.transform = T.Compose([ T.RandomApply( [ #transforms.RandomCrop(32, padding=4), #T.RandomHorizontalFlip(p=0.5) #T.RandomRotation(degrees, interpolation=<InterpolationMode.NEAREST: 'nearest'>, expand=False, center=None, fill=0, resample=None) # T.RandomResizedCrop(size, scale=(0.08, 1.0), ratio=(0.75, 1.3333333333333333), interpolation=<InterpolationMode.BILINEAR: 'bilinear'>) T.ColorJitter( brightness=0.1, contrast=0.1, saturation=0.1, hue=0), T.GaussianBlur(11, sigma=(0.1, 2.0)) #T.ToTensor(), ], p=0.9), T.RandomHorizontalFlip(), #T.RandomErasing(p=0.5, scale=(0.02, 0.07), ratio=(0.3, 3.3), value=255, inplace=False), T.RandomRotation(20, expand=False, center=None, fill=255) #T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) self.is_train = is_train
def __init__(self, args): if args.encoder == "clip": normalize = transforms.Normalize( mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) else: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) randcrop = transforms.RandomResizedCrop(224) randflip = transforms.RandomHorizontalFlip() jitter = transforms.ColorJitter(0.4, 0.4, 0.2, 0.1) randjitter = transforms.RandomApply([jitter], p=0.5) blur = transforms.GaussianBlur(kernel_size=23, sigma=[1, 5]) randblur = transforms.RandomApply([blur], p=0.4) noise = GaussianNoise(std=[0.1, 0.5], fixed=False) randnoise = transforms.RandomApply([noise], p=0.4) mask = RandomMask(percent_missing=[0.25, 0.50], fixed=False) randmask = transforms.RandomApply([mask], p=0.1) self.transform_common = transforms.Compose([randcrop, randflip]) self.transform_clean = transforms.Compose( [transforms.ToTensor(), normalize]) self.transform_distort = transforms.Compose([ randjitter, transforms.ToTensor(), randblur, randnoise, randmask, normalize ])
def build_data_aug(size, mode, resnet=False, resizepad=False): if resnet: norm_tfm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) else: norm_tfm = transforms.Normalize(0.5, 0.5) if resizepad: resize_tfm = ResizePad(imgH=size[0], imgW=size[1]) else: resize_tfm = transforms.Resize(size, interpolation=InterpolationMode.BICUBIC) if mode == 'train': return transforms.Compose([ WeightedRandomChoice([ # transforms.RandomHorizontalFlip(p=1), transforms.RandomRotation(degrees=(-10, 10), expand=True, fill=255), transforms.GaussianBlur(3), Dilation(3), Erosion(3), transforms.Resize((size[0] // 3, size[1] // 3), interpolation=InterpolationMode.NEAREST), Underline(), KeepOriginal(), ]), resize_tfm, transforms.ToTensor(), norm_tfm ]) else: return transforms.Compose( [resize_tfm, transforms.ToTensor(), norm_tfm])
def __init__( self, input_height: int = 224, resize_height: Optional[Union[int, str]] = 'default', gaussian_blur: bool = True, jitter_strength: float = 1., normalize: Optional[Callable[[Tensor], Tensor]] = None) -> None: self.jitter_strength = jitter_strength self.input_height = input_height self.gaussian_blur = gaussian_blur self.normalize = normalize if resize_height is None: self.resize_height = self.input_height elif resize_height == 'default': self.resize_height = int(self.input_height + 0.1 * self.input_height) else: assert isinstance(resize_height, int) self.resize_height = resize_height self.color_jitter = transforms.ColorJitter(0.8 * self.jitter_strength, 0.8 * self.jitter_strength, 0.8 * self.jitter_strength, 0.2 * self.jitter_strength) data_transforms = [ transforms.RandomResizedCrop(size=self.input_height), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomApply([self.color_jitter], p=0.8), transforms.RandomGrayscale(p=0.2) ] if self.gaussian_blur: kernel_size = int(0.1 * self.input_height) if kernel_size % 2 == 0: kernel_size += 1 data_transforms.append( transforms.RandomApply( [transforms.GaussianBlur(kernel_size=kernel_size)], p=0.5)) data_transforms = transforms.Compose(data_transforms) self.online_eval_transform = transforms.Compose([ transforms.Resize(self.resize_height), transforms.CenterCrop(self.input_height), ]) if normalize is None: self.final_transform = transforms.ToTensor() else: self.final_transform = transforms.Compose( [transforms.ToTensor(), normalize]) self.simclr_transform = transforms.Compose( [data_transforms, self.final_transform]) self.online_eval_transform = transforms.Compose( [self.online_eval_transform, self.final_transform])
def __init__(self, args, fixed_distortion=None, epoch=None): if args.encoder == "clip": normalize = transforms.Normalize( mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]) else: normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) if fixed_distortion is not None: distortion = fixed_distortion elif args.distortion == "squaremask": distortion = SquareMask(length=args.length, offset=args.offset, fixed=args.fixed_mask) elif args.distortion == "randommask": distortion = RandomMask(percent_missing=convnoise( args.percent_missing, epoch), fixed=args.fixed_mask) elif args.distortion == "gaussiannoise": distortion = GaussianNoise(std=convnoise(args.std, epoch), fixed=args.fixed_mask) elif args.distortion == "gaussianblur": distortion = transforms.GaussianBlur(kernel_size=args.kernel_size, sigma=args.sigma) self.transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), distortion, normalize ]) self.distortion = distortion
def create_cell_dataset( folders, scale=0.125, crop=(128, 128), n_streams=12, batch_size=16, cell_prominence_min=0.4, cell_prominence_max=float('inf'), ): def validate_crop(tensor): mean = tensor[1:].sum(dim=0).mean() if cell_prominence_min < mean < cell_prominence_max: return tensor[:1] return None images = Dataset.ImageIterableDataset( folders, transforms.Compose(( transforms.GaussianBlur(3, (.01, 1.)), data_aug.flip_scale_pipeline(scale, ), #data_aug.pipeline(scale, degrees=0, noise_p=0.01), )), Dataset.CropGenerator(crop, validate_crop=validate_crop), n_streams=n_streams, # large memory impact indices=[*range(0, 204), *range(306, 2526)], ) return DataLoader(images, batch_size=batch_size, drop_last=True)
def load_dataset(args, mode): if mode == 'val': data_transforms = transforms.Compose([ transforms.Resize((512, 512)), transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) else: data_transforms = transforms.Compose([ transforms.ColorJitter(0.5, 0.5, 0.5, 0.5), transforms.RandomGrayscale(p=0.05), transforms.RandomApply([transforms.GaussianBlur(kernel_size=3)], p=0.5), transforms.RandomRotation((-90, 90)), transforms.RandomHorizontalFlip(), transforms.Resize((512, 512)), transforms.ToTensor(), # transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]) ]) dataset = datasets.ImageFolder(os.path.join(args.data_dir, mode), data_transforms) dataloader = DataLoader(dataset, batch_size=args.train_batch_size, shuffle=True, num_workers=16) data_set_size = len(dataset) return dataloader, data_set_size
def __init__(self, batch_size): super().__init__() self.batch_size = batch_size self.test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # random crop, color jitter etc self.train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([transforms.GaussianBlur([1, 1])], p=0.5), # perhaps this blur is too much transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.491, 0.482, 0.446], std=[0.247, 0.243, 0.261]), ]) self.train_dataset = MSCOCO(train=True, image_transforms=self.train_transform) self.test_dataset = MSCOCO(train=False, image_transforms=self.test_transform)
def get_reduced_transform(resize, size, blur, means, stds, adaptive_thresholding): """Reduced transforms applied to original inputs Arguments: resize {int} -- resize before cropping size {int} -- expected size blur {float} -- sigma of the gaussian blur means {list} -- pixel-wise means stds {list} -- pixel-wise stds """ tfs = [] tfs.append(transforms.Resize(size=resize)) tfs.append(transforms.RandomCrop(size)) # gaussian blur if blur is not None: tfs.append(transforms.GaussianBlur(5, blur)) if adaptive_thresholding: tfs.append(AdaptiveGaussianThreshold()) tfs.append(transforms.ToTensor()) if means is not None and stds is not None: tfs.append(transforms.Normalize(means, stds)) return transforms.Compose(tfs)
def create_dataloaders(self): self.data_val = self.dataset_val examples_to_discard = len(self.data_val) - self.size_experiment self.data_val_without_blur,subdataset_original_val_to_discard=\ random_split( self.data_val, ( self.size_experiment,examples_to_discard) ) self.data_val_with_blur = deepcopy(self.data_val_without_blur) new_transform = transforms.Compose([ transforms.Resize((32, 32), Image.BILINEAR), transforms.GaussianBlur((13), sigma=(10, 20)), transforms.ToTensor(), transforms.Normalize(0.5, 0.5) ]) self.data_val_with_blur.dataset.dataset.transform = new_transform self.data_val_without_blur = DataLoader( dataset=self.data_val_without_blur, batch_size=self.config.batch_size, num_workers=self.config.NUM_WORKERS, pin_memory=True, shuffle=False, ) self.data_val_with_blur = DataLoader( dataset=self.data_val_with_blur, batch_size=self.config.batch_size, num_workers=self.config.NUM_WORKERS, pin_memory=True, shuffle=False, )
def noise_pipeline(p, sigma): return random_apply( [ transforms.GaussianBlur(3, sigma=sigma), GaussianNoise(0.0, (0.02, 0.08)) ], p=p, )
def all_transforms(): return transforms.RandomApply( [ # transforms.RandomHorizontalFlip(p=1.0), transforms.GaussianBlur(kernel_size=5, sigma=10.0), # CustomRescalingTransform([-5, 5], [0.85, 0.95], [0.85, 0.95]) ], p=1.0)
def get_transforms(baseSize=(256, 256), targetSize=(224, 224), advance=False): if not advance: train_transformations = transforms.Compose([ transforms.Resize(baseSize), # transforms.RandomResizedCrop(targetSize), transforms.RandomCrop(targetSize), # transforms.RandomChoice([ # transforms.Resize(targetSize), # transforms.RandomResizedCrop(targetSize), # transforms.RandomCrop(targetSize), # transforms.CenterCrop(targetSize) # ]), transforms.RandomApply([ transforms.ColorJitter(0.05, 0.05, 0.05, 0.05), transforms.GaussianBlur(5), ]), transforms.RandomHorizontalFlip(), # transforms.RandomRotation(10), transforms.RandomAffine(10), transforms.RandomGrayscale(), # Imgaug(), transforms.ToTensor(), # 转成0.~1. # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # -1.~1. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), transforms.RandomErasing() ]) else: train_transformations = transforms.Compose([ transforms.Resize(baseSize), # transforms.RandomResizedCrop(targetSize), transforms.RandomCrop(targetSize), # transforms.RandomChoice([ # transforms.Resize(targetSize), # transforms.RandomResizedCrop(targetSize), # transforms.RandomCrop(targetSize), # transforms.CenterCrop(targetSize) # ]), Imgaug(), transforms.ToTensor(), # 转成0.~1. # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # -1.~1. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) test_transformations = transforms.Compose([ transforms.Resize(baseSize), transforms.CenterCrop(targetSize), # 0.936 # transforms.Resize(targetSize), # 0.941 transforms.ToTensor(), # 转成0.~1. # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # -1.~1. transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) return train_transformations, test_transformations
def __call__(self, data): label, input = data['label'], data['input'] t = transforms.GaussianBlur(5, sigma=(0.1, 2.0)) if random.random() < 0.5: input = t(input) data = {'label': label, 'input': input} return data
def __call__(self, image, bboxes): if type(image) == torch.Tensor: image = T.ToPILImage()(image) assert type(image) in ( JpegImageFile, Image), 'R.H.FLIP: Problems with type of the image: {}'.format( type(image)) image = T.GaussianBlur(kernel_size=3, sigma=(0.1, 2.0))( image) # TODO: what about contrast and hue? return image, bboxes
def __init__(self, batch_size, val_split=0.2): super().__init__() self.batch_size = batch_size self.val_split = val_split self.test_transform = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) # random crop, color jitter etc self.train_transform = transforms.Compose([ transforms.Resize(256), transforms.RandomResizedCrop(224, scale=(0.2, 1.)), transforms.RandomApply( [ transforms.ColorJitter(0.4, 0.4, 0.4, 0.1) # not strengthened ], p=0.8), transforms.RandomGrayscale(p=0.2), transforms.RandomApply([transforms.GaussianBlur([1, 1])], p=0.5), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.491, 0.482, 0.446], std=[0.247, 0.243, 0.261]), ]) # train and test transforms needed self.questions_file = "/data5/shashank2000/final_json/OpenEnded_mscoco_train2014_questions.json" self.answers_file = "/data5/shashank2000/final_json/mscoco_train2014_annotations.json" self.coco_loc = "/mnt/fs0/datasets/mscoco/train2014" # Enter path for questions_file, answers_file and directory for COCO images that correspond to these #self.questions_file = "../../datasets/questions.json" #self.answers_file = "../../datasets/answers.json" # copy and then unarchive instead? #self.coco_loc = "../../datasets/train2014" # do something about self.dims? self.train_dataset = JeopardyDataset(self.questions_file, self.answers_file, self.coco_loc, self.train_transform, train=True) self.test_dataset = JeopardyDataset( self.questions_file, self.answers_file, self.coco_loc, self.test_transform, word2idx=self.train_dataset.word2idx, train=False) self.vl = self.get_vocab_length()
def __init__(self, args, mode='train', downstream=False): if mode == 'train': data_list = self.train_list else: data_list = self.test_list self.targets = [] self.data = [] self.args = args self.downstream = downstream for file_name, checksum in data_list: file_path = os.path.join(args.base_dir, file_name) with open(file_path, 'rb') as f: entry = pickle.load(f, encoding='latin1') self.data.append(entry['data']) if 'labels' in entry: self.targets.extend(entry['labels']) else: self.targets.extend(entry['fine_labels']) self.data = np.vstack(self.data).reshape(-1, 3, 32, 32) self.data = self.data.transpose((0, 2, 3, 1)) # convert to HWC self.transform1 = transforms.Compose([ transforms.RandomResizedCrop(self.args.img_size, scale=(0.2, 1.0)), transforms.RandomHorizontalFlip(), transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(0.2), transforms.GaussianBlur(kernel_size=int(self.args.img_size * 0.1), sigma=(0.1, 2.0)), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) self.transform2 = transforms.Compose([ transforms.RandomResizedCrop(self.args.img_size, scale=(0.2, 1.0)), transforms.RandomHorizontalFlip(), transforms.RandomApply([transforms.ColorJitter(0.4, 0.4, 0.4, 0.1)], p=0.8), transforms.RandomGrayscale(0.2), transforms.GaussianBlur(kernel_size=int(self.args.img_size * 0.1), sigma=(0.1, 2.0)), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ])
def _make_default_aug(image_size): # default SimCLR augmentation return torch.nn.Sequential( RandomApply(T.ColorJitter(0.8, 0.8, 0.8, 0.2), p=0.3), T.RandomGrayscale(p=0.2), T.RandomHorizontalFlip(), RandomApply(T.GaussianBlur((3, 3), (1.0, 2.0)), p=0.2), T.RandomResizedCrop((image_size, image_size)), T.Normalize(mean=torch.tensor([0.485, 0.456, 0.406]), std=torch.tensor([0.229, 0.224, 0.225])), )
def train(img_size=224, target_frame=56): return transforms.Compose([ ReduceFrame(target_frame=target_frame), transforms.RandomChoice([ transforms.GaussianBlur(kernel_size=3), transforms.ColorJitter(brightness=0.5, contrast=0.5, saturation=0.5, hue=0.5) ]), transforms.RandomHorizontalFlip(p=0.5), transforms.RandomResizedCrop(img_size, scale=(0.66, 1.0), ratio=(3. / 4., 4. / 3.)), transforms.Normalize(mean=[0.471, 0.448, 0.408], std=[0.234, 0.239, 0.242]), Expandframe(target_frame=target_frame), ])
def __init__(self, config: dict, from_timm=True, from_th_vision=False): super(Model, self).__init__() try: self.save_hyperparameters(config) except: pass self.train_transforms = nn.Sequential( transforms.Resize(size=(Config.resize, Config.resize)), transforms.RandomHorizontalFlip(p=.7), transforms.RandomVerticalFlip(p=.3), transforms.RandomRotation(degrees=25), transforms.CenterCrop(size=(Config.img_h, Config.img_w)), transforms.ColorJitter(brightness=(0.4, 1), contrast=.2, saturation=0, hue=0), transforms.GaussianBlur(kernel_size=3)) self.validation_transforms = nn.Sequential( transforms.Resize(size=(Config.resize, Config.resize)), transforms.RandomRotation(degrees=25), transforms.CenterCrop(size=(Config.img_h, Config.img_w)), transforms.ColorJitter(brightness=(0.45, 1), contrast=.1, saturation=.1, hue=0.1), transforms.GaussianBlur(kernel_size=3)) # get backbone if from_timm: self.encoder = create_model(model_name=self.hparams.base_model, pretrained=True) else: self.encoder = getattr(torchvision.models, self.hparams.base_model)(pretrained=True) # create classification layer self.classifier = th.nn.Linear(in_features=1000, out_features=5) self.dropout = th.nn.Dropout(p=.35)
def __init__(self, n_detectors, embed_size, pretrained_alex, net, div_transform): super(CNN_layers, self).__init__() self.n_detectors = n_detectors # conv nets self.conv = nn.ModuleList(retrieve_convnets(self.n_detectors, embed_size, pretrained_alex, net=net)) self.transforms = [to_hsv(), kornia.filters.Sobel(), remove_channel(0), transforms.Grayscale(num_output_channels=3), transforms.GaussianBlur(5, sigma=(0.1, 2.0)), transforms.ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.4), identity()] self.div_transform = div_transform