def get_train_dataset(self, iteration): if self.train_dataset is None: if self.use_auto_augment: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), aa.ImageNetPolicy(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) else: transform = transforms.Compose(transforms=[ RandomResizedCrop(224), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225], inplace=True), ], ) self.train_dataset = HDF5Dataset( hdf5_file=os.path.expanduser( "~/nta/data/imagenet/imagenet.hdf5"), root="train", classes=IMAGENET_CLASS_SUBSETS[100], transform=transform) return self.train_dataset
def get_transforms(augment): valid_t = Compose([Resize(256), CenterCrop(224), ToTensor(), Normalize(**_ImageNet['Normalize'])]) if augment == False: train_t = valid_t elif augment == True: train_t = Compose([RandomResizedCrop(224), RandomHorizontalFlip(), ToTensor(), ColorJitter(), Lighting(_ImageNet['PCA']), Normalize(**_ImageNet['Normalize'])]) elif augment == "torchvision": train_t = Compose([RandomResizedCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize(**_ImageNet['Normalize'])]) elif augment == "torchvision2": train_t = Compose([Resize(256), RandomCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize(**_ImageNet['Normalize'])]) else: assert(False) transforms = { 'training': train_t, 'validation': valid_t } return transforms
def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.), interpolation=Image.BILINEAR): RandomResizedCrop.__init__(self, size, scale=scale, ratio=ratio, interpolation=interpolation) DynamicSize.__init__(self, self.size)
def __init__(self, data, info=None): assert data['observations'].dtype == np.uint8 self.size = data['observations'].shape[0] self.traj_length = data['observations'].shape[1] self.data = data self.info = info self.jitter = ColorJitter((0.5, 1.5), (0.9, 1.1), (0.9, 1.1), (-0.1, 0.1)) self.crop = RandomResizedCrop((48, 48), (0.9, 0.9), (1, 1)) # RandomResizedCrop((int(sqrt(self.imlength)), int(sqrt(self.imlength))), (0.9, 0.9), (1, 1)) if 'env' not in self.data: self.data['env'] = self.data['observations'][:, 0, :]
def test_single_transform(self): from torchvision.transforms import Compose, RandomCrop, RandomRotation, ColorJitter, ToTensor, RandomResizedCrop transforms = SequentialWrapper(com_transform=Compose([ RandomRotation(45), RandomCrop(224), RandomResizedCrop(size=192, scale=(0.8, 1.2)) ]), image_transform=Compose([ ColorJitter(brightness=[0.8, 1.2], contrast=[0.8, 1.2], saturation=1), ToTensor() ]), target_transform=ToLabel()) dataset = ACDCDataset( root_dir=self._root, mode="train", transforms=transforms, ) (image, target), filename = dataset[4] from deepclustering3.viewer import multi_slice_viewer_debug import matplotlib.pyplot as plt multi_slice_viewer_debug(image, target, no_contour=True) plt.show()
def build_transforms(cfg, split='train'): is_train = split == 'train' input_cfg = cfg['input'] width = input_cfg['width'] height = input_cfg['height'] transforms = [] for transform in input_cfg['transforms']: if transform == 'random_resized_crop': scale = (0.5, 1.2) if is_train else (1.0, 1.0) ratio = (0.75, 1.3) if is_train else (1.0, 1.0) transforms.append( RandomResizedCrop( (width, height), scale=scale, ratio=ratio, ) ) elif transform == 'random_rotate': transforms.append(RandomRotate()) else: raise NotImplementedError transforms += [ ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ] return Compose(transforms)
def get_transform( target_size=(288, 288), transform_list='random_crop, horizontal_flip', # random_crop | keep_aspect augment_ratio=0.5, is_train=True, ): transform = list() transform_list = transform_list.split(', ') augments = list() for transform_name in transform_list: if transform_name == 'random_crop': scale = (0.5, 1.0) if is_train else (0.8, 1.0) transform.append(RandomResizedCrop(target_size, scale=(0.8, 1.0))) elif transform_name == 'keep_aspect': transform.append(KeepAsepctResize(target_size)) elif transform_name == 'horizontal_flip': augments.append(RandomHorizontalFlip()) elif transform_name == 'vertical_flip': augments.append(RandomVerticalFlip()) elif transform_name == 'random_rotate': augments.append(RandomRotate()) elif transform_name == 'color_jitter': brightness = 0.1 if is_train else 0.05 contrast = 0.1 if is_train else 0.05 augments.append( ColorJitter( brightness=brightness, contrast=contrast, saturation=0, hue=0, )) transform.append(RandomApply(augments, p=augment_ratio)) return Compose(transform)
def CropResize(options): return Compose([ RandomResizedCrop(size=options.image_size), RandomHorizontalFlip(0.5), ToTensor(), Normalize(options.image_mean, options.image_std), ])
def get_dataloader( setting: DataSetSetting, root_dir: str, batch_size: int) -> Tuple[DataLoader, DataLoader, Dataset, Dataset]: train_transforms = Compose([ Resize(setting.size), RandomResizedCrop(size=setting.size, scale=(0.3, 0.3)), ToTensor() ]) test_transforms = Compose([Resize(setting.size), ToTensor()]) train_dataset, test_dataset = None, None # TODO データセットはここを追加 if DATASET_STL == setting.dataset_type: train_dataset = torchvision.datasets.STL10(root=root_dir, download=True, split="train", transform=train_transforms) test_dataset = torchvision.datasets.STL10(root=root_dir, download=True, split="test", transform=test_transforms) elif DATASET_CIFAR == setting.dataset_type: train_dataset = torchvision.datasets.CIFAR10( root=root_dir, download=True, train=True, transform=train_transforms) test_dataset = torchvision.datasets.CIFAR10(root=root_dir, download=True, train=False, transform=test_transforms) assert train_dataset is not None and test_dataset is not None, f"Not supported setting: {setting.dataset_type}" return DataLoader(train_dataset, batch_size=batch_size, shuffle=True), \ DataLoader(test_dataset, batch_size=batch_size, shuffle=True), train_dataset, test_dataset
def imgreader_test(file_path, dimension=256, transform='val', grey_scale=False): with open(file_path, 'rb') as f: img_ = Image.open(f) if grey_scale: img = img_.convert('L') else: img = img_.convert('RGB') transformer = { 'train': Compose([ RandomResizedCrop(size=(dimension, dimension)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]), 'val': Compose([ Resize(size=(dimension, dimension)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) } x = transformer[transform](img) return x
def get_data_loaders(train_files, val_files, img_size=224): train_transform = Compose([ ColorJitter(0.3, 0.3, 0.3, 0.3), RandomResizedCrop(img_size, scale=(0.8, 1.2)), RandomAffine(10.), RandomRotation(13.), RandomHorizontalFlip(), ToTensor(), ]) #train_mask_transform = Compose([ # RandomResizedCrop(img_size, scale=(0.8, 1.2)), # RandomAffine(10.), # RandomRotation(13.), # RandomHorizontalFlip(), # ToTensor(), #]) val_transform = Compose([ Resize((img_size, img_size)), ToTensor(), ]) train_loader = DataLoader(MaskDataset(train_files, train_transform), batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=4) val_loader = DataLoader(MaskDataset(val_files, val_transform), batch_size=BATCH_SIZE, shuffle=False, pin_memory=True, num_workers=4) return train_loader, val_loader
def load_dataset(batch_size=64): data_path = 'autism_faces/train/' valid_path = 'autism_faces/valid/' test_path = 'autism_faces/test/' transformed = transforms.Compose([ RandomResizedCrop(224), RandomHorizontalFlip(), RandomRotation(10), ToTensor() ]) train_dataset = torchvision.datasets.ImageFolder(root=data_path, transform=transformed) validation_dataset = torchvision.datasets.ImageFolder( root=valid_path, transform=transformed) test_dataset = torchvision.datasets.ImageFolder(root=test_path, transform=transformed) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, num_workers=0, shuffle=True) validation_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=100, num_workers=0, shuffle=True) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, num_workers=0, shuffle=True) return train_loader, validation_loader, test_loader
def __init__(self, data_dir, out_size=800): if 'train' in data_dir: self.bg_dir = data_dir[0:data_dir.find('train')] + 'background' else: self.bg_dir = data_dir[0:data_dir.find('test')] + 'background' self.bg_name = [ name for name in os.listdir(self.bg_dir) if os.path.isfile(os.path.join(self.bg_dir, name)) ] self.num_bg = len(self.bg_name) self.bg_object = [ open_img(os.path.join(self.bg_dir, bg_name)).convert('RGB') for bg_name in self.bg_name ] self.bg_scale = (0.5, 1.0) self.obj_scale = (0.2, 0.4, 0.6, 0.8, 1) #self.obj_size = obj_size self.bg_size = out_size self.rrc = RandomResizedCrop(size=out_size, scale=self.bg_scale) self.rrc_obj = self.class_resize(min_size=500, max_size=(600, 575, 550)) self.color_jig = ColorJitter()
def MedT_preprocess_image_v3(img, train, mean=None, std=None) -> torch.Tensor: if std is None: std = [0.5, 0.5, 0.5] if mean is None: mean = [0.5, 0.5, 0.5] if train == True: augment = Compose([ Image.fromarray, RandomResizedCrop(224, scale=(0.88, 1.0), ratio=(0.999, 1.001)), RandomHorizontalFlip(), RandomVerticalFlip(), RandomRot90() ]) normilize = Compose([ToTensor(), Normalize(mean=mean, std=std)]) augmented = augment(img) preprocced = normilize(augmented).unsqueeze(0) return preprocced, augmented preprocessing = Compose([ Image.fromarray, Resize(size=224), ToTensor(), Normalize(mean=mean, std=std) ]) return preprocessing(img).unsqueeze(0), None
def __getitem__(self, i): record = self.selection.iloc[i] imgpath = os.path.join(self.rootdir, record['imgpath']) _img = Image.open(imgpath) if self.target_aspect[i] < 1: height = self.allowed_size width = int(height * self.target_aspect[i]) else: width = self.allowed_size height = int(width / self.target_aspect[i]) _img = _img.convert('RGB').resize((width, height), Image.BILINEAR) _resizer = RandomResizedCrop(min(width, height)) _jitterer = ColorJitter(0.1, 0.1, 0.1, 0.05) _img = _jitterer(_resizer(_img)) img = T.FloatTensor(np.array(_img)).permute(2, 0, 1) / 255. _img.close() return img, \ T.LongTensor([record.cat_id]), \ T.FloatTensor([[(record.xmin + record.xmax) / 2, (record.ymin + record.ymax) / 2, (record.xmax - record.xmin), (record.ymax - record.ymin),]])
def __init__(self, base_dataset, img_size=(32, 32), color_dist_strength=0.5): """ Parameters ---------- base_dataset : Dataset img_size : tuple color_dist_strength : float """ self.base_dataset = base_dataset # get color distortion random transform, Color distortion (see Pytorch # pseudocode in arXiv:2002.05709) color_jitter = ColorJitter( 0.8 * color_dist_strength, 0.8 * color_dist_strength, 0.8 * color_dist_strength, 0.2 * color_dist_strength, ) rnd_color_jitter = RandomApply([color_jitter], p=0.8) rnd_gray = RandomGrayscale(p=0.2) self.transform = Compose([ RandomResizedCrop(img_size), rnd_color_jitter, rnd_gray, ToTensor() ])
def get_data_loaders(train_files, val_files, img_size=224): train_transform = Compose([ #ColorJitter(0.3, 0.3, 0.3, 0.3), RandomResizedCrop(img_size, scale=(0.8, 1.2)), RandomAffine(10.), RandomRotation(13.), RandomHorizontalFlip(), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) val_transform = Compose([ Resize((img_size, img_size)), ToTensor(), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]), ]) train_loader = DataLoader(HairStyleDataset(train_files, train_transform), batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=4) val_loader = DataLoader(HairStyleDataset(val_files, val_transform), batch_size=BATCH_SIZE, shuffle=True, pin_memory=True, num_workers=4) return train_loader, val_loader
def __getitem__(self, index): x = cv2.imread( os.path.join(self.root, self.image_set, self.ids[index]), cv2.IMREAD_GRAYSCALE) if x.shape != (256, 256): x = cv2.resize(x, (256, 256)) # data augmentation if self.transform: transformations = Compose([ ToPILImage(), RandomRotation(degrees=10, resample=PIL.Image.BICUBIC), #RandomAffine(degrees=10, translate=(-25, 25), scale=(0.90, 1.10), resample=PIL.Image.BILINEAR), RandomHorizontalFlip(), RandomResizedCrop(size=256, scale=(0.90, 1), ratio=(0.95, 1.05), interpolation=PIL.Image.BICUBIC), #ColorJitter(brightness=0.05), #CenterCrop(size=(256, 256)), ToTensor(), ]) x = x[..., np.newaxis] x = transformations(x).float().numpy() * 255 x = x[0] image, _, _ = Downsample(x, self.mask) x = x / 255. image = image / 255. target = torch.from_numpy(x).float().unsqueeze(0) image = torch.from_numpy(image).float().unsqueeze(0) mask = [0] # return something to be compatible with fastMRI dataset return target, image, mask
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask = self.get_mask(raw_img, clean_img) mask_t = to_tensor(mask) binary_mask = (1 - mask_t) binary_mask = binary_mask.expand(3, -1, -1) clean_img = self.transformer(clean_img) corrupted_img = clean_img * binary_mask return corrupted_img, binary_mask, clean_img
def set_epoch(self, epoch): # Compute start epoch key from current epoch start = self.epochs[bisect(self.epochs, epoch) - 1] size = self.progressive_resize[start] if size != self.image_size: self.resize = RandomResizedCrop(size) self.image_size = size
def __init__(self, filepath, split='train', data_augmentation=False, img_size=None): self.filepath = filepath self.split = split # open the dataset file self.file = h5py.File(self.filepath , "r") self.data_group = self.file[self.split] self.n_images = self.data_group['observations'].shape[0] self.has_labels = 'labels' in self.data_group if img_size is not None: self.img_size = img_size else: self.img_size = self.data_group['observations'][0].shape self.data_augmentation = data_augmentation if self.data_augmentation: radius = max(self.img_size[0], self.img_size[1]) / 2 padding_size = int(np.sqrt(2 * np.power(radius, 2)) - 128) self.spheric_pad = SphericPad(padding_size=padding_size) # max rotation needs padding of [sqrt(2*128^2)-128 = 53.01] self.random_horizontal_flip = RandomHorizontalFlip(0.2) self.random_vertical_flip = RandomVerticalFlip(0.2) self.random_resized_crop = RandomResizedCrop(size=self.img_size) self.random_rotation = RandomRotation(40) self.center_crop = CenterCrop(self.img_size) self.roll_y = Roll(shift=0, dim=1) self.roll_x = Roll(shift=0, dim=2)
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask = self.get_mask(raw_img, clean_img) mask_t = to_tensor(mask) mask_t = (mask_t > 0).float() mask_t = torch.nn.functional.max_pool2d(mask_t, kernel_size=5, stride=1, padding=2) # mask_t = mask_t.byte() raw_img = ImageChops.difference(mask, clean_img) return self.transformer(raw_img), 1 - mask_t, self.transformer( clean_img)
def build_datasets( conf) -> Tuple[Mapping[str, PcDataset], Mapping[str, Metadata]]: register_datasets(conf.folder) datasets = {} metadata = {} if "trainval" in conf: metadata["train"] = metadata["val"] = MetadataCatalog.get( conf.trainval.name) data_dicts = DatasetCatalog.get(conf.trainval.name) data_dicts = data_dicts datasets["train"], datasets["val"] = torch.utils.data.random_split( PcDataset(data_dicts, metadata["train"], [RandomResizedCrop(224)]), [ int(conf.trainval.split * len(data_dicts)), len(data_dicts) - int(conf.trainval.split * len(data_dicts)), ], ) elif "train" in conf and "val" in conf: metadata["train"] = MetadataCatalog.get(conf.train.name) train_data_dicts = DatasetCatalog.get(conf.train.name) datasets["train"] = PcDataset(train_data_dicts, metadata["train"], [RandomResizedCrop(224)]) metadata["val"] = MetadataCatalog.get(conf.val.name) val_data_dicts = DatasetCatalog.get(conf.val.name) datasets["val"] = PcDataset(val_data_dicts, metadata["val"], [Resize(224), CenterCrop(224)]) else: raise ValueError(f"Invalid data specification:\n{conf.pretty()}") logger.info( f"Data split: {len(datasets['train'])} train, {len(datasets['val'])} val (" f"{100 * len(datasets['train']) / (len(datasets['train']) + len(datasets['val'])):.1f}/" f"{100 * len(datasets['val']) / (len(datasets['train']) + len(datasets['val'])):.1f}%)" ) if "test" in conf: metadata["test"] = MetadataCatalog.get(conf.test.name) test_data_dicts = DatasetCatalog.get(conf.test.name) datasets["test"] = PcDataset( test_data_dicts, metadata["test"], [Resize(224), CenterCrop(224)]) logger.info(f"Test split: {len(datasets['test'])} test") return datasets, metadata
def __init__(self, *, root: str, num_classes: int = 1000, transforms: Optional[Dict[str, Callable]] = None) -> None: if transforms is None: transforms = dict() if 'train' not in transforms: transforms['train'] = Compose([ RandomResizedCrop(224), RandomHorizontalFlip(), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) if 'test' not in transforms: transforms['test'] = Compose([ Resize(256), CenterCrop(224), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) super().__init__({ split: ImageNetDataset(root=root, split=split, transform=transforms[split]) for split in ['train', 'test'] }) indices = dict() for k in range(num_classes): indices[k * (1000 // num_classes)] = k for dataset in self.values(): samples = [] for x, c in dataset.samples: if c in indices: samples.append((x, indices[c])) dataset.samples = samples targets = [] for c in dataset.targets: if c in indices: targets.append(indices[c]) dataset.targets = targets classes = [] for c, x in enumerate(dataset.classes): if c in indices: classes.append(x) dataset.classes = classes class_to_idx = {} for x, c in dataset.class_to_idx.items(): if c in indices: class_to_idx[x] = c dataset.class_to_idx = class_to_idx
def get_image_transformer(self): transformations = [ ColorJitter(brightness=0.3, contrast=0.3, saturation=0.1, hue=0.1), RandomGrayscale(p=0.5), RandomRotation(degrees=[0., 45.]), RandomResizedCrop(size=[224, 224], scale=(0.3, 1.0)) ] return Compose([RandomApply(transformations, p=0.7), ToTensor()])
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.1, 2), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) raw_img = self.transformer(raw_img) # raw_img = np.array(raw_img) mask_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # mask_img = np.array(mask_img) return to_tensor(raw_img), to_tensor(mask_img)
def crop_and_resize_aug(image_batch): aug = torch.zeros_like(image_batch) for i in range(image_batch.shape[0]): aug[i] = ToTensor()(RandomResizedCrop(size=FLAGS.image_size)(ToPILImage(mode='RGB')(image_batch[i].cpu()))) if(FLAGS.cuda): aug = aug.cuda() return aug
def apply_random_crop(device, data, size=32): images = [] transform = Compose([RandomResizedCrop(size=size)]) for d in data: image = ToPILImage()(d) image = transform(image) image = ToTensor()(image) images.append(image) return torch.stack(images)
def train(): transforms = Compose([RandomResizedCrop(size=target_size), ToTensor()]) # train_data = CoCoSegDataset(coco_root='/media/jintian/netac/permanent/datasets/coco', # transforms=transforms) train_data = SegFolderDataset( data_root= '/media/jintian/netac/permanent/datasets/Cityscapes/tiny_cityscapes', transforms=transforms) data_loader = DataLoader(dataset=train_data, batch_size=batch_size, num_workers=1) # test_data(data_loader) # Unet input may not be 512, it must be some other input model = UNet(colordim=3).to(device) # there are some dimension issue about UNet, fix that later optimizer = optim.SGD(model.parameters(), momentum=0.9, weight_decay=0.0005, lr=0.0001) for epoch in range(epochs): epoch_loss = 0 i = 0 for i, batch_data in enumerate(data_loader): img, seg_mask = batch_data seg_mask_predict = model(img) seg_mask_probs = F.sigmoid(seg_mask_predict) seg_mask_predict_flat = seg_mask_probs.view(-1) seg_mask_flat = seg_mask.view(-1) loss = nn.BCELoss(seg_mask_predict_flat, seg_mask_flat) epoch_loss += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() print('Epoch: {}, batch: {}, loss: {}'.format(epoch, i, loss)) print('Epoch {} finished. Average loss: {}'.format( epoch, epoch_loss / i)) if epoch % save_per_epoch == 0 and epoch != 0: if not os.path.exists(save_dir): os.makedirs(save_dir) else: torch.save( model.state_dict(), os.path.join(save_dir, 'seg_{}_{}.pth'.format(epoch, epoch_loss / i))) print('Model has been saved.')
def process_images(self, raw, clean): i, j, h, w = RandomResizedCrop.get_params(raw, scale=(0.5, 2.0), ratio=(3. / 4., 4. / 3.)) raw_img = resized_crop(raw, i, j, h, w, size=self.img_size, interpolation=Image.BICUBIC) clean_img = resized_crop(clean, i, j, h, w, self.img_size, interpolation=Image.BICUBIC) # get mask before further image augment mask_tensor = self.get_mask(raw_img, clean_img) raw_img = self.transformer(raw_img) return raw_img, mask_tensor