def get_celeba_data(split='train', num_samples=None, w=128, attr_num=None, attr_value=None, shuffle=True): try: celeba = CelebA(root=celeba_root, split=split, download=False, target_type='attr', transform=transforms.Compose([ CropTransform((25, 50, 25 + 128, 50 + 128)), transforms.Resize(w), transforms.ToTensor() ])) except Exception: celeba = CelebA(root=celeba_root, split=split, download=True, target_type='attr', transform=transforms.Compose([ CropTransform((25, 50, 25 + 128, 50 + 128)), transforms.Resize(w), transforms.ToTensor() ])) return load_data(celeba, num_samples, w, shuffle, has_cls=True)
def get_celeba_dataset(cfg,mode): root = cfg[mode].dataset.root data = edict() if mode == 'cls': batch_size = cfg.cls.batch_size transform = th_transforms.Compose([th_transforms.ToTensor()]) data.tr = CelebA(root,split='train',transform=transform) data.val = CelebA(root,split='valid',transform=transform) data.te = CelebA(root,split='test',transform=transform) data.all = CelebA(root,split='all',transform=transform) elif mode == "disent": batch_size = cfg.disent.batch_size N = cfg.disent.N data.tr = DisentCelebAv1(root,N,split='train') data.val = DisentCelebAv1(root,N,split='valid') data.te = DisentCelebAv1(root,N,split='test') data.all = DisentCelebAv1(root,N,split='all') else: raise ValueError(f"Unknown CelebA mode {mode}") loader = edict() loader_kwargs = {'batch_size': batch_size, 'shuffle':True,'drop_last':True, 'num_workers':cfg[mode].workers,} loader.tr = DataLoader(data.tr,**loader_kwargs) loader.val = DataLoader(data.val,**loader_kwargs) loader.te = DataLoader(data.te,**loader_kwargs) return data,loader
def load_celeba(path, transforms=None): try: data = CelebA(path, transform=transforms, target_type='attr', download=False) except Exception as e: data = CelebA('./', transform=transforms, target_type='attr', download=True) return data
def val_dataloader(self): transform = self.data_transforms() dir = os.path.join(self.params['data_path'], 'imagenet') testdir = os.path.join(dir, 'test') if self.params['dataset'] == 'celeba': dataset_test = CelebA(root=self.params['data_path'], split="test", transform=transform, download=False) elif self.params['dataset'] == 'imagenet': dataset_test = ImageFolder( testdir, transform, ) else: raise ValueError('Undefined dataset type') self.sample_dataloader = DataLoader(dataset_test, batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) return self.sample_dataloader
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="train", transform=transform, download=False) elif self.params['dataset'] in [ 'my_celeba', 'satellite_hill', 'satellite_rgb' ]: dataset = datasets.ImageFolder(root=self.params['data_path'], transform=transform) num_train = len(dataset) indices = list(range(num_train)) split = int(np.floor(self.params['test_ratio'] * num_train)) train_idx = indices[split:] train_sampler = SubsetRandomSampler(train_idx) self.num_train_imgs = len(train_idx) return DataLoader(dataset, batch_size=self.params['batch_size'], sampler=train_sampler, drop_last=True, num_workers=self.params['num_workers']) else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], shuffle=True, drop_last=True, num_workers=self.params['num_workers'])
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="test", transform=transform, download=False) elif self.params['dataset'] == 'concrete-cracks': dataset = ConcreteCracksDataset(root_dir=self.params['data_path'], split="val", abnormal_data=False, transform=transform) elif self.params['dataset'] == 'SDNET2018': dataset = SDNet2018(root_dir=self.params['data_path'], split="val", abnormal_data=False, transform=transform) else: raise ValueError('Undefined dataset type') self.sample_dataloader = DataLoader( dataset, batch_size=self.params['batch_size'], shuffle=True, drop_last=True, **self.additional_dataloader_args) self.num_val_imgs = len(self.sample_dataloader) return self.sample_dataloader
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root = self.params['data_path'], split = "train", transform=transform, download=False) elif self.params['dataset'] =="mnist": x_train = np.load(self.params['data_path']+"/mnist_train.npy") y_train = np.load(self.params['data_path']+"/mnist_train_target.npy") x_train = torch.from_numpy(x_train).float() y_train = torch.from_numpy(y_train).long() x_train = x_train/255 x_train = torch.nn.functional.pad(x_train, ((2,2,2,2)), 'constant') x_train = x_train.unsqueeze(dim=1) print("train", x_train.size()) dataset = torch.utils.data.TensorDataset(x_train, y_train) else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size= self.params['batch_size'], shuffle = True, drop_last=True)
def val_dataloader(self): print("Inside val dataloader ....") transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="test", transform=transform, download=True) elif self.params['dataset'] == 'user': dataset = torchvision.datasets.ImageFolder( root=self.params['data_path'] + 'train/', transform=transform) elif self.params['dataset'] == 'multicam': dataset = MultipleCameraDataset(csv_file='images.csv', root_dir=self.params['data_path'], transform=transform, num_cam=self.params['num_cam']) else: raise ValueError('Undefined dataset type') self.num_val_imgs = len(dataset) self.sample_dataloader = DataLoader( dataset, batch_size=self.params['batch_size'], shuffle=True, drop_last=True) # self.num_val_imgs = len(self.sample_dataloader) return self.sample_dataloader
def get_celebA(batch_size = 256): ds = CelebA( root = "data/CelebA", split = 'train', transform = myt, download = True) return DataLoader(d, batch_size = batch_size, shuffle = True)
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA( root=self.params['data_path'], split="test", transform=transform, download=False), batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) elif self.params['dataset'] == 'cifar10': self.sample_dataloader = DataLoader(dset.CIFAR10( root='datasets/raw/cifar10', train=True, transform=transform, download=False), batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) else: raise ValueError('Undefined dataset type') return self.sample_dataloader
def val_dataloader(self): # transform = self.data_transforms() if self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA( root=self.params['data_path'], split="test", transform=transform, download=False), batch_size=144, shuffle=True, drop_last=True) elif self.params['dataset'] == 'nisuyGavia': self.sample_dataloader = DataLoader( self.val_origin_dataset, batch_size=self.params['batch_size'], num_workers=64, shuffle=True, drop_last=True) elif self.params['dataset'] == 'allData': self.sample_dataloader = DataLoader( self.train_dataset, batch_size=self.params['batch_size'], num_workers=64, shuffle=False, sampler=self.valid_sampler, drop_last=True) else: raise ValueError('Undefined dataset type') self.num_val_imgs = len(self.sample_dataloader) return self.sample_dataloader
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="train", transform=transform, download=True) elif self.params['dataset'] == 'mnist': dataset = MNIST(root=self.params['data_path'], train=True, transform=transform, download=False) elif self.params['dataset'] == 'mvtec': dataset = MVTec_Dataset(root=self.params['data_path'], obj=self.params['object'], split="train") elif self.params['dataset'] == 'cycif': dataset = CyCIF_Dataset(root=self.params['data_path'], split="train") else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], num_workers=8, shuffle=True, drop_last=True)
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA(root=self.params['data_path'], split="test", transform=transform, download=False), batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) elif self.params['dataset'] == 'cosmo': self.sample_dataloader = DataLoader(CosmoData(train='val', load_every=self.params['load_every'], exp=self.params['exp']), batch_size=self.params['batch_size'], num_workers=self.params['num_workers'], shuffle=False, pin_memory=True, collate_fn=batchify ) self.num_val_imgs = len(self.sample_dataloader) # Create dummy dataset for test elif self.params['dataset'] == 'dummy': self.sample_dataloader = DataLoader(DummyData(), batch_size=self.params['batch_size'], num_workers=self.params['num_workers'], shuffle=False) self.num_val_imgs = len(self.sample_dataloader) else: raise ValueError('Undefined dataset type') return self.sample_dataloader
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="train", transform=transform, download=False) elif self.params['dataset'] == 'cosmo': dataset = CosmoData(train='train', load_every=self.params['load_every'], exp=self.params['exp']) self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], num_workers=self.params['num_workers'], shuffle=True, drop_last=True, pin_memory=True, collate_fn=batchify) # Create dummy dataset for test elif self.params['dataset'] == 'dummy': dataset = DummyData() else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], num_workers=self.params['num_workers'], shuffle=True, drop_last=True)
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'mnist': dataset = MNIST(root=self.params['data_path'], train=True, transform=transform, download=True) elif self.params['dataset'] == 'cifar10': dataset = CIFAR10(root=self.params['data_path'], train=True, transform=transform, download=True) elif self.params['dataset'] == 'celeba': dataset = CelebA(root=self.params['data_path'], split="train", transform=transform, download=False) elif self.params['dataset'] == 'wikiart': main_dataset = datasets.ImageFolder(root=self.params['data_path'], transform=transform) train_size = int(0.8 * len(main_dataset)) test_size = len(main_dataset) - train_size dataset, _ = torch.utils.data.random_split( main_dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42)) else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], num_workers=12, shuffle=True, drop_last=True)
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root = self.params['data_path'], split = "train", transform=transform, download=True) elif self.params['dataset'] == 'mnist': dataset = MNIST(root = self.params['data_path'], train=True, transform=transform, download=True) elif self.params['dataset'] == 'currents': dataset = CSDataset(root_dir = self.params['data_path'] + '/training/tiles_32', transform=transform) else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) print(self.num_train_imgs) return DataLoader(dataset, batch_size= self.params['batch_size'], shuffle = True, drop_last=True)
def get_dataloader_celebA(batch_size, transform): dataloader = DataLoader(CelebA(root=str(data_path), split='train', download=True, transform=transform), batch_size=batch_size, shuffle=True) return dataloader
def get_celeba_loaders(data_path, img_ext, crop_size, img_size, batch_size, download): dataset = celebA(data_path, img_ext, crop_size, img_size) if download: dataset = CelebA('celebA', transform=dataset.transforms, download=True) return DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=28)
def celeba(male, tfm=None): from torchvision.datasets import CelebA celeba = CelebA('~/.torch/celeba', download=True, target_type=[]) male_idx = celeba.attr_names.index('Male') files = [ f'~/.torch/celeba/celeba/img_align_celeba/{celeba.filename[i]}' for i in range(len(celeba)) if celeba.attr[i, male_idx] == (1 if male else 0) ] return tch.datasets.pix2pix.ImagesPaths(files, tfm)
def __init__(self, data_dir, batch_size=1, split='train', train_transform=None, val_transform=None, shuffle=True, validation_split=0., num_workers=1, collate_fn=collate.drop_target, download=False, drop_last=True, sampler=None, seed=SelfSupervisedLoader.SEED): self.data_dir = data_dir self.batch_size = batch_size self.dataset = CelebA(root=data_dir, split=split, target_type='attr', download=download) super(CelebALoader, self).__init__(dataset=self.dataset, batch_size=batch_size, shuffle=shuffle, train_transform=train_transform, val_transform=val_transform, validation_split=validation_split, num_workers=num_workers, sampler=sampler, collate_fn=collate_fn, drop_last=drop_last, seed=seed)
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'mnist': self.sample_dataloader = DataLoader(MNIST( root=self.params['data_path'], train=True, transform=transform, download=True), batch_size=144, num_workers=12, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) elif self.params['dataset'] == 'cifar10': self.sample_dataloader = DataLoader(CIFAR10( root=self.params['data_path'], train=False, transform=transform, download=True), batch_size=144, num_workers=12, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) elif self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA( root=self.params['data_path'], split="test", transform=transform, download=False), num_workers=12, batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) elif self.params['dataset'] == 'wikiart': main_dataset = datasets.ImageFolder(root=self.params['data_path'] + 'wikiart', transform=transform) train_size = int(0.8 * len(main_dataset)) test_size = len(main_dataset) - train_size _, test_dataset = torch.utils.data.random_split( main_dataset, [train_size, test_size], generator=torch.Generator().manual_seed(42)) self.sample_dataloader = DataLoader(test_dataset, num_workers=12, batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) else: raise ValueError('Undefined dataset type') return self.sample_dataloader
def train_dataloader(self): transform = self.data_transforms() dataset = CelebA(root=self.params['data_path'], split='train', transform=transform, download=True) self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size=self.params['batch_size'], shuffle=True, drop_last=True)
def val_dataloader(self): transform = self.data_transforms() dataset = CelebA(root=self.params['data_path'], split='test', transform=transform, downalod=True) self.sample_dataloader = DataLoader(dataset, batch_size=144, shuffle=True, drop_last=True) self.num_val_imgs = len(self.sample_dataloader) return self.sample_dataloader
def _get_dataset(): image_size = 299 transform = transforms.Compose([ transforms.Resize(image_size), transforms.CenterCrop(image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataset = CelebA(".", download=True, transform=transform) return dataset
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA(root = self.params['data_path'], split = "test", transform=transform, download=False), batch_size= 144, shuffle = True, drop_last=True) else: raise ValueError('Undefined dataset type') return self.sample_dataloader
def __get_dataloader(self, download): transform = transforms.Compose([ torchvision.transforms.CenterCrop(178), torchvision.transforms.Resize(16), transforms.ToTensor(), transforms.Normalize([0.5] * 3, [0.5] * 3) ]) dataset = CelebA(self.hparams.data_root, split='all', download=download, transform=transform) return DataLoader(dataset, batch_size=self.hparams.batch_size, shuffle=True, num_workers=32)
def train_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root = self.params['data_path'], split = "train", transform=transform, download=True) else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size= self.params['batch_size'], shuffle = True, drop_last=True)
def train_sequential_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': dataset = CelebA(root = self.params['data_path'], split = "train", transform=transform, download=True) # if seeing not-a-zip-file error, download the corresponding zip file from the source google drive else: raise ValueError('Undefined dataset type') self.num_train_imgs = len(dataset) return DataLoader(dataset, batch_size= self.params['batch_size'], shuffle = False, drop_last=False)
class CelebALoader(SelfSupervisedLoader): """Self-supervised dataloader for CelebA dataset Args: data_dir (str): dataset root directory batch_size (int): how many samples per batch to load (default: 1) split (str): in {'train', 'valid', 'test', 'all'}. train_transform (callable): transformation for train set PIL image val_transform (callable): transformation for validation set PIL image shuffle (bool): validation_split (int, float): number of validation samples of fraction in ]0, 1[ num_workers (int) collate_fn (callable): merges a list of samples to form a mini-batch. download (bool): if True, dataset is downloaded and unzipped in data_dir """ def __init__(self, data_dir, batch_size=1, split='train', train_transform=None, val_transform=None, shuffle=True, validation_split=0., num_workers=1, collate_fn=collate.drop_target, download=False, drop_last=True, sampler=None, seed=SelfSupervisedLoader.SEED): self.data_dir = data_dir self.batch_size = batch_size self.dataset = CelebA(root=data_dir, split=split, target_type='attr', download=download) super(CelebALoader, self).__init__(dataset=self.dataset, batch_size=batch_size, shuffle=shuffle, train_transform=train_transform, val_transform=val_transform, validation_split=validation_split, num_workers=num_workers, sampler=sampler, collate_fn=collate_fn, drop_last=drop_last, seed=seed) def __repr__(self): return "\n".join([self.dataset.__repr__(), super(CelebALoader, self).__repr__()]) def set_transform(self, dataset, transform): new_dataset = copy.deepcopy(dataset) new_dataset.transform = transform return new_dataset @property def split(self): return self.dataset.split
def val_dataloader(self): transform = self.data_transforms() if self.params['dataset'] == 'celeba': self.sample_dataloader = DataLoader(CelebA(root = self.params['data_path'], split = "test", transform=transform, download=False), batch_size= 144, shuffle = False, drop_last=True, prefetch_factor=8, num_workers=3, pin_memory=False) self.num_val_imgs = len(self.sample_dataloader) else: raise ValueError('Undefined dataset type') return self.sample_dataloader