def get_celeba_data(split='train',
                    num_samples=None,
                    w=128,
                    attr_num=None,
                    attr_value=None,
                    shuffle=True):
    try:
        celeba = CelebA(root=celeba_root,
                        split=split,
                        download=False,
                        target_type='attr',
                        transform=transforms.Compose([
                            CropTransform((25, 50, 25 + 128, 50 + 128)),
                            transforms.Resize(w),
                            transforms.ToTensor()
                        ]))
    except Exception:
        celeba = CelebA(root=celeba_root,
                        split=split,
                        download=True,
                        target_type='attr',
                        transform=transforms.Compose([
                            CropTransform((25, 50, 25 + 128, 50 + 128)),
                            transforms.Resize(w),
                            transforms.ToTensor()
                        ]))
    return load_data(celeba, num_samples, w, shuffle, has_cls=True)
Esempio n. 2
0
def get_celeba_dataset(cfg,mode):
    root = cfg[mode].dataset.root
    data = edict()
    if mode == 'cls':
        batch_size = cfg.cls.batch_size
        transform = th_transforms.Compose([th_transforms.ToTensor()])
        data.tr = CelebA(root,split='train',transform=transform)
        data.val = CelebA(root,split='valid',transform=transform)
        data.te = CelebA(root,split='test',transform=transform)
        data.all = CelebA(root,split='all',transform=transform)
    elif mode == "disent":
        batch_size = cfg.disent.batch_size
        N = cfg.disent.N
        data.tr = DisentCelebAv1(root,N,split='train')
        data.val = DisentCelebAv1(root,N,split='valid')
        data.te = DisentCelebAv1(root,N,split='test')
        data.all = DisentCelebAv1(root,N,split='all')
    else: raise ValueError(f"Unknown CelebA mode {mode}")
    loader = edict()
    loader_kwargs = {'batch_size': batch_size,
                     'shuffle':True,'drop_last':True,
                     'num_workers':cfg[mode].workers,}
    loader.tr = DataLoader(data.tr,**loader_kwargs)
    loader.val = DataLoader(data.val,**loader_kwargs)
    loader.te = DataLoader(data.te,**loader_kwargs)
    return data,loader
Esempio n. 3
0
def load_celeba(path, transforms=None):
    try:
        data = CelebA(path,
                      transform=transforms,
                      target_type='attr',
                      download=False)
    except Exception as e:
        data = CelebA('./',
                      transform=transforms,
                      target_type='attr',
                      download=True)

    return data
Esempio n. 4
0
    def val_dataloader(self):
        transform = self.data_transforms()
        dir = os.path.join(self.params['data_path'], 'imagenet')
        testdir = os.path.join(dir, 'test')

        if self.params['dataset'] == 'celeba':
            dataset_test = CelebA(root=self.params['data_path'],
                                  split="test",
                                  transform=transform,
                                  download=False)
        elif self.params['dataset'] == 'imagenet':
            dataset_test = ImageFolder(
                testdir,
                transform,
            )
        else:
            raise ValueError('Undefined dataset type')

        self.sample_dataloader = DataLoader(dataset_test,
                                            batch_size=144,
                                            shuffle=True,
                                            drop_last=True)
        self.num_val_imgs = len(self.sample_dataloader)

        return self.sample_dataloader
Esempio n. 5
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="train",
                             transform=transform,
                             download=False)
        elif self.params['dataset'] in [
                'my_celeba', 'satellite_hill', 'satellite_rgb'
        ]:
            dataset = datasets.ImageFolder(root=self.params['data_path'],
                                           transform=transform)
            num_train = len(dataset)
            indices = list(range(num_train))
            split = int(np.floor(self.params['test_ratio'] * num_train))

            train_idx = indices[split:]
            train_sampler = SubsetRandomSampler(train_idx)
            self.num_train_imgs = len(train_idx)

            return DataLoader(dataset,
                              batch_size=self.params['batch_size'],
                              sampler=train_sampler,
                              drop_last=True,
                              num_workers=self.params['num_workers'])
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size=self.params['batch_size'],
                          shuffle=True,
                          drop_last=True,
                          num_workers=self.params['num_workers'])
Esempio n. 6
0
    def val_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="test",
                             transform=transform,
                             download=False)
        elif self.params['dataset'] == 'concrete-cracks':
            dataset = ConcreteCracksDataset(root_dir=self.params['data_path'],
                                            split="val",
                                            abnormal_data=False,
                                            transform=transform)
        elif self.params['dataset'] == 'SDNET2018':
            dataset = SDNet2018(root_dir=self.params['data_path'],
                                split="val",
                                abnormal_data=False,
                                transform=transform)
        else:
            raise ValueError('Undefined dataset type')

        self.sample_dataloader = DataLoader(
            dataset,
            batch_size=self.params['batch_size'],
            shuffle=True,
            drop_last=True,
            **self.additional_dataloader_args)

        self.num_val_imgs = len(self.sample_dataloader)

        return self.sample_dataloader
Esempio n. 7
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root = self.params['data_path'],
                             split = "train",
                             transform=transform,
                             download=False)
        elif self.params['dataset'] =="mnist":
            x_train = np.load(self.params['data_path']+"/mnist_train.npy")
            y_train = np.load(self.params['data_path']+"/mnist_train_target.npy")
            x_train  = torch.from_numpy(x_train).float()       
            y_train  = torch.from_numpy(y_train).long()

            x_train = x_train/255
            x_train = torch.nn.functional.pad(x_train, ((2,2,2,2)), 'constant')
            x_train = x_train.unsqueeze(dim=1)

            print("train", x_train.size())
            dataset = torch.utils.data.TensorDataset(x_train, y_train)

        
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size= self.params['batch_size'],
                          shuffle = True,
                          drop_last=True)
Esempio n. 8
0
    def val_dataloader(self):
        print("Inside val dataloader ....")
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="test",
                             transform=transform,
                             download=True)
        elif self.params['dataset'] == 'user':
            dataset = torchvision.datasets.ImageFolder(
                root=self.params['data_path'] + 'train/', transform=transform)
        elif self.params['dataset'] == 'multicam':
            dataset = MultipleCameraDataset(csv_file='images.csv',
                                            root_dir=self.params['data_path'],
                                            transform=transform,
                                            num_cam=self.params['num_cam'])
        else:
            raise ValueError('Undefined dataset type')

        self.num_val_imgs = len(dataset)
        self.sample_dataloader = DataLoader(
            dataset,
            batch_size=self.params['batch_size'],
            shuffle=True,
            drop_last=True)
        # self.num_val_imgs = len(self.sample_dataloader)
        return self.sample_dataloader
Esempio n. 9
0
def get_celebA(batch_size = 256):
    ds = CelebA(
        root = "data/CelebA",
        split = 'train',
        transform = myt,
        download = True)
    return DataLoader(d, batch_size = batch_size, shuffle = True)
Esempio n. 10
0
    def val_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            self.sample_dataloader = DataLoader(CelebA(
                root=self.params['data_path'],
                split="test",
                transform=transform,
                download=False),
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        elif self.params['dataset'] == 'cifar10':
            self.sample_dataloader = DataLoader(dset.CIFAR10(
                root='datasets/raw/cifar10',
                train=True,
                transform=transform,
                download=False),
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        else:
            raise ValueError('Undefined dataset type')

        return self.sample_dataloader
Esempio n. 11
0
    def val_dataloader(self):
        # transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            self.sample_dataloader = DataLoader(CelebA(
                root=self.params['data_path'],
                split="test",
                transform=transform,
                download=False),
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)

        elif self.params['dataset'] == 'nisuyGavia':
            self.sample_dataloader = DataLoader(
                self.val_origin_dataset,
                batch_size=self.params['batch_size'],
                num_workers=64,
                shuffle=True,
                drop_last=True)
        elif self.params['dataset'] == 'allData':
            self.sample_dataloader = DataLoader(
                self.train_dataset,
                batch_size=self.params['batch_size'],
                num_workers=64,
                shuffle=False,
                sampler=self.valid_sampler,
                drop_last=True)
        else:
            raise ValueError('Undefined dataset type')
        self.num_val_imgs = len(self.sample_dataloader)
        return self.sample_dataloader
Esempio n. 12
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="train",
                             transform=transform,
                             download=True)
        elif self.params['dataset'] == 'mnist':
            dataset = MNIST(root=self.params['data_path'],
                            train=True,
                            transform=transform,
                            download=False)
        elif self.params['dataset'] == 'mvtec':
            dataset = MVTec_Dataset(root=self.params['data_path'],
                                    obj=self.params['object'],
                                    split="train")
        elif self.params['dataset'] == 'cycif':
            dataset = CyCIF_Dataset(root=self.params['data_path'],
                                    split="train")
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)

        return DataLoader(dataset,
                          batch_size=self.params['batch_size'],
                          num_workers=8,
                          shuffle=True,
                          drop_last=True)
Esempio n. 13
0
    def val_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            self.sample_dataloader = DataLoader(CelebA(root=self.params['data_path'],
                                                       split="test",
                                                       transform=transform,
                                                       download=False),
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)

        elif self.params['dataset'] == 'cosmo':
            self.sample_dataloader = DataLoader(CosmoData(train='val', load_every=self.params['load_every'], exp=self.params['exp']),
                                                batch_size=self.params['batch_size'],
                                                num_workers=self.params['num_workers'],
                                                shuffle=False,
                                                pin_memory=True,
                                                collate_fn=batchify
                                                )
            self.num_val_imgs = len(self.sample_dataloader)

        # Create dummy dataset for test
        elif self.params['dataset'] == 'dummy':
            self.sample_dataloader = DataLoader(DummyData(),
                                                batch_size=self.params['batch_size'],
                                                num_workers=self.params['num_workers'],
                                                shuffle=False)
            self.num_val_imgs = len(self.sample_dataloader)

        else:
            raise ValueError('Undefined dataset type')

        return self.sample_dataloader
Esempio n. 14
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="train",
                             transform=transform,
                             download=False)
        elif self.params['dataset'] == 'cosmo':
            dataset = CosmoData(train='train', load_every=self.params['load_every'], exp=self.params['exp'])
            self.num_train_imgs = len(dataset)
            return DataLoader(dataset,
                              batch_size=self.params['batch_size'],
                              num_workers=self.params['num_workers'],
                              shuffle=True,
                              drop_last=True,
                              pin_memory=True,
                              collate_fn=batchify)

        # Create dummy dataset for test
        elif self.params['dataset'] == 'dummy':
            dataset = DummyData()

        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size=self.params['batch_size'],
                          num_workers=self.params['num_workers'],
                          shuffle=True,
                          drop_last=True)
Esempio n. 15
0
    def train_dataloader(self):
        transform = self.data_transforms()
        if self.params['dataset'] == 'mnist':
            dataset = MNIST(root=self.params['data_path'],
                            train=True,
                            transform=transform,
                            download=True)
        elif self.params['dataset'] == 'cifar10':
            dataset = CIFAR10(root=self.params['data_path'],
                              train=True,
                              transform=transform,
                              download=True)
        elif self.params['dataset'] == 'celeba':
            dataset = CelebA(root=self.params['data_path'],
                             split="train",
                             transform=transform,
                             download=False)
        elif self.params['dataset'] == 'wikiart':
            main_dataset = datasets.ImageFolder(root=self.params['data_path'],
                                                transform=transform)
            train_size = int(0.8 * len(main_dataset))
            test_size = len(main_dataset) - train_size
            dataset, _ = torch.utils.data.random_split(
                main_dataset, [train_size, test_size],
                generator=torch.Generator().manual_seed(42))
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size=self.params['batch_size'],
                          num_workers=12,
                          shuffle=True,
                          drop_last=True)
Esempio n. 16
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root = self.params['data_path'],
                             split = "train",
                             transform=transform,
                             download=True)
        elif self.params['dataset'] == 'mnist':
            dataset = MNIST(root = self.params['data_path'],
                             train=True,
                             transform=transform,
                             download=True)
        elif self.params['dataset'] == 'currents':
            dataset = CSDataset(root_dir = self.params['data_path'] + '/training/tiles_32',
                                                        transform=transform)
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        print(self.num_train_imgs)
        return DataLoader(dataset,
                          batch_size= self.params['batch_size'],
                          shuffle = True,
                          drop_last=True)
Esempio n. 17
0
def get_dataloader_celebA(batch_size, transform):
    dataloader = DataLoader(CelebA(root=str(data_path),
                                   split='train',
                                   download=True,
                                   transform=transform),
                            batch_size=batch_size,
                            shuffle=True)
    return dataloader
Esempio n. 18
0
def get_celeba_loaders(data_path, img_ext, crop_size, img_size, batch_size,
                       download):
    dataset = celebA(data_path, img_ext, crop_size, img_size)
    if download:
        dataset = CelebA('celebA', transform=dataset.transforms, download=True)
    return DataLoader(dataset,
                      batch_size=batch_size,
                      shuffle=True,
                      num_workers=28)
Esempio n. 19
0
def celeba(male, tfm=None):
    from torchvision.datasets import CelebA
    celeba = CelebA('~/.torch/celeba', download=True, target_type=[])
    male_idx = celeba.attr_names.index('Male')
    files = [
        f'~/.torch/celeba/celeba/img_align_celeba/{celeba.filename[i]}'
        for i in range(len(celeba))
        if celeba.attr[i, male_idx] == (1 if male else 0)
    ]
    return tch.datasets.pix2pix.ImagesPaths(files, tfm)
Esempio n. 20
0
 def __init__(self, data_dir, batch_size=1, split='train', train_transform=None,
              val_transform=None, shuffle=True, validation_split=0.,
              num_workers=1, collate_fn=collate.drop_target, download=False,
              drop_last=True, sampler=None, seed=SelfSupervisedLoader.SEED):
     self.data_dir = data_dir
     self.batch_size = batch_size
     self.dataset = CelebA(root=data_dir, split=split, target_type='attr',
                           download=download)
     super(CelebALoader, self).__init__(dataset=self.dataset,
                                        batch_size=batch_size,
                                        shuffle=shuffle,
                                        train_transform=train_transform,
                                        val_transform=val_transform,
                                        validation_split=validation_split,
                                        num_workers=num_workers,
                                        sampler=sampler,
                                        collate_fn=collate_fn,
                                        drop_last=drop_last,
                                        seed=seed)
Esempio n. 21
0
    def val_dataloader(self):
        transform = self.data_transforms()
        if self.params['dataset'] == 'mnist':
            self.sample_dataloader = DataLoader(MNIST(
                root=self.params['data_path'],
                train=True,
                transform=transform,
                download=True),
                                                batch_size=144,
                                                num_workers=12,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        elif self.params['dataset'] == 'cifar10':
            self.sample_dataloader = DataLoader(CIFAR10(
                root=self.params['data_path'],
                train=False,
                transform=transform,
                download=True),
                                                batch_size=144,
                                                num_workers=12,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        elif self.params['dataset'] == 'celeba':
            self.sample_dataloader = DataLoader(CelebA(
                root=self.params['data_path'],
                split="test",
                transform=transform,
                download=False),
                                                num_workers=12,
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        elif self.params['dataset'] == 'wikiart':
            main_dataset = datasets.ImageFolder(root=self.params['data_path'] +
                                                'wikiart',
                                                transform=transform)
            train_size = int(0.8 * len(main_dataset))
            test_size = len(main_dataset) - train_size
            _, test_dataset = torch.utils.data.random_split(
                main_dataset, [train_size, test_size],
                generator=torch.Generator().manual_seed(42))
            self.sample_dataloader = DataLoader(test_dataset,
                                                num_workers=12,
                                                batch_size=144,
                                                shuffle=True,
                                                drop_last=True)
            self.num_val_imgs = len(self.sample_dataloader)
        else:
            raise ValueError('Undefined dataset type')

        return self.sample_dataloader
Esempio n. 22
0
 def train_dataloader(self):
     transform = self.data_transforms()
     dataset = CelebA(root=self.params['data_path'],
                      split='train',
                      transform=transform,
                      download=True)
     self.num_train_imgs = len(dataset)
     return DataLoader(dataset,
                       batch_size=self.params['batch_size'],
                       shuffle=True,
                       drop_last=True)
Esempio n. 23
0
 def val_dataloader(self):
     transform = self.data_transforms()
     dataset = CelebA(root=self.params['data_path'],
                      split='test',
                      transform=transform,
                      downalod=True)
     self.sample_dataloader = DataLoader(dataset,
                                         batch_size=144,
                                         shuffle=True,
                                         drop_last=True)
     self.num_val_imgs = len(self.sample_dataloader)
     return self.sample_dataloader
Esempio n. 24
0
def _get_dataset():
    image_size = 299

    transform = transforms.Compose([
        transforms.Resize(image_size),
        transforms.CenterCrop(image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
    ])

    dataset = CelebA(".", download=True, transform=transform)
    return dataset
Esempio n. 25
0
    def val_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            self.sample_dataloader =  DataLoader(CelebA(root = self.params['data_path'],
                                                        split = "test",
                                                        transform=transform,
                                                        download=False),
                                                 batch_size= 144,
                                                 shuffle = True,
                                                 drop_last=True)
        else:
            raise ValueError('Undefined dataset type')
        return self.sample_dataloader
Esempio n. 26
0
 def __get_dataloader(self, download):
     transform = transforms.Compose([
         torchvision.transforms.CenterCrop(178),
         torchvision.transforms.Resize(16),
         transforms.ToTensor(),
         transforms.Normalize([0.5] * 3, [0.5] * 3)
     ])
     dataset = CelebA(self.hparams.data_root,
                      split='all',
                      download=download,
                      transform=transform)
     return DataLoader(dataset,
                       batch_size=self.hparams.batch_size,
                       shuffle=True,
                       num_workers=32)
Esempio n. 27
0
    def train_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root = self.params['data_path'],
                             split = "train",
                             transform=transform,
                             download=True)
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size= self.params['batch_size'],
                          shuffle = True,
                          drop_last=True)
Esempio n. 28
0
    def train_sequential_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            dataset = CelebA(root = self.params['data_path'],
                             split = "train",
                             transform=transform,
                             download=True) # if seeing not-a-zip-file error, download the corresponding zip file from the source google drive
        else:
            raise ValueError('Undefined dataset type')

        self.num_train_imgs = len(dataset)
        return DataLoader(dataset,
                          batch_size= self.params['batch_size'],
                          shuffle = False,
                          drop_last=False)
Esempio n. 29
0
class CelebALoader(SelfSupervisedLoader):
    """Self-supervised dataloader for CelebA dataset

    Args:
        data_dir (str): dataset root directory
        batch_size (int): how many samples per batch to load (default: 1)
        split (str): in {'train', 'valid', 'test', 'all'}.
        train_transform (callable): transformation for train set PIL image
        val_transform (callable): transformation for validation set PIL image
        shuffle (bool):
        validation_split (int, float): number of validation samples of fraction in ]0, 1[
        num_workers (int)
        collate_fn (callable): merges a list of samples to form a mini-batch.
        download (bool): if True, dataset is downloaded and unzipped in data_dir
    """
    def __init__(self, data_dir, batch_size=1, split='train', train_transform=None,
                 val_transform=None, shuffle=True, validation_split=0.,
                 num_workers=1, collate_fn=collate.drop_target, download=False,
                 drop_last=True, sampler=None, seed=SelfSupervisedLoader.SEED):
        self.data_dir = data_dir
        self.batch_size = batch_size
        self.dataset = CelebA(root=data_dir, split=split, target_type='attr',
                              download=download)
        super(CelebALoader, self).__init__(dataset=self.dataset,
                                           batch_size=batch_size,
                                           shuffle=shuffle,
                                           train_transform=train_transform,
                                           val_transform=val_transform,
                                           validation_split=validation_split,
                                           num_workers=num_workers,
                                           sampler=sampler,
                                           collate_fn=collate_fn,
                                           drop_last=drop_last,
                                           seed=seed)

    def __repr__(self):
        return "\n".join([self.dataset.__repr__(),
                          super(CelebALoader, self).__repr__()])

    def set_transform(self, dataset, transform):
        new_dataset = copy.deepcopy(dataset)
        new_dataset.transform = transform
        return new_dataset

    @property
    def split(self):
        return self.dataset.split
Esempio n. 30
0
    def val_dataloader(self):
        transform = self.data_transforms()

        if self.params['dataset'] == 'celeba':
            self.sample_dataloader =  DataLoader(CelebA(root = self.params['data_path'],
                                                        split = "test",
                                                        transform=transform,
                                                        download=False),
                                                 batch_size= 144,
                                                 shuffle = False,
                                                 drop_last=True, prefetch_factor=8, num_workers=3, pin_memory=False)

            self.num_val_imgs = len(self.sample_dataloader)
        else:
            raise ValueError('Undefined dataset type')

        return self.sample_dataloader