def torchvision_dataset(transform=None, train=True, subset=None): """Creates a dataset from torchvision, configured using Command Line Arguments. Args: transform (callable, optional): A function that transforms an image (default None). train (bool, optional): Training set or validation - if applicable (default True). subset (string, optional): Specifies the subset of the relevant categories, if any of them was split (default, None). Relevant Command Line Arguments: - **dataset**: `--data`, `--torchvision_dataset`. Note: Settings are automatically acquired from a call to :func:`dlt.config.parse` from the built-in ones. If :func:`dlt.config.parse` was not called in the main script, this function will call it. Warning: Unlike the torchvision datasets, this function returns a dataset that uses NumPy Arrays instead of a PIL Images. """ opts = fetch_opts(['dataset'], subset) if opts.torchvision_dataset is None: if subset is not None: apnd = '_' + subset else: apnd = '' raise ValueError('No value given for --torchvision_dataset{0}.'.format(apnd)) if opts.torchvision_dataset == 'mnist': from torchvision.datasets import MNIST MNIST.__getitem__ = _custom_get_item ret_dataset = MNIST(opts.data, train=train, download=True, transform=transform) # Add channel dimension for consistency if train: ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3) else: ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3) elif opts.torchvision_dataset == 'fashionmnist': from torchvision.datasets import FashionMNIST FashionMNIST.__getitem__ = _custom_get_item ret_dataset = FashionMNIST(opts.data, train=train, download=True, transform=transform) if train: ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3) else: ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3) elif opts.torchvision_dataset == 'cifar10': from torchvision.datasets import CIFAR10 CIFAR10.__getitem__ = _custom_get_item ret_dataset = CIFAR10(opts.data, train=train, download=True, transform=transform) elif opts.torchvision_dataset == 'cifar100': from torchvision.datasets import CIFAR100 CIFAR100.__getitem__ = _custom_get_item ret_dataset = CIFAR100(opts.data, train=train, download=True, transform=transform) return ret_dataset
def get_dataloaders(data='mnist', train_bs=128, test_bs=500, root='./data', ohe_labels=False, train_fraction=1.): to_tensor = transforms.ToTensor() if data == 'mnist': trainset = MNIST(root, train=True, download=True, transform=to_tensor) if train_fraction < 1.: data, _, labels, _ = train_test_split( trainset.train_data.numpy(), trainset.train_labels.numpy(), stratify=trainset.train_labels.numpy(), train_size=train_fraction) trainset.train_data, trainset.train_labels = torch.ByteTensor( data), torch.LongTensor(labels) idx = torch.LongTensor(np.where(trainset.train_labels.numpy() == 0)[0]) trainset.train_data = trainset.train_data[idx] trainset.train_labels = trainset.train_labels[idx] if ohe_labels: x = trainset.train_labels.numpy() ohe = np.zeros((len(x), 10)) ohe[np.arange(ohe.shape[0]), x] = 1 trainset.train_labels = torch.from_numpy(ohe.astype(np.float32)) testset = MNIST(root, train=False, download=True, transform=to_tensor) if ohe_labels: x = testset.test_labels.numpy() ohe = np.zeros((len(x), 10)) ohe[np.arange(ohe.shape[0]), x] = 1 testset.test_labels = torch.from_numpy(ohe.astype(np.float32)) elif data == 'not-mnist': trainset = MNIST(root=os.path.join(root, 'not-mnist'), train=False, download=True, transform=to_tensor) testset = MNIST(root=os.path.join(root, 'not-mnist'), train=False, download=True, transform=to_tensor) else: raise NotImplementedError trainloader = torch.utils.data.DataLoader(trainset, batch_size=train_bs) testloader = torch.utils.data.DataLoader(testset, batch_size=test_bs) return trainloader, testloader
def fetch_dataloader(args, train=True, download=True, mini_size=128): # load dataset and init in the dataloader transforms = T.Compose([T.ToTensor()]) dataset = MNIST(root=args.data_dir, train=train, download=download, transform=transforms) # load dataset and init in the dataloader if args.mini_data: if train: dataset.train_data = dataset.train_data[:mini_size] dataset.train_labels = dataset.train_labels[:mini_size] else: dataset.test_data = dataset.test_data[:mini_size] dataset.test_labels = dataset.test_labels[:mini_size] kwargs = { 'num_workers': 1, 'pin_memory': True } if args.device.type is 'cuda' else {} dl = DataLoader(dataset, batch_size=args.batch_size, shuffle=train, drop_last=True, **kwargs) return dl
def get_loaders(nb_labelled, batch_size, unlab_rat, lab_inds=[]): transform_train, transform_test = augment() trainset_l = MNIST(root='./data', train=True, download=True, transform=transform_train) test_set = MNIST(root='./data', train=False, download=True, transform=transform_test) nb_class = 10 print('total training dataset - ') print(trainset_l.train_data.shape, len(trainset_l.train_labels)) if len(lab_inds) == 0: lab_inds = [] for i in range(nb_class): labels = np.array(trainset_l.train_labels) inds_i = np.where(labels == i)[0] inds_i = np.random.permutation(inds_i) lab_inds.extend(inds_i[0:int(nb_labelled / nb_class)].tolist()) lab_inds = np.array(lab_inds) all_inds = np.arange(len(trainset_l.train_labels)) unlab_inds = np.setdiff1d(all_inds, lab_inds) trainset_u = copy.deepcopy(trainset_l) trainset_u.train_data = torch.tensor( np.array(trainset_u.train_data)[unlab_inds]) trainset_u.train_labels = torch.tensor( np.array(trainset_u.train_labels)[unlab_inds]) trainloader_u = DataLoader(trainset_u, batch_size=batch_size, shuffle=False) print('unlabelled part of training dataset - ') print(trainset_u.train_data.shape, len(trainset_u.train_labels)) trainset_l.train_data = torch.tensor( np.array(trainset_l.train_data)[lab_inds]) trainset_l.train_labels = torch.tensor( np.array(trainset_l.train_labels)[lab_inds]) print('labelled part of training dataset - ') print(trainset_l.train_data.shape, len(trainset_l.train_labels)) trainloader_l = DataLoader(trainset_l, batch_size=batch_size, shuffle=True) testloader = DataLoader(test_set, batch_size=batch_size, shuffle=False) loaders = { "trainloader_l": trainloader_l, "testloader": testloader, "trainloader_u": trainloader_u, "trainset_l": trainset_l, "test_set": test_set, "trainset_u": trainset_u, "lab_inds": lab_inds } return loaders
def fetch_dataloader(params, train=True, mini_size=128): # load dataset and init in the dataloader transforms = T.Compose([T.ToTensor()]) dataset = MNIST(root=params.data_dir, train=train, download=True, transform=transforms) if params.dict.get('mini_data'): if train: dataset.train_data = dataset.train_data[:mini_size] dataset.train_labels = dataset.train_labels[:mini_size] else: dataset.test_data = dataset.test_data[:mini_size] dataset.test_labels = dataset.test_labels[:mini_size] if params.dict.get('mini_ones'): if train: labels = dataset.train_labels[:2000] mask = labels == 1 dataset.train_labels = labels[mask][:mini_size] dataset.train_data = dataset.train_data[:2000][mask][:mini_size] else: labels = dataset.test_labels[:2000] mask = labels == 1 dataset.test_labels = labels[mask][:mini_size] dataset.test_data = dataset.test_data[:2000][mask][:mini_size] kwargs = { 'num_workers': 1, 'pin_memory': True } if torch.cuda.is_available() and params.device.type is 'cuda' else {} return DataLoader(dataset, batch_size=params.batch_size, shuffle=True, drop_last=True, **kwargs)
def load_data(opt): """ Load Data Args: opt ([type]): Argument Parser Raises: IOError: Cannot Load Dataset Returns: [type]: dataloader """ ## # LOAD DATA SET if opt.dataroot == '': opt.dataroot = './data/{}'.format(opt.dataset) if opt.dataset in ['cifar10']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) classes = { 'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9 } dataset = {} dataset['train'] = CIFAR10(root='./data', train=True, download=True, transform=transform) dataset['test'] = CIFAR10(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_cifar_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=classes[opt.anomaly_class] ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader elif opt.dataset in ['mnist']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=opt.anomaly_class ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader elif opt.dataset in ['mnist2']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist2_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, nrm_cls_idx=opt.anomaly_class, proportion=opt.proportion ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader else: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.CenterCrop(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataset = { x: ImageFolder(os.path.join(opt.dataroot, x), transform) for x in splits } dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader
def load_data(opt): """ Args: opt ([type]): Argument Parser Raises: IOError: Cannot Load Dataset Returns: [type]: dataloader """ ## # LOAD DATA SET if opt.dataroot == '': opt.dataroot = './data/{}'.format(opt.dataset) if opt.dataset in ['disk']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} # transforms.Compose就是将transforms组合在一起 transform = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(32), transforms.ToTensor(), # range [0, 255] -> [0.0,1.0] ]) good_sample = LoadDiskDataset(opt, 'good_sample', transform) failed_sample = LoadDiskDataset(opt, 'failed_sample', transform) dataset = {} dataset['train'], dataset['test'] = get_disk_anomaly_dataset( good_sample, failed_sample, 1) # 生成一个dict # DataLoader中的shuffle,洗牌。默认设置为False。在每次迭代训练时是否将数据洗牌,默认设置是False。 # 将输入数据的顺序打乱,是为了使数据更有独立性,但如果数据是有序列特征的,就不要设置成True了。 # 在disk failure prediction中是不能够打乱次序的,因为SMART是有序的 dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader ## elif opt.dataset in ['mnist']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=opt.anomaly_class ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader
def load_data(opt): """ Load Data Args: opt ([type]): Argument Parser Raises: IOError: Cannot Load Dataset Returns: [type]: dataloader """ ## # LOAD DATA SET if opt.dataroot == '': opt.dataroot = './data/{}'.format(opt.dataset) if opt.dataset in ['cifar10']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose( [ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ] ) classes = { 'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9 } dataset = {} dataset['train'] = CIFAR10(root='./data', train=True, download=True, transform=transform) dataset['test'] = CIFAR10(root='./data', train=False, download=True, transform=transform) if opt.task == 'anomaly_detect': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_cifar_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=classes[opt.anomaly_class] ) elif opt.task == 'random_walk': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_sub_cifar10_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, ) elif opt.task == 'llk_trend': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_sub_cifar10_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=[0] ) elif opt.task == 'rw_llk': ##for simplication, let's do it together dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_cifar_rwllk_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=classes[opt.anomaly_class] ) dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batch_size, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits} return dataloader elif opt.dataset in ['mnist']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} ##the second one is good for mnist transform = transforms.Compose([transforms.Resize((opt.isize, opt.isize)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) if opt.task == 'anomaly_detect': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=opt.anomaly_class ) elif opt.task == 'random_walk': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_sub_mnist_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, ) elif opt.task == 'llk_trend': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_sub_mnist_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx = [0] ) elif opt.task == 'rw_llk': ##for simplication, let's do it together dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist_rwllk_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=opt.anomaly_class ) dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batch_size, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits} return dataloader elif opt.dataset in ['svhn']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} ##it seems that the transform in our GAN, we can use identical transform for both cifar10 and mnist ##the following is not working well at all. # transform = transforms.Compose( # [ # transforms.Scale(opt.isize), # transforms.ToTensor(), # transforms.Normalize((0.1307,), (0.3081,)) # ] # ) ##the second one is good for mnist transform = transforms.Compose([transforms.Resize((opt.isize, opt.isize)), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))]) dataset = {} dataset['train'] = SVHN(root='./data', split='train', download=True, transform=transform) dataset['test'] = SVHN(root='./data', split='test', download=True, transform=transform) if opt.task == 'anomaly_detect': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_cifar_anomaly_dataset( ##not sure if we need to write a get_svhn_anomaly_dataset yet. trn_img=dataset['train'].data, trn_lbl=dataset['train'].labels, tst_img=dataset['test'].data, tst_lbl=dataset['test'].labels, abn_cls_idx=opt.anomaly_class ) dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batch_size, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits} return dataloader elif opt.dataset in ['mnist2']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose( [ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ] ) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) if opt.task == 'anomaly_detect': dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist2_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, nrm_cls_idx=opt.anomaly_class, proportion=opt.proportion ) dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batch_size, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits} return dataloader # elif opt.dataset in ['celebA']: # ##not for abnormal detection but not for classification either # splits = ['train', 'test'] # drop_last_batch = {'train': True, 'test': False} # shuffle = {'train': True, 'test': True} # transform = transforms.Compose([transforms.Scale(opt.isize), # transforms.CenterCrop(opt.isize), # transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) # print(os.path.abspath('./data/celebA')) # dataset = datasets.ImageFolder(os.path.abspath('./data/celebA'), transform) # dataloader = torch.utils.data.DataLoader(dataset=dataset, batch_size=opt.batch_size, shuffle=True) # return dataloader elif opt.dataset in ['celebA']: ##not for abnormal detection but not for classification either # import helper # helper.download_extract('celeba', opt.dataroot) # splits = ['train', 'test'] # drop_last_batch = {'train': True, 'test': False} # shuffle = {'train': True, 'test': True} # transform = transforms.Compose([transforms.Scale(opt.isize), # transforms.CenterCrop(opt.isize), # transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) # # # transform = transforms.Compose([ # # transforms.CenterCrop(160), # # transforms.Scale(opt.isize), # # transforms.ToTensor(),) # # dataset = ImageFolder(root=image_root, transform=transforms.Compose([ # transforms.CenterCrop(160), # transforms.Scale(scale_size), # transforms.ToTensor(), # #transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # ])) # # dataset = {x: ImageFolder(os.path.join(opt.dataroot, x), transform) for x in splits} # dataloader = {x: torch.utils.data.DataLoader(dataset=dataset[x], # batch_size=opt.batch_size, # shuffle=shuffle[x], # num_workers=int(opt.workers), # drop_last=drop_last_batch[x]) for x in splits} dataloader = get_loader( './data/celebA', 'train', opt.batch_size, opt.isize) return dataloader
def load_data(opt): """ Load Data Args: opt ([type]): Argument Parser Raises: IOError: Cannot Load Dataset Returns: [type]: dataloader """ ## # LOAD DATA SET if opt.dataroot == '': opt.dataroot = './data/{}'.format(opt.dataset) if opt.dataset in ['cifar10']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) classes = { 'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9 } dataset = {} dataset['train'] = CIFAR10(root='./data', train=True, download=True, transform=transform) dataset['test'] = CIFAR10(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_cifar_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=classes[opt.anomaly_class] ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader elif opt.dataset in ['mnist']: opt.anomaly_class = int(opt.anomaly_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Scale(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].train_data, dataset['train'].train_labels, \ dataset['test'].test_data, dataset['test'].test_labels = get_mnist_anomaly_dataset( trn_img=dataset['train'].train_data, trn_lbl=dataset['train'].train_labels, tst_img=dataset['test'].test_data, tst_lbl=dataset['test'].test_labels, abn_cls_idx=opt.anomaly_class ) dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader else: splits = ['train', 'val', 'test'] drop_last_batch = {'train': True, 'val': False, 'test': False} shuffle = {'train': True, 'val': True, 'test': True} # transform = transforms.Compose([transforms.Resize((opt.iheight, opt.iwidth)), # the PILL Image has 0~1 # normlizing it would give me -1~+1 values transform = transforms.Compose([ # transforms.Crop((opt.iheight, opt.iwidth*2)), # MyCrop(0,0, opt.iheight, opt.iwidth*2), # resize or grayscale #transforms.Resize((opt.iheight, opt.iwidth)), #transforms.Grayscale(num_output_channels=3), #transforms.RandomHorizontalFlip(), #transforms.RandomApply(( # transforms.RandomRotation(15), # transforms.RandomAffine(degrees=0, shear=15), #)), #transforms.RandomAffine(degrees=15, translate=(0.2, 0.2), shear=15), #transforms.RandomAffine(degrees=0, translate=(0.2, 0.2)), transforms.ToTensor( ), # convert the PIL image in the range [0, 255] to a tensor in the range [0, 1] # MyTransformation(chunks=2, splitDim=2, mergeDim=0), #transforms.Normalize((0.5), (0.5)) # for gray scale, 1 channel transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5) ) # normalize the tensor by subtract mean and divide it with std ]) #dataset = {x: ImageFolder(os.path.join(opt.dataroot, x), transform) for x in splits} dataset = { x: ImageFolderWithPaths(os.path.join(opt.dataroot, x), transform) for x in splits } dataloader = { x: torch.utils.data.DataLoader(dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x]) for x in splits } return dataloader
def get_datasets(args): if args.dataset == 'mnist': biased_weights = [(args.bias, 1 - args.bias) for _ in range(args.n_classes)] balanced_weights = [(1, 1) for _ in range(args.n_classes)] override = sample_n( range(10), [10 // args.n_classes for _ in range(args.n_classes)]) train_dataset = MNIST(root='./data/mnist', train=True, download=True) test_dataset = MNIST(root='./data/mnist', train=False, download=True) train_dataset.train_data = train_dataset.train_data.unsqueeze( -1).numpy() test_dataset.test_data = test_dataset.test_data.unsqueeze(-1).numpy() transform = Compose( [ToPILImage(), RandomCrop(28, padding=4), ToTensor()]) elif args.dataset == 'cifar10': biased_weights = [(args.bias, 1 - args.bias) for _ in range(args.n_classes)] balanced_weights = [(1, 1) for _ in range(args.n_classes)] override = sample_n( range(10), [10 // args.n_classes for _ in range(args.n_classes)]) train_dataset = CIFAR10(root='./data/cifar10', train=True, download=True) test_dataset = CIFAR10(root='./data/cifar10', train=False, download=True) transform = Compose([ ToPILImage(), RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor() ]) elif args.dataset == 'cifar100': # uses predefined coarse labels coarse_labels = [(4, 30, 55, 72, 95), (1, 32, 67, 73, 91), (54, 62, 70, 82, 92), (9, 10, 16, 28, 61), (0, 51, 53, 57, 83), (22, 39, 40, 86, 87), (5, 20, 25, 84, 94), (6, 7, 14, 18, 24), (3, 42, 43, 88, 97), (12, 17, 37, 68, 76), (23, 33, 49, 60, 71), (15, 19, 21, 31, 38), (34, 63, 64, 66, 75), (26, 45, 77, 79, 99), (2, 11, 35, 46, 98), (27, 29, 44, 78, 93), (36, 50, 65, 74, 80), (47, 52, 56, 59, 96), (8, 13, 48, 58, 90), (41, 69, 81, 85, 89)] biased_weights = [(args.bias, (1 - args.bias) / 4, (1 - args.bias) / 4, (1 - args.bias) / 4, (1 - args.bias) / 4) for _ in range(args.n_classes)] balanced_weights = [(1, 1, 1, 1, 1) for _ in range(args.n_classes)] override = random.sample( [random.sample(coarse_label, 5) for coarse_label in coarse_labels], args.n_classes) train_dataset = CIFAR100(root='./data/cifar100', train=True, download=True) test_dataset = CIFAR100(root='./data/cifar100', train=False, download=True) transform = Compose([ ToPILImage(), RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor() ]) train_mixture = MixtureDataset(train_dataset.train_data[:-args.n_valid], train_dataset.train_labels[:-args.n_valid], mixture_weights=biased_weights, mixture_override=override, transform=transform) valid_mixture = MixtureDataset(train_dataset.train_data[-args.n_valid:], train_dataset.train_labels[-args.n_valid:], mixture_weights=balanced_weights, mixture_override=override, transform=transform) test_mixture = MixtureDataset(test_dataset.test_data, test_dataset.test_labels, mixture_weights=balanced_weights, mixture_override=override, transform=transform) return train_mixture, valid_mixture, test_mixture