def __init__(self, args): super(Dataloader, self).__init__() self.args = args self.dataset_test_name = args.dataset_test self.dataset_train_name = args.dataset_train self.dataroot = args.dataroot self.batch_size = args.batch_size if self.dataset_train_name == "CELEBA": self.dataset_train, self.dataset_train_len = datasets.ImageFolder( root=self.dataroot + "/train") elif self.dataset_train_name == "MNIST": self.dataset_train, self.dataset_train_len = datasets.MNIST( self.dataroot).train() else: raise (Exception("Unknown Dataset")) if self.dataset_test_name == "CELEBA": self.dataset_test, self.dataset_test_len = datasets.ImageFolder( root=self.dataroot + "/test") elif self.dataset_test_name == "MNIST": self.dataset_test, self.dataset_test_len = datasets.MNIST( self.dataroot).test() else: raise (Exception("Unknown Dataset"))
def fetch_dataset(data_name, subset): dataset = {} print('fetching data {}...'.format(data_name)) root = './data/{}'.format(data_name) if data_name == 'MNIST': dataset['train'] = datasets.MNIST(root=root, split='train', subset=subset, transform=datasets.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) dataset['test'] = datasets.MNIST(root=root, split='test', subset=subset, transform=datasets.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ])) elif data_name == 'CIFAR10': dataset['train'] = datasets.CIFAR10( root=root, split='train', subset=subset, transform=datasets.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) dataset['test'] = datasets.CIFAR10(root=root, split='test', subset=subset, transform=datasets.Compose([ transforms.ToTensor(), transforms.Normalize( (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)) ])) elif data_name in ['PennTreebank', 'WikiText2', 'WikiText103']: dataset['train'] = eval( 'datasets.{}(root=root, split=\'train\')'.format(data_name)) dataset['test'] = eval( 'datasets.{}(root=root, split=\'test\')'.format(data_name)) else: raise ValueError('Not valid dataset name') print('data ready') return dataset
def test_module_import(): import datasets m = datasets.MNIST() # m2 = torchvision.datasets.MNIST() # assert len(m) == len(m2) assert len(m) == 60000
def _get_maf_original(data_name): warnings.warn( "This function should generally not be called because it " "requires special setup but is kept here in order to reproduce functions if " "needed.") if sys.version_info < (3, ): # Load MNIST from MAF code maf_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "..", "..", "maf") sys.path.append(maf_path) # noinspection PyPackageRequirements import datasets # maf/datasets/* # Reset datasets root directory relative to this file datasets.root = os.path.join(maf_path, "data") + "/" # Copied from maf/experiments.py if data_name == "mnist": data = datasets.MNIST(logit=True, dequantize=True) elif data_name == "bsds300": data = datasets.BSDS300() elif data_name == "cifar10": data = datasets.CIFAR10(logit=True, flip=True, dequantize=True) elif data_name == "power": data = datasets.POWER() elif data_name == "gas": data = datasets.GAS() elif data_name == "hepmass": data = datasets.HEPMASS() elif data_name == "miniboone": data = datasets.MINIBOONE() else: raise ValueError("Unknown dataset") # Make a dictionary instead of pickled object for better compatibility if hasattr(data.trn, "labels"): data_dict = dict( X_train=data.trn.x, y_train=data.trn.labels, X_validation=data.val.x, y_validation=data.val.labels, X_test=data.tst.x, y_test=data.tst.labels, data_name=data_name, ) else: data_dict = dict( X_train=data.trn.x, X_validation=data.val.x, X_test=data.tst.x, data_name=data_name, ) else: raise RuntimeError( "Must create data using Python 2 to load data since MAF is written for " "Python 2") return data_dict
def _get_maf_original(data_name): warnings.warn( 'This function should generally not be called because it ' 'requires special setup but is kept here in order to reproduce functions if ' 'needed.') if sys.version_info < (3, ): # Load MNIST from MAF code maf_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'maf') sys.path.append(maf_path) # noinspection PyPackageRequirements import datasets # maf/datasets/* # Reset datasets root directory relative to this file datasets.root = os.path.join(maf_path, 'data') + '/' # Copied from maf/experiments.py if data_name == 'mnist': data = datasets.MNIST(logit=True, dequantize=True) elif data_name == 'bsds300': data = datasets.BSDS300() elif data_name == 'cifar10': data = datasets.CIFAR10(logit=True, flip=True, dequantize=True) elif data_name == 'power': data = datasets.POWER() elif data_name == 'gas': data = datasets.GAS() elif data_name == 'hepmass': data = datasets.HEPMASS() elif data_name == 'miniboone': data = datasets.MINIBOONE() else: raise ValueError('Unknown dataset') # Make a dictionary instead of pickled object for better compatibility if hasattr(data.trn, 'labels'): data_dict = dict( X_train=data.trn.x, y_train=data.trn.labels, X_validation=data.val.x, y_validation=data.val.labels, X_test=data.tst.x, y_test=data.tst.labels, data_name=data_name, ) else: data_dict = dict( X_train=data.trn.x, X_validation=data.val.x, X_test=data.tst.x, data_name=data_name, ) else: raise RuntimeError( 'Must create data using Python 2 to load data since MAF is written for ' 'Python 2') return data_dict
def __init__(self): self.config = Config() self.gstep = tf.Variable(0, dtype=tf.int32, trainable=False, name='global_step') self.num_epochs = self.config.num_epochs self.batch_size = self.config.batch_size self.isTraining = self.config.isTraining self.isVisualize = self.config.isVisualize self.isAnimate = self.config.isAnimate self.dataset = datasets.MNIST(self.config)
def load_data(name): """ Loads the dataset. Has to be called before anything else. :param name: string, the dataset's name """ assert isinstance(name, str), 'Name must be a string' datasets.root = root_data global data, data_name if data_name == name: return if name == 'mnist': data = datasets.MNIST(logit=True, dequantize=True) data_name = name elif name == 'bsds300': data = datasets.BSDS300() data_name = name elif name == 'cifar10': data = datasets.CIFAR10(logit=True, flip=True, dequantize=True) data_name = name elif name == 'power': data = POWER() data_name = name elif name == 'gas': data = datasets.GAS() data_name = name elif name == 'hepmass': data = datasets.HEPMASS() data_name = name elif name == 'miniboone': data = datasets.MINIBOONE() data_name = name else: raise ValueError('Unknown dataset')
def load_data(name,logit=False,dequantize=False,flip = False): """ Loads the dataset. Has to be called before anything else. :param name: string, the dataset's name """ assert isinstance(name, str), 'Name must be a string' # global data if name == 'mnist': data = datasets.MNIST(logit=logit, dequantize=dequantize) elif name == 'bsds300': data = datasets.BSDS300() elif name == 'cifar10': data = datasets.CIFAR10(logit=logit, flip=flip, dequantize=dequantize) elif name == 'power': data = datasets.POWER() elif name == 'gas': data = datasets.GAS() elif name == 'hepmass': data = datasets.HEPMASS() elif name == 'miniboone': data = datasets.MINIBOONE() else: raise Exception('Unknown dataset') # get data splits X_train = data.trn.x X_val = data.val.x X_test = data.tst.x # Convert to float32 X_train = X_train.astype(np.float32) X_val = X_val.astype(np.float32) X_test = X_test.astype(np.float32) return data, X_train, X_val, X_test
elif arguments.descriptor == 'BRISK': globals.descriptor = features.BRISK() elif arguments.descriptor == 'AKAZE': globals.descriptor = features.AKAZE() elif arguments.descriptor == 'FREAK': globals.descriptor = features.FREAK() # Print descriptor features.printDescriptor() # Path of Dataset if arguments.dataset == 'MNIST': globals.data_train_images, globals.data_test_images = datasets.MNIST() elif arguments.dataset == 'JAFFE': globals.data_train_images, globals.data_test_images = datasets.JAFFE() elif arguments.dataset == 'Extended-CK+': globals.data_train_images, globals.data_test_images = datasets.extendedCK() elif arguments.dataset == 'FEI': globals.data_train_images, globals.data_test_images = datasets.FEI() elif arguments.dataset == 'CIFAR-10': globals.data_train_images, globals.data_test_images = datasets.CIFAR10() elif arguments.dataset == 'FER-2013': globals.data_train_images, globals.data_test_images = datasets.FER2013()
import torchvision import datasets batch_size = 4 if (__name__ == '__main__'): parser = argparse.ArgumentParser(description='Taming VAEs experiments') parser.add_argument('--data', dest='dataset', default=None, help='Dataset to be used') args = parser.parse_args() if (args.dataset.lower() == 'mnist'): data_set = datasets.MNIST('./data/mnist/', download=True, transform=torchvision.transforms.ToTensor()) elif (args.dataset.lower() == 'cifar10'): data_set = datasets.CIFAR10( './data/cifar10/', download=True, transform=torchvision.transforms.ToTensor()) elif (args.dataset.lower() == 'celeba'): data_set = datasets.CELEBA('./data/celeba/', transform=torchvision.transforms.ToTensor()) loader = torch.utils.data.DataLoader(data_set, batch_size=batch_size, shuffle=True, drop_last=True) for batch in loader:
def main(args): args.update_exclusive(default_args) directory = os.path.join(config.RESULTSDIR, 'ws', args.directory) if not os.path.exists(directory): os.makedirs(directory) if not args.restore: map(os.remove, (os.path.join(directory, f) for f in os.listdir(directory) if f.endswith(".txt") or f.endswith(".png"))) with open(os.path.join(directory, 'params.txt'), 'w') as f: f.write(repr(args)) print directory dataset = datasets.MNIST(binary=True) x = tf.placeholder(np.float32, shape=(None, dataset.get_data_dim())) model = Model(x, args.latent_units, q_units=args.q_units, p_units=args.p_units, sleep=args.sleep_type, batch_norm=args.bn) examples_per_epoch = dataset.data['train'][0].shape[0] num_updates = args.n_epochs * examples_per_epoch / args.mb_size step = tf.Variable(0, trainable=False) lr = tf.placeholder(tf.float32) train_op = args.optimizer(lr).minimize(model.loss, global_step=step) with tf.control_dependencies([train_op]): # linearly anneal alpha from 0 to 1 over the course of N/2 epochs, then train for additional N/2 epochs with alpha=1 train_op = tf.assign( model.alpha, tf.minimum( 1., tf.maximum(0., tf.cast(step, tf.float32) / num_updates * 2))) init_op = tf.initialize_all_variables() saver = tf.train.Saver(max_to_keep=1) with tf.Session() as sess: if not args.restore: sess.run(init_op) else: saver.restore(sess, tf.train.latest_checkpoint(os.path.join(directory))) print "restored" measure_test_log_likelihood(sess, model, dataset, directory) import ipdb ipdb.set_trace() for x_np, _ in dataset.random_minibatches('train', args.mb_size, num_updates): i, _ = sess.run([step, train_op], feed_dict={x: x_np, lr: args.lr}) if i % 1000 == 1 or i == num_updates - 1: visualize(sess, model, dataset, directory, float(i) * args.mb_size / examples_per_epoch) if i % 10000 == 1 or i == num_updates - 1: saver.save(sess, os.path.join(directory, 'model.chk'), global_step=step) if i % 10000 == 1: print directory
def main(cf): print( f"\nStarting divisize normalization experiment {cf.logdir}: --seed {cf.seed} --device {utils.DEVICE}" ) pprint.pprint(cf) os.makedirs(cf.logdir, exist_ok=True) utils.seed(cf.seed) utils.save_json({k: str(v) for (k, v) in cf.items()}, cf.logdir + "config.json") train_dataset = datasets.MNIST(train=True, scale=cf.label_scale, size=cf.train_size, normalize=cf.normalize) test_dataset = datasets.MNIST(train=False, scale=cf.label_scale, size=cf.test_size, normalize=cf.normalize) train_loader = datasets.get_dataloader(train_dataset, cf.batch_size) test_loader = datasets.get_dataloader(test_dataset, cf.batch_size) print( f"Loaded data [train batches: {len(train_loader)} test batches: {len(test_loader)}]" ) model = PCModel(nodes=cf.nodes, mu_dt=cf.mu_dt, act_fn=cf.act_fn, use_bias=cf.use_bias, kaiming_init=cf.kaiming_init, pe_fn=cf.pe_fn, pe_fn_inverse=cf.pe_fn_inverse) optimizer = optim.get_optim( model.params, cf.optim, cf.lr, batch_scale=cf.batch_scale, grad_clip=cf.grad_clip, weight_decay=cf.weight_decay, ) with torch.no_grad(): metrics = {"acc": []} for epoch in range(1, cf.n_epochs + 1): print(f"\nTrain @ epoch {epoch} ({len(train_loader)} batches)") for batch_id, (img_batch, label_batch) in enumerate(train_loader): model.train_batch_supervised(img_batch, label_batch, cf.n_train_iters, fixed_preds=cf.fixed_preds_train) optimizer.step( curr_epoch=epoch, curr_batch=batch_id, n_batches=len(train_loader), batch_size=img_batch.size(0), ) if epoch % cf.test_every == 0: acc = 0 for _, (img_batch, label_batch) in enumerate(test_loader): label_preds = model.test_batch_supervised(img_batch) acc += datasets.accuracy(label_preds, label_batch) metrics["acc"].append(acc / len(test_loader)) print("\nTest @ epoch {} / Accuracy: {:.4f}".format( epoch, acc / len(test_loader))) utils.save_json(metrics, cf.logdir + "metrics.json")
if Args.cuda: print('Using GPU.') # %% DataSet ID datasets_id = {1: 'MNIST'} try: datasets_name = datasets_id[Args.dataset] except: print( 'Because of a wrong dataset. You will using the default MNIST dataset.' ) datasets_name = 'MNIST' # %% Main Function if __name__ == '__main__': if datasets_name == 'MNIST': print('Using the MNIST dataset.') data_train, data_test = datasets.MNIST(True) data_loader_train = torch.utils.data.DataLoader( dataset=data_train, batch_size=Args.batch_size, shuffle=True) data_loader_test = torch.utils.data.DataLoader( dataset=data_test, batch_size=Args.batch_size, shuffle=True) cnn = models.CNN_MNIST1().cuda() if Args.cuda else models.CNN_MNIST1() loss_func = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(cnn.parameters(), lr=Args.learn_rate) if Args.show: plt.ion() figure = plt.figure(1) Accuracy = [] print('==>Start Training.') for epoch in range(Args.epochs): for step, (x, y) in enumerate(data_loader_train):
def get_data(args, train_flag=True, transform=None): if train_flag: # p = 0.5 # Invert 50% only randomly p = 1.0 # Invert all else: p = -1.0 # disable random transformations for testset if not transform: # if args.nc == 1: # transform = transforms.Compose([ # transforms.Resize(args.image_size), # transforms.Grayscale(), # transforms.CenterCrop(args.image_size), # transforms.ToTensor(), # transforms.Normalize( # (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # ]) # else: if args.dataset in {'usps', 'mnist'} and args.exp == 'mnist_svhn': transform = transforms.Compose([ transforms.Grayscale(num_output_channels=args.nc), transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=p, nc=args.nc), RandomClampTensors(min_margin=0, max_margin=0.3), ]) elif args.dataset in {'usps', 'mnist'} and args.exp == 'svhn_mnist': transform = transforms.Compose([ transforms.Grayscale(num_output_channels=args.nc), # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), # RandomClampTensors(min_margin=0, max_margin=0.0), ]) elif args.dataset in {'usps', 'mnist'}: transform = transforms.Compose([ transforms.Grayscale(num_output_channels=args.nc), # transforms.ColorJitter(brightness=0.4, contrast=0.4, saturation=0.4), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), ]) elif args.dataset in {'svhn', 'syndigits' } and args.exp == 'mnist_svhn': if args.nc == 1: transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.Grayscale(num_output_channels=args.nc), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), # RandomClampTensors(min_margin=0, max_margin=0.3), ]) else: transform = transforms.Compose([ transforms.Grayscale(num_output_channels=args.nc), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), # transforms.RandomResizedCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), ]) elif args.dataset in {'svhn', 'syndigits' } and args.exp == 'svhn_mnist': if args.nc == 1: transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.Grayscale(num_output_channels=args.nc), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), # RandomClampTensors(min_margin=0, max_margin=0.2), ]) else: transform = transforms.Compose([ transforms.Grayscale(num_output_channels=args.nc), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), # transforms.RandomResizedCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), ]) elif args.dataset in {'svhn', 'syndigits'}: if args.nc == 1: transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.Grayscale(num_output_channels=args.nc), transforms.CenterCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), # RandomClampTensors(min_margin=0, max_margin=0.2), ]) else: transform = transforms.Compose([ # transforms.Grayscale(num_output_channels=args.nc), transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), # transforms.RandomResizedCrop(args.image_size), transforms.ToTensor(), Normalize_RandomInvert_pixels(p=-1, nc=args.nc), ]) elif args.dataset in {'cifar9', 'stl9'}: transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), # transforms.RandomHorizontalFlip(p=p), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) elif args.dataset in {'amazon', 'dslr', 'webcam'}: transform = transforms.Compose([ transforms.RandomResizedCrop(args.image_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ]) else: transform = transforms.Compose([ transforms.Resize(args.image_size), transforms.CenterCrop(args.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) if args.dataset == 'usps': train_dataset = mydset.USPS(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.USPS(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'mnist': train_dataset = mydset.MNIST(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.MNIST(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'svhn': train_dataset = mydset.SVHN(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.SVHN(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'syndigits': train_dataset = mydset.SYNDIGITS(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.SYNDIGITS(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'cifar9': train_dataset = mydset.CIFAR9(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.CIFAR9(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'stl9': train_dataset = mydset.STL9(root=args.dataroot, download=False, train=True, transform=transform) test_dataset = mydset.STL9(root=args.dataroot, download=False, train=False, transform=transform) elif args.dataset == 'gtsrb': train_dataset = mydset.GTSRB(root=args.dataroot, train=True, transform=transform) test_dataset = mydset.GTSRB(root=args.dataroot, train=False, transform=transform) elif args.dataset == 'amazon': # dataset = dset.ImageFolder(root=root_path + dir, transform=transform_dict[phase]) # train_size = int(0.8 * len(data)) # test_size = len(data) - train_size # data_train, data_val = torch.utils.data.random_split(data, [train_size, test_size]) dataset_path = os.path.join(args.dataroot, 'office', 'amazon', 'images') train_dataset = mydset.OFFICE(root=dataset_path, train=True, transform=transform) test_dataset = mydset.OFFICE(root=dataset_path, train=False, transform=transform) elif args.dataset == 'dslr': dataset_path = os.path.join(args.dataroot, 'office', 'dslr', 'images') train_dataset = mydset.OFFICE(root=dataset_path, train=True, transform=transform) test_dataset = mydset.OFFICE(root=dataset_path, train=False, transform=transform) elif args.dataset == 'webcam': dataset_path = os.path.join(args.dataroot, 'office', 'webcam', 'images') train_dataset = mydset.OFFICE(root=dataset_path, train=True, transform=transform) test_dataset = mydset.OFFICE(root=dataset_path, train=False, transform=transform) elif args.dataset == 'synsigns': train_dataset = mydset.SYNSIGNS(root=args.dataroot, train=True, transform=transform) test_dataset = mydset.SYNSIGNS(root=args.dataroot, train=False, transform=transform) # elif args.dataset == 'celeba': # imdir = 'train' if train_flag else 'val' # dataroot = os.path.join(args.dataroot, imdir) # if args.image_size != 64: # raise ValueError('the image size for CelebA args.dataset need to be 64!') # # args.dataset = FolderWithImages(root=dataroot, # input_transform=transforms.Compose([ # ALICropAndScale(), # transforms.ToTensor(), # transforms.Normalize( # (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # ]), # target_transform=transforms.ToTensor() # ) # # elif args.dataset in ['imagenet', 'folder', 'lfw']: # dataset = dset.ImageFolder(root=args.dataroot, # transform=transform) # # elif args.dataset == 'lsun': # dataset = dset.LSUN(db_path=args.dataroot, # classes=['bedroom_train'], # transform=transform) # # elif args.dataset == 'cifar10': # dataset = dset.CIFAR10(root=args.dataroot, # download=False, # train=train_flag, # transform=transform) # # elif args.dataset == 'cifar100': # dataset = dset.CIFAR100(root=args.dataroot, # download=False, # train=train_flag, # transform=transform) # # elif args.dataset == 'celeba': # imdir = 'train' if train_flag else 'val' # dataroot = os.path.join(args.dataroot, imdir) # if args.image_size != 64: # raise ValueError('the image size for CelebA dataset need to be 64!') # # dataset = FolderWithImages(root=dataroot, # input_transform=transforms.Compose([ # ALICropAndScale(), # transforms.ToTensor(), # transforms.Normalize( # (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), # ]), # target_transform=transforms.ToTensor() # ) else: raise ValueError("Unknown dataset %s" % (args.dataset)) print( '{}: train: count={}, X.shape={}, X.min={}, X.max={}, test: count={}, X.shape={}, X.min={}, X.max={}' .format(args.dataset.upper(), train_dataset.__len__(), train_dataset[0][0].shape, train_dataset[0][0].min(), train_dataset[0][0].max(), test_dataset.__len__(), test_dataset[0][0].shape, test_dataset[0][0].min(), test_dataset[0][0].max())) return train_dataset, test_dataset
def fetch_dataset(data_name): print('fetching data {}...'.format(data_name)) if (data_name == 'MNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) train_dataset = datasets.MNIST(root=train_dir, train=True, download=True, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.MNIST(root=test_dir, train=False, download=True, transform=test_transform) elif (data_name == 'EMNIST' or data_name == 'EMNIST_byclass' or data_name == 'EMNIST_bymerge' or data_name == 'EMNIST_balanced' or data_name == 'EMNIST_letters' or data_name == 'EMNIST_digits' or data_name == 'EMNIST_mnist'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/test'.format(data_name.split('_')[0]) transform = transforms.Compose([transforms.ToTensor()]) split = 'balanced' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.EMNIST(root=train_dir, split=split, branch=branch, train=True, download=True, transform=transform) test_dataset = datasets.EMNIST(root=test_dir, split=split, branch=branch, train=False, download=True, transform=transform) elif (data_name == 'FashionMNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) transform = transforms.Compose([transforms.ToTensor()]) train_dataset = datasets.FashionMNIST(root=train_dir, train=True, download=True, transform=transform) test_dataset = datasets.FashionMNIST(root=test_dir, train=False, download=True, transform=transform) elif (data_name == 'CIFAR10'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR10(train_dir, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR10(test_dir, train=False, transform=test_transform, download=True) elif (data_name == 'CIFAR100'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR100(train_dir, branch=branch, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR100(test_dir, branch=branch, train=False, transform=test_transform, download=True) elif (data_name == 'SVHN'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.SVHN(train_dir, split='train', transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.SVHN(test_dir, split='test', transform=test_transform, download=True) elif (data_name == 'ImageNet'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.ImageFolder(train_dir, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.ImageFolder(test_dir, transform=test_transform) elif (data_name == 'CUB2011'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) train_dataset = datasets.CUB2011(train_dir, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CUB2011(test_dir, transform=test_transform, download=True) elif (data_name == 'WheatImage' or data_name == 'WheatImage_binary' or data_name == 'WheatImage_six'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) label_mode = 'six' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.WheatImage(train_dir, label_mode=label_mode, transform=transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor() ])) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose( [transforms.Resize((224, 288)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.WheatImage(test_dir, label_mode=label_mode, transform=test_transform) elif (data_name == 'CocoDetection'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/instances_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/instances_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoDetection(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoDetection(test_dir, test_ann, transform=transform) elif (data_name == 'CocoCaptions'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/captions_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/captions_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoCaptions(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoCaptions(test_dir, test_ann, transform=transform) elif (data_name == 'VOCDetection'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCDetection(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCDetection(test_dir, 'test', transform=transform) elif (data_name == 'VOCSegmentation'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCSegmentation(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCSegmentation(test_dir, 'test', transform=transform) elif (data_name == 'MOSI' or data_name == 'MOSI_binary' or data_name == 'MOSI_five' or data_name == 'MOSI_seven' or data_name == 'MOSI_regression'): train_dir = './data/{}'.format(data_name.split('_')[0]) test_dir = './data/{}'.format(data_name.split('_')[0]) label_mode = 'five' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.MOSI(train_dir, split='trainval', label_mode=label_mode, download=True) stats = make_stats(train_dataset, batch_size=1) train_transform = transforms.Compose([transforms.Normalize(stats)]) test_transform = transforms.Compose([transforms.Normalize(stats)]) train_dataset.transform = train_transform test_dataset = datasets.MOSI(test_dir, split='test', label_mode=label_mode, download=True, transform=test_transform) elif (data_name == 'Kodak'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) elif (data_name == 'UCID'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) else: raise ValueError('Not valid dataset name') print('data ready') return train_dataset, test_dataset
def load_datasets(name, root, batch_size): if name == "mnist": train_dataset = datasets.MNIST(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(28), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.MNIST(root=root, download=True, train=False, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader elif name == "fmnist": train_dataset = datasets.FashionMNIST(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(28), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.FashionMNIST(root=root, download=True, train=False, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader elif name == "kmnist": train_dataset = datasets.KMNIST(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(28), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.KMNIST(root=root, download=True, train=False, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader elif name == "qmnist": train_dataset = datasets.QMNIST(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(28), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.QMNIST(root=root, download=True, what="test50k", train=False, transform=transforms.Compose([ transforms.Resize(28), transforms.ToTensor(), transforms.Normalize([0.5], [0.5]), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader elif name == "cifar10": train_dataset = datasets.CIFAR10(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.CIFAR10(root=root, download=True, train=False, transform=transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader elif name == "cifar100": train_dataset = datasets.CIFAR100(root=root, download=True, train=True, transform=transforms.Compose([ transforms.Resize(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=8) test_dataset = datasets.CIFAR100(root=root, download=True, train=False, transform=transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=8) return train_dataloader, test_dataloader