def load_emnist(val_size=10000, seed=None): """Return the train (55k), val (5k, randomly drawn from the original test set) and test (10k) dataset for MNIST.""" image_transform = transforms.Compose([ # EMNIST images are flipped and rotated by default, fix this here. transforms.RandomHorizontalFlip(1), transforms.RandomRotation((90, 90)), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) target_transform = lambda x: x - 1 # make labels start at 0 instead of 1 raw_train_dataset = datasets.EMNIST('data/emnist', split='letters', train=True, download=True, transform=image_transform, target_transform=target_transform) test_dataset = datasets.EMNIST('data/emnist', split='letters', train=False, download=True, transform=image_transform, target_transform=target_transform) # Split 5k samples from the train dataset for validation (similar to Sacramento et al. 2018). utils.seed_torch(seed) train_dataset, val_dataset = torch.utils.data.dataset.random_split( raw_train_dataset, (len(raw_train_dataset) - val_size, val_size)) return train_dataset, val_dataset, test_dataset
def _load(self): """ Load dataset :rtype: Tuple[,] :return: train and test dataset """ transformations = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3310, )) ]) train_loader = DataLoader(datasets.EMNIST(self.data_path, split="byclass", train=True, download=True, transform=transformations), batch_size=self.batch_size, shuffle=True, pin_memory=True) test_loader = DataLoader(datasets.EMNIST(self.data_path, split="byclass", train=False, download=False, transform=transformations), batch_size=self.batch_size, shuffle=True, pin_memory=True) dataset_test_len = len(test_loader.dataset) dataset_train_len = len(train_loader.dataset) print("Длина обучающего датасета {}\n Длина трениро" "вочного датасета\n".format(dataset_train_len, dataset_test_len)) return train_loader, test_loader
def load_emnist_dataset(): import torchvision.datasets as datasets mnist_train = datasets.EMNIST(root='../data/emnist', split='balanced', train=True, download=True, transform=None) mnist_test = datasets.EMNIST(root='../data/emnist', split='balanced', train=False, download=True, transform=None) test_labels = np.array( [mnist_test[i][1].numpy() for i in range(len(mnist_test))], dtype=np.int) train_labels = np.array( [mnist_train[i][1].numpy() for i in range(len(mnist_train))], dtype=np.int) test = np.array([ np.asarray(mnist_test[i][0]).reshape(28 * 28) for i in range(len(mnist_test)) ], dtype=np.float) train = np.array([ np.asarray(mnist_train[i][0]).reshape(28 * 28) for i in range(len(mnist_train)) ], dtype=np.float) train /= 255. # normalize data to be in range [0,1] test /= 255. return train, train_labels, test, test_labels, [28, 28]
def __init__(self): super().__init__() _path = Config().data.data_path train_transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomAffine(degrees=10, translate=(0.2, 0.2), scale=(0.8, 1.2)), transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) test_transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5], std=[0.5]) ]) self.trainset = datasets.EMNIST(root=_path, split='balanced', train=True, download=True, transform=train_transform) self.testset = datasets.EMNIST(root=_path, split='balanced', train=False, download=True, transform=test_transform)
def get_dataset(name, subset=None): if name == 'EMNIST': dataset = datasets.EMNIST('./data/EMNIST', train=True, download=True, split='byclass', transform=transforms.ToTensor()) elif name == 'MNIST': dataset = datasets.EMNIST('./data/MNIST', train=True, download=True, transform=transforms.ToTensor()) elif name == 'CIFAR10': dataset = datasets.CIFAR10('./data/CIFAR10', train=True, download=True, transform=transforms.ToTensor()) elif name == 'SVHN': dataset = datasets.SVHN('./data/SVHN', split='train', download=True, transform=transforms.ToTensor()) else: raise Exception if (subset is None) or (subset >= len(dataset)): return dataset else: split = (subset, len(dataset) - subset) subset, _ = torch.utils.data.random_split(dataset, split) return subset
def main(): # curl https://www.itl.nist.gov/iaui/vip/cs_links/EMNIST/gzip.zip -o ../data/EMNIST/raw/emnist.zip train_loader = torch.utils.data.DataLoader( datasets.EMNIST('../data', 'balanced', train=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=300, shuffle=False) test_loader = torch.utils.data.DataLoader( datasets.EMNIST('../data', 'balanced', train=False, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.1307,), (0.3081,)) ])), batch_size=500, shuffle=False) model = Net() optimizer = optim.SGD(model.parameters(), lr=0.1) for epoch in range(3): train(model, train_loader, optimizer, epoch) test(model, test_loader) torch.save(model.state_dict(), "emnist.pt")
def load_emnist(batch_size): transformations = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3310, ))]) train_loader = torch.utils.data.DataLoader(datasets.EMNIST( DATAPATHS['EMNIST'], split="byclass", train=True, download=True, transform=transformations), batch_size=batch_size, shuffle=True, pin_memory=True) labels_loader = torch.utils.data.DataLoader(datasets.EMNIST( DATAPATHS['EMNIST'], split="byclass", train=False, download=False, transform=transformations), batch_size=batch_size, shuffle=True, pin_memory=True) dataset_test_len = len(labels_loader.dataset) dataset_train_len = len(train_loader.dataset) print("Длина обучающего датасета {}\n Длина трениро" "вочного датасета\n".format(dataset_train_len, dataset_test_len)) return train_loader, labels_loader
def get_data( self, data_filepath, val_set_percentage, random_split_seed, download=False ): train_set = datasets.EMNIST( root=data_filepath, split="balanced", train=True, download=download, transform=self.transform_train, ) num_training_items = int(len(train_set) * (1.0 - val_set_percentage)) num_val_items = len(train_set) - num_training_items train_set, val_set = torch.utils.data.random_split( train_set, [num_training_items, num_val_items], generator=torch.Generator().manual_seed(random_split_seed), ) test_set = datasets.EMNIST( root=data_filepath, split="balanced", train=False, transform=self.transform_validate, ) num_labels = 47 return train_set, val_set, test_set, num_labels
def __init__(self, dataset_name: str, root_dir: Path) -> None: self.root_dir = root_dir self.dataset_name = dataset_name if self.dataset_name == "MNIST": ## Reference: https://stackoverflow.com/a/66816284 new_mnist_mirror = 'https://ossci-datasets.s3.amazonaws.com/mnist' datasets.MNIST.resources = [ ('/'.join([new_mnist_mirror, url.split('/')[-1]]), md5) for url, md5 in datasets.MNIST.resources ] transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )), transforms.Lambda( lambda x: torch.flatten(x, start_dim=1).squeeze()) ]) self.train_data = datasets.MNIST(root_dir / "raw/", train=True, download=True, transform=transform) self.test_data = datasets.MNIST(root_dir / "raw/", train=False, download=True, transform=transform) self.num_train_data = len(self.train_data) self.num_classes = 10 elif self.dataset_name == "EMNIST": self.train_data = datasets.EMNIST(root_dir / "raw/", split="letters", train=True, download=True) self.test_data = datasets.EMNIST(root_dir / "raw/", split="letters", train=False, download=True) elif self.dataset_name == "CIFAR10": transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) self.train_data = datasets.CIFAR10(root_dir / "raw/", train=True, download=True, transform=transform) self.test_data = datasets.CIFAR10(root_dir / "raw/", train=False, download=True, transform=transform) self.num_train_data = len(self.train_data) self.num_classes = 10 else: raise ValueError("Unknown dataset_name")
def download_EMNIST(split='letters'): """ Download EMNIST dataset and save it into data folder. :param split: ['balanced', 'byclass', 'bymerge', 'digits', 'letters', 'mnist'] """ data_folder = os.path.join(ROOT_DIR, DATA_DIR) dsets.EMNIST(root=data_folder, train=True, transform=transforms.ToTensor(), download=True, split=split) dsets.EMNIST(root=data_folder, train=False, transform=transforms.ToTensor(), download=True, split=split)
def get_data_loader(): # Get Data root = './data' transform_labeled = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(size=28, padding=int(28 * 0.125), padding_mode='reflect'), transforms.ToTensor(), transforms.Normalize(mean=(0.1307, ), std=(0.3081, )) ]) transform_val = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) base_dataset = datasets.EMNIST(root, train=True, split='balanced', download=True) label_size = int(args.basicLabelRatio * len(base_dataset)) train_labeled_idxs, train_unlabeled_idxs = x_u_split(base_dataset.targets, label_size, args.k_img, 7 * args.k_img, num_classes=47) labeled_dataset = EMNISTSSL(root, train_labeled_idxs, train=True, transform=transform_labeled) unlabeled_dataset = EMNISTSSL(root, train_unlabeled_idxs, train=True, transform=TransformFix(mean=(0.1307, ), std=(0.3081, ), size=28)) test_dataset = datasets.EMNIST(root, train=False, split='balanced', transform=transform_val, download=True) labeled_loader = DataLoader(labeled_dataset, args.bs, num_workers=4, pin_memory=True, shuffle=True) unlabeled_loader = DataLoader(unlabeled_dataset, args.bs * 7, num_workers=4, pin_memory=True, shuffle=True) test_loader = DataLoader(test_dataset, args.bs, shuffle=True, num_workers=4, pin_memory=True) return labeled_loader, unlabeled_loader, test_loader
def getdata_emnist(batch_size): transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) data_train=datasets.EMNIST("./data/emnist",split='balanced',transform=transform,train=True,download=True) data_test=datasets.EMNIST("./data/emnist",split='balanced',transform=transform,train=False,download=True) data_loader_train=torch.utils.data.DataLoader(dataset=data_train,batch_size=batch_size,shuffle=True) data_loader_test=torch.utils.data.DataLoader(dataset=data_test,batch_size=batch_size,shuffle=True) return data_loader_train,data_loader_test
def __init__(self, args): kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} self.train_loader = torch.utils.data.DataLoader( datasets.EMNIST('data/emnist', train=True, download=True, split='byclass', transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs) self.test_loader = torch.utils.data.DataLoader( datasets.EMNIST('data/emnist', train=False, split='byclass', transform=transforms.ToTensor()), batch_size=args.batch_size, shuffle=True, **kwargs)
def foo(Model, Name): print(Name) args = Args() torch.manual_seed(args.seed) use_cuda = not args.no_cuda and torch.cuda.is_available() device = torch.device("cuda" if use_cuda else "cpu") kwargs = {'num_workers': 16, 'pin_memory': True} if use_cuda else {} transformer = transforms.Compose([ transforms.Lambda(to_tensor), ]) model = Model().to(device) optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.momentum) test_loader = torch.utils.data.DataLoader( datasets.EMNIST(args.data_path, split='mnist', train=False, transform=transformer, download = True), batch_size=args.test_batch_size, shuffle=True, **kwargs) if len(sys.argv) > 1 and sys.argv[1] == 'test': model.load_state_dict(torch.load(args.save_path + Name, map_location=lambda storage, loc: storage)) failed = {} test(args, model, device, test_loader, failed) for k, v in failed.items(): im = Image.new(mode='L', size=(len(v) * 28, 28)) for i, f in enumerate(map(lambda h: hashmap.get(h, None), v)): if f != None: im.paste(f, (i * 28, 0)) im.save('./result/' + k + '.png') else: train_loader = torch.utils.data.DataLoader( datasets.EMNIST(args.data_path, split='mnist', train=True, transform=transformer, download=True), batch_size=args.batch_size, shuffle=True, **kwargs) accuracies = [] for epoch in range(args.epochs): train(args, model, device, train_loader, optimizer, epoch) test(args, model, device, test_loader, accuracies) torch.save(model.state_dict(), args.save_path + Name) accuracieses.append((accuracies, Name)) plt.plot(accuracies) plt.xlabel('epoch') plt.ylabel('accuracy') # plt.show() plt.title(Name) plt.savefig(Name) plt.clf()
def get_emnist_semi(root, num_expand_x, num_expand_u, device_ids, server_idxs): root = './data' transform_labeled = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.RandomCrop(size=28, padding=int(28 * 0.125), padding_mode='reflect'), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) transform_val = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) base_dataset = datasets.EMNIST(root, train=True, split='balanced', download=True) train_labeled_idxs, train_unlabeled_idxs = x_u_split_semi( base_dataset.targets, num_expand_x, num_expand_u, device_ids, server_idxs) train_unlabeled_dataset_list = [] train_labeled_dataset_list = [] train_unlabeled_idxs_tmp = copy.deepcopy(train_unlabeled_idxs[0]) for id in range(len(train_unlabeled_idxs)): train_unlabeled_dataset = EMNIST(root, train_unlabeled_idxs[id], train=True, transform=TransformFix( size=28, mean=(0.1307, ), std=(0.3081, ))) train_unlabeled_dataset_list.append(train_unlabeled_dataset) train_labeled_dataset = EMNIST(root, train_labeled_idxs[id], train=True, transform=transform_labeled) train_labeled_dataset_list.append(train_labeled_dataset) test_dataset = datasets.EMNIST(root, train=False, split='balanced', transform=transform_val, download=True) return train_labeled_dataset_list, train_unlabeled_dataset_list, test_dataset
def load_mnist_dataset(dataset, data_dir, training_data_ratio=1): # data_dir = '../data/mnist/' apply_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, ))]) if dataset == "mnist": train_dataset = datasets.MNIST(data_dir, train=True, download=True, transform=apply_transform) test_dataset = datasets.MNIST(data_dir, train=False, download=True, transform=apply_transform) elif dataset == "fmnist": train_dataset = datasets.FashionMNIST(data_dir, train=True, download=True, transform=apply_transform) test_dataset = datasets.FashionMNIST(data_dir, train=False, download=True, transform=apply_transform) elif dataset == "emnist": train_dataset = datasets.EMNIST(data_dir, train=True, download=True, transform=apply_transform, split="balanced") test_dataset = datasets.EMNIST(data_dir, train=False, download=True, transform=apply_transform, split="balanced") else: raise NotImplementedError if training_data_ratio != 1: select_len = int(len(train_dataset) * training_data_ratio) train_dataset.data = train_dataset.data[:select_len] train_dataset.targets = train_dataset.targets[:select_len] return train_dataset, test_dataset
def EMNIST(train=False, batch_size=None, augm_flag=False, val_size=None): if batch_size==None: if train: batch_size=train_batch_size else: batch_size=test_batch_size transform_base = [transforms.ToTensor(), pre.Transpose()] #EMNIST is rotated 90 degrees from MNIST transform_train = transforms.Compose([ transforms.RandomCrop(28, padding=4), ] + transform_base) transform_test = transforms.Compose(transform_base) transform_train = transforms.RandomChoice([transform_train, transform_test]) transform = transform_train if (augm_flag and train) else transform_test dataset = datasets.EMNIST(path, split='letters', train=train, transform=transform, download=True) if train or val_size is None: loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train, num_workers=1) return loader else: # Split into val and test sets test_size = len(dataset) - val_size dataset_val, dataset_test = data_utils.random_split(dataset, (val_size, test_size)) val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=train, num_workers=1) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=train, num_workers=1) return val_loader, test_loader
def EMNIST(train=False, batch_size=None, augm_flag=False): if batch_size == None: if train: batch_size = train_batch_size else: batch_size = test_batch_size transform_base = [transforms.ToTensor(), pre.Transpose() ] #EMNIST is rotated 90 degrees from MNIST transform_train = transforms.Compose([ transforms.RandomCrop(28, padding=4), ] + transform_base) transform_test = transforms.Compose(transform_base) transform_train = transforms.RandomChoice( [transform_train, transform_test]) transform = transform_train if (augm_flag and train) else transform_test dataset = datasets.EMNIST(path, split='letters', train=train, transform=transform, download=True) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train, num_workers=1) return loader
def __init__(self, batch_size, augm_flag): super().__init__(batch_size, augm_flag) self.n_train, self.n_test = 60000, 10000 # TODO: actually, these numbers are smaller than the real ones. self.n_classes = 10 self.height, self.width, self.n_colors = 28, 28, 1 self.data_dir = self.base_path + 'emnist/' transform_base = [transforms.Lambda(lambda x: np.array(x).T / 255.0)] transform_train = transforms.Compose([ transforms.RandomCrop(self.height, padding=4), ] + transform_base) transform_test = transforms.Compose(transform_base) transform_train = transform_train if self.augm_flag else transform_test self.train_dataset = datasets.EMNIST(self.data_dir, split='letters', train=True, transform=transform_train, download=True) self.test_dataset = datasets.EMNIST(self.data_dir, split='letters', train=False, transform=transform_test, download=True)
def get_dataset(data_name, data_root, image_size, train): transform = transforms.Compose( [transforms.Resize(image_size), transforms.ToTensor()]) if data_name == "mnist": dataset = datasets.MNIST(root=data_root, train=train, transform=transform, download=True) elif data_name == "fushion-mnist": dataset = datasets.FashionMNIST(root=data_root, train=train, transform=transform, download=True) elif data_name == "kmnist": dataset = datasets.KMNIST(root=data_root, train=train, transform=transform, download=True) elif data_name == "emnist": dataset = datasets.EMNIST(root=data_root, split="byclass", train=train, transform=transform, download=True) else: dataset = None return dataset
def get_loader(_dir, _batch_size=1, _train=True, _portion=None, _download=False, **_args): dataset = datasets.EMNIST( _dir, split='letters', train=_train, download=_download, transform=transforms.ToTensor(), target_transform=transforms.Lambda( lambda x: x - 1 ) # Necessary because labels are mapped 1 to 26 instead of 0 to 25 ) indices = list(torch.randperm(len(dataset))) if _portion is not None: indices = indices[0:math.floor(_portion * len(dataset))] sampler = torch.utils.data.SubsetRandomSampler(indices) batch_sampler = torch.utils.data.BatchSampler(sampler, batch_size=_batch_size, drop_last=False) loader = torch.utils.data.DataLoader(dataset, batch_sampler=batch_sampler, **_args) return loader
def rnn_train(): train_data = dsets.EMNIST( root='./mnist', split='mnist', train=True, transform=transforms.ToTensor(), download=DOWNLOADS ) train_load = Data.DataLoader(dataset=train_data,batch_size=BATCH_SIZE,shuffle=True,num_workers=2) rnn = RNN() optimizer = op.Adam(params=rnn.parameters(),lr=LR) loss_fun = nn.CrossEntropyLoss() for epoch in range(EPOCH): for step ,(t_x ,t_y) in enumerate(train_load): y = rnn(t_x) loss = loss_fun(y,t_y) optimizer.zero_grad() loss.backward() optimizer.step() print("step",step,"| loss=",loss) torch.save(rnn,"./rnn.pkl")
def raw_dataset(self, data_dir: str, download: bool, train: bool, transform): if self.split == 'letters': target_transform = (lambda x: x - 1) else: target_transform = None return datasets.EMNIST(data_dir, split=self.split, download=download, train=train, transform=transform, target_transform=target_transform)
def __init__(self, options): transform_list = [] if options.image_size is not None: transform_list.append( transforms.Resize((options.image_size, options.image_size))) # transform_list.append(transforms.CenterCrop(options.image_size)) transform_list.append(transforms.ToTensor()) if options.image_colors == 1: transform_list.append(transforms.Normalize(mean=[0.5], std=[0.5])) elif options.image_colors == 3: transform_list.append( transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])) transform = transforms.Compose(transform_list) if options.dataset == 'mnist': dataset = datasets.MNIST(options.data_dir, train=True, download=True, transform=transform) elif options.dataset == 'emnist': # Updated URL from https://www.westernsydney.edu.au/bens/home/reproducible_research/emnist datasets.EMNIST.url = 'https://cloudstor.aarnet.edu.au/plus/s/ZNmuFiuQTqZlu9W/download' dataset = datasets.EMNIST(options.data_dir, split=options.image_class, train=True, download=True, transform=transform) elif options.dataset == 'fashion-mnist': dataset = datasets.FashionMNIST(options.data_dir, train=True, download=True, transform=transform) elif options.dataset == 'lsun': training_class = options.image_class + '_train' dataset = datasets.LSUN(options.data_dir, classes=[training_class], transform=transform) elif options.dataset == 'cifar10': dataset = datasets.CIFAR10(options.data_dir, train=True, download=True, transform=transform) elif options.dataset == 'cifar100': dataset = datasets.CIFAR100(options.data_dir, train=True, download=True, transform=transform) else: dataset = datasets.ImageFolder(root=options.data_dir, transform=transform) self.dataloader = DataLoader(dataset, batch_size=options.batch_size, num_workers=options.loader_workers, shuffle=True, drop_last=True, pin_memory=options.pin_memory) self.iterator = iter(self.dataloader)
def main(): train_batch_size = 100 dataset = datasets.EMNIST( '../dataEMNIST', split = 'balanced', train = True, download = True, transform=transforms.ToTensor() ) shuffle_dataset = True val_split = 0.002 dataset_size = len(dataset) indices = list(range(dataset_size)) split = int(np.floor(val_split * dataset_size)) if shuffle_dataset : np.random.seed(2) np.random.shuffle(indices) train_indices, val_indices = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_indices) val_sampler = SubsetRandomSampler(val_indices) train_loader = torch.utils.data.DataLoader( dataset, sampler = train_sampler, batch_size = 100 ) val_loader = torch.utils.data.DataLoader( dataset, sampler = val_sampler, batch_size = 100 ) model = model_root().to(torch.device("cuda")) model.load_state_dict(torch.load('root_emnist.pth')) model_one = model_1().to(torch.device("cuda")) model_one.load_state_dict(torch.load('emnist_1.pth')) model_one_zero = model_1_0().to(torch.device("cuda")) model_one_zero.load_state_dict(torch.load('emnist_1_0.pth')) learning_rate = 0.0001 optimizer = torch.optim.Adam(model_one_zero.parameters(), lr=learning_rate) loss_fn = nn.CrossEntropyLoss() max = train(model, model_one, model_one_zero, optimizer, loss_fn, train_loader, val_loader, torch.device("cuda"), 0) for i in range(2): model_one_zero.load_state_dict(torch.load('emnist_1_0.pth')) learning_rate /= 10 optimizer = torch.optim.Adam(model_one_zero.parameters(), lr=learning_rate, weight_decay = 5e-4) loss_fn = nn.CrossEntropyLoss() max = train(model, model_one, model_one_zero, optimizer, loss_fn, train_loader, val_loader, torch.device("cuda"), max)
def __init__(self): super(CustomDataset, self).__init__() self.trans = transforms.Compose([ transforms.Resize(64), transforms.ToTensor(), transforms.Normalize(mean = [0.5], std = [0.5]) ] ) self.letter_images = datasets.EMNIST('data_letters', 'letters', train=True, download = False, transform = self.trans)
def get_train_test_queues(args, train_transform, valid_transform): print("Getting",args.dataset,"data") if args.dataset == 'cifar10': print("Using CIFAR10") train_data = dset.CIFAR10(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.CIFAR10(root=args.data, train=False, download=True, transform=valid_transform) elif args.dataset == 'mnist': print("Using MNIST") train_data = dset.MNIST(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.MNIST(root=args.data, train=False, download=True, transform=valid_transform) elif args.dataset == 'emnist': print("Using EMNIST") train_data = dset.EMNIST(root=args.data, split='balanced', train=True, download=True, transform=train_transform) valid_data = dset.EMNIST(root=args.data, split='balanced', train=False, download=True, transform=valid_transform) elif args.dataset == 'fashion': print("Using Fashion") train_data = dset.FashionMNIST(root=args.data, train=True, download=True, transform=train_transform) valid_data = dset.FashionMNIST(root=args.data, train=False, download=True, transform=valid_transform) elif args.dataset == 'svhn': print("Using SVHN") train_data = dset.SVHN(root=args.data, split='train', download=True, transform=train_transform) valid_data = dset.SVHN(root=args.data, split='test', download=True, transform=valid_transform) elif args.dataset == 'stl10': print("Using STL10") train_data = dset.STL10(root=args.data, split='train', download=True, transform=train_transform) valid_data = dset.STL10(root=args.data, split='test', download=True, transform=valid_transform) elif args.dataset == 'devanagari': print("Using DEVANAGARI") # Ensure dataset is present in the directory args.data. Does not support auto download print(args.data) train_data = dset.ImageFolder(root=os.path.join(args.data,"Train"), transform=train_transform, loader = grey_pil_loader) valid_data = dset.ImageFolder(root=os.path.join(args.data, "Test"), transform=valid_transform, loader = grey_pil_loader) else: assert False, "Cannot get training queue for dataset" train_queue = torch.utils.data.DataLoader( train_data, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=1) valid_queue = torch.utils.data.DataLoader( valid_data, batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=1) return train_queue, valid_queue
def get_test_loader(data_dir, batch_size, num_workers=4, pin_memory=False): """ Utility function for loading and returning a multi-process test iterator over the MNIST dataset. If using CUDA, num_workers should be set to 1 and pin_memory to True. Args ---- - data_dir: path directory to the dataset. - batch_size: how many samples per batch to load. - num_workers: number of subprocesses to use when loading the dataset. - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to True if using GPU. Returns ------- - data_loader: test set iterator. """ # define transforms normalize = transforms.Normalize((0.1307, ), (0.3081, )) trans = transforms.Compose([ transforms.ToTensor(), normalize, ]) # load dataset # dataset = datasets.MNIST( # data_dir, train=False, download=False, transform=trans # ) #gb changes********************************************************************************************* emnist_dir = "data" # load dataset dataset = datasets.EMNIST( emnist_dir, download=True, split='letters', train=True, transform=transforms.Compose([ lambda img: transforms.functional.rotate(img, -90), lambda img: transforms.functional.hflip(img), transforms.ToTensor() ])) data_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, ) return data_loader
def __init__(self, train: bool = True, max_seq_len: int = 1, blank_label = 11, pad_label = 10, img_size: [int, int] = (28, 28)): self.dataset = datasets.EMNIST(root='./data', split='mnist', train=train, download=True) self.max_seq_len = max_seq_len self.img_size = img_size
def make_dataset(dataset, dataroot, imageSize): """ :param dataset: must be in 'cifar10 | lsun | imagenet | folder | lfw | fake' :return: pytorch dataset for DataLoader to utilize """ if dataset in ['imagenet', 'folder', 'lfw']: # folder dataset dataset = dset.ImageFolder(root=dataroot, transform=transforms.Compose([ transforms.Resize(imageSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif dataset == 'lsun': dataset = dset.LSUN(db_path=dataroot, classes=['bedroom_train'], transform=transforms.Compose([ transforms.Resize(imageSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif dataset == 'cifar10': dataset = dset.CIFAR10(root=dataroot, download=True, transform=transforms.Compose([ transforms.Resize(imageSize), transforms.CenterCrop(imageSize), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif dataset == 'emnist': dataset = dset.EMNIST(root=dataroot,download=True,split='letters', transform=transforms.Compose([ transforms.Resize(imageSize), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif dataset == 'celeba': dataset = dset.ImageFolder(root=dataroot, transform=transforms.Compose([ transforms.CenterCrop(138), transforms.Resize(imageSize), transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) else: raise Exception('--dataset must be in cifar10 | lsun | imagenet | folder | lfw | fake') assert dataset return dataset