def celeb_faces_model(): model = CelebFacesNeuralNetwork() training_data = datasets.USPS(root="data", train=True, download=True, transform=ToTensor()) test_data = datasets.USPS(root="data", train=False, download=True, transform=ToTensor()) img, tar = training_data.__getitem__(1) print(img.size()) learning_rate = 1e-3 # how much to update models parameters at each batch/epoch batch_size = 64 # the number of data samples seen by the model in each epoch epochs = 10 # each iteration of the optimization loop is called an epoch. train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=True) test_dataloader = DataLoader(test_data, batch_size=batch_size, shuffle=True) loss_fn = nn.CrossEntropyLoss() optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) for t in range(epochs): print(f"Epoch {t + 1}\n-------------------------------") train_loop(train_dataloader, model, loss_fn, optimizer) test_loop(test_dataloader, model, loss_fn) print("Done!")
def get_usps(train, download=True, drop_last=True, get_pseudo=False): """Get USPS dataset loader.""" # Image pre-processing pre_process = transforms.Compose([ transforms.ToTensor(), transforms.Resize((32, 32)), transforms.Lambda(gray2rgb), transforms.Normalize(mean=(params.dataset_mean, ), std=(params.dataset_std, )), ]) # Dataset and data loader if get_pseudo: path = 'data/usps_train_pseudo.mat' if train else 'data/usps_eval_pseudo.mat' usps_dataset = CustomDataset(path, transforms.ToTensor()) else: usps_dataset = datasets.USPS(root=params.data_root, train=train, transform=pre_process, download=download) usps_data_loader = torch.utils.data.DataLoader( dataset=usps_dataset, batch_size=params.batch_size, shuffle=False, num_workers=params.num_worker, drop_last=drop_last, pin_memory=True) return usps_data_loader
def __init__(self, labeled_data, unlabeled_data, test_data, batch_size): super().__init__() transform = transforms.Compose([ transforms.Grayscale(), transforms.Resize(32), transforms.ToTensor() ]) self.dataset_by_name = { "MNIST": lambda train: datasets.MNIST("./data/mnist", train=train, transform=transform, download=True), "USPS": lambda train: datasets.USPS( "./data/usps", train=train, transform=transform, download=True ), "SVHN": lambda train: datasets.SVHN("./data/svhn", transform=transform, download=True, split="train" if train else "test") } self.labeled_data = labeled_data self.unlabeled_data = unlabeled_data self.test_data = test_data self.batch_size = batch_size
def get_data(data_name, transform, is_train=True): if data_name =="mnist": data_loader = datasets.MNIST(root=paramsTrans.dataset_root, train=is_train, download=True, transform=transform) elif data_name == "usps": data_loader = datasets.USPS(root=paramsTrans.dataset_root, train=is_train, download=True, transform=transform) data = DataLoader(dataset=data_loader, batch_size=paramsTrans.batch_size, shuffle=True, num_workers=paramsTrans.num_workers) return data
def usps(): return itertools.chain(*[ collect_download_configs( lambda: datasets.USPS(ROOT, train=train, download=True), name=f"USPS, {'train' if train else 'test'}", file="usps", ) for train in (True, False) ])
def get_vision_dset(dset_name,device,x_only=False): dirpath = f'~/unsupervised_object_learning/{dset_name}/data' if dset_name in ['MNISTfull', 'MNISTtest']: dtest=tdatasets.MNIST(root=dirpath,train=False,download=True) x, y = dtest.data, dtest.targets if dset_name == 'MNISTfull': dtrain=tdatasets.MNIST(root=dirpath,train=True,download=True) x = torch.cat([dtrain.data,x]) y = torch.cat([dtrain.targets,y]) data = x if x_only else (x,y) elif dset_name == 'MNISTtrain': dtrain=tdatasets.MNIST(root=dirpath,train=True,download=True) data = dtrain.data if x_only else (dtrain.data,dtrain.targets) elif dset_name == 'FashionMNIST': dtrain=tdatasets.FashionMNIST(root=dirpath,train=True,download=True) dtest=tdatasets.FashionMNIST(root=dirpath,train=False,download=True) x = torch.cat([dtrain.data,dtest.data]) y = torch.cat([dtrain.targets,dtest.targets]) data = x if x_only else (x,y) elif dset_name == 'USPS': dtrain=tdatasets.USPS(root=dirpath,train=True,download=True) dtest=tdatasets.USPS(root=dirpath,train=False,download=True) train_data = torch.tensor(dtrain.data,device=device) test_data = torch.tensor(dtest.data,device=device) train_targets = torch.tensor(dtrain.targets,device=device) test_targets = torch.tensor(dtest.targets,device=device) x = torch.cat([train_data,test_data]) y = torch.cat([train_targets,test_targets]) data = x if x_only else (x,y) #data = torch.tensor(d.data,device=device) if x_only else (torch.tensor(d.data,device=device), torch.tensor(d.targets,device=device)) elif dset_name == 'CIFAR10': d=tdatasets.CIFAR10(root=dirpath,train=True,download=True) data = torch.tensor(d.data,device=device) if x_only else (torch.tensor(d.data,device=device), torch.tensor(d.targets,device=device)) elif dset_name == 'coil-100': data = load_coil100(x_only) elif dset_name == 'letterAJ': data = load_letterAJ(x_only) return TransformDataset(data,[add_colour_dimension],x_only,device=device) return TransformDataset(data,[to_float_tensor,add_colour_dimension],x_only,device=device)
def get_loader(dataset, opts): transform = transforms.Compose([ transforms.Scale(opts.image_size), transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]) if dataset == 'USPS' : usps_train = datasets.USPS(root = '/content/CycleGan/data/USPS',train=True, download=True, transform = transform) usps_test = datasets.USPS(root = '/content/CycleGan/data/USPS',train=False, download=True, transform = transform) train_dloader = DataLoader(dataset=usps_train, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers) test_dloader = DataLoader(dataset=usps_test, train=False, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers) if dataset == 'MNIST': mnis_train = datasets.MNIST(root='/content/CycleGan/data/MNIST',train=True, download=True, transform=transform) mnis_test = datasets.MNIST(root='/content/CycleGan/data/MNIST',train=False, download=True, transform=transform) train_dloader = DataLoader(dataset=mnis_train, Train = True, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers) test_dloader = DataLoader(dataset=mnis_test, Train = False, batch_size=opts.batch_size, shuffle=True, num_workers=opts.num_workers) return train_dloader, test_dloader
def __init__(self, path): transform = get_transform() self.MNIST_dataset = datasets.MNIST(root=os.path.join(path, "MNIST"), transform=transform, train=True, download=True) self.SVHN_dataset = datasets.SVHN(root=os.path.join(path, "SVHN"), transform=transform, split='train', download=True) self.USPS_dataset = datasets.USPS(root=os.path.join(path, "USPS"), transform=transform, train=True, download=True)
def get_usps(train, batch_size=1024): """Get USPS dataset loader.""" # image pre-processing pre_process = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, )) ]) # dataset and data loader usps_dataset = datasets.USPS(root='cache/data/', train=train, transform=pre_process, download=True) usps_data_loader = torch.utils.data.DataLoader(dataset=usps_dataset, batch_size=batch_size, shuffle=True, num_workers=8) return usps_data_loader
def get_usps(train, adp=False): """Get usps dataset loader.""" # image pre-processing pre_process = transforms.Compose([ transforms.Resize(params.image_size), transforms.ToTensor(), # transforms.Normalize((0.5),(0.5)), transforms.Lambda(lambda x: x.repeat(3, 1, 1)), # transforms.Grayscale(1), ]) # dataset and data loader usps_dataset = datasets.USPS(root=params.usps_dataset_root, train=train, transform=pre_process, download=True) usps_data_loader = torch.utils.data.DataLoader( dataset=usps_dataset, batch_size=params.adp_batch_size if adp else params.batch_size, shuffle=True, drop_last=True) return usps_data_loader
transforms.Normalize([0.5], [0.5]) ]) if DB == 'MNIST': dataset = datasets.MNIST(db_path, train=True, download=True, transform=transform) elif DB == 'FashionMNIST': dataset = datasets.FashionMNIST(db_path, train=True, download=True, transform=transform) elif DB == 'USPS': dataset = datasets.USPS(db_path, train=True, download=True, transform=transform) elif DB == 'SVHN': dataset = datasets.SVHN(db_path, split='train', download=True, transform=transform) elif DB == 'CIFAR10': dataset = datasets.CIFAR10(db_path, train=True, download=True, transform=transform) data_loader = torch.utils.data.DataLoader(dataset=dataset, batch_size=BATCH_SIZE, shuffle=True,
def __init__(self, data_path, dataset_name="mnist", valid_size=0.2, transformations=None, shuffle=False, **kwargs): """ Initializer of the classification dataset object """ # checking valid values for the parameters assert dataset_name in ["mnist", "fashion_mnist", 'usps', 'mnist-test'], \ f"Dataset name: {dataset_name} is not a correct value. " \ f"Choose one from ['mnist', 'usps', 'mnist-test', 'fashion_mnist']" assert (valid_size >= 0 and valid_size < 1), f"Valid size must be in range [0,1)" self.data_path = data_path self.dataset_name = dataset_name self.valid_size = valid_size self.shuffle = shuffle # enforcing ToTensor in the transforms if(transformations is None): transformations = [] if(transforms.ToTensor() not in transformations): transformations.append(transforms.ToTensor()) transformations.append(transforms.Normalize((0.5,), (0.5,))) transformations = transforms.Compose(transformations) # loading the corresponding data if(dataset_name == "mnist"): train_set = datasets.MNIST(self.data_path, train=True, download=True, transform=transformations) test_set = datasets.MNIST(self.data_path, train=False, download=True, transform=transformations) elif(dataset_name == "mnist-test"): train_set = None test_set = datasets.MNIST(self.data_path, train=False, download=True, transform=transformations) elif(dataset_name == "svhn"): train_set = datasets.SVHN(self.data_path, split='train',download=True, transform=transformations) test_set = datasets.SVHN(self.data_path, split='test',download=True, transform=transformations) train_set.targets, test_set.targets = train_set.labels, test_set.labels elif(dataset_name == "fashion_mnist"): train_set = datasets.FashionMNIST(self.data_path, train=True, download=True, transform=transformations) test_set = datasets.FashionMNIST(self.data_path, train=False, download=True, transform=transformations) elif(dataset_name == "usps"): train_set = datasets.USPS(self.data_path, train=True, download=True, transform=transformations) test_set = datasets.USPS(self.data_path, train=False, download=True, transform=transformations) elif(dataset_name == "coil-100"): data_path = os.path.join(self.data_path, "coil-100", "coil-100") get_lbl = lambda name: int(name.split("_")[0][3:]) train_set = None test_set = CustomDataset(root=data_path, transform=transformations, get_lbl=get_lbl, downscale=(32,32), **kwargs) if(train_set is not None): self.train_data, self.train_labels = train_set.data, train_set.targets self.test_data, self.test_labels = test_set.data, test_set.targets self.train_set = train_set self.test_set = test_set if(self.valid_size > 0 and self.train_set is not None): self._get_train_validation_split() return
target_transform=None, download=True) svhn_test_image = osp.join(outdir, 'svhn_test_image') os.system("mkdir -p " + svhn_test_image) svhn_labels = svhn_dataset_test.labels.flatten() with open(osp.join(outdir, 'svhn_test.txt'), 'w') as label_file: for i in range(len(svhn_test_image)): img = Image.fromarray(svhn_dataset_test.data[i].transpose(1, 2, 0)) img.save(osp.join(svhn_test_image, '{:d}.png'.format(i))) label_file.write(svhn_test_image + '/{:d}.png {:d}\n'.format(i, svhn_labels[i])) outdir = '/data/usps' usps_dataset_train = datasets.USPS('/data/usps', train=True, transform=None, target_transform=None, download=True) usps_train_image = osp.join(outdir, 'usps_tain_image') os.system("mkdir -p " + usps_train_image) usps_labels = usps_dataset_train.targets # .labels.flatten() with open(osp.join(outdir, 'usps_train.txt'), 'w') as label_file: for i in range(len(usps_dataset_train)): img = Image.fromarray(usps_dataset_train.data[i]) img.save(osp.join(usps_train_image, '{:d}.png'.format(i))) label_file.write(usps_train_image + '/{:d}.png {:d}\n'.format(i, usps_labels[i])) usps_dataset_test = datasets.USPS('/data/usps', train=False, transform=None,
download=True) # CIFAR 10 train3_dataset = dsets.CIFAR10(root='./data', train=True, transform=transforms.ToTensor(), download=True) test3_dataset = dsets.CIFAR10(root='./data', train=False, transform=transforms.ToTensor(), download=True) #USPS train4_dataset = dsets.USPS(root='./data', train=True, transform=transforms.ToTensor(), download=True) test4_dataset = dsets.USPS(root='./data', train=False, transform=transforms.ToTensor(), download=True) train_dataset = train1_dataset test_dataset = test1_dataset batch_size = 100 n_iters = 2000 num_epochs = n_iters / (len(train_dataset) / batch_size) num_epochs = int(num_epochs)
def get_digit_five_train_loader(d_name, batch_size=16, num_workers=1, pin_memory=False, drop_last=False, resize=28): d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) if d_name == "MNIST": d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = datasets.MNIST('./digits_data/', download=True, train=True, transform=d_transforms) valset = datasets.MNIST('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == "MNIST-M": d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = MNISTM.MNISTM_dataset('./digits_data/', download=True, train=True, transform=d_transforms) valset = MNISTM.MNISTM_dataset('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == 'SVHN': trainset = datasets.SVHN('./digits_data/', download=True, split="train", transform=d_transforms) valset = datasets.SVHN('./digits_data/', download=True, split="test", transform=d_transforms) elif d_name == 'USPS': d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = datasets.USPS('./digits_data/', download=True, train=True, transform=d_transforms) valset = datasets.USPS('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == 'SY': trainset = datasets.ImageFolder( './digits_data/synthetic_digits/imgs_train', transform=d_transforms) valset = datasets.ImageFolder( './digits_data/synthetic_digits/imgs_valid', transform=d_transforms) trainset.num_classes = 10 valset.num_classes = 10 train_loader = data.DataLoader(trainset, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last) test_loader = data.DataLoader(valset, batch_size=1, shuffle=True, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last) return train_loader, test_loader
def __init__(self, dataset, root_folder="raw-datasets/", flatten=False, coil20_unprocessed=False, debug=False, n_samples=100): super(Datasets, self).__init__() transform_train_list = [] transform_test_list = [] if dataset == "mnist": transform_train_list.append(transforms.ToTensor()) transform_train_list.append( transforms.Normalize((0.1307, ), (0.3081, ))) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = datasets.MNIST(root=root_folder, train=True, download=True, transform=transform_train) self.test_data = datasets.MNIST(root=root_folder, train=False, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] self.dim_flatten = self.train_data.data.size( 1) * self.train_data.data.size(2) self.d_in = 1 self.hw_in = 28 elif dataset == "fashion": transform_train_list.append(transforms.ToTensor()) transform_train_list.append(transforms.Normalize((0.5, ), (0.5, ))) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = datasets.FashionMNIST(root=root_folder, train=True, download=True, transform=transform_train) self.test_data = datasets.FashionMNIST(root=root_folder, train=False, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] self.dim_flatten = self.train_data.data.size( 1) * self.train_data.data.size(2) self.d_in = 1 self.hw_in = 28 elif dataset == "cifar10": transform_train_list.append(transforms.RandomCrop(32, padding=4)) transform_train_list.append(transforms.RandomHorizontalFlip()) transform_train_list.append(transforms.ToTensor()) transform_train_list.append( transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])) transform_test_list.append(transforms.ToTensor()) transform_test_list.append( transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_test_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transforms.Compose(transform_test_list) self.train_data = datasets.CIFAR10(root=root_folder, train=True, download=True, transform=transform_train) self.test_data = datasets.CIFAR10(root=root_folder, train=False, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3] self.d_in = 3 self.hw_in = 32 elif dataset == "cifar100": transform_train_list.append(transforms.RandomCrop(32, padding=4)) transform_train_list.append(transforms.RandomHorizontalFlip()) transform_train_list.append(transforms.ToTensor()) transform_train_list.append( transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])) transform_test_list.append(transforms.ToTensor()) transform_test_list.append( transforms.Normalize(mean=[0.507, 0.487, 0.441], std=[0.267, 0.256, 0.276])) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_test_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transforms.Compose(transform_test_list) self.train_data = datasets.CIFAR100(root=root_folder, train=True, download=True, transform=transform_train) self.test_data = datasets.CIFAR100(root=root_folder, train=False, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3] self.d_in = 3 self.hw_in = 32 elif dataset == "svhn": transform_train_list.append(transforms.ToTensor()) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = datasets.SVHN(root=root_folder, split='train', download=True, transform=transform_train) self.test_data = datasets.SVHN(root=root_folder, split='test', download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] * data_shape[2] * data_shape[3] self.d_in = 3 self.hw_in = 32 elif dataset == "usps": transform_train_list.append(transforms.ToTensor()) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = datasets.USPS(root=root_folder, train=True, download=True, transform=transform_train) self.test_data = datasets.USPS(root=root_folder, train=False, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] * data_shape[2] self.d_in = 1 self.hw_in = 16 elif dataset == "coil20": transform_train_list.append(transforms.ToTensor()) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = COIL20(root=root_folder, processed=not coil20_unprocessed, download=True, transform=transform_train) self.test_data = COIL20(root=root_folder, processed=not coil20_unprocessed, download=True, transform=transform_test) if debug: self.train_data.data = self.train_data.data[:n_samples] self.test_data.data = self.test_data.data[:n_samples] data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] * data_shape[2] self.d_in = 1 self.hw_in = 32 elif dataset == "colors": transform_train_list.append(transforms.ToTensor()) if flatten: transform_train_list.append(ReshapeTransform((-1, ))) transform_train = transforms.Compose(transform_train_list) transform_test = transform_train self.train_data = COLORS(root=root_folder, transform=transform_train) self.test_data = COLORS(root=root_folder, transform=transform_test) data_shape = self.train_data.data.shape self.dim_flatten = data_shape[1] self.d_in = 3 self.hw_in = 1 else: self.train_data = CustomDataset(load_path=join( root_folder, dataset), norm="minmax") self.test_data = self.train_data self.dim_flatten = self.train_data.data.shape[1]
def get_digits_loaders_concat(targets, batch_size=16, num_workers=1, pin_memory=False, drop_last=False, resize=28): d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) vals = [] trains = [] for d_name in targets: if d_name == "MNIST": d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = datasets.MNIST('./digits_data/', download=True, train=True, transform=d_transforms) valset = datasets.MNIST('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == "MNIST-M": d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = MNISTM.MNISTM_dataset('./digits_data/', download=True, train=True, transform=d_transforms) valset = MNISTM.MNISTM_dataset('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == 'SVHN': trainset = datasets.SVHN('./digits_data/', download=True, split="train", transform=d_transforms) valset = datasets.SVHN('./digits_data/', download=True, split="test", transform=d_transforms) elif d_name == 'USPS': d_transforms = transforms.Compose([ transforms.Resize((resize, resize)), transforms.Grayscale(3), transforms.ToTensor(), transforms.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) trainset = datasets.USPS('./digits_data/', download=True, train=True, transform=d_transforms) valset = datasets.USPS('./digits_data/', download=True, train=False, transform=d_transforms) elif d_name == 'SY': trainset = datasets.ImageFolder( './digits_data/synthetic_digits/imgs_train', transform=d_transforms) valset = datasets.ImageFolder( './digits_data/synthetic_digits/imgs_valid', transform=d_transforms) trains.append(trainset) vals.append(valset) train_sets = torch.utils.data.ConcatDataset(trains) val_sets = torch.utils.data.ConcatDataset(vals) train_sets.num_classes = 10 val_sets.num_classes = 10 trainloader = data.DataLoader(train_sets, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last) valloader = data.DataLoader(val_sets, batch_size=batch_size, shuffle=True, num_workers=num_workers, pin_memory=pin_memory, drop_last=drop_last) return trainloader, valloader
def load_usps(args, **kwargs): # set args args.input_size = [1, 16, 16] args.input_type = 'gray' args.dynamic_binarization = False # start processing from torchvision import datasets, transforms train_loader = torch.utils.data.DataLoader(datasets.USPS( os.path.join('datasets', "usps"), train=True, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.batch_size, shuffle=True) test_loader = torch.utils.data.DataLoader(datasets.USPS( os.path.join('datasets', "usps"), train=False, download=True, transform=transforms.Compose([transforms.ToTensor()])), batch_size=args.batch_size, shuffle=True) # preparing data x_train = train_loader.dataset.data.astype('float32') x_train = x_train / 255. x_train = np.reshape( x_train, (x_train.shape[0], x_train.shape[1] * x_train.shape[2])) y_train = np.array(train_loader.dataset.targets, dtype=int) x_test = test_loader.dataset.data.astype('float32') x_test = x_test / 255. x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1] * x_test.shape[2])) y_test = np.array(test_loader.dataset.targets, dtype=int) # validation set; 7291 data points in training dataset; 2007 in test dataset x_val = x_train[6000:] y_val = np.array(y_train[6000:], dtype=int) x_train = x_train[0:6000] y_train = np.array(y_train[0:6000], dtype=int) # pytorch data loader train = data_utils.TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) train_loader = data_utils.DataLoader(train, batch_size=args.batch_size, shuffle=True, **kwargs) validation = data_utils.TensorDataset( torch.from_numpy(x_val).float(), torch.from_numpy(y_val)) val_loader = data_utils.DataLoader(validation, batch_size=args.test_batch_size, shuffle=False, **kwargs) test = data_utils.TensorDataset( torch.from_numpy(x_test).float(), torch.from_numpy(y_test)) test_loader = data_utils.DataLoader(test, batch_size=args.test_batch_size, shuffle=False, **kwargs) # setting pseudo-inputs inits if args.use_training_data_init == 1: args.pseudoinputs_std = 0.01 init = x_train[0:args.number_components_input].T args.pseudoinputs_mean = torch.from_numpy( init + args.pseudoinputs_std * np.random.randn(np.prod(args.input_size), args.number_components_input)).float() else: args.pseudoinputs_mean = 0.05 args.pseudoinputs_std = 0.01 return train_loader, val_loader, test_loader, args