def load_dataset(img_transform, dataset_name, limit=None): if dataset_name == mnist: dataset = datasets.MNIST(root=mnist_image_root, train=True, transform=img_transform, download=True) elif dataset_name == svhn: dataset = datasets.SVHN(root=os.path.join('dataset', 'svhn'), transform=img_transform, download=True) elif dataset_name == mnist_m: train_list = os.path.join(mnist_m_image_root, 'mnist_m_train_labels.txt') dataset = GetLoader(data_root=os.path.join(mnist_m_image_root, 'mnist_m_train'), data_list=train_list, transform=img_transform) elif dataset_name == synth: train_mat = os.path.join(synth_image_root, 'synth_train_32x32.mat') dataset = GetSynthDigits(data_root=synth_image_root, data_mat=train_mat, transform=img_transform) elif dataset_name == usps: data_file = "usps_28x28.pkl" dataset = GetUSPS(data_root=usps_image_root, data_file=data_file, transform=img_transform) elif dataset_name == amazon: dataset = datasets.ImageFolder('dataset/amazon', transform=img_transform) elif dataset_name == dslr: dataset = datasets.ImageFolder('dataset/dslr', transform=img_transform) elif dataset_name == webcam: dataset = datasets.ImageFolder('dataset/webcam', transform=img_transform) elif type(dataset_name) is list: return ConcatDataset([ load_dataset(img_transform, dset, limit) for dset in dataset_name ]) if limit: indices = index_cache.get((dataset_name, limit), None) if indices is None: indices = torch.randperm(len(dataset))[:limit] index_cache[(dataset_name, limit)] = indices dataset = Subset(dataset, indices) return RgbWrapper(dataset)
def __init__(self, path): transform = get_transform() self.MNIST_dataset = datasets.MNIST(root=os.path.join(path, "MNIST"), transform=transform, train=True, download=True) self.SVHN_dataset = datasets.SVHN(root=os.path.join(path, "SVHN"), transform=transform, split='train', download=True) self.USPS_dataset = datasets.USPS(root=os.path.join(path, "USPS"), transform=transform, train=True, download=True)
def train_dataset(data_dir, transform=cifar_transform_train, split_size=10000, **kwargs): train_data = datasets.SVHN(root=data_dir, split='train', transform=transform, download=True, **kwargs) length = len(train_data) print(length) train_size, validate_size = split_size, length - split_size train_set, _ = torch.utils.data.random_split( dataset=train_data, lengths=[train_size, validate_size], generator=torch.Generator().manual_seed(42)) return train_set
def get_SVHN(split='train', shuffle=None, batch_size=None, augm_type='none', size=32, num_workers=4, config_dict=None): if batch_size == None: if split in ['train', 'extra']: batch_size = DEFAULT_TRAIN_BATCHSIZE else: batch_size = DEFAULT_TEST_BATCHSIZE if shuffle is None: if split in ['train', 'extra']: shuffle = True else: shuffle = False augm_config = {} transform = get_SVHN_augmentation(augm_type, out_size=size, config_dict=augm_config) path = get_svhn_path() if split == 'svhn_train_extra': dataset = SVHNTrainExtraCombo(transform) else: dataset = datasets.SVHN(path, split=split, transform=transform, download=True) loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) if config_dict is not None: config_dict['Dataset'] = 'SVHN' config_dict['SVHN Split'] = split config_dict['Batch out_size'] = batch_size config_dict['Augmentation'] = augm_config return loader
def train_loader(data, data_directory='/home/sungwonlyu/data', batch_size=128): if data == 'mnist': train_dataloader = DataLoader(datasets.MNIST( data_directory + data + '/', train=True, download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True, **kwargs) elif data == 'svhn': train_dataloader = DataLoader(datasets.SVHN( data_directory + data + '/', train=True, download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True, **kwargs) elif data == 'cifar10': train_dataloader = DataLoader(datasets.CIFAR10( data_directory + data + '/', train=True, download=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True, **kwargs) elif data == 'celeba': train_dataloader = DataLoader(CelebA(data_directory + data + '/', 'Male', train=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True, **kwargs) elif data == 'alphachu': train_dataloader = DataLoader(AlphachuDataset( data_directory + data + '/', train=True, transform=transforms.ToTensor()), batch_size=batch_size, shuffle=True, **kwargs) return train_dataloader
def get_svhn(train, split='train'): """Get SVHN dataset loader.""" # image pre-processing pre_process = transforms.Compose([ transforms.Resize(32), transforms.ToTensor(), transforms.Normalize((0.437, 0.4437, 0.4728), (0.1980, 0.2010, 0.1970)) ]) svhn_dataset = datasets.SVHN(root=params.data_root, split='train', transform=pre_process, download=True) svhn_data_loader = torch.utils.data.DataLoader( dataset=svhn_dataset, batch_size=params.batch_size, shuffle=True) return svhn_data_loader
def __new__(cls, root, train=True, transform=None, download=False): if train: td = OriginalSVHN(root, train=True, transform=transform, download=download) ed = VD.SVHN(root, split='extra', transform=transform, download=download) td.data += ed.data td.targets += ed.labels return td else: return OriginalSVHN(root, train=False, transform=transform, download=download)
def get_train_val_loaders(self): if self.args.dataset == 'cifar10': train_transform, valid_transform = utils._data_transforms_cifar10( self.args) train_data = dset.CIFAR10(root=self.args.data, train=True, download=True, transform=train_transform) elif self.args.dataset == 'cifar100': train_transform, valid_transform = utils._data_transforms_cifar100( self.args) train_data = dset.CIFAR100(root=self.args.data, train=True, download=True, transform=train_transform) elif self.args.dataset == 'svhn': train_transform, valid_transform = utils._data_transforms_svhn( self.args) train_data = dset.SVHN(root=self.args.data, split='train', download=True, transform=train_transform) num_train = len(train_data) indices = list(range(num_train)) split = int(np.floor(self.args.train_portion * num_train)) train_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[:split]), pin_memory=True, num_workers=2) valid_queue = torch.utils.data.DataLoader( train_data, batch_size=self.args.batch_size, sampler=torch.utils.data.sampler.SubsetRandomSampler( indices[split:num_train]), pin_memory=True, num_workers=2) return train_queue, valid_queue, train_transform, valid_transform
def get_test_loader(batch_size, shuffle=True, num_workers=4, pin_memory=False): """ Utility function for loading and returning a multi-process test iterator over the SVHN dataset. If using CUDA, num_workers should be set to 1 and pin_memory to True. Params ------ - batch_size: how many samples per batch to load. - shuffle: whether to shuffle the dataset after every epoch. - num_workers: number of subprocesses to use when loading the dataset. - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to True if using GPU. Returns ------- - data_loader: test set iterator. """ normalize = transforms.Normalize( mean=[0.4914, 0.4822, 0.4465], std=[0.2023, 0.1994, 0.2010], ) # define transform transform = transforms.Compose([ transforms.ToTensor(), normalize, ]) data_dir = './data' dataset = datasets.SVHN( root=data_dir, split='test', download=True, transform=transform, ) data_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=pin_memory, ) return data_loader
def load_datasets(path, train=True): """ Load datasets for both training or evaluating the model. Downloads the datasets if they're not on disk. Parameters: -path (str): Path to the datasets -train (bool, default=True): Gets either train or test datasets Returns: -A dict with datasets for both source and target """ # Resize both dataset samples to 32x32x3 img_size = 32 # Apply a few transform such as resizing, color jittering and normalization with mean and std transform = transforms.Compose([ transforms.Resize(img_size), transforms.ColorJitter(.1, 1, .75, 0), transforms.ToTensor(), transforms.Lambda(lambda x: x.expand([3, -1, -1])), transforms.Normalize(mean=(0.1307, 0.1307, 0.1307), std=(0.3081, 0.3081, 0.3081)) ]) mnist = datasets.MNIST(path, train=train, download=True, transform=transform) # Apply a few transform such as resizing and normalization with mean and std transform = transforms.Compose([ transforms.Resize(img_size), transforms.ToTensor(), transforms.Normalize(mean=(0.45, 0.45, 0.45), std=(0.199, 0.199, 0.199)) ]) svhn = datasets.SVHN(path, split='train' if train else 'test', download=True, transform=transform) return {'mnist': mnist, 'svhn': svhn}
def __init__(self, dataset_name, indexed=False): self.data_root = os.path.join(data_dir, dataset_name) self.indexed = indexed if dataset_name == 'mnist': self.data = datasets.MNIST( root=self.data_root, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.5,), (0.5,)), ])) elif dataset_name == 'cifar10': self.data = datasets.CIFAR10( root=self.data_root, train=False, download=True, transform=transforms.Compose([ transforms.ToTensor(), # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ])) elif dataset_name == 'cifar100': # CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343) # CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404) transform_test = transforms.Compose([ transforms.ToTensor(), # transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD) ]) self.data = torchvision.datasets.CIFAR100(root=self.data_root, train=False, download=True, transform=transform_test) elif dataset_name == 'svhn': self.data = datasets.SVHN(root=self.data_root, split='test', download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize( (0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]))
def binary_SVHN(cls1, cls2, train=True, batch_size=None, augm_flag=True, val_size=None): if batch_size==None: if train: batch_size=train_batch_size else: batch_size=test_batch_size if train: split = 'train' else: split = 'test' transform_base = [transforms.ToTensor()] transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4, padding_mode='edge'), ] + transform_base) transform_test = transforms.Compose(transform_base) transform_train = transforms.RandomChoice([transform_train, transform_test]) transform = transform_train if (augm_flag and train) else transform_test dataset = datasets.SVHN(path, split=split, transform=transform, download=False) labels = np.array(dataset.labels) masks = np.logical_or(labels == cls1, labels == cls2) idxs = np.where(masks == True)[0] dataset.data = dataset.data[idxs] dataset.labels = labels[idxs] dataset.labels = np.where(dataset.labels == cls1, 0., dataset.labels) dataset.labels = np.where(dataset.labels == cls2, 1., dataset.labels) if train or val_size is None: loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=train, num_workers=4) return loader else: # Split into val and test sets test_size = len(dataset) - val_size dataset_val, dataset_test = data_utils.random_split(dataset, (val_size, test_size)) val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size, shuffle=train, num_workers=4) test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size, shuffle=train, num_workers=4) return val_loader, test_loader
def __init__(self, batchsize, train=True): Dataset.__init__(self) data_root = join(dirname(realpath(__file__)), 'SVHN_data') self.name = "svhn" self.range = [0.0, 1.0] self.data_dims = [3, 32, 32] self.batchsize = batchsize if train: split = "train" self.data = dsets.SVHN(root=data_root, download=True, split="train", transform=transforms.Compose( [transforms.ToTensor()])) self.dataloder = tdata.DataLoader(self.data, self.batchsize, shuffle=True) self.iter = iter(self.dataloder) self._index = 0
def get_val_data_loader(dataset_name, dataset_dir, batch_size, num_workers): mean_rgb = (0.4914, 0.4822, 0.4465) std_rgb = (0.2023, 0.1994, 0.2010) transform_val = transforms.Compose([ transforms.ToTensor(), transforms.Normalize( mean=mean_rgb, std=std_rgb), ]) if dataset_name == "CIFAR10": dataset = datasets.CIFAR10( root=dataset_dir, train=False, transform=transform_val, download=True) elif dataset_name == "CIFAR100": dataset = datasets.CIFAR100( root=dataset_dir, train=False, transform=transform_val, download=True) elif dataset_name == "SVHN": dataset = datasets.SVHN( root=dataset_dir, split="test", transform=transform_val, download=True) else: raise Exception('Unrecognized dataset: {}'.format(dataset_name)) val_loader = torch.utils.data.DataLoader( dataset=dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=True) return val_loader
def get_svhn(train, get_dataset=False, batch_size=cfg.batch_size): """Get SVHN dataset loader.""" #image pre-processing # pre_process = transforms.Compose([transforms.Resize(cfg.image_size), # transforms.Grayscale(num_output_channels=1), # transforms.ToTensor()]) # pre_process = transforms.Compose([transforms.Resize(cfg.image_size), # transforms.ToTensor(), # transforms.Normalize( # mean=(0.5, 0.5, 0.5), # std=(0.5, 0.5, 0.5))]) convert_to_gray = transforms.Lambda(lambda x: (x[0, ...] * 0.299 + x[ 1, ...] * 0.587 + x[2, ...] * 0.114).unsqueeze(0)) pre_process = transforms.Compose([ transforms.Resize(cfg.image_size), transforms.ToTensor(), transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)), convert_to_gray ]) # pre_process = transforms.Compose([transforms.Resize(cfg.image_size), # transforms.Grayscale(num_output_channels=1), # transforms.ToTensor(), # transforms.Normalize( # mean=cfg.dataset_mean, # std=cfg.dataset_std)]) # dataset and data loader svhn_dataset = datasets.SVHN(root=cfg.data_root, split='train' if train else 'test', transform=pre_process, download=False) if get_dataset: return svhn_dataset else: svhn_data_loader = torch.utils.data.DataLoader(dataset=svhn_dataset, batch_size=batch_size, shuffle=True) return svhn_data_loader
def get_test_loader(data_dir, dataset, batch_size, exp='azimuth', # smallnorb only familiar=True, # smallnorb only num_workers=4, pin_memory=False): data_dir = data_dir + '/' + dataset if dataset == "cifar10": trans = [transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))] dataset = datasets.CIFAR10(data_dir, train=False, download=False, transform=transforms.Compose(trans)) elif dataset == "svhn": normalize = transforms.Normalize(mean=[x / 255.0 for x in[109.9, 109.7, 113.8]], std=[x / 255.0 for x in [50.1, 50.6, 50.8]]) trans = [transforms.ToTensor(), normalize] dataset = datasets.SVHN(data_dir, split='test', download=True, transform=transforms.Compose(trans)) elif dataset == "smallnorb": trans = [transforms.Resize(48), transforms.CenterCrop(32), transforms.ToTensor(), #transforms.Normalize((0.7199,), (0.117,)) ] if exp in VIEWPOINT_EXPS: dataset = smallNORBViewPoint(data_dir, exp=exp, familiar=familiar, train=False, download=True, transform=transforms.Compose(trans)) elif exp == "full": dataset = smallNORB(data_dir, train=False, download=True, transform=transforms.Compose(trans)) data_loader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers, pin_memory=pin_memory, ) return data_loader
def get_dataloader(args): svhn_transform = transforms.Compose([ transforms.Resize((args.load_size, args.load_size)), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) mnist_transform = transforms.Compose([ transforms.Resize((args.load_size, args.load_size)), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) if args.mode == 'train' or args.mode == 'continue_train': train = True shuffle = True split = 'train' else: train = False shuffle = False split = 'test' svhn = datasets.SVHN(root=args.dataset_path, split=split, download=True, transform=svhn_transform) mnist = datasets.MNIST(root=args.dataset_path, train=train, download=True, transform=mnist_transform) svhn_loader = torch.utils.data.DataLoader(dataset=svhn, batch_size=args.batch_size, shuffle=shuffle, num_workers=args.num_workers) mnist_loader = torch.utils.data.DataLoader(dataset=mnist, batch_size=args.batch_size, shuffle=shuffle, num_workers=args.num_workers) return mnist_loader, svhn_loader
def get_loader(config): """Builds and returns Dataloader for MNIST and SVHN dataset.""" transform1 = transforms.Compose([ transforms.Scale(config.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) transform2 = transforms.Compose([ transforms.Scale(config.image_size), transforms.ToTensor(), transforms.Normalize((0.5), (0.5)), transforms.Lambda(lambda x: x.repeat(3, 1, 1)) ]) mnistm = MNISTM("./", train=True, transform=transform1, download=True) syn_dig = Loadsyn() svhn = datasets.SVHN(root=config.svhn_path, download=True, transform=transform1) mnist = datasets.MNIST(root=config.mnist_path, download=True, transform=transform2) dataset_dic = {1: mnist, 2: svhn, 3: mnistm, 4: syn_dig} split_source, split_target = spilit_dataset(dataset_dic[config.source_idx]) output = [] for dataset in [ split_source, split_target, dataset_dic[config.t1_idx], dataset_dic[config.t2_idx] ]: output.append( data.DataLoader(dataset=dataset, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers)) return output
def get_loader(mode): """Builds and returns Dataloader for MNIST and SVHN dataset.""" config = Config transform_list = [] is_train = mode == "train" if config.model.use_augmentation: transform_list.append(transforms.RandomHorizontalFlip()) transform_list.append(transforms.RandomRotation(0.1)) loader = None transform = transforms.Compose([ transforms.Resize(config.data.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) if config.model.dataset == "mnist": mnist = datasets.MNIST(root=config.data.mnist_path, download=True, transform=transform, train=is_train) loader = torch.utils.data.DataLoader( dataset=mnist, batch_size=config.train.batch_size, shuffle=config.train.shuffle, num_workers=config.data.num_workers) if config.model.dataset == "svhn": svhn = datasets.SVHN(root=config.data.svhn_path, download=True, transform=transform, split=mode) loader = torch.utils.data.DataLoader( dataset=svhn, batch_size=config.train.batch_size, shuffle=config.train.shuffle, num_workers=config.data.num_workers) ## preparing for AC costum dataset # train_size = int(0.8 * len(full_dataset)) # test_size = len(full_dataset) - train_size # train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size]) return loader
def main(): logger.info('Loading SVHN test data') transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), ]) inv_transform = transforms.Normalize( (-0.4914 / 0.2023, -0.4822 / 0.1994, -0.4465 / 0.2010), (1 / 0.2023, 1 / 0.1994, 1 / 0.2010)) dataset = datasets.SVHN(root='data/', split='test', download=True, transform=transform) dataloader = data.DataLoader(dataset, batch_size=1000, shuffle=False, num_workers=4) logger.info('Loading model') model = resnet(num_classes=10, depth=152) model = torch.nn.DataParallel(model).cuda() # checkpoint = torch.load('resnet-110/model_best.pth.tar') checkpoint = torch.load('checkpoint/model_best.pth.tar') model.load_state_dict(checkpoint['state_dict']) model.eval() i = 0 print('Index Correct Predicted Confidence') for inputs, targets in dataloader: inputs, targets = inputs.cuda(), targets.cuda() with torch.no_grad(): logits = model(inputs) probs = torch.softmax(logits, dim=-1) values, indices = torch.max(probs, 1) for target, logit in zip(targets, logits): tgt_string = '%i ' % target.item() prediction_strings = ['%0.8f' % x for x in logit.tolist()] print(tgt_string + ' '.join(prediction_strings))
def load_data(): # tensor transform transform = transforms.ToTensor() # SVHN training datasets svhn_train = datasets.SVHN(root='data/', split='train', download=True, transform=transform) batch_size = 128 num_workers = 0 # build DataLoaders for SVHN dataset train_loader = torch.utils.data.DataLoader(dataset=svhn_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) return train_loader, batch_size, num_workers
def get_test_loader(data_dir='./data/svhn', batch_size=64, shuffle=False, num_workers=1, pin_memory=True): """ Utility function for loading and returning a multi-process test iterator over the CIFAR-10 dataset. If using CUDA, num_workers should be set to 1 and pin_memory to True. Params ------ - data_dir: path directory to the dataset. - batch_size: how many samples per batch to load. - shuffle: whether to shuffle the dataset after every epoch. - num_workers: number of subprocesses to use when loading the dataset. - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to True if using GPU. Returns ------- - data_loader: test set iterator. """ # define transform transform = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) ]) dataset = datasets.SVHN(root=data_dir, split='test', download=False, transform=transform, target_transform=target_transform) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers, pin_memory=pin_memory) return data_loader
def get_svhn(train, get_dataset=False, batch_size=cfg.batch_size): """Get SVHN dataset loader.""" # image pre-processing pre_process = transforms.Compose([ transforms.ToTensor(), transforms.Normalize(mean=cfg.dataset_mean, std=cfg.dataset_std) ]) # dataset and data loader svhn_dataset = datasets.SVHN(root=cfg.data_root, split='train' if train else 'test', transform=pre_process, download=True) if get_dataset: return svhn_dataset else: svhn_data_loader = torch.utils.data.DataLoader(dataset=svhn_dataset, batch_size=batch_size, shuffle=True) return svhn_data_loader
def SVHNUnlabel(): raw_dataset = datasets.SVHN('../data', split="train", download=True, transform=transforms.Compose( [transforms.ToTensor()])) cnt = 0 data = [] labels = [] for i in range(raw_dataset.__len__()): datum, label = raw_dataset.__getitem__(i) data.append(datum.numpy()) labels.append(label) cnt = cnt + 1 if cnt >= 70000: break dataset = TensorDataset(torch.FloatTensor(np.array(data)), torch.LongTensor(np.array(labels))) print(len(data)) return dataset
def get_loader(config): """Builds and returns Dataloader for MNIST and SVHN dataset.""" transform = transforms.Compose([ transforms.Resize(config.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]) svhn = datasets.SVHN(root=config.svhn_path, download=True, transform=transform) mnist = datasets.MNIST(root=config.mnist_path, download=True, transform=transform) svhn_loader = torch.utils.data.DataLoader(dataset=svhn, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) mnist_loader = torch.utils.data.DataLoader(dataset=mnist, batch_size=config.batch_size, shuffle=True, num_workers=config.num_workers) return svhn_loader, mnist_loader
def create_svhn_dataloaders(data_dir: str = './data', batch_size: int = 128, num_workers: int = 4): """ create train and test pytorch dataloaders for SVHN dataset :param data_dir: the folder that will contain the data :param batch_size: the size of the batch for test and train loaders :param num_workers: number of cpu workers which loads the GPU with the dataset :return: train and test loaders along with mapping between labels and class names """ trainset = datasets.CIFAR10(root=data_dir, train=True, download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) trainloader = data.DataLoader(trainset, batch_size=batch_size, shuffle=False, num_workers=num_workers) data_dir = os.path.join(data_dir, 'svhn') testset = datasets.SVHN(root=data_dir, split='test', download=True, transform=transforms.Compose([transforms.ToTensor(), normalize])) # Align as CIFAR10 dataset testset.test_data = testset.data testset.test_labels = testset.labels testloader = data.DataLoader(testset, batch_size=batch_size, shuffle=False, num_workers=num_workers) # Classes name classes_cifar10 = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck') classes_svhn = ('1', '2', '3', '4', '5', '6', '7', '8', '9', '0') return trainloader, testloader, classes_svhn, classes_cifar10
def get_ood(data_path, batch_size, transform): if data_path == 'SVHN': def target_transform(target): return int(target) data_root = os.path.expanduser( os.path.join('~/.advertorch/data/', 'svhn-data')) ood_dataset = datasets.SVHN( root=data_root, split='train', download=True, transform=transform, target_transform=target_transform, ) else: ood_dataset = ImageFolder(data_path, transform=transform) ood_loader = torch.utils.data.DataLoader(ood_dataset, batch_size=batch_size, num_workers=8, shuffle=True) return ood_loader
def get_svhn_loaders(cuda_flag, dataset_path, val=False, validation_size=5000, batch_size=64, test_batch_size=1000): kwargs = {'num_workers': 1, 'pin_memory': True} if cuda_flag and torch.cuda.is_available() else {} train_dataset = datasets.SVHN(dataset_path, split='train' if not val else 'test', download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])) indices = list(range(len(train_dataset))) np.random.shuffle(indices) if not val: ptr = len(train_dataset) else: ptr = len(train_dataset) - validation_size train_sampler, val_sampler = SubsetRandomSampler(indices[:ptr]), SubsetRandomSampler(indices[ptr:]) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=train_sampler, **kwargs) if val: val_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, sampler=val_sampler, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.CIFAR10(dataset_path, train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261)) ])), batch_size=test_batch_size, shuffle=True, **kwargs) logger.info('Using SVHN dataset for this experiment') if not val: return train_loader, test_loader else: return train_loader, val_loader, test_loader
def create_test_dataset(dataset, dataset_dir, transform, target_transform=None): if dataset == 'cifar10': test_dataset = datasets.CIFAR10(root=dataset_dir, train=False, download=True, transform=transform, target_transform=target_transform) elif dataset == 'cifar100': test_dataset = datasets.CIFAR100(root=dataset_dir, train=False, download=True, transform=transform, target_transform=target_transform) elif dataset == 'cifar20': test_dataset = CoarseCIFAR100(root=dataset_dir, train=False, download=True, transform=transform, target_transform=target_transform) elif dataset == 'svhn' or dataset == 'svhn+extra': test_dataset = datasets.SVHN(root=dataset_dir, split='test', download=True, transform=transform, target_transform=target_transform) return test_dataset
def __init__(self, dataset, datasets_path, train=False): dataset_path = os.path.join(datasets_path, dataset) if dataset == 'CIFAR10': mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010) transform_test = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) self.dataset = datasets.CIFAR10(root=dataset_path, train=train, download=True, transform=transform_test) elif dataset == 'SVHN': mean, std = (0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970) transform_test = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(mean, std)]) self.dataset = datasets.SVHN(root=dataset_path, split='train' if train else 'test', download=True, transform=transform_test) else: raise NotImplementedError