Exemple #1
0
def load_dataset(img_transform, dataset_name, limit=None):
    if dataset_name == mnist:
        dataset = datasets.MNIST(root=mnist_image_root,
                                 train=True,
                                 transform=img_transform,
                                 download=True)
    elif dataset_name == svhn:
        dataset = datasets.SVHN(root=os.path.join('dataset', 'svhn'),
                                transform=img_transform,
                                download=True)
    elif dataset_name == mnist_m:
        train_list = os.path.join(mnist_m_image_root,
                                  'mnist_m_train_labels.txt')
        dataset = GetLoader(data_root=os.path.join(mnist_m_image_root,
                                                   'mnist_m_train'),
                            data_list=train_list,
                            transform=img_transform)
    elif dataset_name == synth:
        train_mat = os.path.join(synth_image_root, 'synth_train_32x32.mat')
        dataset = GetSynthDigits(data_root=synth_image_root,
                                 data_mat=train_mat,
                                 transform=img_transform)
    elif dataset_name == usps:
        data_file = "usps_28x28.pkl"
        dataset = GetUSPS(data_root=usps_image_root,
                          data_file=data_file,
                          transform=img_transform)
    elif dataset_name == amazon:
        dataset = datasets.ImageFolder('dataset/amazon',
                                       transform=img_transform)
    elif dataset_name == dslr:
        dataset = datasets.ImageFolder('dataset/dslr', transform=img_transform)
    elif dataset_name == webcam:
        dataset = datasets.ImageFolder('dataset/webcam',
                                       transform=img_transform)
    elif type(dataset_name) is list:
        return ConcatDataset([
            load_dataset(img_transform, dset, limit) for dset in dataset_name
        ])
    if limit:
        indices = index_cache.get((dataset_name, limit), None)
        if indices is None:
            indices = torch.randperm(len(dataset))[:limit]
        index_cache[(dataset_name, limit)] = indices
        dataset = Subset(dataset, indices)
    return RgbWrapper(dataset)
Exemple #2
0
    def __init__(self, path):
        transform = get_transform()

        self.MNIST_dataset = datasets.MNIST(root=os.path.join(path, "MNIST"),
                                            transform=transform,
                                            train=True,
                                            download=True)

        self.SVHN_dataset = datasets.SVHN(root=os.path.join(path, "SVHN"),
                                          transform=transform,
                                          split='train',
                                          download=True)

        self.USPS_dataset = datasets.USPS(root=os.path.join(path, "USPS"),
                                          transform=transform,
                                          train=True,
                                          download=True)
def train_dataset(data_dir,
                  transform=cifar_transform_train,
                  split_size=10000,
                  **kwargs):
    train_data = datasets.SVHN(root=data_dir,
                               split='train',
                               transform=transform,
                               download=True,
                               **kwargs)
    length = len(train_data)
    print(length)
    train_size, validate_size = split_size, length - split_size
    train_set, _ = torch.utils.data.random_split(
        dataset=train_data,
        lengths=[train_size, validate_size],
        generator=torch.Generator().manual_seed(42))
    return train_set
Exemple #4
0
def get_SVHN(split='train',
             shuffle=None,
             batch_size=None,
             augm_type='none',
             size=32,
             num_workers=4,
             config_dict=None):
    if batch_size == None:
        if split in ['train', 'extra']:
            batch_size = DEFAULT_TRAIN_BATCHSIZE
        else:
            batch_size = DEFAULT_TEST_BATCHSIZE

    if shuffle is None:
        if split in ['train', 'extra']:
            shuffle = True
        else:
            shuffle = False

    augm_config = {}
    transform = get_SVHN_augmentation(augm_type,
                                      out_size=size,
                                      config_dict=augm_config)

    path = get_svhn_path()
    if split == 'svhn_train_extra':
        dataset = SVHNTrainExtraCombo(transform)
    else:
        dataset = datasets.SVHN(path,
                                split=split,
                                transform=transform,
                                download=True)

    loader = torch.utils.data.DataLoader(dataset,
                                         batch_size=batch_size,
                                         shuffle=shuffle,
                                         num_workers=num_workers)

    if config_dict is not None:
        config_dict['Dataset'] = 'SVHN'
        config_dict['SVHN Split'] = split
        config_dict['Batch out_size'] = batch_size
        config_dict['Augmentation'] = augm_config

    return loader
Exemple #5
0
def train_loader(data, data_directory='/home/sungwonlyu/data', batch_size=128):
    if data == 'mnist':
        train_dataloader = DataLoader(datasets.MNIST(
            data_directory + data + '/',
            train=True,
            download=True,
            transform=transforms.ToTensor()),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    elif data == 'svhn':
        train_dataloader = DataLoader(datasets.SVHN(
            data_directory + data + '/',
            train=True,
            download=True,
            transform=transforms.ToTensor()),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    elif data == 'cifar10':
        train_dataloader = DataLoader(datasets.CIFAR10(
            data_directory + data + '/',
            train=True,
            download=True,
            transform=transforms.ToTensor()),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    elif data == 'celeba':
        train_dataloader = DataLoader(CelebA(data_directory + data + '/',
                                             'Male',
                                             train=True,
                                             transform=transforms.ToTensor()),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    elif data == 'alphachu':
        train_dataloader = DataLoader(AlphachuDataset(
            data_directory + data + '/',
            train=True,
            transform=transforms.ToTensor()),
                                      batch_size=batch_size,
                                      shuffle=True,
                                      **kwargs)
    return train_dataloader
def get_svhn(train, split='train'):
    """Get SVHN dataset loader."""
    # image pre-processing
    pre_process = transforms.Compose([
        transforms.Resize(32),
        transforms.ToTensor(),
        transforms.Normalize((0.437, 0.4437, 0.4728), (0.1980, 0.2010, 0.1970))
    ])

    svhn_dataset = datasets.SVHN(root=params.data_root,
                                 split='train',
                                 transform=pre_process,
                                 download=True)

    svhn_data_loader = torch.utils.data.DataLoader(
        dataset=svhn_dataset, batch_size=params.batch_size, shuffle=True)

    return svhn_data_loader
 def __new__(cls, root, train=True, transform=None, download=False):
     if train:
         td = OriginalSVHN(root,
                           train=True,
                           transform=transform,
                           download=download)
         ed = VD.SVHN(root,
                      split='extra',
                      transform=transform,
                      download=download)
         td.data += ed.data
         td.targets += ed.labels
         return td
     else:
         return OriginalSVHN(root,
                             train=False,
                             transform=transform,
                             download=download)
    def get_train_val_loaders(self):
        if self.args.dataset == 'cifar10':
            train_transform, valid_transform = utils._data_transforms_cifar10(
                self.args)
            train_data = dset.CIFAR10(root=self.args.data,
                                      train=True,
                                      download=True,
                                      transform=train_transform)
        elif self.args.dataset == 'cifar100':
            train_transform, valid_transform = utils._data_transforms_cifar100(
                self.args)
            train_data = dset.CIFAR100(root=self.args.data,
                                       train=True,
                                       download=True,
                                       transform=train_transform)
        elif self.args.dataset == 'svhn':
            train_transform, valid_transform = utils._data_transforms_svhn(
                self.args)
            train_data = dset.SVHN(root=self.args.data,
                                   split='train',
                                   download=True,
                                   transform=train_transform)

        num_train = len(train_data)
        indices = list(range(num_train))
        split = int(np.floor(self.args.train_portion * num_train))

        train_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=self.args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[:split]),
            pin_memory=True,
            num_workers=2)

        valid_queue = torch.utils.data.DataLoader(
            train_data,
            batch_size=self.args.batch_size,
            sampler=torch.utils.data.sampler.SubsetRandomSampler(
                indices[split:num_train]),
            pin_memory=True,
            num_workers=2)

        return train_queue, valid_queue, train_transform, valid_transform
def get_test_loader(batch_size, shuffle=True, num_workers=4, pin_memory=False):
    """
    Utility function for loading and returning a multi-process
    test iterator over the SVHN dataset.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - batch_size: how many samples per batch to load.
    - shuffle: whether to shuffle the dataset after every epoch.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - data_loader: test set iterator.
    """
    normalize = transforms.Normalize(
        mean=[0.4914, 0.4822, 0.4465],
        std=[0.2023, 0.1994, 0.2010],
    )

    # define transform
    transform = transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ])

    data_dir = './data'
    dataset = datasets.SVHN(
        root=data_dir,
        split='test',
        download=True,
        transform=transform,
    )

    data_loader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=shuffle,
        num_workers=num_workers,
        pin_memory=pin_memory,
    )

    return data_loader
def load_datasets(path, train=True):
    """
    Load datasets for both training or evaluating the model.
    
    Downloads the datasets if they're not on disk.
    
    Parameters:
    -path (str): Path to the datasets
	-train (bool, default=True): Gets either train or test datasets
    
	Returns:
	-A dict with datasets for both source and target
    """

    # Resize both dataset samples to 32x32x3
    img_size = 32

    # Apply a few transform such as resizing, color jittering and normalization with mean and std
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ColorJitter(.1, 1, .75, 0),
        transforms.ToTensor(),
        transforms.Lambda(lambda x: x.expand([3, -1, -1])),
        transforms.Normalize(mean=(0.1307, 0.1307, 0.1307),
                             std=(0.3081, 0.3081, 0.3081))
    ])
    mnist = datasets.MNIST(path,
                           train=train,
                           download=True,
                           transform=transform)

    # Apply a few transform such as resizing and normalization with mean and std
    transform = transforms.Compose([
        transforms.Resize(img_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.45, 0.45, 0.45),
                             std=(0.199, 0.199, 0.199))
    ])
    svhn = datasets.SVHN(path,
                         split='train' if train else 'test',
                         download=True,
                         transform=transform)

    return {'mnist': mnist, 'svhn': svhn}
Exemple #11
0
    def __init__(self, dataset_name, indexed=False):
        self.data_root = os.path.join(data_dir, dataset_name)
        self.indexed = indexed
        if dataset_name == 'mnist':
            self.data = datasets.MNIST(
                root=self.data_root,
                train=False,
                download=True,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    # transforms.Normalize((0.5,), (0.5,)),
                ]))
        elif dataset_name == 'cifar10':
            self.data = datasets.CIFAR10(
                root=self.data_root,
                train=False,
                download=True,
                transform=transforms.Compose([
                    transforms.ToTensor(),
                    # transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
                ]))
        elif dataset_name == 'cifar100':
            # CIFAR100_TRAIN_MEAN = (0.5070751592371323, 0.48654887331495095, 0.4409178433670343)
            # CIFAR100_TRAIN_STD = (0.2673342858792401, 0.2564384629170883, 0.27615047132568404)
            transform_test = transforms.Compose([
                transforms.ToTensor(),
                # transforms.Normalize(CIFAR100_TRAIN_MEAN, CIFAR100_TRAIN_STD)
            ])

            self.data = torchvision.datasets.CIFAR100(root=self.data_root,
                                                      train=False,
                                                      download=True,
                                                      transform=transform_test)

        elif dataset_name == 'svhn':
            self.data = datasets.SVHN(root=self.data_root,
                                      split='test',
                                      download=True,
                                      transform=transforms.Compose([
                                          transforms.ToTensor(),
                                          transforms.Normalize(
                                              (0.5, 0.5, 0.5),
                                              (0.5, 0.5, 0.5)),
                                      ]))
Exemple #12
0
def binary_SVHN(cls1, cls2, train=True, batch_size=None, augm_flag=True, val_size=None):
    if batch_size==None:
        if train:
            batch_size=train_batch_size
        else:
            batch_size=test_batch_size

    if train:
        split = 'train'
    else:
        split = 'test'

    transform_base = [transforms.ToTensor()]
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4, padding_mode='edge'),
    ] + transform_base)
    transform_test = transforms.Compose(transform_base)
    transform_train = transforms.RandomChoice([transform_train, transform_test])
    transform = transform_train if (augm_flag and train) else transform_test

    dataset = datasets.SVHN(path, split=split, transform=transform, download=False)
    labels = np.array(dataset.labels)
    masks = np.logical_or(labels == cls1, labels == cls2)
    idxs = np.where(masks == True)[0]

    dataset.data = dataset.data[idxs]
    dataset.labels = labels[idxs]
    dataset.labels = np.where(dataset.labels == cls1, 0., dataset.labels)
    dataset.labels = np.where(dataset.labels == cls2, 1., dataset.labels)

    if train or val_size is None:
        loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                                            shuffle=train, num_workers=4)
        return loader
    else:
        # Split into val and test sets
        test_size = len(dataset) - val_size
        dataset_val, dataset_test = data_utils.random_split(dataset, (val_size, test_size))
        val_loader = torch.utils.data.DataLoader(dataset_val, batch_size=batch_size,
                                                shuffle=train, num_workers=4)
        test_loader = torch.utils.data.DataLoader(dataset_test, batch_size=batch_size,
                                                shuffle=train, num_workers=4)
        return val_loader, test_loader
Exemple #13
0
 def __init__(self, batchsize, train=True):
     Dataset.__init__(self)
     data_root = join(dirname(realpath(__file__)), 'SVHN_data')
     self.name = "svhn"
     self.range = [0.0, 1.0]
     self.data_dims = [3, 32, 32]
     self.batchsize = batchsize
     if train:
         split = "train"
     self.data = dsets.SVHN(root=data_root,
                            download=True,
                            split="train",
                            transform=transforms.Compose(
                                [transforms.ToTensor()]))
     self.dataloder = tdata.DataLoader(self.data,
                                       self.batchsize,
                                       shuffle=True)
     self.iter = iter(self.dataloder)
     self._index = 0
def get_val_data_loader(dataset_name,
                        dataset_dir,
                        batch_size,
                        num_workers):
    mean_rgb = (0.4914, 0.4822, 0.4465)
    std_rgb = (0.2023, 0.1994, 0.2010)

    transform_val = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(
            mean=mean_rgb,
            std=std_rgb),
    ])

    if dataset_name == "CIFAR10":
        dataset = datasets.CIFAR10(
            root=dataset_dir,
            train=False,
            transform=transform_val,
            download=True)
    elif dataset_name == "CIFAR100":
        dataset = datasets.CIFAR100(
            root=dataset_dir,
            train=False,
            transform=transform_val,
            download=True)
    elif dataset_name == "SVHN":
        dataset = datasets.SVHN(
            root=dataset_dir,
            split="test",
            transform=transform_val,
            download=True)
    else:
        raise Exception('Unrecognized dataset: {}'.format(dataset_name))

    val_loader = torch.utils.data.DataLoader(
        dataset=dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True)

    return val_loader
Exemple #15
0
def get_svhn(train, get_dataset=False, batch_size=cfg.batch_size):
    """Get SVHN dataset loader."""
    #image pre-processing

    # pre_process = transforms.Compose([transforms.Resize(cfg.image_size),
    #                               transforms.Grayscale(num_output_channels=1),
    #                               transforms.ToTensor()])

    # pre_process = transforms.Compose([transforms.Resize(cfg.image_size),
    #                                   transforms.ToTensor(),
    #                                   transforms.Normalize(
    #                                       mean=(0.5, 0.5, 0.5),
    #                                       std=(0.5, 0.5, 0.5))])

    convert_to_gray = transforms.Lambda(lambda x: (x[0, ...] * 0.299 + x[
        1, ...] * 0.587 + x[2, ...] * 0.114).unsqueeze(0))
    pre_process = transforms.Compose([
        transforms.Resize(cfg.image_size),
        transforms.ToTensor(),
        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5)),
        convert_to_gray
    ])

    # pre_process = transforms.Compose([transforms.Resize(cfg.image_size),
    #                                   transforms.Grayscale(num_output_channels=1),
    #                                   transforms.ToTensor(),
    #                                   transforms.Normalize(
    #                                       mean=cfg.dataset_mean,
    #                                       std=cfg.dataset_std)])

    # dataset and data loader
    svhn_dataset = datasets.SVHN(root=cfg.data_root,
                                 split='train' if train else 'test',
                                 transform=pre_process,
                                 download=False)

    if get_dataset:
        return svhn_dataset
    else:
        svhn_data_loader = torch.utils.data.DataLoader(dataset=svhn_dataset,
                                                       batch_size=batch_size,
                                                       shuffle=True)
        return svhn_data_loader
Exemple #16
0
def get_test_loader(data_dir,
                    dataset,
                    batch_size,
                    exp='azimuth', # smallnorb only
                    familiar=True, # smallnorb only
                    num_workers=4,
                    pin_memory=False):

    data_dir = data_dir + '/' + dataset

    if dataset == "cifar10":
        trans = [transforms.ToTensor(),
                 transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010))]
        dataset = datasets.CIFAR10(data_dir, train=False, download=False,
                transform=transforms.Compose(trans))

    elif dataset == "svhn":
        normalize = transforms.Normalize(mean=[x / 255.0 for x in[109.9, 109.7, 113.8]],
                                     std=[x / 255.0 for x in [50.1, 50.6, 50.8]])
        trans = [transforms.ToTensor(),
                 normalize]
        dataset = datasets.SVHN(data_dir, split='test', download=True,
                transform=transforms.Compose(trans))

    elif dataset == "smallnorb":
        trans = [transforms.Resize(48),
                 transforms.CenterCrop(32),
                 transforms.ToTensor(),
                 #transforms.Normalize((0.7199,), (0.117,))
                 ]
        if exp in VIEWPOINT_EXPS:
            dataset = smallNORBViewPoint(data_dir, exp=exp, familiar=familiar, train=False, download=True,
                                transform=transforms.Compose(trans))
        elif exp == "full":
            dataset = smallNORB(data_dir, train=False, download=True,
                                transform=transforms.Compose(trans))

    data_loader = torch.utils.data.DataLoader(
        dataset, batch_size=batch_size, shuffle=False,
        num_workers=num_workers, pin_memory=pin_memory,
    )

    return data_loader
Exemple #17
0
def get_dataloader(args):
    svhn_transform = transforms.Compose([
        transforms.Resize((args.load_size, args.load_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    mnist_transform = transforms.Compose([
        transforms.Resize((args.load_size, args.load_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.1307, ), (0.3081, ))
    ])

    if args.mode == 'train' or args.mode == 'continue_train':
        train = True
        shuffle = True
        split = 'train'
    else:
        train = False
        shuffle = False
        split = 'test'

    svhn = datasets.SVHN(root=args.dataset_path,
                         split=split,
                         download=True,
                         transform=svhn_transform)
    mnist = datasets.MNIST(root=args.dataset_path,
                           train=train,
                           download=True,
                           transform=mnist_transform)

    svhn_loader = torch.utils.data.DataLoader(dataset=svhn,
                                              batch_size=args.batch_size,
                                              shuffle=shuffle,
                                              num_workers=args.num_workers)

    mnist_loader = torch.utils.data.DataLoader(dataset=mnist,
                                               batch_size=args.batch_size,
                                               shuffle=shuffle,
                                               num_workers=args.num_workers)

    return mnist_loader, svhn_loader
Exemple #18
0
def get_loader(config):
    """Builds and returns Dataloader for MNIST and SVHN dataset."""

    transform1 = transforms.Compose([
        transforms.Scale(config.image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    transform2 = transforms.Compose([
        transforms.Scale(config.image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5), (0.5)),
        transforms.Lambda(lambda x: x.repeat(3, 1, 1))
    ])

    mnistm = MNISTM("./", train=True, transform=transform1, download=True)
    syn_dig = Loadsyn()
    svhn = datasets.SVHN(root=config.svhn_path,
                         download=True,
                         transform=transform1)
    mnist = datasets.MNIST(root=config.mnist_path,
                           download=True,
                           transform=transform2)

    dataset_dic = {1: mnist, 2: svhn, 3: mnistm, 4: syn_dig}

    split_source, split_target = spilit_dataset(dataset_dic[config.source_idx])

    output = []

    for dataset in [
            split_source, split_target, dataset_dic[config.t1_idx],
            dataset_dic[config.t2_idx]
    ]:
        output.append(
            data.DataLoader(dataset=dataset,
                            batch_size=config.batch_size,
                            shuffle=True,
                            num_workers=config.num_workers))

    return output
Exemple #19
0
def get_loader(mode):
    """Builds and returns Dataloader for MNIST and SVHN dataset."""
    config = Config
    transform_list = []
    is_train = mode == "train"

    if config.model.use_augmentation:
        transform_list.append(transforms.RandomHorizontalFlip())
        transform_list.append(transforms.RandomRotation(0.1))

    loader = None
    transform = transforms.Compose([
        transforms.Resize(config.data.image_size),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    if config.model.dataset == "mnist":
        mnist = datasets.MNIST(root=config.data.mnist_path,
                               download=True,
                               transform=transform,
                               train=is_train)
        loader = torch.utils.data.DataLoader(
            dataset=mnist,
            batch_size=config.train.batch_size,
            shuffle=config.train.shuffle,
            num_workers=config.data.num_workers)
    if config.model.dataset == "svhn":
        svhn = datasets.SVHN(root=config.data.svhn_path,
                             download=True,
                             transform=transform,
                             split=mode)
        loader = torch.utils.data.DataLoader(
            dataset=svhn,
            batch_size=config.train.batch_size,
            shuffle=config.train.shuffle,
            num_workers=config.data.num_workers)

    ## preparing for AC costum dataset
    # train_size = int(0.8 * len(full_dataset))
    # test_size = len(full_dataset) - train_size
    # train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])
    return loader
Exemple #20
0
def main():

    logger.info('Loading SVHN test data')
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ])
    inv_transform = transforms.Normalize(
        (-0.4914 / 0.2023, -0.4822 / 0.1994, -0.4465 / 0.2010),
        (1 / 0.2023, 1 / 0.1994, 1 / 0.2010))

    dataset = datasets.SVHN(root='data/',
                            split='test',
                            download=True,
                            transform=transform)
    dataloader = data.DataLoader(dataset,
                                 batch_size=1000,
                                 shuffle=False,
                                 num_workers=4)

    logger.info('Loading model')
    model = resnet(num_classes=10, depth=152)
    model = torch.nn.DataParallel(model).cuda()
    # checkpoint = torch.load('resnet-110/model_best.pth.tar')
    checkpoint = torch.load('checkpoint/model_best.pth.tar')
    model.load_state_dict(checkpoint['state_dict'])

    model.eval()

    i = 0
    print('Index Correct Predicted Confidence')
    for inputs, targets in dataloader:
        inputs, targets = inputs.cuda(), targets.cuda()
        with torch.no_grad():
            logits = model(inputs)
            probs = torch.softmax(logits, dim=-1)
            values, indices = torch.max(probs, 1)
        for target, logit in zip(targets, logits):
            tgt_string = '%i ' % target.item()
            prediction_strings = ['%0.8f' % x for x in logit.tolist()]
            print(tgt_string + ' '.join(prediction_strings))
Exemple #21
0
def load_data():
    # tensor transform
    transform = transforms.ToTensor()

    # SVHN training datasets
    svhn_train = datasets.SVHN(root='data/',
                               split='train',
                               download=True,
                               transform=transform)

    batch_size = 128
    num_workers = 0

    # build DataLoaders for SVHN dataset
    train_loader = torch.utils.data.DataLoader(dataset=svhn_train,
                                               batch_size=batch_size,
                                               shuffle=True,
                                               num_workers=num_workers)

    return train_loader, batch_size, num_workers
Exemple #22
0
def get_test_loader(data_dir='./data/svhn',
                    batch_size=64,
                    shuffle=False,
                    num_workers=1,
                    pin_memory=True):
    """
    Utility function for loading and returning a multi-process
    test iterator over the CIFAR-10 dataset.
    If using CUDA, num_workers should be set to 1 and pin_memory to True.
    Params
    ------
    - data_dir: path directory to the dataset.
    - batch_size: how many samples per batch to load.
    - shuffle: whether to shuffle the dataset after every epoch.
    - num_workers: number of subprocesses to use when loading the dataset.
    - pin_memory: whether to copy tensors into CUDA pinned memory. Set it to
      True if using GPU.
    Returns
    -------
    - data_loader: test set iterator.
    """

    # define transform
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.5, 0.5, 0.5],
                             std=[0.5, 0.5, 0.5])
    ])

    dataset = datasets.SVHN(root=data_dir,
                            split='test',
                            download=False,
                            transform=transform,
                            target_transform=target_transform)

    data_loader = torch.utils.data.DataLoader(dataset,
                                              batch_size=batch_size,
                                              shuffle=shuffle,
                                              num_workers=num_workers,
                                              pin_memory=pin_memory)
    return data_loader
Exemple #23
0
def get_svhn(train, get_dataset=False, batch_size=cfg.batch_size):
    """Get SVHN dataset loader."""
    # image pre-processing
    pre_process = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize(mean=cfg.dataset_mean, std=cfg.dataset_std)
    ])

    # dataset and data loader
    svhn_dataset = datasets.SVHN(root=cfg.data_root,
                                 split='train' if train else 'test',
                                 transform=pre_process,
                                 download=True)

    if get_dataset:
        return svhn_dataset
    else:
        svhn_data_loader = torch.utils.data.DataLoader(dataset=svhn_dataset,
                                                       batch_size=batch_size,
                                                       shuffle=True)
        return svhn_data_loader
Exemple #24
0
def SVHNUnlabel():
    raw_dataset = datasets.SVHN('../data',
                                split="train",
                                download=True,
                                transform=transforms.Compose(
                                    [transforms.ToTensor()]))

    cnt = 0
    data = []
    labels = []
    for i in range(raw_dataset.__len__()):
        datum, label = raw_dataset.__getitem__(i)
        data.append(datum.numpy())
        labels.append(label)
        cnt = cnt + 1
        if cnt >= 70000:
            break
    dataset = TensorDataset(torch.FloatTensor(np.array(data)),
                            torch.LongTensor(np.array(labels)))
    print(len(data))
    return dataset
Exemple #25
0
def get_loader(config):
    """Builds and returns Dataloader for MNIST and SVHN dataset."""
    
    transform = transforms.Compose([
                    transforms.Resize(config.image_size),
                    transforms.ToTensor(),
                    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
    
    svhn = datasets.SVHN(root=config.svhn_path, download=True, transform=transform)
    mnist = datasets.MNIST(root=config.mnist_path, download=True, transform=transform)

    svhn_loader = torch.utils.data.DataLoader(dataset=svhn,
                                              batch_size=config.batch_size,
                                              shuffle=True,
                                              num_workers=config.num_workers)

    mnist_loader = torch.utils.data.DataLoader(dataset=mnist,
                                               batch_size=config.batch_size,
                                               shuffle=True,
                                               num_workers=config.num_workers)
    return svhn_loader, mnist_loader
def create_svhn_dataloaders(data_dir: str = './data', batch_size: int = 128, num_workers: int = 4):
    """
    create train and test pytorch dataloaders for SVHN dataset
    :param data_dir: the folder that will contain the data
    :param batch_size: the size of the batch for test and train loaders
    :param num_workers: number of cpu workers which loads the GPU with the dataset
    :return: train and test loaders along with mapping between labels and class names
    """

    trainset = datasets.CIFAR10(root=data_dir,
                                train=True,
                                download=True,
                                transform=transforms.Compose([transforms.ToTensor(),
                                                              normalize]))
    trainloader = data.DataLoader(trainset,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=num_workers)

    data_dir = os.path.join(data_dir, 'svhn')
    testset = datasets.SVHN(root=data_dir,
                            split='test',
                            download=True,
                            transform=transforms.Compose([transforms.ToTensor(),
                                                          normalize]))

    # Align as CIFAR10 dataset
    testset.test_data = testset.data
    testset.test_labels = testset.labels

    testloader = data.DataLoader(testset,
                                 batch_size=batch_size,
                                 shuffle=False,
                                 num_workers=num_workers)

    # Classes name
    classes_cifar10 = ('plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck')
    classes_svhn = ('1', '2', '3', '4', '5', '6', '7', '8', '9', '0')

    return trainloader, testloader, classes_svhn, classes_cifar10
Exemple #27
0
def get_ood(data_path, batch_size, transform):
    if data_path == 'SVHN':

        def target_transform(target):
            return int(target)

        data_root = os.path.expanduser(
            os.path.join('~/.advertorch/data/', 'svhn-data'))
        ood_dataset = datasets.SVHN(
            root=data_root,
            split='train',
            download=True,
            transform=transform,
            target_transform=target_transform,
        )
    else:
        ood_dataset = ImageFolder(data_path, transform=transform)
    ood_loader = torch.utils.data.DataLoader(ood_dataset,
                                             batch_size=batch_size,
                                             num_workers=8,
                                             shuffle=True)
    return ood_loader
def get_svhn_loaders(cuda_flag, dataset_path, val=False, validation_size=5000, batch_size=64, test_batch_size=1000):
    
    kwargs = {'num_workers': 1, 'pin_memory': True} if cuda_flag and torch.cuda.is_available() else {}
    
    train_dataset = datasets.SVHN(dataset_path, split='train' if not val else 'test', download=True,
                        transform=transforms.Compose([
                               transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]))

    indices = list(range(len(train_dataset)))
    np.random.shuffle(indices)
    
    if not val:
        ptr = len(train_dataset)
    else:
        ptr = len(train_dataset) - validation_size

    train_sampler, val_sampler = SubsetRandomSampler(indices[:ptr]), SubsetRandomSampler(indices[ptr:])

    train_loader = torch.utils.data.DataLoader(train_dataset,
                        batch_size=batch_size, sampler=train_sampler, **kwargs)

    if val:
        val_loader = torch.utils.data.DataLoader(train_dataset,
                            batch_size=batch_size, sampler=val_sampler, **kwargs)
    
    test_loader = torch.utils.data.DataLoader(
                        datasets.CIFAR10(dataset_path, train=False, transform=transforms.Compose([
                            transforms.ToTensor(),
                            transforms.Normalize((0.4914, 0.4822, 0.4465), (0.247, 0.243, 0.261))
                        ])),
                        batch_size=test_batch_size, shuffle=True, **kwargs)
    
    logger.info('Using SVHN dataset for this experiment')

    if not val:
        return train_loader, test_loader
    else:
        return train_loader, val_loader, test_loader
Exemple #29
0
def create_test_dataset(dataset, dataset_dir, transform,
                        target_transform=None):
    if dataset == 'cifar10':
        test_dataset = datasets.CIFAR10(root=dataset_dir, train=False,
                                        download=True,
                                        transform=transform,
                                        target_transform=target_transform)
    elif dataset == 'cifar100':
        test_dataset = datasets.CIFAR100(root=dataset_dir, train=False,
                                         download=True,
                                         transform=transform,
                                         target_transform=target_transform)
    elif dataset == 'cifar20':
        test_dataset = CoarseCIFAR100(root=dataset_dir, train=False,
                                      download=True, transform=transform,
                                      target_transform=target_transform)
    elif dataset == 'svhn' or dataset == 'svhn+extra':
        test_dataset = datasets.SVHN(root=dataset_dir, split='test',
                                     download=True,
                                     transform=transform,
                                     target_transform=target_transform)
    return test_dataset
Exemple #30
0
 def __init__(self, dataset, datasets_path, train=False):
     dataset_path = os.path.join(datasets_path, dataset)
     if dataset == 'CIFAR10':
         mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)
         transform_test = transforms.Compose(
             [transforms.ToTensor(),
              transforms.Normalize(mean, std)])
         self.dataset = datasets.CIFAR10(root=dataset_path,
                                         train=train,
                                         download=True,
                                         transform=transform_test)
     elif dataset == 'SVHN':
         mean, std = (0.4377, 0.4438, 0.4728), (0.1980, 0.2010, 0.1970)
         transform_test = transforms.Compose(
             [transforms.ToTensor(),
              transforms.Normalize(mean, std)])
         self.dataset = datasets.SVHN(root=dataset_path,
                                      split='train' if train else 'test',
                                      download=True,
                                      transform=transform_test)
     else:
         raise NotImplementedError