Esempio n. 1
0
def torchvision_dataset(transform=None, train=True, subset=None):
    """Creates a dataset from torchvision, configured using Command Line Arguments.

    Args:
        transform (callable, optional): A function that transforms an image (default None).
        train (bool, optional): Training set or validation - if applicable (default True).
        subset (string, optional): Specifies the subset of the relevant
            categories, if any of them was split (default, None).

    Relevant Command Line Arguments:

        - **dataset**: `--data`, `--torchvision_dataset`.

    Note:
        Settings are automatically acquired from a call to :func:`dlt.config.parse`
        from the built-in ones. If :func:`dlt.config.parse` was not called in the 
        main script, this function will call it.

    Warning:
        Unlike the torchvision datasets, this function returns a dataset that
        uses NumPy Arrays instead of a PIL Images.
    """
    opts = fetch_opts(['dataset'], subset)

    if opts.torchvision_dataset is None:
        if subset is not None:
            apnd = '_' + subset
        else:
            apnd = ''
        raise ValueError('No value given for --torchvision_dataset{0}.'.format(apnd))

    if opts.torchvision_dataset == 'mnist':
        from torchvision.datasets import MNIST
        MNIST.__getitem__ = _custom_get_item
        ret_dataset = MNIST(opts.data, train=train, download=True, transform=transform)
        # Add channel dimension for consistency
        if train:
            ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3)
        else:
            ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3)
    elif opts.torchvision_dataset == 'fashionmnist':
        from torchvision.datasets import FashionMNIST
        FashionMNIST.__getitem__ = _custom_get_item
        ret_dataset = FashionMNIST(opts.data, train=train, download=True, transform=transform)
        if train:
            ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3)
        else:
            ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3)
    elif opts.torchvision_dataset == 'cifar10':
        from torchvision.datasets import CIFAR10
        CIFAR10.__getitem__ = _custom_get_item
        ret_dataset = CIFAR10(opts.data, train=train, download=True, transform=transform)
    elif opts.torchvision_dataset == 'cifar100':
        from torchvision.datasets import CIFAR100
        CIFAR100.__getitem__ = _custom_get_item
        ret_dataset = CIFAR100(opts.data, train=train, download=True, transform=transform)
    return ret_dataset
Esempio n. 2
0
def fetch_dataloader(args, train=True, download=True, mini_size=128):
    # load dataset and init in the dataloader

    transforms = T.Compose([T.ToTensor()])
    dataset = MNIST(root=args.data_dir,
                    train=train,
                    download=download,
                    transform=transforms)

    # load dataset and init in the dataloader
    if args.mini_data:
        if train:
            dataset.train_data = dataset.train_data[:mini_size]
            dataset.train_labels = dataset.train_labels[:mini_size]
        else:
            dataset.test_data = dataset.test_data[:mini_size]
            dataset.test_labels = dataset.test_labels[:mini_size]

    kwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if args.device.type is 'cuda' else {}

    dl = DataLoader(dataset,
                    batch_size=args.batch_size,
                    shuffle=train,
                    drop_last=True,
                    **kwargs)

    return dl
def fetch_dataloader(params, train=True, mini_size=128):

    # load dataset and init in the dataloader
    transforms = T.Compose([T.ToTensor()])
    dataset = MNIST(root=params.data_dir,
                    train=train,
                    download=True,
                    transform=transforms)

    if params.dict.get('mini_data'):
        if train:
            dataset.train_data = dataset.train_data[:mini_size]
            dataset.train_labels = dataset.train_labels[:mini_size]
        else:
            dataset.test_data = dataset.test_data[:mini_size]
            dataset.test_labels = dataset.test_labels[:mini_size]

    if params.dict.get('mini_ones'):
        if train:
            labels = dataset.train_labels[:2000]
            mask = labels == 1
            dataset.train_labels = labels[mask][:mini_size]
            dataset.train_data = dataset.train_data[:2000][mask][:mini_size]
        else:
            labels = dataset.test_labels[:2000]
            mask = labels == 1
            dataset.test_labels = labels[mask][:mini_size]
            dataset.test_data = dataset.test_data[:2000][mask][:mini_size]

    kwargs = {
        'num_workers': 1,
        'pin_memory': True
    } if torch.cuda.is_available() and params.device.type is 'cuda' else {}

    return DataLoader(dataset,
                      batch_size=params.batch_size,
                      shuffle=True,
                      drop_last=True,
                      **kwargs)
Esempio n. 4
0
def get_datasets(args):
    if args.dataset == 'mnist':
        biased_weights = [(args.bias, 1 - args.bias)
                          for _ in range(args.n_classes)]
        balanced_weights = [(1, 1) for _ in range(args.n_classes)]
        override = sample_n(
            range(10), [10 // args.n_classes for _ in range(args.n_classes)])
        train_dataset = MNIST(root='./data/mnist', train=True, download=True)
        test_dataset = MNIST(root='./data/mnist', train=False, download=True)
        train_dataset.train_data = train_dataset.train_data.unsqueeze(
            -1).numpy()
        test_dataset.test_data = test_dataset.test_data.unsqueeze(-1).numpy()
        transform = Compose(
            [ToPILImage(), RandomCrop(28, padding=4),
             ToTensor()])

    elif args.dataset == 'cifar10':
        biased_weights = [(args.bias, 1 - args.bias)
                          for _ in range(args.n_classes)]
        balanced_weights = [(1, 1) for _ in range(args.n_classes)]
        override = sample_n(
            range(10), [10 // args.n_classes for _ in range(args.n_classes)])
        train_dataset = CIFAR10(root='./data/cifar10',
                                train=True,
                                download=True)
        test_dataset = CIFAR10(root='./data/cifar10',
                               train=False,
                               download=True)
        transform = Compose([
            ToPILImage(),
            RandomCrop(32, padding=4),
            RandomHorizontalFlip(),
            ToTensor()
        ])

    elif args.dataset == 'cifar100':  # uses predefined coarse labels
        coarse_labels = [(4, 30, 55, 72, 95), (1, 32, 67, 73, 91),
                         (54, 62, 70, 82, 92), (9, 10, 16, 28, 61),
                         (0, 51, 53, 57, 83), (22, 39, 40, 86, 87),
                         (5, 20, 25, 84, 94), (6, 7, 14, 18, 24),
                         (3, 42, 43, 88, 97), (12, 17, 37, 68, 76),
                         (23, 33, 49, 60, 71), (15, 19, 21, 31, 38),
                         (34, 63, 64, 66, 75), (26, 45, 77, 79, 99),
                         (2, 11, 35, 46, 98), (27, 29, 44, 78, 93),
                         (36, 50, 65, 74, 80), (47, 52, 56, 59, 96),
                         (8, 13, 48, 58, 90), (41, 69, 81, 85, 89)]
        biased_weights = [(args.bias, (1 - args.bias) / 4, (1 - args.bias) / 4,
                           (1 - args.bias) / 4, (1 - args.bias) / 4)
                          for _ in range(args.n_classes)]
        balanced_weights = [(1, 1, 1, 1, 1) for _ in range(args.n_classes)]
        override = random.sample(
            [random.sample(coarse_label, 5) for coarse_label in coarse_labels],
            args.n_classes)
        train_dataset = CIFAR100(root='./data/cifar100',
                                 train=True,
                                 download=True)
        test_dataset = CIFAR100(root='./data/cifar100',
                                train=False,
                                download=True)
        transform = Compose([
            ToPILImage(),
            RandomCrop(32, padding=4),
            RandomHorizontalFlip(),
            ToTensor()
        ])

    train_mixture = MixtureDataset(train_dataset.train_data[:-args.n_valid],
                                   train_dataset.train_labels[:-args.n_valid],
                                   mixture_weights=biased_weights,
                                   mixture_override=override,
                                   transform=transform)

    valid_mixture = MixtureDataset(train_dataset.train_data[-args.n_valid:],
                                   train_dataset.train_labels[-args.n_valid:],
                                   mixture_weights=balanced_weights,
                                   mixture_override=override,
                                   transform=transform)

    test_mixture = MixtureDataset(test_dataset.test_data,
                                  test_dataset.test_labels,
                                  mixture_weights=balanced_weights,
                                  mixture_override=override,
                                  transform=transform)

    return train_mixture, valid_mixture, test_mixture
Esempio n. 5
0
bruit = np.random.normal(loc=0.0, scale=1.0, size=img_test.shape)
img_test = img_test.astype(float) / 255. # uint8 vers float entre 0. et 1.
img_bruitee = img_test + coeff_bruit * bruit # ajout du bruit a l'image
img_bruitee = np.clip(img_bruitee, 0., 1.) # force les valeurs entre 0. et 1.
img_bruitee = (img_bruitee * 255).astype(np.uint8) # retour en uint8
plt.imshow(img_bruitee, interpolation='none', cmap=plt.cm.gray)
plt.waitforbuttonpress()

# Ajout d'un bruit gaussien sur les imagettes de test ########################
test_data_numpy = mnist_test.test_data.numpy()
test_data_numpy = test_data_numpy.astype(float) / 255.
bruit = np.random.normal(loc=0.0, scale=1.0, size=test_data_numpy.shape)
test_data_numpy += coeff_bruit * bruit
test_data_numpy = np.clip(test_data_numpy, 0., 1.)
test_data_numpy = (test_data_numpy * 255).astype(np.uint8)
mnist_test.test_data = torch.from_numpy(test_data_numpy)
plt.imshow(mnist_test.test_data[0,:,:], interpolation='none', cmap=plt.cm.gray)
plt.waitforbuttonpress()

# Entrainement du modele sur les images train (non bruitees !) ###########
trainloader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)

print('Entrainement...')
model = autoencoder().cpu() # charger le modele dans le CPU
criterion = nn.MSELoss() # fonction de cout
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # choix de l'optimisation

for epoch in range(num_epochs):
    print('debut epoch {0}'.format(epoch))
    for data in trainloader:
        img, _ = data