def torchvision_dataset(transform=None, train=True, subset=None): """Creates a dataset from torchvision, configured using Command Line Arguments. Args: transform (callable, optional): A function that transforms an image (default None). train (bool, optional): Training set or validation - if applicable (default True). subset (string, optional): Specifies the subset of the relevant categories, if any of them was split (default, None). Relevant Command Line Arguments: - **dataset**: `--data`, `--torchvision_dataset`. Note: Settings are automatically acquired from a call to :func:`dlt.config.parse` from the built-in ones. If :func:`dlt.config.parse` was not called in the main script, this function will call it. Warning: Unlike the torchvision datasets, this function returns a dataset that uses NumPy Arrays instead of a PIL Images. """ opts = fetch_opts(['dataset'], subset) if opts.torchvision_dataset is None: if subset is not None: apnd = '_' + subset else: apnd = '' raise ValueError('No value given for --torchvision_dataset{0}.'.format(apnd)) if opts.torchvision_dataset == 'mnist': from torchvision.datasets import MNIST MNIST.__getitem__ = _custom_get_item ret_dataset = MNIST(opts.data, train=train, download=True, transform=transform) # Add channel dimension for consistency if train: ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3) else: ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3) elif opts.torchvision_dataset == 'fashionmnist': from torchvision.datasets import FashionMNIST FashionMNIST.__getitem__ = _custom_get_item ret_dataset = FashionMNIST(opts.data, train=train, download=True, transform=transform) if train: ret_dataset.train_data = ret_dataset.train_data.unsqueeze(3) else: ret_dataset.test_data = ret_dataset.test_data.unsqueeze(3) elif opts.torchvision_dataset == 'cifar10': from torchvision.datasets import CIFAR10 CIFAR10.__getitem__ = _custom_get_item ret_dataset = CIFAR10(opts.data, train=train, download=True, transform=transform) elif opts.torchvision_dataset == 'cifar100': from torchvision.datasets import CIFAR100 CIFAR100.__getitem__ = _custom_get_item ret_dataset = CIFAR100(opts.data, train=train, download=True, transform=transform) return ret_dataset
def fetch_dataloader(args, train=True, download=True, mini_size=128): # load dataset and init in the dataloader transforms = T.Compose([T.ToTensor()]) dataset = MNIST(root=args.data_dir, train=train, download=download, transform=transforms) # load dataset and init in the dataloader if args.mini_data: if train: dataset.train_data = dataset.train_data[:mini_size] dataset.train_labels = dataset.train_labels[:mini_size] else: dataset.test_data = dataset.test_data[:mini_size] dataset.test_labels = dataset.test_labels[:mini_size] kwargs = { 'num_workers': 1, 'pin_memory': True } if args.device.type is 'cuda' else {} dl = DataLoader(dataset, batch_size=args.batch_size, shuffle=train, drop_last=True, **kwargs) return dl
def fetch_dataloader(params, train=True, mini_size=128): # load dataset and init in the dataloader transforms = T.Compose([T.ToTensor()]) dataset = MNIST(root=params.data_dir, train=train, download=True, transform=transforms) if params.dict.get('mini_data'): if train: dataset.train_data = dataset.train_data[:mini_size] dataset.train_labels = dataset.train_labels[:mini_size] else: dataset.test_data = dataset.test_data[:mini_size] dataset.test_labels = dataset.test_labels[:mini_size] if params.dict.get('mini_ones'): if train: labels = dataset.train_labels[:2000] mask = labels == 1 dataset.train_labels = labels[mask][:mini_size] dataset.train_data = dataset.train_data[:2000][mask][:mini_size] else: labels = dataset.test_labels[:2000] mask = labels == 1 dataset.test_labels = labels[mask][:mini_size] dataset.test_data = dataset.test_data[:2000][mask][:mini_size] kwargs = { 'num_workers': 1, 'pin_memory': True } if torch.cuda.is_available() and params.device.type is 'cuda' else {} return DataLoader(dataset, batch_size=params.batch_size, shuffle=True, drop_last=True, **kwargs)
def get_datasets(args): if args.dataset == 'mnist': biased_weights = [(args.bias, 1 - args.bias) for _ in range(args.n_classes)] balanced_weights = [(1, 1) for _ in range(args.n_classes)] override = sample_n( range(10), [10 // args.n_classes for _ in range(args.n_classes)]) train_dataset = MNIST(root='./data/mnist', train=True, download=True) test_dataset = MNIST(root='./data/mnist', train=False, download=True) train_dataset.train_data = train_dataset.train_data.unsqueeze( -1).numpy() test_dataset.test_data = test_dataset.test_data.unsqueeze(-1).numpy() transform = Compose( [ToPILImage(), RandomCrop(28, padding=4), ToTensor()]) elif args.dataset == 'cifar10': biased_weights = [(args.bias, 1 - args.bias) for _ in range(args.n_classes)] balanced_weights = [(1, 1) for _ in range(args.n_classes)] override = sample_n( range(10), [10 // args.n_classes for _ in range(args.n_classes)]) train_dataset = CIFAR10(root='./data/cifar10', train=True, download=True) test_dataset = CIFAR10(root='./data/cifar10', train=False, download=True) transform = Compose([ ToPILImage(), RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor() ]) elif args.dataset == 'cifar100': # uses predefined coarse labels coarse_labels = [(4, 30, 55, 72, 95), (1, 32, 67, 73, 91), (54, 62, 70, 82, 92), (9, 10, 16, 28, 61), (0, 51, 53, 57, 83), (22, 39, 40, 86, 87), (5, 20, 25, 84, 94), (6, 7, 14, 18, 24), (3, 42, 43, 88, 97), (12, 17, 37, 68, 76), (23, 33, 49, 60, 71), (15, 19, 21, 31, 38), (34, 63, 64, 66, 75), (26, 45, 77, 79, 99), (2, 11, 35, 46, 98), (27, 29, 44, 78, 93), (36, 50, 65, 74, 80), (47, 52, 56, 59, 96), (8, 13, 48, 58, 90), (41, 69, 81, 85, 89)] biased_weights = [(args.bias, (1 - args.bias) / 4, (1 - args.bias) / 4, (1 - args.bias) / 4, (1 - args.bias) / 4) for _ in range(args.n_classes)] balanced_weights = [(1, 1, 1, 1, 1) for _ in range(args.n_classes)] override = random.sample( [random.sample(coarse_label, 5) for coarse_label in coarse_labels], args.n_classes) train_dataset = CIFAR100(root='./data/cifar100', train=True, download=True) test_dataset = CIFAR100(root='./data/cifar100', train=False, download=True) transform = Compose([ ToPILImage(), RandomCrop(32, padding=4), RandomHorizontalFlip(), ToTensor() ]) train_mixture = MixtureDataset(train_dataset.train_data[:-args.n_valid], train_dataset.train_labels[:-args.n_valid], mixture_weights=biased_weights, mixture_override=override, transform=transform) valid_mixture = MixtureDataset(train_dataset.train_data[-args.n_valid:], train_dataset.train_labels[-args.n_valid:], mixture_weights=balanced_weights, mixture_override=override, transform=transform) test_mixture = MixtureDataset(test_dataset.test_data, test_dataset.test_labels, mixture_weights=balanced_weights, mixture_override=override, transform=transform) return train_mixture, valid_mixture, test_mixture
bruit = np.random.normal(loc=0.0, scale=1.0, size=img_test.shape) img_test = img_test.astype(float) / 255. # uint8 vers float entre 0. et 1. img_bruitee = img_test + coeff_bruit * bruit # ajout du bruit a l'image img_bruitee = np.clip(img_bruitee, 0., 1.) # force les valeurs entre 0. et 1. img_bruitee = (img_bruitee * 255).astype(np.uint8) # retour en uint8 plt.imshow(img_bruitee, interpolation='none', cmap=plt.cm.gray) plt.waitforbuttonpress() # Ajout d'un bruit gaussien sur les imagettes de test ######################## test_data_numpy = mnist_test.test_data.numpy() test_data_numpy = test_data_numpy.astype(float) / 255. bruit = np.random.normal(loc=0.0, scale=1.0, size=test_data_numpy.shape) test_data_numpy += coeff_bruit * bruit test_data_numpy = np.clip(test_data_numpy, 0., 1.) test_data_numpy = (test_data_numpy * 255).astype(np.uint8) mnist_test.test_data = torch.from_numpy(test_data_numpy) plt.imshow(mnist_test.test_data[0,:,:], interpolation='none', cmap=plt.cm.gray) plt.waitforbuttonpress() # Entrainement du modele sur les images train (non bruitees !) ########### trainloader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True) print('Entrainement...') model = autoencoder().cpu() # charger le modele dans le CPU criterion = nn.MSELoss() # fonction de cout optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-5) # choix de l'optimisation for epoch in range(num_epochs): print('debut epoch {0}'.format(epoch)) for data in trainloader: img, _ = data