def load_data(): print("Loading data...") PIL_imgs = [] kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.EMNIST( args.data_path, 'letters', train=True, download=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, )) #CHECK ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.EMNIST( args.data_path, 'letters', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, )) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) train_transfer_loader = torch.utils.data.DataLoader( datasets.AgirEcole( args.data_transfer, 'train', train=True, download=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, )) #CHECK ])), batch_size=4, shuffle=True) test_transfer_loader = torch.utils.data.DataLoader(datasets.AgirEcole( args.data_transfer, 'dev', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, )) ])), batch_size=4, shuffle=True) print("Done") return train_loader, test_loader, train_transfer_loader, test_transfer_loader
type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader(datasets.EMNIST( 'data', 'letters', train=True, download=True, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, ))])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader(datasets.EMNIST( 'data', 'letters', train=False, transform=transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1722, ), (0.3309, ))])), batch_size=args.test_batch_size, shuffle=True,
help='random seed (default: 1)') parser.add_argument('--log-interval', type=int, default=10, metavar='N', help='how many batches to wait before logging training status') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_loader = torch.utils.data.DataLoader( datasets.EMNIST('data', 'letters', train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722,), (0.3309,)) ])), batch_size=args.batch_size, shuffle=True, **kwargs) test_loader = torch.utils.data.DataLoader( datasets.EMNIST('data', 'letters', train=False, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1722,), (0.3309,)) ])), batch_size=args.test_batch_size, shuffle=True, **kwargs) model = models.Net() if args.cuda: model.cuda()
def fetch_dataset(data_name, subset): dataset = {} print('fetching data {}...'.format(data_name)) root = './data/{}'.format(data_name) if data_name in ['MNIST', 'FashionMNIST', 'SVHN']: dataset['train'] = eval( 'datasets.{}(root=root, split=\'train\', subset=subset,' 'transform=datasets.Compose([' 'transforms.ToTensor()]))'.format(data_name)) dataset['test'] = eval( 'datasets.{}(root=root, split=\'test\', subset=subset,' 'transform=datasets.Compose([transforms.ToTensor()]))'.format( data_name)) config.PARAM['transform'] = { 'train': datasets.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]), 'test': datasets.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) } elif data_name == 'EMNIST': dataset['train'] = datasets.EMNIST(root=root, split='train', subset=subset, transform=datasets.Compose( [transforms.ToTensor()])) dataset['test'] = datasets.EMNIST(root=root, split='test', subset=subset, transform=datasets.Compose( [transforms.ToTensor()])) config.PARAM['transform'] = { 'train': datasets.Compose([transforms.ToTensor()]), 'test': datasets.Compose([transforms.ToTensor()]) } elif data_name in ['CIFAR10', 'CIFAR100']: dataset['train'] = eval( 'datasets.{}(root=root, split=\'train\', subset=subset,' 'transform=datasets.Compose([' 'transforms.ToTensor()]))'.format(data_name)) dataset['test'] = eval( 'datasets.{}(root=root, split=\'test\', subset=subset,' 'transform=datasets.Compose([transforms.ToTensor()]))'.format( data_name)) config.PARAM['transform'] = { 'train': datasets.Compose([transforms.ToTensor()]), 'test': datasets.Compose([transforms.ToTensor()]) } elif data_name == 'ImageNet': dataset['train'] = datasets.ImageNet(root, split='train', subset=subset, transform=datasets.Compose( [transforms.ToTensor()])) dataset['test'] = datasets.ImageNet(root, split='test', subset=subset, transform=datasets.Compose( [transforms.ToTensor()])) config.PARAM['transform'] = { 'train': datasets.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]), 'test': datasets.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) } elif data_name == 'Kodak': dataset['train'] = datasets.ImageFolder(root, transform=datasets.Compose( [transforms.ToTensor()])) dataset['test'] = datasets.ImageFolder(root, transform=datasets.Compose( [transforms.ToTensor()])) config.PARAM['transform'] = { 'train': datasets.Compose([transforms.ToTensor()]), 'test': datasets.Compose([transforms.ToTensor()]) } else: raise ValueError('Not valid dataset name') dataset['train'].transform = config.PARAM['transform']['train'] dataset['test'].transform = config.PARAM['transform']['test'] print('data ready') return dataset
def fetch_dataset(data_name): print('fetching data {}...'.format(data_name)) if (data_name == 'MNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) train_dataset = datasets.MNIST(root=train_dir, train=True, download=True, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.MNIST(root=test_dir, train=False, download=True, transform=test_transform) elif (data_name == 'EMNIST' or data_name == 'EMNIST_byclass' or data_name == 'EMNIST_bymerge' or data_name == 'EMNIST_balanced' or data_name == 'EMNIST_letters' or data_name == 'EMNIST_digits' or data_name == 'EMNIST_mnist'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/test'.format(data_name.split('_')[0]) transform = transforms.Compose([transforms.ToTensor()]) split = 'balanced' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.EMNIST(root=train_dir, split=split, branch=branch, train=True, download=True, transform=transform) test_dataset = datasets.EMNIST(root=test_dir, split=split, branch=branch, train=False, download=True, transform=transform) elif (data_name == 'FashionMNIST'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/test'.format(data_name) transform = transforms.Compose([transforms.ToTensor()]) train_dataset = datasets.FashionMNIST(root=train_dir, train=True, download=True, transform=transform) test_dataset = datasets.FashionMNIST(root=test_dir, train=False, download=True, transform=transform) elif (data_name == 'CIFAR10'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR10(train_dir, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR10(test_dir, train=False, transform=test_transform, download=True) elif (data_name == 'CIFAR100'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.CIFAR100(train_dir, branch=branch, train=True, transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CIFAR100(test_dir, branch=branch, train=False, transform=test_transform, download=True) elif (data_name == 'SVHN'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.SVHN(train_dir, split='train', transform=transforms.ToTensor(), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) test_transform = transforms.Compose( [transforms.ToTensor(), transforms.Normalize(stats)]) else: train_transform = transforms.Compose([transforms.ToTensor()]) test_transform = transforms.Compose([transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.SVHN(test_dir, split='test', transform=test_transform, download=True) elif (data_name == 'ImageNet'): train_dir = './data/{}/train'.format(data_name) test_dir = './data/{}/validation'.format(data_name) train_dataset = datasets.ImageFolder(train_dir, transform=transforms.ToTensor()) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.ImageFolder(test_dir, transform=test_transform) elif (data_name == 'CUB2011'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) train_dataset = datasets.CUB2011(train_dir, transform=transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor() ]), download=True) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) test_transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.CUB2011(test_dir, transform=test_transform, download=True) elif (data_name == 'WheatImage' or data_name == 'WheatImage_binary' or data_name == 'WheatImage_six'): train_dir = './data/{}/train'.format(data_name.split('_')[0]) test_dir = './data/{}/validation'.format(data_name.split('_')[0]) label_mode = 'six' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.WheatImage(train_dir, label_mode=label_mode, transform=transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor() ])) if (normalize): stats = make_stats(train_dataset, batch_size=128) train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor(), transforms.Normalize(stats) ]) test_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.ToTensor(), transforms.Normalize(stats) ]) else: train_transform = transforms.Compose([ transforms.Resize((224, 288)), transforms.RandomHorizontalFlip(), transforms.RandomVerticalFlip(), transforms.ToTensor() ]) test_transform = transforms.Compose( [transforms.Resize((224, 288)), transforms.ToTensor()]) train_dataset.transform = train_transform test_dataset = datasets.WheatImage(test_dir, label_mode=label_mode, transform=test_transform) elif (data_name == 'CocoDetection'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/instances_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/instances_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoDetection(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoDetection(test_dir, test_ann, transform=transform) elif (data_name == 'CocoCaptions'): train_dir = './data/Coco/train2017' train_ann = './data/Coco/annotations/captions_train2017.json' test_dir = './data/Coco/val2017' test_ann = './data/Coco/annotations/captions_val2017.json' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.CocoCaptions(train_dir, train_ann, transform=transform) test_dataset = datasets.CocoCaptions(test_dir, test_ann, transform=transform) elif (data_name == 'VOCDetection'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCDetection(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCDetection(test_dir, 'test', transform=transform) elif (data_name == 'VOCSegmentation'): train_dir = './data/VOC/VOCdevkit' test_dir = './data/VOC/VOCdevkit' transform = transforms.Compose( [transforms.Resize((224, 224)), transforms.ToTensor()]) train_dataset = datasets.VOCSegmentation(train_dir, 'trainval', transform=transform) test_dataset = datasets.VOCSegmentation(test_dir, 'test', transform=transform) elif (data_name == 'MOSI' or data_name == 'MOSI_binary' or data_name == 'MOSI_five' or data_name == 'MOSI_seven' or data_name == 'MOSI_regression'): train_dir = './data/{}'.format(data_name.split('_')[0]) test_dir = './data/{}'.format(data_name.split('_')[0]) label_mode = 'five' if len( data_name.split('_')) == 1 else data_name.split('_')[1] train_dataset = datasets.MOSI(train_dir, split='trainval', label_mode=label_mode, download=True) stats = make_stats(train_dataset, batch_size=1) train_transform = transforms.Compose([transforms.Normalize(stats)]) test_transform = transforms.Compose([transforms.Normalize(stats)]) train_dataset.transform = train_transform test_dataset = datasets.MOSI(test_dir, split='test', label_mode=label_mode, download=True, transform=test_transform) elif (data_name == 'Kodak'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) elif (data_name == 'UCID'): train_dataset = None transform = transforms.Compose([transforms.ToTensor()]) test_dir = './data/{}'.format(data_name) train_dataset = datasets.ImageFolder(test_dir, transform) test_dataset = datasets.ImageFolder(test_dir, transform) else: raise ValueError('Not valid dataset name') print('data ready') return train_dataset, test_dataset