def get_loader_mnist_rgb(batchsize): transform_train = augment_mnist_rgb() trainset_m = MNIST(root='./data', train=True, download=True, transform=transform_train) testset_m = MNIST(root='./data', train=False, download=True, transform=no_augment_mnist_rgb()) # SVHN object accepts NUMPY: train_data = trainset_m.train_data.numpy() test_data = testset_m.test_data.numpy() train_labels = trainset_m.train_labels.numpy() test_labels = testset_m.test_labels.numpy() print("Original MNIST") print(train_data.shape, len(train_labels)) print(test_data.shape, len(test_labels)) ### use SVHN object to load MNIST RGB data trainset = SVHN(root='./data', split='train', download=True, transform=transform_train) testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb()) trainset.data = convert_mnist_images(trainset_m.train_data) testset.data = convert_mnist_images(testset_m.test_data) trainset.labels = trainset_m.train_labels testset.labels = testset_m.test_labels print("RGB MNIST") print(trainset.data.shape, len(trainset.labels)) print(testset.data.shape, len(testset.labels)) trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0) testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0) print("MNIST train min=%f, max=%f" % (trainset.data.min(), trainset.data.max())) print("MNIST test min=%f, max=%f" % (testset.data.min(), testset.data.max())) return trainloader, testloader
def get_loader_digit_rgb(batchsize): transform_train = no_augment_mnist_rgb() ########## download synth data from Ganin's Google Drive #################### gdd.download_file_from_google_drive(file_id='0B9Z4d7lAwbnTSVR1dEFSRUFxOUU', dest_path='data/SynthDigits.zip', unzip=True) folder_name = "data/" file_train = 'synth_train_32x32.mat' train_data = loadmat(folder_name+file_train) train_x = train_data["X"] train_x = np.rollaxis(train_x, 3, 0) train_x = np.rollaxis(train_x, 3, 1) train_y = train_data["y"] print(train_x.shape) print(train_y.shape) file_test = 'synth_test_32x32.mat' test_data = loadmat(folder_name+file_test) test_x = test_data["X"] test_x = np.rollaxis(test_x, 3, 0) test_x = np.rollaxis(test_x, 3, 1) test_y = test_data["y"] print(test_x.shape) print(test_y.shape) trainset = SVHN(root='./data', split='train', download=True, transform=transform_train) testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb()) trainset.data = train_x testset.data = test_x trainset.labels = train_y testset.labels = test_y print (trainset.data.shape, len(trainset.labels)) print (testset.data.shape, len(testset.labels)) trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0) testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0) print("synth train min=%f, max=%f" % (trainset.data.min(), trainset.data.max())) print("synth test min=%f, max=%f" % (testset.data.min(), testset.data.max())) return trainloader, testloader
def get_loaders(nb_labelled, batch_size, unlab_rat, augment_type, lab_inds=[], is_balanced=True): if augment_type == "affine": transform_train, transform_test = augment_affine_svhn() elif augment_type == "mean": transform_train, transform_test = augment_mean_svhn() elif augment_type == "no": transform_train, transform_test = noaug_SVHN() trainset_l = SVHN(root='./data', split='train', download=True, transform=transform_train) test_set = SVHN(root='./data', split='test', download=True, transform=transform_test) print(trainset_l.data.shape, len(trainset_l.labels)) if len(lab_inds) == 0: if is_balanced: lab_inds = [] for i in range(10): labels = np.array(trainset_l.labels) inds_i = np.where(labels == i)[0] inds_i = np.random.permutation(inds_i) lab_inds.extend(inds_i[0:int(nb_labelled / 10)].tolist()) lab_inds = np.array(lab_inds) else: lab_inds = np.arange(0, nb_labelled) all_inds = np.arange(len(trainset_l.labels)) unlab_inds = np.setdiff1d(all_inds, lab_inds) trainset_u = copy.deepcopy(trainset_l) unlab_inds = unlab_inds[0:int(unlab_rat * len(unlab_inds))] trainset_u.data = np.array(trainset_u.data)[unlab_inds] trainset_u.labels = np.array(trainset_u.labels)[unlab_inds] trainloader_u = DataLoader(trainset_u, batch_size=batch_size, shuffle=False, num_workers=1) print(trainset_u.data.shape, len(trainset_u.labels)) trainset_l.data = np.array(trainset_l.data)[lab_inds] trainset_l.labels = np.array(trainset_l.labels)[lab_inds] print(trainset_l.data.shape, len(trainset_l.labels)) trainloader_l = DataLoader(trainset_l, batch_size=batch_size, shuffle=True, num_workers=1) testloader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=1) loaders = { "trainloader_l": trainloader_l, "testloader": testloader, "trainloader_u": trainloader_u, "trainset_l": trainset_l, "test_set": test_set, "trainset_u": trainset_u, "lab_inds": lab_inds } return loaders
def load_data(opt): """ Load Data Args: opt ([type]): Argument Parser Raises: IOError: Cannot Load Dataset Returns: [type]: dataloader """ ## # LOAD DATA SET if opt.dataroot == '': opt.dataroot = './data/{}'.format(opt.dataset) if opt.dataset in ['cifar10']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) classes = { 'plane': 0, 'car': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9 } dataset = {} dataset['train'] = CIFAR10(root='./data', train=True, download=True, transform=transform) dataset['test'] = CIFAR10(root='./data', train=False, download=True, transform=transform) dataset['train'].data, dataset['train'].targets, \ dataset['test'].data, dataset['test'].targets = get_cifar_anomaly_dataset( trn_img=dataset['train'].data, trn_lbl=dataset['train'].targets, tst_img=dataset['test'].data, tst_lbl=dataset['test'].targets, abn_cls_idx=classes[opt.abnormal_class], manualseed=opt.manualseed, perc_outlier=opt.perc_outlier ) dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader elif opt.dataset in ['svhn']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), #transforms.Normalize((0.45141584, 0.45141453, 0.45142587), (0.19929032, 0.1992932, 0.19929022)) transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) ]) dataset = {} dataset['train'] = SVHN(root='./data', split='train', download=True, transform=transform) dataset['test'] = SVHN(root='./data', split='test', download=True, transform=transform) dataset['train'].data = dataset['train'].data.reshape(-1, 32, 32, 3) dataset['test'].data = dataset['test'].data.reshape(-1, 32, 32, 3) dataset['train'].data, dataset['train'].labels, \ dataset['test'].data, dataset['test'].labels = get_cifar_anomaly_dataset( trn_img=dataset['train'].data, trn_lbl=dataset['train'].labels, tst_img=dataset['test'].data, tst_lbl=dataset['test'].labels, abn_cls_idx=int(opt.abnormal_class), manualseed=opt.manualseed ) dataset['train'].data = dataset['train'].data.reshape(-1, 3, 32, 32) dataset['test'].data = dataset['test'].data.reshape(-1, 3, 32, 32) dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader elif opt.dataset in ['fashionmnist']: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': False} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.2860402, ), (0.3530239, )) ]) classes = { 'tshirt': 0, 'trouser': 1, 'pullover': 2, 'dress': 3, 'coat': 4, 'sandal': 5, 'shirt': 6, 'sneacker': 7, 'bag': 8, 'boot': 9 } dataset = {} dataset['train'] = FashionMNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = FashionMNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].data, dataset['train'].targets, \ dataset['test'].data, dataset['test'].targets = get_mnist_anomaly_dataset( trn_img=dataset['train'].data, trn_lbl=dataset['train'].targets, tst_img=dataset['test'].data, tst_lbl=dataset['test'].targets, abn_cls_idx=classes[opt.abnormal_class], manualseed=opt.manualseed, perc_outlier=opt.perc_outlier ) dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader elif opt.dataset in ['mnist']: opt.abnormal_class = int(opt.abnormal_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].data, dataset['train'].targets, \ dataset['test'].data, dataset['test'].targets = get_mnist_anomaly_dataset( trn_img=dataset['train'].data, trn_lbl=dataset['train'].targets, tst_img=dataset['test'].data, tst_lbl=dataset['test'].targets, abn_cls_idx=opt.abnormal_class, manualseed=opt.manualseed, perc_outlier=opt.perc_outlier ) dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader elif opt.dataset in ['mnist2']: opt.abnormal_class = int(opt.abnormal_class) splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.ToTensor(), transforms.Normalize((0.1307, ), (0.3081, )) ]) dataset = {} dataset['train'] = MNIST(root='./data', train=True, download=True, transform=transform) dataset['test'] = MNIST(root='./data', train=False, download=True, transform=transform) dataset['train'].data, dataset['train'].targets, \ dataset['test'].data, dataset['test'].targets = get_mnist2_anomaly_dataset( trn_img=dataset['train'].data, trn_lbl=dataset['train'].targets, tst_img=dataset['test'].data, tst_lbl=dataset['test'].targets, nrm_cls_idx=opt.abnormal_class, proportion=opt.proportion, manualseed=opt.manualseed ) dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader else: splits = ['train', 'test'] drop_last_batch = {'train': True, 'test': False} shuffle = {'train': True, 'test': True} transform = transforms.Compose([ transforms.Resize(opt.isize), transforms.CenterCrop(opt.isize), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) dataset = { x: ImageFolder(os.path.join(opt.dataroot, x), transform) for x in splits } dataloader = { x: torch.utils.data.DataLoader( dataset=dataset[x], batch_size=opt.batchsize, shuffle=shuffle[x], num_workers=int(opt.workers), drop_last=drop_last_batch[x], worker_init_fn=(None if opt.manualseed == -1 else lambda x: np.random.seed(opt.manualseed))) for x in splits } return dataloader