def odd_even_dataset(dataset, limit, args): transform = transforms.ToTensor() if (dataset == 'MNIST'): print("Loading odd vs even MNIST dataset...") data_train = MNIST(root='./data', train=True, download=True, transform=transform) data_test = MNIST(root='./data', train=False, download=True, transform=transform) else: print("Loading odd vs even Fashion-MNIST dataset...") data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform) data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform) # train batch idx = (data_train.targets < limit) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] data_train.targets[(data_train.targets % 2) == 0] = -1 data_train.targets[(data_train.targets % 2) != 0] = 0 data_train.targets[(data_train.targets % 2) == 0] = 1 train_label = data_train.targets.cpu().detach().numpy() trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=False) # test batch idx = (data_test.targets < limit) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] data_test.targets[(data_test.targets % 2) == 0] = -1 data_test.targets[(data_test.targets % 2) != 0] = 0 data_test.targets[(data_test.targets % 2) == 0] = 1 test_label = data_test.targets.cpu().detach().numpy() testloader = DataLoader(data_test, batch_size=args.batch_size_train, shuffle=False) return trainloader, testloader, train_label, test_label
def full_class_dataset(dataset, limit, class_object, args): if (dataset == 'MNIST'): print("Loading full MNIST dataset...") data_train = MNIST(root='./data', train=True, download=True, transform=transform) data_test = MNIST(root='./data', train=False, download=True, transform=transform) elif (dataset == 'fMNIST'): print("Loading full Fashion-MNIST dataset...") data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform) data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform) else: print("Loading full QuickDraw! dataset...") train_data = [] train_label = [] test_data = [] test_label = [] for i in range(len(class_object)): # load npy file and concatenate data ob = np.load('./data/quickdraw/full_numpy_bitmap_'+ class_object[i] +'.npy') # choose train size and test size train = ob[0:5000,] test = ob[5000:6000,] train_label = np.concatenate((train_label, i * np.ones(train.shape[0])), axis=0) test_label = np.concatenate((test_label, i * np.ones(test.shape[0])), axis=0) if i == 0: train_data = train test_data = test else: train_data = np.concatenate((train_data, train), axis=0) test_data = np.concatenate((test_data, test), axis=0) # generate dataloader trainset = feature_Dataset(train_data, train_label, transform) trainloader = DataLoader(trainset, batch_size=args.batch_size_train, shuffle=True) testset = feature_Dataset(test_data, test_label, transform) testloader = DataLoader(testset, batch_size=args.batch_size_test, shuffle=False) if (dataset == 'MNIST' or dataset == 'fMNIST'): # train batch idx = (data_train.targets < limit) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] train_label = data_train.targets.cpu().detach().numpy() trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=False) # test batch idx = (data_test.targets < limit) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] test_label = data_test.targets.cpu().detach().numpy() testloader = DataLoader(data_test, batch_size=args.batch_size_train, shuffle=False) return trainloader, testloader, train_label, test_label
def _filter_(dataset: MNIST): final_mask = torch.zeros_like(dataset.targets).bool() for index, label in enumerate(args.filter_labels): mask = dataset.targets == label dataset.targets[mask] = index final_mask |= mask dataset.data = dataset.data[final_mask] dataset.targets = dataset.targets[final_mask]
def __init__(self, args): super(loader, self).__init__() mnist_transform = transforms.Compose([transforms.ToTensor()]) download_root = 'D:/2020-2/비즈니스애널리틱스/논문리뷰/Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction/MNIST_DATASET' dataset = MNIST(download_root, transform=mnist_transform, train=True, download=True) normal_class_idx = dataset.targets != args.abnormal_class # args.abnormal_class is zero class dataset.targets = dataset.targets[normal_class_idx] dataset.data = dataset.data[normal_class_idx] '''train dataset은 1과 9 사이의 정상 데이터로만 구성한다.''' train_dataset, valid_dataset = random_split(dataset, [ int(dataset.__len__() * 0.8), dataset.__len__() - int(dataset.__len__() * 0.8) ]) '''train 80% , validation 20% split''' test_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True) normal_class_idx = torch.where( test_dataset.targets != args.abnormal_class)[0].numpy() novelty_class_idx = torch.where( test_dataset.targets == args.abnormal_class)[0].numpy() temp_idx = np.random.choice(normal_class_idx, size=novelty_class_idx.__len__()) test_idx = np.concatenate([novelty_class_idx, temp_idx]) '''test data는 비정상 클래스인 0과 정상 클래스인 1과 9 사이의 숫자로 구성된다. 이때, 비정상과 정상간의 클래스 비율은 50:50이다.''' test_dataset.targets = test_dataset.targets[test_idx] test_dataset.data = test_dataset.data[test_idx] self.batch_size = args.batch_size self.train_iter = DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True) self.valid_iter = DataLoader(dataset=valid_dataset, batch_size=self.batch_size, shuffle=True) self.test_iter = DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=True)
def _filter_classes(dataset: MNIST, classes_to_keep: Sequence[int]) -> Subset: targets: np.ndarray = dataset.targets.numpy() final_mask = np.zeros_like(targets, dtype=np.bool_) for index, label in enumerate(classes_to_keep): mask = targets == label targets = np.where(mask, index, targets) final_mask |= mask dataset.targets = targets inds = final_mask.nonzero()[0].tolist() return Subset(dataset, inds)
def load_single(self, data_path="data", download=True, **kwargs): batch_size = 1 dataset = MNIST(data_path, train=True, download=download, transform=self.train_transform) dataset.data = dataset.data[0][None] dataset.targets = dataset.targets[0][None] train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False) return train_loader, test_loader
def setup(self, stage: str = None): # Transforms mnist_transforms = transforms.Compose([ transforms.ToTensor(), transforms.Normalize([self.x_mean], [self.x_std]) ]) # Train train_val = MNIST( os.path.dirname(__file__), download=True, train=True, transform=mnist_transforms, ) idx = torch.cat( [train_val.targets[:, None] == digit for digit in self.digits], dim=1).any(dim=1) train_val.targets = train_val.targets[idx] train_val.data = train_val.data[idx] train_length = int(len(train_val) * self.train_val_split) val_length = len(train_val) - train_length self.train_dataset, self.val_dataset = random_split( train_val, [train_length, val_length]) # Test self.test_dataset = MNIST( os.path.dirname(__file__), download=True, train=False, transform=mnist_transforms, ) idx = torch.cat( [ self.test_dataset.targets[:, None] == digit for digit in self.digits ], dim=1, ).any(dim=1) self.test_dataset.targets = self.test_dataset.targets[idx] self.test_dataset.data = self.test_dataset.data[idx]
def load_mnist_datasets( limit_train_samples_to: int = 10_000) -> Tuple[Dataset, Dataset]: train_dataset = MNIST( "data", # folder where data should be saved download=True, transform=ToTensor(), # transform to convert images to torch tensors target_transform=IntegerToOneHotConverter(10), ) test_dataset = MNIST( "data", # folder where data should be saved download=True, train=False, transform=ToTensor(), # transform to convert to torch tensors target_transform=IntegerToOneHotConverter(10), ) # limiting for faster training indices = np.random.permutation(len( train_dataset.data))[:limit_train_samples_to] train_dataset.data = train_dataset.data[indices] train_dataset.targets = train_dataset.targets[indices] return train_dataset, test_dataset
def define_dataloader(opt: Namespace, split: str = 'train'): transform = define_transform(opt=opt, split=split) print(split, transform) if opt.dataset == 'mnist': print("Loading MNIST...") dataset = MNIST(root=opt.data_path, train=True if split == 'train' else False, transform=transform, download=True) elif opt.dataset == 'cifar10': print("Loading CIFAR10...") dataset = CIFAR10(root=opt.data_path, train=True if split == 'train' else False, transform=transform, download=True) if opt.corrupt_p > 0: dataset.targets = corrupt_labels(dataset.targets, p=opt.corrupt_p) elif opt.dataset == 'imagefolder': dataset = ImageFolder( root=opt.train_path if split == 'train' else opt.val_path, transform=transform) else: raise ValueError('Undefined dataset type') do_shuffle = True if split == 'train' else False batch_size = opt.batch_size if split == 'train' else 100 sampler = ImbalancedDatasetSampler(dataset) if ( split == 'train' and opt.sampler == 'imbalanced') else None data_loader = DataLoader(dataset=dataset, sampler=sampler, batch_size=batch_size, num_workers=opt.num_workers, shuffle=do_shuffle, drop_last=True if split == 'train' else False) return data_loader
num_datapoints = 5000 #dataloaderzeros = DataLoader( # MNIST('./data', train=True, download=True, transform=img_transform), # batch_size=batch_size, shuffle=True, collate_fn = my_collate) print('Making each number training set') datasets = [] for i in range(10): dataset = MNIST('./data', transform=img_transform, download=True, train=True) #print(dataset) #[0:5851] idx = dataset.targets == i dataset.targets = dataset.targets[idx] dataset.data = dataset.data[idx] dataset = torch.utils.data.random_split( dataset, [num_datapoints, len(dataset) - num_datapoints])[0] #dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True) datasets.append(dataset) #print(len(dataset)) print('Making each number combined with 0 VAE set') VAE_dataloaders_w_zeros = [] for i in range(10): datasetandzero = [datasets[0], datasets[i]] combined = ConcatDataset(datasetandzero) dataloader = DataLoader(combined, batch_size=batch_size, shuffle=True) VAE_dataloaders_w_zeros.append(dataloader)
split_model.train() # ----- Data ----- data_transform = transforms.Compose([ transforms.ToTensor(), # PyTorch examples; https://github.com/pytorch/examples/blob/master/mnist/main.py transforms.Normalize((0.1307, ), (0.3081, )), ]) train_data = MNIST(data_dir, download=True, train=True, transform=data_transform) # We only want to use a subset of the data to force overfitting train_data.data = train_data.data[:args.n_train_data] train_data.targets = train_data.targets[:args.n_train_data] train_loader = torch.utils.data.DataLoader(train_data, batch_size=args.batch_size) # Test data test_data = MNIST(data_dir, download=True, train=False, transform=data_transform) test_loader = torch.utils.data.DataLoader(test_data, batch_size=1024) # ----- Train ----- n_epochs = args.epochs best_accuracy = 0.0
def indicator_dataset(dataset, num, limit, class_object, args): if (dataset == 'MNIST'): print("Loading {}-indicator for MNIST dataset...".format(num)) data_train = MNIST(root='./data', train=True, download=True, transform=transform) data_test = MNIST(root='./data', train=False, download=True, transform=transform) elif (dataset == 'fMNIST'): print("Loading full Fashion-MNIST dataset...") data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform) data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform) else: print("Loading full QuickDraw! dataset...") train_data = [] train_label = [] test_data = [] test_label = [] for i in range(len(class_object)): # load npy file and concatenate data ob = np.load('./data/quickdraw/full_numpy_bitmap_' + class_object[i] + '.npy') # choose train size and test size train = ob[0:5000, ] test = ob[5000:6000, ] train_label = np.concatenate( (train_label, i * np.ones(train.shape[0])), axis=0) test_label = np.concatenate( (test_label, i * np.ones(test.shape[0])), axis=0) if i == 0: train_data = train test_data = test else: train_data = np.concatenate((train_data, train), axis=0) test_data = np.concatenate((test_data, test), axis=0) train_label[train_label != num] = -1 train_label[train_label == num] = 1 train_label[train_label == -1] = 0 test_label[test_label != num] = -1 test_label[test_label == num] = 1 test_label[test_label == -1] = 0 # generate dataloader trainset = feature_Dataset(train_data, train_label.astype(int), transform) trainloader = DataLoader(trainset, batch_size=args.batch_size_train, shuffle=True) testset = feature_Dataset(test_data, test_label.astype(int), transform) testloader = DataLoader(testset, batch_size=args.batch_size_test, shuffle=False) if (dataset == 'MNIST' or dataset == 'fMNIST'): # train batch idx = (data_train.targets < limit) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] print("Changing label...") data_train.targets[data_train.targets == 1] = 10 data_train.targets[data_train.targets == 6] = 1 data_train.targets[data_train.targets == 10] = 6 for i in num: data_train.targets[data_train.targets == i] = 10 data_train.targets[data_train.targets != 10] = 0 data_train.targets[data_train.targets == 10] = 1 idx_0 = (data_train.targets == 0) idx_1 = (data_train.targets == 1) sum_idx_0 = 0 total = sum(idx_1) for i in range(len(idx_0)): sum_idx_0 += idx_0[i] if sum_idx_0 == total: idx_0[i + 1:] = False break idx = idx_0 + idx_1 print(sum(idx)) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] train_label = data_train.targets.cpu().detach().numpy() trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=True) # test batch idx = (data_test.targets < limit) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] print("Changing label...") data_test.targets[data_test.targets == 1] = 10 data_test.targets[data_test.targets == 6] = 1 data_test.targets[data_test.targets == 10] = 6 for i in num: data_test.targets[data_test.targets == i] = 10 data_test.targets[data_test.targets != 10] = 0 data_test.targets[data_test.targets == 10] = 1 idx_0 = (data_test.targets == 0) idx_1 = (data_test.targets == 1) sum_idx_0 = 0 print(sum(idx_1)) # total = sum(idx_1) total = 1042 for i in range(len(idx_0)): sum_idx_0 += idx_0[i] if sum_idx_0 == total: idx_0[i + 1:] = False break idx = idx_0 + idx_1 print(sum(idx)) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] test_label = data_test.targets.cpu().detach().numpy() testloader = DataLoader(data_test, batch_size=args.batch_size_test, shuffle=False) return trainloader, testloader, train_label, test_label
def main(): img_transform = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda tensor:min_max_normalization(tensor, 0, 1)), transforms.Lambda(lambda tensor:tensor_round(tensor)) ]) dataset = MNIST('./data', train=True, transform=img_transform, download=True) train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True) testset = MNIST('./data', train=False, transform=img_transform, download=True) testloader = DataLoader(testset, batch_size=batch_size, shuffle=True) # visualize the distributions of the continuous feature U over 5,000 images visuadata = MNIST('./data', train=False, transform=img_transform, download=True) X = dataset.data L = np.array(dataset.targets) first = True for label in range(10): index = np.where(L == label)[0] N = index.shape[0] np.random.seed(0) perm = np.random.permutation(N) index = index[perm] data = X[index[0:500]] labels = L[index[0:500]] if first: visualization_L = labels visualization_data = data else: visualization_L = np.concatenate((visualization_L, labels)) visualization_data = torch.cat((visualization_data, data)) first = False visuadata.data = visualization_data visuadata.targets = visualization_L # Data Loader visualization_loader = DataLoader(dataset=visuadata, batch_size=batch_size, shuffle=False, num_workers = 0) model = autoencoder(encode_length=encode_length) criterion = nn.BCELoss() optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, weight_decay=1e-5) for epoch in range(num_epochs): print('--------training epoch {}--------'.format(epoch)) adjust_learning_rate(optimizer, epoch) # train the model using SGD for i, (img, _) in enumerate(train_loader): img = img.view(img.size(0), -1) img = Variable(img) # ===================forward===================== output, h, b = model(img) loss_BCE = criterion(output, img) onesvec = Variable(torch.ones(h.size(0), 1)) Tcode = torch.transpose(b, 1, 0) loss_reg = torch.mean(torch.pow(Tcode.mm(onesvec)/h.size(0), 2))/2 loss = loss_BCE + Alpha*loss_reg # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # Test the Model using testset if (epoch + 1) % 1== 0: ''' Calculate the mAP over test set ''' retrievalB, retrievalL, queryB, queryL = compress(train_loader, testloader, model) result_map = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL) print('---{}_mAP: {}---'.format(name, result_map)) ''' visulization of latent variable over 5,000 images In this setting, we set encode_length = 3 ''' if encode_length ==3: z_buf = list([]) label_buf = list([]) for ii, (img, labelb) in enumerate(visualization_loader): img = img.view(img.size(0), -1) img = Variable(img) # ===================forward===================== _, qz, _ = model(img) z_buf.extend(qz.cpu().data.numpy()) label_buf.append(labelb) X = np.vstack(z_buf) Y = np.hstack(label_buf) plot_latent_variable3d(X, Y, epoch, name)
def fetch_dataloaders(args): # preprocessing transforms transform = T.Compose([ T.ToTensor(), # tensor in [0,1] lambda x: x.mul(255).div(2**(8 - args.n_bits)).floor(), # lower bits partial(preprocess, n_bits=args.n_bits) ]) # to model space [-1,1] target_transform = (lambda y: torch.eye(args.n_cond_classes)[y] ) if args.n_cond_classes else None if args.dataset == 'mnist': args.image_dims = (1, 28, 28) train_dataset = MNIST(args.data_path, train=True, transform=transform, target_transform=target_transform) valid_dataset = MNIST(args.data_path, train=False, transform=transform, target_transform=target_transform) elif args.dataset == 'cifar10': args.image_dims = (3, 32, 32) train_dataset = CIFAR10(args.data_path, train=True, transform=transform, target_transform=target_transform) valid_dataset = CIFAR10(args.data_path, train=False, transform=transform, target_transform=target_transform) elif args.dataset == 'colored-mnist': args.image_dims = (3, 28, 28) # NOTE -- data is quantized to 2 bits and in (N,H,W,C) format with open(args.data_path, 'rb' ) as f: # return dict {'train': np array; 'test': np array} data = pickle.load(f) # quantize to n_bits to match the transforms for other datasets and construct tensors in shape N,C,H,W train_data = torch.from_numpy( np.floor(data['train'].astype(np.float32) / (2**(2 - args.n_bits)))).permute(0, 3, 1, 2) valid_data = torch.from_numpy( np.floor(data['test'].astype(np.float32) / (2**(2 - args.n_bits)))).permute(0, 3, 1, 2) # preprocess to [-1,1] and setup datasets -- NOTE using 0s for labels to have a symmetric dataloader train_dataset = TensorDataset(preprocess(train_data, args.n_bits), torch.zeros(train_data.shape[0])) valid_dataset = TensorDataset(preprocess(valid_data, args.n_bits), torch.zeros(valid_data.shape[0])) else: raise RuntimeError('Dataset not recognized') if args.mini_data: # dataset to a single batch if args.dataset == 'colored-mnist': train_dataset = train_dataset.tensors[0][:args.batch_size] else: train_dataset.data = train_dataset.data[:args.batch_size] train_dataset.targets = train_dataset.targets[:args.batch_size] valid_dataset = train_dataset print( 'Dataset {}\n\ttrain len: {}\n\tvalid len: {}\n\tshape: {}\n\troot: {}' .format(args.dataset, len(train_dataset), len(valid_dataset), train_dataset[0][0].shape, args.data_path)) train_dataloader = DataLoader(train_dataset, args.batch_size, shuffle=True, pin_memory=(args.device.type == 'cuda'), num_workers=4) valid_dataloader = DataLoader(valid_dataset, args.batch_size, shuffle=False, pin_memory=(args.device.type == 'cuda'), num_workers=4) # save a sample data_sample = next(iter(train_dataloader))[0] writer.add_image('data_sample', make_grid(data_sample, normalize=True, scale_each=True), args.step) save_image(data_sample, os.path.join(args.output_dir, 'data_sample.png'), normalize=True, scale_each=True) return train_dataloader, valid_dataloader
torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False np.random.seed(seed) device = "cuda:0" if torch.cuda.is_available() else "cpu" # flatten 28*28 images to a 784 vector for each image transform = transforms.Compose([ transforms.ToTensor(), # convert to tensor transforms.Lambda(lambda x: x.view(-1)) # flatten into vector ]) trainset = MNIST(".", train=True, download=True, transform=transform) testset = MNIST(".", train=False, download=True, transform=transform) #split the data trainset.data = trainset.data[0:27105] trainset.targets = trainset.targets[0:27105] # # print(trainset.targets[0:11905]) #for distillation trainloaderforDistillation = DataLoader(trainset, batch_size=1000, shuffle=True) testloaderforDistillation = DataLoader(testset, batch_size=1000, shuffle=True) class BetterCNNforDistillation(nn.Module): def __init__(self): super(BetterCNNforDistillation, self).__init__() self.conv1 = nn.Conv2d(1, 32, (5, 5), padding=0) self.conv2 = nn.Conv2d(32, 64, (3, 3), padding=0) self.fc1 = nn.Linear(64 * 5**2, 1024) self.fc2 = nn.Linear(1024, 50) self.fc3 = nn.Linear(50, 10)
def multi_indicator_dataset(dataset, num, limit, class_object, args): if (dataset == 'MNIST'): print("Loading {}-multi-indicator for MNIST dataset...".format(num)) data_train = MNIST(root='./data', train=True, download=True, transform=transform) data_test = MNIST(root='./data', train=False, download=True, transform=transform) elif (dataset == 'fMNIST'): print("Loading full Fashion-MNIST dataset...") data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform) data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform) else: print("Loading full QuickDraw! dataset...") if (dataset == 'MNIST' or dataset == 'fMNIST'): # train batch idx = (data_train.targets < limit) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] idx = 1 for i in num: data_train.targets[data_train.targets == i] = 10 + idx print('adding...') idx += 1 data_train.targets[data_train.targets < 10] = 0 data_train.targets[data_train.targets > 10] -= 10 idx_0 = (data_train.targets == 0) idx_1 = (data_train.targets != 0) sum_idx_0 = 0 total = sum(idx_1) // len(num) for i in range(len(idx_0)): sum_idx_0 += idx_0[i] if sum_idx_0 == total: idx_0[i + 1:] = False break idx = idx_0 + idx_1 print(sum(idx)) data_train.targets = data_train.targets[idx] data_train.data = data_train.data[idx] train_label = data_train.targets.cpu().detach().numpy() trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=True) # test batch idx = (data_test.targets < limit) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] idx = 1 for i in num: data_test.targets[data_test.targets == i] = 10 + idx idx += 1 data_test.targets[data_test.targets < 10] = 0 data_test.targets[data_test.targets > 10] -= 10 idx_0 = (data_test.targets == 0) idx_1 = (data_test.targets != 0) sum_idx_0 = 0 total = sum(idx_1) // len(num) print(sum(idx_1)) # total = 843 for i in range(len(idx_0)): sum_idx_0 += idx_0[i] if sum_idx_0 == total: idx_0[i + 1:] = False break idx = idx_0 + idx_1 print(sum(idx)) data_test.targets = data_test.targets[idx] data_test.data = data_test.data[idx] test_label = data_test.targets.cpu().detach().numpy() testloader = DataLoader(data_test, batch_size=args.batch_size_test, shuffle=False) return trainloader, testloader, train_label, test_label
for run in range(args.runs): train_set = MNIST("/home/jeff/datasets", download=True, train=True, transform=tx) test_set = MNIST("/home/jeff/datasets", download=True, train=False, transform=tx) val_set = copy.deepcopy(train_set) perm = torch.randperm(train_set.data.size(0)) train_set.data = train_set.data[perm[:55000]] train_set.targets = train_set.targets[perm[:55000]] val_set.data = val_set.data[perm[55000:]] val_set.targets = val_set.targets[perm[55000:]] train = DataLoader(train_set, shuffle=True, batch_size=args.batch_size, num_workers=4) val = DataLoader(val_set, shuffle=True, batch_size=args.batch_size, num_workers=4) test = DataLoader(test_set, shuffle=False, batch_size=args.batch_size,
def load_data(config): normal_class = config['normal_class'] batch_size = config['batch_size'] img_size = config['image_size'] if config['dataset_name'] in ['cifar10']: img_transform = transforms.Compose([ transforms.ToTensor(), ]) os.makedirs("./train/CIFAR10", exist_ok=True) dataset = CIFAR10('./train/CIFAR10', train=True, download=True, transform=img_transform) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] train_set, val_set = torch.utils.data.random_split( dataset, [dataset.data.shape[0] - 851, 851]) os.makedirs("./test/CIFAR10", exist_ok=True) test_set = CIFAR10("./test/CIFAR10", train=False, download=True, transform=img_transform) elif config['dataset_name'] in ['mnist']: img_transform = transforms.Compose([ transforms.Resize(img_size), transforms.ToTensor(), ]) os.makedirs("./train/MNIST", exist_ok=True) dataset = MNIST('./train/MNIST', train=True, download=True, transform=img_transform) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] train_set, val_set = torch.utils.data.random_split( dataset, [dataset.data.shape[0] - 851, 851]) os.makedirs("./test/MNIST", exist_ok=True) test_set = MNIST("./test/MNIST", train=False, download=True, transform=img_transform) elif config['dataset_name'] in ['fashionmnist']: img_transform = transforms.Compose([ transforms.Resize(img_size), transforms.ToTensor(), ]) os.makedirs("./train/FashionMNIST", exist_ok=True) dataset = FashionMNIST('./train/FashionMNIST', train=True, download=True, transform=img_transform) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] train_set, val_set = torch.utils.data.random_split( dataset, [dataset.data.shape[0] - 851, 851]) os.makedirs("./test/FashionMNIST", exist_ok=True) test_set = FashionMNIST("./test/FashionMNIST", train=False, download=True, transform=img_transform) elif config['dataset_name'] in ['brain_tumor', 'head_ct']: img_transform = transforms.Compose([ transforms.Resize([img_size, img_size]), transforms.Grayscale(num_output_channels=1), transforms.ToTensor() ]) root_path = 'Dataset/medical/' + config['dataset_name'] train_data_path = root_path + '/train' test_data_path = root_path + '/test' dataset = ImageFolder(root=train_data_path, transform=img_transform) load_dataset = DataLoader(dataset, batch_size=batch_size, shuffle=True) train_dataset_array = next(iter(load_dataset))[0] my_dataset = TensorDataset(train_dataset_array) train_set, val_set = torch.utils.data.random_split( my_dataset, [train_dataset_array.shape[0] - 5, 5]) test_set = ImageFolder(root=test_data_path, transform=img_transform) elif config['dataset_name'] in ['coil100']: img_transform = transforms.Compose([transforms.ToTensor()]) root_path = 'Dataset/coil100/' + config['dataset_name'] train_data_path = root_path + '/train' test_data_path = root_path + '/test' dataset = ImageFolder(root=train_data_path, transform=img_transform) load_dataset = DataLoader(dataset, batch_size=batch_size, shuffle=True) train_dataset_array = next(iter(load_dataset))[0] my_dataset = TensorDataset(train_dataset_array) train_set, val_set = torch.utils.data.random_split( my_dataset, [train_dataset_array.shape[0] - 5, 5]) test_set = ImageFolder(root=test_data_path, transform=img_transform) elif config['dataset_name'] in ['MVTec']: data_path = 'Dataset/MVTec/' + normal_class + '/train' data_list = [] orig_transform = transforms.Compose( [transforms.Resize(img_size), transforms.ToTensor()]) orig_dataset = ImageFolder(root=data_path, transform=orig_transform) train_orig, val_set = torch.utils.data.random_split( orig_dataset, [len(orig_dataset) - 25, 25]) data_list.append(train_orig) for i in range(3): img_transform = transforms.Compose([ transforms.Resize(img_size), transforms.RandomAffine(0, scale=(1.05, 1.2)), transforms.ToTensor() ]) dataset = ImageFolder(root=data_path, transform=img_transform) data_list.append(dataset) dataset = ConcatDataset(data_list) train_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True) train_dataset_array = next(iter(train_loader))[0] train_set = TensorDataset(train_dataset_array) test_data_path = 'Dataset/MVTec/' + normal_class + '/test' test_set = ImageFolder(root=test_data_path, transform=orig_transform) train_dataloader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True, ) val_dataloader = torch.utils.data.DataLoader( val_set, batch_size=batch_size, shuffle=True, ) test_dataloader = torch.utils.data.DataLoader( test_set, batch_size=batch_size, shuffle=True, ) return train_dataloader, val_dataloader, test_dataloader
def load_data(config): normal_class = config['normal_class'] batch_size = config['batch_size'] if config['dataset_name'] in ['cifar10']: img_transform = transforms.Compose([ transforms.Resize((256, 256), Image.ANTIALIAS), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)) ]) os.makedirs("./Dataset/CIFAR10/train", exist_ok=True) dataset = CIFAR10('./Dataset/CIFAR10/train', train=True, download=True, transform=img_transform) print("Cifar10 DataLoader Called...") print("All Train Data: ", dataset.data.shape) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] print("Normal Train Data: ", dataset.data.shape) os.makedirs("./Dataset/CIFAR10/test", exist_ok=True) test_set = CIFAR10("./Dataset/CIFAR10/test", train=False, download=True, transform=img_transform) print("Test Train Data:", test_set.data.shape) elif config['dataset_name'] in ['mnist']: img_transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) os.makedirs("./Dataset/MNIST/train", exist_ok=True) dataset = MNIST('./Dataset/MNIST/train', train=True, download=True, transform=img_transform) print("MNIST DataLoader Called...") print("All Train Data: ", dataset.data.shape) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] print("Normal Train Data: ", dataset.data.shape) os.makedirs("./Dataset/MNIST/test", exist_ok=True) test_set = MNIST("./Dataset/MNIST/test", train=False, download=True, transform=img_transform) print("Test Train Data:", test_set.data.shape) elif config['dataset_name'] in ['fashionmnist']: img_transform = transforms.Compose( [transforms.Resize((32, 32)), transforms.ToTensor()]) os.makedirs("./Dataset/FashionMNIST/train", exist_ok=True) dataset = FashionMNIST('./Dataset/FashionMNIST/train', train=True, download=True, transform=img_transform) print("FashionMNIST DataLoader Called...") print("All Train Data: ", dataset.data.shape) dataset.data = dataset.data[np.array(dataset.targets) == normal_class] dataset.targets = [normal_class] * dataset.data.shape[0] print("Normal Train Data: ", dataset.data.shape) os.makedirs("./Dataset/FashionMNIST/test", exist_ok=True) test_set = FashionMNIST("./Dataset/FashionMNIST/test", train=False, download=True, transform=img_transform) print("Test Train Data:", test_set.data.shape) elif config['dataset_name'] in ['mvtec']: data_path = 'Dataset/MVTec/' + normal_class + '/train' mvtec_img_size = config['mvtec_img_size'] orig_transform = transforms.Compose([ transforms.Resize([mvtec_img_size, mvtec_img_size]), transforms.ToTensor() ]) dataset = ImageFolder(root=data_path, transform=orig_transform) test_data_path = 'Dataset/MVTec/' + normal_class + '/test' test_set = ImageFolder(root=test_data_path, transform=orig_transform) elif config['dataset_name'] in ['retina']: data_path = 'Dataset/OCT2017/train' orig_transform = transforms.Compose( [transforms.Resize([128, 128]), transforms.ToTensor()]) dataset = ImageFolder(root=data_path, transform=orig_transform) test_data_path = 'Dataset/OCT2017/test' test_set = ImageFolder(root=test_data_path, transform=orig_transform) else: raise Exception( "You enter {} as dataset, which is not a valid dataset for this repository!" .format(config['dataset_name'])) train_dataloader = torch.utils.data.DataLoader( dataset, batch_size=batch_size, shuffle=True, ) test_dataloader = torch.utils.data.DataLoader( test_set, batch_size=batch_size, shuffle=False, ) return train_dataloader, test_dataloader
def get_dataset(args, config, test=False, rev=False, one_hot=True, subset=False, shuffle=True): total_labels = 10 if config.data.dataset.lower().split( '_')[0] != 'cifar100' else 100 reduce_labels = total_labels != config.n_labels if config.data.dataset.lower() in [ 'mnist_transferbaseline', 'cifar10_transferbaseline', 'fashionmnist_transferbaseline', 'cifar100_transferbaseline' ]: print('loading baseline transfer dataset') rev = True test = False subset = True reduce_labels = True if config.data.random_flip is False: transform = transforms.Compose( [transforms.Resize(config.data.image_size), transforms.ToTensor()]) else: if not test: transform = transforms.Compose([ transforms.Resize(config.data.image_size), transforms.RandomHorizontalFlip(p=0.5), transforms.ToTensor() ]) else: transform = transforms.Compose([ transforms.Resize(config.data.image_size), transforms.ToTensor() ]) if config.data.dataset.lower().split('_')[0] == 'mnist': dataset = MNIST(os.path.join(args.run, 'datasets'), train=not test, download=True, transform=transform) elif config.data.dataset.lower().split('_')[0] in [ 'fashionmnist', 'fmnist' ]: dataset = FashionMNIST(os.path.join(args.run, 'datasets'), train=not test, download=True, transform=transform) elif config.data.dataset.lower().split('_')[0] == 'cifar10': dataset = CIFAR10(os.path.join(args.run, 'datasets'), train=not test, download=True, transform=transform) elif config.data.dataset.lower().split('_')[0] == 'cifar100': dataset = CIFAR100(os.path.join(args.run, 'datasets'), train=not test, download=True, transform=transform) else: raise ValueError('Unknown config dataset {}'.format( config.data.dataset)) if type(dataset.targets) is list: # CIFAR10 and CIFAR100 store targets as list, unlike (F)MNIST which uses torch.Tensor dataset.targets = np.array(dataset.targets) if not rev: labels_to_consider = np.arange(config.n_labels) target_transform = lambda label: single_one_hot_encode( label, n_labels=config.n_labels) cond_size = config.n_labels else: labels_to_consider = np.arange(config.n_labels, total_labels) target_transform = lambda label: single_one_hot_encode_rev( label, start_label=config.n_labels, n_labels=total_labels) cond_size = total_labels - config.n_labels if reduce_labels: idx = np.any( [np.array(dataset.targets) == i for i in labels_to_consider], axis=0).nonzero() dataset.targets = dataset.targets[idx] dataset.data = dataset.data[idx] if one_hot: dataset.target_transform = target_transform if subset and args.subset_size != 0: dataset = torch.utils.data.Subset(dataset, np.arange(args.subset_size)) dataloader = DataLoader(dataset, batch_size=config.training.batch_size, shuffle=shuffle, num_workers=0) return dataloader, dataset, cond_size