def __init__(self, args): super(c_loader, self).__init__() mnist_transform = transforms.Compose([transforms.ToTensor()]) download_root = './MNIST_DATASET' dataset = MNIST(download_root, transform=mnist_transform, train=True, download=True) dataset = Subset( dataset, random.sample(range(dataset.__len__()), args.data_size)) train_dataset, valid_dataset = random_split(dataset, [ int(dataset.__len__() * 0.8), dataset.__len__() - int(dataset.__len__() * 0.8) ]) test_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True) del dataset self.batch_size = args.batch_size self.train_iter = DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True) self.valid_iter = DataLoader(dataset=valid_dataset, batch_size=self.batch_size, shuffle=True) self.test_iter = DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=True)
class MNISTClassification(Dataset): def __init__(self, cfg: CfgNode, train: bool): super().__init__(cfg) self.train = train call_args = { "root": cfg.dataset.root, "train": self.train, "download": True, "transform": transforms.Compose( [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] ), } if cfg.dataset.name == "fashion": self.mnist = FashionMNIST(**call_args) elif cfg.dataset.name == "digit": self.mnist = MNIST(**call_args) def __len__(self) -> int: return self.mnist.__len__() def __getitem__(self, item: int) -> Batch: img, target = self.mnist.__getitem__(item) batch = Batch(x=img.view(-1), y=torch.tensor(target).long()) return batch def collate_fn(self, items: List[Batch]) -> Batch: return Batch.default_collate(items)
def __init__(self, noise_level=noise_level): MNIST_db = MNIST(root=ROOT_MNIST, train=True, download=True, transform=torchvision.transforms.ToTensor()) self.getitem = MNIST_db.__getitem__ self.len = MNIST_db.__len__() self.noise_level = noise_level
def __init__(self, args): super(loader, self).__init__() mnist_transform = transforms.Compose([transforms.ToTensor()]) download_root = 'D:/2020-2/비즈니스애널리틱스/논문리뷰/Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction/MNIST_DATASET' dataset = MNIST(download_root, transform=mnist_transform, train=True, download=True) normal_class_idx = dataset.targets != args.abnormal_class # args.abnormal_class is zero class dataset.targets = dataset.targets[normal_class_idx] dataset.data = dataset.data[normal_class_idx] '''train dataset은 1과 9 사이의 정상 데이터로만 구성한다.''' train_dataset, valid_dataset = random_split(dataset, [ int(dataset.__len__() * 0.8), dataset.__len__() - int(dataset.__len__() * 0.8) ]) '''train 80% , validation 20% split''' test_dataset = MNIST(download_root, transform=mnist_transform, train=False, download=True) normal_class_idx = torch.where( test_dataset.targets != args.abnormal_class)[0].numpy() novelty_class_idx = torch.where( test_dataset.targets == args.abnormal_class)[0].numpy() temp_idx = np.random.choice(normal_class_idx, size=novelty_class_idx.__len__()) test_idx = np.concatenate([novelty_class_idx, temp_idx]) '''test data는 비정상 클래스인 0과 정상 클래스인 1과 9 사이의 숫자로 구성된다. 이때, 비정상과 정상간의 클래스 비율은 50:50이다.''' test_dataset.targets = test_dataset.targets[test_idx] test_dataset.data = test_dataset.data[test_idx] self.batch_size = args.batch_size self.train_iter = DataLoader(dataset=train_dataset, batch_size=self.batch_size, shuffle=True) self.valid_iter = DataLoader(dataset=valid_dataset, batch_size=self.batch_size, shuffle=True) self.test_iter = DataLoader(dataset=test_dataset, batch_size=self.batch_size, shuffle=True)
class MyDataset(Dataset): def __init__(self, data_path, transform): super(MyDataset, self).__init__() self.aggr_dataset = MNIST(data_path, download=True, train=True, transform=transform) def __len__(self): return self.aggr_dataset.__len__() def __getitem__(self, idx): X, y = self.aggr_dataset.__getitem__(idx) X = X.view(-1) y = torch.FloatTensor(1).fill_(y) return {'image' : X, 'label' : y}
def build_mnist(val_ratio, batch_size, num_workers, transform): train_dataset = MNIST(os.path.join(os.getcwd(), "data/"), train=True, download=True, transform=transform) test_dataset = MNIST(os.path.join(os.getcwd(), "data/"), train=False, download=True, transform=transform) val_size = int(train_dataset.__len__() * val_ratio) # size of validation data train, val = random_split(train_dataset, [train_dataset.__len__() - val_size, val_size]) train = DataLoader(train, num_workers=num_workers, batch_size=batch_size) val = DataLoader(val, num_workers=num_workers, batch_size=batch_size) test = DataLoader(test_dataset, num_workers=num_workers, batch_size=batch_size) return train, val, test
def load_dataset_mnist(root, train, download, batch_size): data_transforms = Compose([ToTensor()]) mnist_trainset = MNIST(root=root, train=train, download=download, transform=data_transforms) dataloader_mnist_train = torch.utils.data.DataLoader(mnist_trainset, batch_size=batch_size, shuffle=True) print("\n*******************************") print("Dataset MNIST cargado") print("Tamaño: " + str(mnist_trainset.__len__())) print("Batchsize: " + str(batch_size)) print("Batches: " + str(len(dataloader_mnist_train))) print("Clases: " + str(mnist_trainset.classes)) print("Shape: " + str(mnist_trainset[0][0].shape)) print("*******************************") return dataloader_mnist_train
class MNISTSummation(Dataset): def __init__(self, min_len: int, max_len: int, dataset_len: int, train: bool = True, transform: Compose = None): self.min_len = min_len self.max_len = max_len self.dataset_len = dataset_len self.train = train self.transform = transform self.mnist = MNIST(DATA_ROOT, train=self.train, transform=self.transform, download=True) mnist_len = self.mnist.__len__() mnist_items_range = np.arange(0, mnist_len) items_len_range = np.arange(self.min_len, self.max_len + 1) items_len = np.random.choice(items_len_range, size=self.dataset_len, replace=True) self.mnist_items = [] for i in range(self.dataset_len): self.mnist_items.append( np.random.choice(mnist_items_range, size=items_len[i], replace=True)) def __len__(self) -> int: return self.dataset_len def __getitem__(self, item: int) -> Tuple[FloatTensor, FloatTensor]: mnist_items = self.mnist_items[item] the_sum = 0 images = [] for mi in mnist_items: img, target = self.mnist.__getitem__(mi) the_sum += target images.append(img) return torch.stack(images, dim=0), torch.FloatTensor([the_sum])
class MNISTSummation(Dataset): def __init__(self, set_size: int, train: bool = True, transform: Compose = None): self.set_size = set_size self.train = train self.transform = transform self.mnist = MNIST("../../data/mnist", train=self.train, transform=self.transform, download=True) mnist_len = self.mnist.__len__() mnist_items_range = np.arange(0, mnist_len) self.mnist_items = [] for l in range(10): ids = self.mnist.targets == l for i in range(int(len(mnist_items_range[ids]) / set_size)): self.mnist_items.append( mnist_items_range[ids][i * set_size:(i + 1) * set_size]) def __len__(self) -> int: # return self.dataset_len return len(self.mnist_items) def __getitem__(self, item: int) -> Tuple[FloatTensor, FloatTensor]: # print(item) # embed() mnist_items = self.mnist_items[item] images1 = [] for mi in mnist_items: img, target = self.mnist.__getitem__(mi) images1.append(img) return torch.stack(images1, dim=0), torch.LongTensor([target])
plt.show() # Data loader for easy mini-bacth return in training, the image batch shape will be (50,1,28,28) train_loader = DataLoader(dataset=train_data, batch_size=BATCH_SIZE, shuffle=True) print(train_loader.__len__()) # 60000 / bacth size = 1200 # pick 2000 samples to speed up testing test_data = MNIST(root='./mnist', train=False) # shape from (2000, 28, 28) to (2000, 1, 28, 28) test_x = torch.unsqueeze(test_data.test_data, dim=1).type( torch.FloatTensor)[:2000] / 255 test_y = test_data.test_labels[:2000] # (2000, 1, 28, 28) print(test_data.__len__(), test_x.size()) class CNN(nn.Module): def __init__(self): super(CNN, self).__init__() self.conv1 = nn.Sequential( # input shape (1, 28 ,28) nn.Conv2d( in_channels=1, # input height out_channels=16, # n_filters kernel_size=5, # filter(kernel) size stride=1, # filter movement/step padding= 2, # if want same width and length of this image after Conv2d, padding=(kernel_size-1)/2 if stride ), # output shape (16 ,28, 28) nn.ReLU(), # activation
n_epochs = args.epochs noise_level = args.noise mkimage = True ROOT_MNIST = './dataset' LOSS_PATH = '../results' join = os.path.join MNIST_db = MNIST(root=ROOT_MNIST, train=True, download=True, transform=torchvision.transforms.ToTensor()) train_loader = Data.DataLoader(dataset=MNIST_db, batch_size=BATCH_SIZE, shuffle=True) total = MNIST_db.__len__() name = 'VAE_Loss_RL_lr' + str(lr) + '_e' + str(n_epochs) + '_bs' + str( BATCH_SIZE) + '_n' + str(noise_level) + '.png' # Plot loss if os.path.exists(join(LOSS_PATH, name[:-4])): os.system('rm -r ' + join(LOSS_PATH, name[:-4])) os.mkdir(join(LOSS_PATH, name[:-4])) class Noisy_MNIST(): def __init__(self, noise_level=noise_level): MNIST_db = MNIST(root=ROOT_MNIST, train=True, download=True, transform=torchvision.transforms.ToTensor())
transform=transform) test_loader = DataLoader(dataset=test_data, batch_size=batch_size, shuffle=True) classifier = Net(num_classes).to(device) optimizer = optim.Adam(classifier.parameters(), betas=(0.5, 0.999), lr=0.0002) criterion = nn.BCELoss().to(device) train_loss = [] train_accuracy = [] test_accuracy = [] steps_per_epoch = int(np.ceil(train_data.__len__() / batch_size)) for epoch_i in range(1, epochs + 1): running_loss = 0 for i, (img, label) in enumerate(train_loader): img = img.to(device) onehot_label = to_onehot(label, num_classes).to(device) predict = classifier(img) loss = criterion(predict, onehot_label) running_loss += loss optimizer.zero_grad() loss.backward() optimizer.step()
class DigitSumImageLoader(Dataset): def __init__(self, data_dir, split, min_size, max_size, dataset_size, train=True, custom_transforms=None): self.data_dir = data_dir self.split = split self.min_size = min_size self.max_size = max_size self.dataset_size = dataset_size self.train = train self.transforms = self.get_transforms(custom_transforms) # Downloading MNIST Dataset into the folder of path data_dir. self.mnist = MNIST(data_dir, train=self.train, transform=self.transforms, download=True) mnist_len = self.mnist.__len__() mnist_items_range = np.arange(0, mnist_len) # Sampling the sets sizes randomly given min_size and max_size. set_sizes_range = np.arange(self.min_size, self.max_size + 1) set_sizes = np.random.choice(set_sizes_range, self.dataset_size, replace=True) self.mnist_items = [] # Adding randomly sampled particles (MNIST images) into sets of the dataset. for i in range(self.dataset_size): set_items = np.random.choice(mnist_items_range, set_sizes[i], replace=True) self.mnist_items.append(set_items) self.data = [self.__getitem__(item) for item in range(self.__len__())] def __len__(self): return self.dataset_size def __getitem__(self, item): set_items = self.mnist_items[item] sum = 0 img_list = [] for mnist_item in set_items: img, label = self.mnist.__getitem__(mnist_item) sum += label img_list.append(img) out = torch.zeros(1, 1) out[0][0] = sum return torch.stack(img_list, dim=0), out def get_transforms(self, transforms): if transforms: return transforms else: return MNIST_TRANSFORM def read_images(self, item): set_items = self.__getitem__(item)[0] img_list = [] for i in range(set_items.size()[0]): img = set_items[i] img_list.append(img.numpy()[0]) show_images(img_list) def get_label(self, item): return self.__getitem__(item)[1]