def odd_even_dataset(dataset, limit, args):
    transform = transforms.ToTensor()
    if (dataset == 'MNIST'):
        print("Loading odd vs even MNIST dataset...")
        data_train = MNIST(root='./data', train=True, download=True, transform=transform)
        data_test = MNIST(root='./data', train=False, download=True, transform=transform)
    else:
        print("Loading odd vs even Fashion-MNIST dataset...")
        data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform)
        data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform)
    
    # train batch
    idx = (data_train.targets < limit)
    data_train.targets = data_train.targets[idx]
    data_train.data = data_train.data[idx]
    data_train.targets[(data_train.targets % 2) == 0] = -1
    data_train.targets[(data_train.targets % 2) != 0] = 0
    data_train.targets[(data_train.targets % 2) == 0] = 1
    train_label = data_train.targets.cpu().detach().numpy()
    trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=False)
    # test batch
    idx = (data_test.targets < limit)
    data_test.targets = data_test.targets[idx]
    data_test.data = data_test.data[idx]
    data_test.targets[(data_test.targets % 2) == 0] = -1
    data_test.targets[(data_test.targets % 2) != 0] = 0
    data_test.targets[(data_test.targets % 2) == 0] = 1
    test_label = data_test.targets.cpu().detach().numpy()
    testloader = DataLoader(data_test, batch_size=args.batch_size_train, shuffle=False)
    
    return trainloader, testloader, train_label, test_label
def full_class_dataset(dataset, limit, class_object, args):
    if (dataset == 'MNIST'):
        print("Loading full MNIST dataset...")
        data_train = MNIST(root='./data', train=True, download=True, transform=transform)
        data_test = MNIST(root='./data', train=False, download=True, transform=transform)
    elif (dataset == 'fMNIST'):
        print("Loading full Fashion-MNIST dataset...")
        data_train = FashionMNIST(root='./data', train=True, download=True, transform=transform)
        data_test = FashionMNIST(root='./data', train=False, download=True, transform=transform)
    else:
        print("Loading full QuickDraw! dataset...")
        train_data = []
        train_label = []
        test_data = []
        test_label = []
        for i in range(len(class_object)):
            # load npy file and concatenate data
            ob = np.load('./data/quickdraw/full_numpy_bitmap_'+ class_object[i] +'.npy')
            # choose train size and test size
            train = ob[0:5000,]
            test = ob[5000:6000,]
            train_label = np.concatenate((train_label, i * np.ones(train.shape[0])), axis=0)
            test_label = np.concatenate((test_label, i * np.ones(test.shape[0])), axis=0)
            
            if i == 0:
                train_data = train
                test_data = test
            else:
                train_data = np.concatenate((train_data, train), axis=0)
                test_data = np.concatenate((test_data, test), axis=0)
        
        # generate dataloader
        trainset = feature_Dataset(train_data, train_label, transform)
        trainloader = DataLoader(trainset, batch_size=args.batch_size_train, shuffle=True)
        
        testset = feature_Dataset(test_data, test_label, transform)
        testloader = DataLoader(testset, batch_size=args.batch_size_test, shuffle=False)
                    
        
    if (dataset == 'MNIST' or dataset == 'fMNIST'):
        # train batch
        idx = (data_train.targets < limit)
        data_train.targets = data_train.targets[idx]
        data_train.data = data_train.data[idx]
        train_label = data_train.targets.cpu().detach().numpy()
        trainloader = DataLoader(data_train, batch_size=args.batch_size_train, shuffle=False)
        # test batch
        idx = (data_test.targets < limit)
        data_test.targets = data_test.targets[idx]
        data_test.data = data_test.data[idx]
        test_label = data_test.targets.cpu().detach().numpy()
        testloader = DataLoader(data_test, batch_size=args.batch_size_train, shuffle=False)
    
    return trainloader, testloader, train_label, test_label
Exemple #3
0
 def _filter_(dataset: MNIST):
     final_mask = torch.zeros_like(dataset.targets).bool()
     for index, label in enumerate(args.filter_labels):
         mask = dataset.targets == label
         dataset.targets[mask] = index
         final_mask |= mask
     dataset.data = dataset.data[final_mask]
     dataset.targets = dataset.targets[final_mask]
    def __init__(self, args):
        super(loader, self).__init__()

        mnist_transform = transforms.Compose([transforms.ToTensor()])
        download_root = 'D:/2020-2/비즈니스애널리틱스/논문리뷰/Stacked Convolutional Auto-Encoders for Hierarchical Feature Extraction/MNIST_DATASET'

        dataset = MNIST(download_root,
                        transform=mnist_transform,
                        train=True,
                        download=True)
        normal_class_idx = dataset.targets != args.abnormal_class  # args.abnormal_class is zero class
        dataset.targets = dataset.targets[normal_class_idx]
        dataset.data = dataset.data[normal_class_idx]
        '''train dataset은 1과 9 사이의 정상 데이터로만 구성한다.'''

        train_dataset, valid_dataset = random_split(dataset, [
            int(dataset.__len__() * 0.8),
            dataset.__len__() - int(dataset.__len__() * 0.8)
        ])
        '''train 80% , validation 20% split'''

        test_dataset = MNIST(download_root,
                             transform=mnist_transform,
                             train=False,
                             download=True)
        normal_class_idx = torch.where(
            test_dataset.targets != args.abnormal_class)[0].numpy()
        novelty_class_idx = torch.where(
            test_dataset.targets == args.abnormal_class)[0].numpy()
        temp_idx = np.random.choice(normal_class_idx,
                                    size=novelty_class_idx.__len__())
        test_idx = np.concatenate([novelty_class_idx, temp_idx])
        '''test data는 비정상 클래스인 0과 정상 클래스인 1과 9 사이의 숫자로 구성된다. 이때, 비정상과 정상간의 클래스 비율은 50:50이다.'''
        test_dataset.targets = test_dataset.targets[test_idx]
        test_dataset.data = test_dataset.data[test_idx]

        self.batch_size = args.batch_size
        self.train_iter = DataLoader(dataset=train_dataset,
                                     batch_size=self.batch_size,
                                     shuffle=True)
        self.valid_iter = DataLoader(dataset=valid_dataset,
                                     batch_size=self.batch_size,
                                     shuffle=True)
        self.test_iter = DataLoader(dataset=test_dataset,
                                    batch_size=self.batch_size,
                                    shuffle=True)
Exemple #5
0
def _filter_classes(dataset: MNIST, classes_to_keep: Sequence[int]) -> Subset:
    targets: np.ndarray = dataset.targets.numpy()
    final_mask = np.zeros_like(targets, dtype=np.bool_)
    for index, label in enumerate(classes_to_keep):
        mask = targets == label
        targets = np.where(mask, index, targets)
        final_mask |= mask
    dataset.targets = targets
    inds = final_mask.nonzero()[0].tolist()

    return Subset(dataset, inds)
Exemple #6
0
    def load_single(self, data_path="data", download=True, **kwargs):
        batch_size = 1
        dataset = MNIST(data_path,
                        train=True,
                        download=download,
                        transform=self.train_transform)
        dataset.data = dataset.data[0][None]
        dataset.targets = dataset.targets[0][None]
        train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        test_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)

        return train_loader, test_loader
Exemple #7
0
    def setup(self, stage: str = None):
        # Transforms
        mnist_transforms = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([self.x_mean], [self.x_std])
        ])

        # Train
        train_val = MNIST(
            os.path.dirname(__file__),
            download=True,
            train=True,
            transform=mnist_transforms,
        )

        idx = torch.cat(
            [train_val.targets[:, None] == digit for digit in self.digits],
            dim=1).any(dim=1)
        train_val.targets = train_val.targets[idx]
        train_val.data = train_val.data[idx]

        train_length = int(len(train_val) * self.train_val_split)
        val_length = len(train_val) - train_length
        self.train_dataset, self.val_dataset = random_split(
            train_val, [train_length, val_length])

        # Test
        self.test_dataset = MNIST(
            os.path.dirname(__file__),
            download=True,
            train=False,
            transform=mnist_transforms,
        )
        idx = torch.cat(
            [
                self.test_dataset.targets[:, None] == digit
                for digit in self.digits
            ],
            dim=1,
        ).any(dim=1)
        self.test_dataset.targets = self.test_dataset.targets[idx]
        self.test_dataset.data = self.test_dataset.data[idx]
def load_mnist_datasets(
        limit_train_samples_to: int = 10_000) -> Tuple[Dataset, Dataset]:
    train_dataset = MNIST(
        "data",  # folder where data should be saved
        download=True,
        transform=ToTensor(),  # transform to convert images to torch tensors
        target_transform=IntegerToOneHotConverter(10),
    )
    test_dataset = MNIST(
        "data",  # folder where data should be saved
        download=True,
        train=False,
        transform=ToTensor(),  # transform to convert to torch tensors
        target_transform=IntegerToOneHotConverter(10),
    )

    # limiting for faster training
    indices = np.random.permutation(len(
        train_dataset.data))[:limit_train_samples_to]
    train_dataset.data = train_dataset.data[indices]
    train_dataset.targets = train_dataset.targets[indices]
    return train_dataset, test_dataset
def define_dataloader(opt: Namespace, split: str = 'train'):
    transform = define_transform(opt=opt, split=split)
    print(split, transform)

    if opt.dataset == 'mnist':
        print("Loading MNIST...")
        dataset = MNIST(root=opt.data_path,
                        train=True if split == 'train' else False,
                        transform=transform,
                        download=True)
    elif opt.dataset == 'cifar10':
        print("Loading CIFAR10...")
        dataset = CIFAR10(root=opt.data_path,
                          train=True if split == 'train' else False,
                          transform=transform,
                          download=True)
        if opt.corrupt_p > 0:
            dataset.targets = corrupt_labels(dataset.targets, p=opt.corrupt_p)

    elif opt.dataset == 'imagefolder':
        dataset = ImageFolder(
            root=opt.train_path if split == 'train' else opt.val_path,
            transform=transform)
    else:
        raise ValueError('Undefined dataset type')

    do_shuffle = True if split == 'train' else False
    batch_size = opt.batch_size if split == 'train' else 100
    sampler = ImbalancedDatasetSampler(dataset) if (
        split == 'train' and opt.sampler == 'imbalanced') else None
    data_loader = DataLoader(dataset=dataset,
                             sampler=sampler,
                             batch_size=batch_size,
                             num_workers=opt.num_workers,
                             shuffle=do_shuffle,
                             drop_last=True if split == 'train' else False)
    return data_loader
num_datapoints = 5000

#dataloaderzeros = DataLoader(
#    MNIST('./data', train=True, download=True, transform=img_transform),
#    batch_size=batch_size, shuffle=True, collate_fn = my_collate)
print('Making each number training set')
datasets = []
for i in range(10):
    dataset = MNIST('./data',
                    transform=img_transform,
                    download=True,
                    train=True)
    #print(dataset)
    #[0:5851]
    idx = dataset.targets == i
    dataset.targets = dataset.targets[idx]
    dataset.data = dataset.data[idx]
    dataset = torch.utils.data.random_split(
        dataset,
        [num_datapoints, len(dataset) - num_datapoints])[0]
    #dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    datasets.append(dataset)
    #print(len(dataset))

print('Making each number combined with 0 VAE set')
VAE_dataloaders_w_zeros = []
for i in range(10):
    datasetandzero = [datasets[0], datasets[i]]
    combined = ConcatDataset(datasetandzero)
    dataloader = DataLoader(combined, batch_size=batch_size, shuffle=True)
    VAE_dataloaders_w_zeros.append(dataloader)
Exemple #11
0
    split_model.train()

    # ----- Data -----
    data_transform = transforms.Compose([
        transforms.ToTensor(),
        # PyTorch examples; https://github.com/pytorch/examples/blob/master/mnist/main.py
        transforms.Normalize((0.1307, ), (0.3081, )),
    ])
    train_data = MNIST(data_dir,
                       download=True,
                       train=True,
                       transform=data_transform)

    # We only want to use a subset of the data to force overfitting
    train_data.data = train_data.data[:args.n_train_data]
    train_data.targets = train_data.targets[:args.n_train_data]

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size)

    # Test data
    test_data = MNIST(data_dir,
                      download=True,
                      train=False,
                      transform=data_transform)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size=1024)

    # ----- Train -----
    n_epochs = args.epochs

    best_accuracy = 0.0
Exemple #12
0
def indicator_dataset(dataset, num, limit, class_object, args):
    if (dataset == 'MNIST'):
        print("Loading {}-indicator for MNIST dataset...".format(num))
        data_train = MNIST(root='./data',
                           train=True,
                           download=True,
                           transform=transform)
        data_test = MNIST(root='./data',
                          train=False,
                          download=True,
                          transform=transform)
    elif (dataset == 'fMNIST'):
        print("Loading full Fashion-MNIST dataset...")
        data_train = FashionMNIST(root='./data',
                                  train=True,
                                  download=True,
                                  transform=transform)
        data_test = FashionMNIST(root='./data',
                                 train=False,
                                 download=True,
                                 transform=transform)
    else:
        print("Loading full QuickDraw! dataset...")
        train_data = []
        train_label = []
        test_data = []
        test_label = []
        for i in range(len(class_object)):
            # load npy file and concatenate data
            ob = np.load('./data/quickdraw/full_numpy_bitmap_' +
                         class_object[i] + '.npy')
            # choose train size and test size
            train = ob[0:5000, ]
            test = ob[5000:6000, ]
            train_label = np.concatenate(
                (train_label, i * np.ones(train.shape[0])), axis=0)
            test_label = np.concatenate(
                (test_label, i * np.ones(test.shape[0])), axis=0)

            if i == 0:
                train_data = train
                test_data = test
            else:
                train_data = np.concatenate((train_data, train), axis=0)
                test_data = np.concatenate((test_data, test), axis=0)

        train_label[train_label != num] = -1
        train_label[train_label == num] = 1
        train_label[train_label == -1] = 0

        test_label[test_label != num] = -1
        test_label[test_label == num] = 1
        test_label[test_label == -1] = 0

        # generate dataloader
        trainset = feature_Dataset(train_data, train_label.astype(int),
                                   transform)
        trainloader = DataLoader(trainset,
                                 batch_size=args.batch_size_train,
                                 shuffle=True)

        testset = feature_Dataset(test_data, test_label.astype(int), transform)
        testloader = DataLoader(testset,
                                batch_size=args.batch_size_test,
                                shuffle=False)

    if (dataset == 'MNIST' or dataset == 'fMNIST'):
        # train batch
        idx = (data_train.targets < limit)
        data_train.targets = data_train.targets[idx]
        data_train.data = data_train.data[idx]

        print("Changing label...")
        data_train.targets[data_train.targets == 1] = 10
        data_train.targets[data_train.targets == 6] = 1
        data_train.targets[data_train.targets == 10] = 6

        for i in num:
            data_train.targets[data_train.targets == i] = 10

        data_train.targets[data_train.targets != 10] = 0
        data_train.targets[data_train.targets == 10] = 1

        idx_0 = (data_train.targets == 0)
        idx_1 = (data_train.targets == 1)
        sum_idx_0 = 0
        total = sum(idx_1)

        for i in range(len(idx_0)):
            sum_idx_0 += idx_0[i]

            if sum_idx_0 == total:
                idx_0[i + 1:] = False
                break

        idx = idx_0 + idx_1
        print(sum(idx))
        data_train.targets = data_train.targets[idx]
        data_train.data = data_train.data[idx]

        train_label = data_train.targets.cpu().detach().numpy()
        trainloader = DataLoader(data_train,
                                 batch_size=args.batch_size_train,
                                 shuffle=True)

        # test batch
        idx = (data_test.targets < limit)
        data_test.targets = data_test.targets[idx]
        data_test.data = data_test.data[idx]

        print("Changing label...")
        data_test.targets[data_test.targets == 1] = 10
        data_test.targets[data_test.targets == 6] = 1
        data_test.targets[data_test.targets == 10] = 6

        for i in num:
            data_test.targets[data_test.targets == i] = 10

        data_test.targets[data_test.targets != 10] = 0
        data_test.targets[data_test.targets == 10] = 1

        idx_0 = (data_test.targets == 0)
        idx_1 = (data_test.targets == 1)
        sum_idx_0 = 0
        print(sum(idx_1))
        # total = sum(idx_1)
        total = 1042

        for i in range(len(idx_0)):
            sum_idx_0 += idx_0[i]

            if sum_idx_0 == total:
                idx_0[i + 1:] = False
                break

        idx = idx_0 + idx_1
        print(sum(idx))
        data_test.targets = data_test.targets[idx]
        data_test.data = data_test.data[idx]

        test_label = data_test.targets.cpu().detach().numpy()
        testloader = DataLoader(data_test,
                                batch_size=args.batch_size_test,
                                shuffle=False)

    return trainloader, testloader, train_label, test_label
def main():

    img_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Lambda(lambda tensor:min_max_normalization(tensor, 0, 1)),
        transforms.Lambda(lambda tensor:tensor_round(tensor))
    ])
    

    dataset = MNIST('./data', train=True, transform=img_transform, download=True)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    testset = MNIST('./data', train=False, transform=img_transform, download=True)
    testloader = DataLoader(testset, batch_size=batch_size, shuffle=True)
    
    # visualize the distributions of the continuous feature U over 5,000 images
    visuadata =  MNIST('./data', train=False, transform=img_transform, download=True)
    X = dataset.data
    L = np.array(dataset.targets)
    
    first = True
    
    for label in range(10):
        index = np.where(L == label)[0]
    
        N = index.shape[0]
        np.random.seed(0)
        perm = np.random.permutation(N)
        index = index[perm]
    
        data = X[index[0:500]]
        labels = L[index[0:500]]
        if first:
            visualization_L = labels
            visualization_data = data
        else:
            visualization_L = np.concatenate((visualization_L, labels))
            visualization_data = torch.cat((visualization_data, data))
    
    
        first = False
    
        visuadata.data = visualization_data
        visuadata.targets = visualization_L
    
    # Data Loader
    visualization_loader = DataLoader(dataset=visuadata,
                                            batch_size=batch_size,
                                            shuffle=False,
                                            num_workers = 0)      
        
    
    
    model = autoencoder(encode_length=encode_length)
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(
        model.parameters(), lr=learning_rate, weight_decay=1e-5)

    
    for epoch in range(num_epochs):
        print('--------training epoch {}--------'.format(epoch))        
        adjust_learning_rate(optimizer, epoch)    
        
        # train the model using SGD        
        for i, (img, _) in enumerate(train_loader):   
            img = img.view(img.size(0), -1)
            img = Variable(img)
  
            # ===================forward=====================
            output, h, b = model(img)
            loss_BCE = criterion(output, img)
            onesvec  =  Variable(torch.ones(h.size(0), 1))  
            Tcode  = torch.transpose(b, 1, 0)
            loss_reg = torch.mean(torch.pow(Tcode.mm(onesvec)/h.size(0), 2))/2
            loss = loss_BCE + Alpha*loss_reg
            # ===================backward====================
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        # Test the Model using testset            
        if (epoch + 1) % 1== 0:       


            '''
            Calculate the mAP over test set            
            '''             

            retrievalB, retrievalL, queryB, queryL = compress(train_loader, testloader, model)            
            result_map = calculate_map(qB=queryB, rB=retrievalB, queryL=queryL, retrievalL=retrievalL)
            print('---{}_mAP: {}---'.format(name, result_map))  
            
          
            
            '''
            visulization of latent variable over 5,000 images
            In this setting, we set encode_length = 3            
            '''
            if encode_length ==3:
                z_buf = list([])
                label_buf = list([])
                for ii, (img, labelb) in enumerate(visualization_loader):
                    img = img.view(img.size(0), -1)
                    img = Variable(img)
                    # ===================forward=====================
                    _, qz, _ = model(img)        
                    z_buf.extend(qz.cpu().data.numpy())
                    label_buf.append(labelb)
                X = np.vstack(z_buf)
                Y = np.hstack(label_buf)
                plot_latent_variable3d(X, Y, epoch, name)   
def fetch_dataloaders(args):
    # preprocessing transforms
    transform = T.Compose([
        T.ToTensor(),  # tensor in [0,1]
        lambda x: x.mul(255).div(2**(8 - args.n_bits)).floor(),  # lower bits
        partial(preprocess, n_bits=args.n_bits)
    ])  # to model space [-1,1]
    target_transform = (lambda y: torch.eye(args.n_cond_classes)[y]
                        ) if args.n_cond_classes else None

    if args.dataset == 'mnist':
        args.image_dims = (1, 28, 28)
        train_dataset = MNIST(args.data_path,
                              train=True,
                              transform=transform,
                              target_transform=target_transform)
        valid_dataset = MNIST(args.data_path,
                              train=False,
                              transform=transform,
                              target_transform=target_transform)
    elif args.dataset == 'cifar10':
        args.image_dims = (3, 32, 32)
        train_dataset = CIFAR10(args.data_path,
                                train=True,
                                transform=transform,
                                target_transform=target_transform)
        valid_dataset = CIFAR10(args.data_path,
                                train=False,
                                transform=transform,
                                target_transform=target_transform)
    elif args.dataset == 'colored-mnist':
        args.image_dims = (3, 28, 28)
        # NOTE -- data is quantized to 2 bits and in (N,H,W,C) format
        with open(args.data_path, 'rb'
                  ) as f:  # return dict {'train': np array; 'test': np array}
            data = pickle.load(f)
        # quantize to n_bits to match the transforms for other datasets and construct tensors in shape N,C,H,W
        train_data = torch.from_numpy(
            np.floor(data['train'].astype(np.float32) /
                     (2**(2 - args.n_bits)))).permute(0, 3, 1, 2)
        valid_data = torch.from_numpy(
            np.floor(data['test'].astype(np.float32) /
                     (2**(2 - args.n_bits)))).permute(0, 3, 1, 2)
        # preprocess to [-1,1] and setup datasets -- NOTE using 0s for labels to have a symmetric dataloader
        train_dataset = TensorDataset(preprocess(train_data, args.n_bits),
                                      torch.zeros(train_data.shape[0]))
        valid_dataset = TensorDataset(preprocess(valid_data, args.n_bits),
                                      torch.zeros(valid_data.shape[0]))
    else:
        raise RuntimeError('Dataset not recognized')

    if args.mini_data:  # dataset to a single batch
        if args.dataset == 'colored-mnist':
            train_dataset = train_dataset.tensors[0][:args.batch_size]
        else:
            train_dataset.data = train_dataset.data[:args.batch_size]
            train_dataset.targets = train_dataset.targets[:args.batch_size]
        valid_dataset = train_dataset

    print(
        'Dataset {}\n\ttrain len: {}\n\tvalid len: {}\n\tshape: {}\n\troot: {}'
        .format(args.dataset, len(train_dataset), len(valid_dataset),
                train_dataset[0][0].shape, args.data_path))

    train_dataloader = DataLoader(train_dataset,
                                  args.batch_size,
                                  shuffle=True,
                                  pin_memory=(args.device.type == 'cuda'),
                                  num_workers=4)
    valid_dataloader = DataLoader(valid_dataset,
                                  args.batch_size,
                                  shuffle=False,
                                  pin_memory=(args.device.type == 'cuda'),
                                  num_workers=4)

    # save a sample
    data_sample = next(iter(train_dataloader))[0]
    writer.add_image('data_sample',
                     make_grid(data_sample, normalize=True, scale_each=True),
                     args.step)
    save_image(data_sample,
               os.path.join(args.output_dir, 'data_sample.png'),
               normalize=True,
               scale_each=True)

    return train_dataloader, valid_dataloader
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(seed)
device = "cuda:0" if torch.cuda.is_available() else "cpu"

# flatten 28*28 images to a 784 vector for each image
transform = transforms.Compose([
    transforms.ToTensor(),  # convert to tensor
    transforms.Lambda(lambda x: x.view(-1))  # flatten into vector
])

trainset = MNIST(".", train=True, download=True, transform=transform)
testset = MNIST(".", train=False, download=True, transform=transform)
#split the data
trainset.data = trainset.data[0:27105]
trainset.targets = trainset.targets[0:27105]
# # print(trainset.targets[0:11905])
#for distillation
trainloaderforDistillation = DataLoader(trainset, batch_size=1000, shuffle=True)
testloaderforDistillation = DataLoader(testset, batch_size=1000, shuffle=True)

class BetterCNNforDistillation(nn.Module):
    def __init__(self):
        super(BetterCNNforDistillation, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, (5, 5), padding=0)
        self.conv2 = nn.Conv2d(32, 64, (3, 3), padding=0)
        self.fc1 = nn.Linear(64 * 5**2, 1024)
        self.fc2 = nn.Linear(1024, 50)
        self.fc3 = nn.Linear(50, 10)

            
Exemple #16
0
def multi_indicator_dataset(dataset, num, limit, class_object, args):
    if (dataset == 'MNIST'):
        print("Loading {}-multi-indicator for MNIST dataset...".format(num))
        data_train = MNIST(root='./data',
                           train=True,
                           download=True,
                           transform=transform)
        data_test = MNIST(root='./data',
                          train=False,
                          download=True,
                          transform=transform)
    elif (dataset == 'fMNIST'):
        print("Loading full Fashion-MNIST dataset...")
        data_train = FashionMNIST(root='./data',
                                  train=True,
                                  download=True,
                                  transform=transform)
        data_test = FashionMNIST(root='./data',
                                 train=False,
                                 download=True,
                                 transform=transform)
    else:
        print("Loading full QuickDraw! dataset...")

    if (dataset == 'MNIST' or dataset == 'fMNIST'):
        # train batch
        idx = (data_train.targets < limit)
        data_train.targets = data_train.targets[idx]
        data_train.data = data_train.data[idx]

        idx = 1
        for i in num:
            data_train.targets[data_train.targets == i] = 10 + idx
            print('adding...')
            idx += 1

        data_train.targets[data_train.targets < 10] = 0
        data_train.targets[data_train.targets > 10] -= 10

        idx_0 = (data_train.targets == 0)
        idx_1 = (data_train.targets != 0)
        sum_idx_0 = 0
        total = sum(idx_1) // len(num)

        for i in range(len(idx_0)):
            sum_idx_0 += idx_0[i]

            if sum_idx_0 == total:
                idx_0[i + 1:] = False
                break

        idx = idx_0 + idx_1
        print(sum(idx))
        data_train.targets = data_train.targets[idx]
        data_train.data = data_train.data[idx]

        train_label = data_train.targets.cpu().detach().numpy()
        trainloader = DataLoader(data_train,
                                 batch_size=args.batch_size_train,
                                 shuffle=True)

        # test batch
        idx = (data_test.targets < limit)
        data_test.targets = data_test.targets[idx]
        data_test.data = data_test.data[idx]

        idx = 1
        for i in num:
            data_test.targets[data_test.targets == i] = 10 + idx
            idx += 1

        data_test.targets[data_test.targets < 10] = 0
        data_test.targets[data_test.targets > 10] -= 10

        idx_0 = (data_test.targets == 0)
        idx_1 = (data_test.targets != 0)
        sum_idx_0 = 0
        total = sum(idx_1) // len(num)
        print(sum(idx_1))
        # total = 843

        for i in range(len(idx_0)):
            sum_idx_0 += idx_0[i]

            if sum_idx_0 == total:
                idx_0[i + 1:] = False
                break

        idx = idx_0 + idx_1
        print(sum(idx))
        data_test.targets = data_test.targets[idx]
        data_test.data = data_test.data[idx]

        test_label = data_test.targets.cpu().detach().numpy()
        testloader = DataLoader(data_test,
                                batch_size=args.batch_size_test,
                                shuffle=False)

    return trainloader, testloader, train_label, test_label
Exemple #17
0
    for run in range(args.runs):

        train_set = MNIST("/home/jeff/datasets",
                          download=True,
                          train=True,
                          transform=tx)
        test_set = MNIST("/home/jeff/datasets",
                         download=True,
                         train=False,
                         transform=tx)

        val_set = copy.deepcopy(train_set)

        perm = torch.randperm(train_set.data.size(0))
        train_set.data = train_set.data[perm[:55000]]
        train_set.targets = train_set.targets[perm[:55000]]

        val_set.data = val_set.data[perm[55000:]]
        val_set.targets = val_set.targets[perm[55000:]]

        train = DataLoader(train_set,
                           shuffle=True,
                           batch_size=args.batch_size,
                           num_workers=4)
        val = DataLoader(val_set,
                         shuffle=True,
                         batch_size=args.batch_size,
                         num_workers=4)
        test = DataLoader(test_set,
                          shuffle=False,
                          batch_size=args.batch_size,
def load_data(config):
    normal_class = config['normal_class']
    batch_size = config['batch_size']
    img_size = config['image_size']

    if config['dataset_name'] in ['cifar10']:
        img_transform = transforms.Compose([
            transforms.ToTensor(),
        ])

        os.makedirs("./train/CIFAR10", exist_ok=True)
        dataset = CIFAR10('./train/CIFAR10',
                          train=True,
                          download=True,
                          transform=img_transform)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]

        train_set, val_set = torch.utils.data.random_split(
            dataset, [dataset.data.shape[0] - 851, 851])

        os.makedirs("./test/CIFAR10", exist_ok=True)
        test_set = CIFAR10("./test/CIFAR10",
                           train=False,
                           download=True,
                           transform=img_transform)

    elif config['dataset_name'] in ['mnist']:
        img_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
        ])

        os.makedirs("./train/MNIST", exist_ok=True)
        dataset = MNIST('./train/MNIST',
                        train=True,
                        download=True,
                        transform=img_transform)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]

        train_set, val_set = torch.utils.data.random_split(
            dataset, [dataset.data.shape[0] - 851, 851])

        os.makedirs("./test/MNIST", exist_ok=True)
        test_set = MNIST("./test/MNIST",
                         train=False,
                         download=True,
                         transform=img_transform)

    elif config['dataset_name'] in ['fashionmnist']:
        img_transform = transforms.Compose([
            transforms.Resize(img_size),
            transforms.ToTensor(),
        ])

        os.makedirs("./train/FashionMNIST", exist_ok=True)
        dataset = FashionMNIST('./train/FashionMNIST',
                               train=True,
                               download=True,
                               transform=img_transform)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]

        train_set, val_set = torch.utils.data.random_split(
            dataset, [dataset.data.shape[0] - 851, 851])

        os.makedirs("./test/FashionMNIST", exist_ok=True)
        test_set = FashionMNIST("./test/FashionMNIST",
                                train=False,
                                download=True,
                                transform=img_transform)

    elif config['dataset_name'] in ['brain_tumor', 'head_ct']:
        img_transform = transforms.Compose([
            transforms.Resize([img_size, img_size]),
            transforms.Grayscale(num_output_channels=1),
            transforms.ToTensor()
        ])

        root_path = 'Dataset/medical/' + config['dataset_name']
        train_data_path = root_path + '/train'
        test_data_path = root_path + '/test'
        dataset = ImageFolder(root=train_data_path, transform=img_transform)
        load_dataset = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        train_dataset_array = next(iter(load_dataset))[0]
        my_dataset = TensorDataset(train_dataset_array)
        train_set, val_set = torch.utils.data.random_split(
            my_dataset, [train_dataset_array.shape[0] - 5, 5])

        test_set = ImageFolder(root=test_data_path, transform=img_transform)

    elif config['dataset_name'] in ['coil100']:
        img_transform = transforms.Compose([transforms.ToTensor()])

        root_path = 'Dataset/coil100/' + config['dataset_name']
        train_data_path = root_path + '/train'
        test_data_path = root_path + '/test'
        dataset = ImageFolder(root=train_data_path, transform=img_transform)
        load_dataset = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        train_dataset_array = next(iter(load_dataset))[0]
        my_dataset = TensorDataset(train_dataset_array)
        train_set, val_set = torch.utils.data.random_split(
            my_dataset, [train_dataset_array.shape[0] - 5, 5])

        test_set = ImageFolder(root=test_data_path, transform=img_transform)

    elif config['dataset_name'] in ['MVTec']:
        data_path = 'Dataset/MVTec/' + normal_class + '/train'
        data_list = []

        orig_transform = transforms.Compose(
            [transforms.Resize(img_size),
             transforms.ToTensor()])

        orig_dataset = ImageFolder(root=data_path, transform=orig_transform)

        train_orig, val_set = torch.utils.data.random_split(
            orig_dataset, [len(orig_dataset) - 25, 25])
        data_list.append(train_orig)

        for i in range(3):
            img_transform = transforms.Compose([
                transforms.Resize(img_size),
                transforms.RandomAffine(0, scale=(1.05, 1.2)),
                transforms.ToTensor()
            ])

            dataset = ImageFolder(root=data_path, transform=img_transform)
            data_list.append(dataset)

        dataset = ConcatDataset(data_list)

        train_loader = torch.utils.data.DataLoader(dataset,
                                                   batch_size=batch_size,
                                                   shuffle=True)
        train_dataset_array = next(iter(train_loader))[0]
        train_set = TensorDataset(train_dataset_array)

        test_data_path = 'Dataset/MVTec/' + normal_class + '/test'
        test_set = ImageFolder(root=test_data_path, transform=orig_transform)

    train_dataloader = torch.utils.data.DataLoader(
        train_set,
        batch_size=batch_size,
        shuffle=True,
    )

    val_dataloader = torch.utils.data.DataLoader(
        val_set,
        batch_size=batch_size,
        shuffle=True,
    )

    test_dataloader = torch.utils.data.DataLoader(
        test_set,
        batch_size=batch_size,
        shuffle=True,
    )

    return train_dataloader, val_dataloader, test_dataloader
def load_data(config):
    normal_class = config['normal_class']
    batch_size = config['batch_size']

    if config['dataset_name'] in ['cifar10']:
        img_transform = transforms.Compose([
            transforms.Resize((256, 256), Image.ANTIALIAS),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=(0.485, 0.456, 0.406),
                                 std=(0.229, 0.224, 0.225))
        ])

        os.makedirs("./Dataset/CIFAR10/train", exist_ok=True)
        dataset = CIFAR10('./Dataset/CIFAR10/train',
                          train=True,
                          download=True,
                          transform=img_transform)
        print("Cifar10 DataLoader Called...")
        print("All Train Data: ", dataset.data.shape)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]
        print("Normal Train Data: ", dataset.data.shape)

        os.makedirs("./Dataset/CIFAR10/test", exist_ok=True)
        test_set = CIFAR10("./Dataset/CIFAR10/test",
                           train=False,
                           download=True,
                           transform=img_transform)
        print("Test Train Data:", test_set.data.shape)

    elif config['dataset_name'] in ['mnist']:
        img_transform = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor()])

        os.makedirs("./Dataset/MNIST/train", exist_ok=True)
        dataset = MNIST('./Dataset/MNIST/train',
                        train=True,
                        download=True,
                        transform=img_transform)
        print("MNIST DataLoader Called...")
        print("All Train Data: ", dataset.data.shape)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]
        print("Normal Train Data: ", dataset.data.shape)

        os.makedirs("./Dataset/MNIST/test", exist_ok=True)
        test_set = MNIST("./Dataset/MNIST/test",
                         train=False,
                         download=True,
                         transform=img_transform)
        print("Test Train Data:", test_set.data.shape)

    elif config['dataset_name'] in ['fashionmnist']:
        img_transform = transforms.Compose(
            [transforms.Resize((32, 32)),
             transforms.ToTensor()])

        os.makedirs("./Dataset/FashionMNIST/train", exist_ok=True)
        dataset = FashionMNIST('./Dataset/FashionMNIST/train',
                               train=True,
                               download=True,
                               transform=img_transform)
        print("FashionMNIST DataLoader Called...")
        print("All Train Data: ", dataset.data.shape)
        dataset.data = dataset.data[np.array(dataset.targets) == normal_class]
        dataset.targets = [normal_class] * dataset.data.shape[0]
        print("Normal Train Data: ", dataset.data.shape)

        os.makedirs("./Dataset/FashionMNIST/test", exist_ok=True)
        test_set = FashionMNIST("./Dataset/FashionMNIST/test",
                                train=False,
                                download=True,
                                transform=img_transform)
        print("Test Train Data:", test_set.data.shape)

    elif config['dataset_name'] in ['mvtec']:
        data_path = 'Dataset/MVTec/' + normal_class + '/train'
        mvtec_img_size = config['mvtec_img_size']

        orig_transform = transforms.Compose([
            transforms.Resize([mvtec_img_size, mvtec_img_size]),
            transforms.ToTensor()
        ])

        dataset = ImageFolder(root=data_path, transform=orig_transform)

        test_data_path = 'Dataset/MVTec/' + normal_class + '/test'
        test_set = ImageFolder(root=test_data_path, transform=orig_transform)

    elif config['dataset_name'] in ['retina']:
        data_path = 'Dataset/OCT2017/train'

        orig_transform = transforms.Compose(
            [transforms.Resize([128, 128]),
             transforms.ToTensor()])

        dataset = ImageFolder(root=data_path, transform=orig_transform)

        test_data_path = 'Dataset/OCT2017/test'
        test_set = ImageFolder(root=test_data_path, transform=orig_transform)

    else:
        raise Exception(
            "You enter {} as dataset, which is not a valid dataset for this repository!"
            .format(config['dataset_name']))

    train_dataloader = torch.utils.data.DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=True,
    )
    test_dataloader = torch.utils.data.DataLoader(
        test_set,
        batch_size=batch_size,
        shuffle=False,
    )

    return train_dataloader, test_dataloader
Exemple #20
0
def get_dataset(args,
                config,
                test=False,
                rev=False,
                one_hot=True,
                subset=False,
                shuffle=True):
    total_labels = 10 if config.data.dataset.lower().split(
        '_')[0] != 'cifar100' else 100
    reduce_labels = total_labels != config.n_labels
    if config.data.dataset.lower() in [
            'mnist_transferbaseline', 'cifar10_transferbaseline',
            'fashionmnist_transferbaseline', 'cifar100_transferbaseline'
    ]:
        print('loading baseline transfer dataset')
        rev = True
        test = False
        subset = True
        reduce_labels = True

    if config.data.random_flip is False:
        transform = transforms.Compose(
            [transforms.Resize(config.data.image_size),
             transforms.ToTensor()])
    else:
        if not test:
            transform = transforms.Compose([
                transforms.Resize(config.data.image_size),
                transforms.RandomHorizontalFlip(p=0.5),
                transforms.ToTensor()
            ])
        else:
            transform = transforms.Compose([
                transforms.Resize(config.data.image_size),
                transforms.ToTensor()
            ])

    if config.data.dataset.lower().split('_')[0] == 'mnist':
        dataset = MNIST(os.path.join(args.run, 'datasets'),
                        train=not test,
                        download=True,
                        transform=transform)
    elif config.data.dataset.lower().split('_')[0] in [
            'fashionmnist', 'fmnist'
    ]:
        dataset = FashionMNIST(os.path.join(args.run, 'datasets'),
                               train=not test,
                               download=True,
                               transform=transform)
    elif config.data.dataset.lower().split('_')[0] == 'cifar10':
        dataset = CIFAR10(os.path.join(args.run, 'datasets'),
                          train=not test,
                          download=True,
                          transform=transform)
    elif config.data.dataset.lower().split('_')[0] == 'cifar100':
        dataset = CIFAR100(os.path.join(args.run, 'datasets'),
                           train=not test,
                           download=True,
                           transform=transform)
    else:
        raise ValueError('Unknown config dataset {}'.format(
            config.data.dataset))

    if type(dataset.targets) is list:
        # CIFAR10 and CIFAR100 store targets as list, unlike (F)MNIST which uses torch.Tensor
        dataset.targets = np.array(dataset.targets)

    if not rev:
        labels_to_consider = np.arange(config.n_labels)
        target_transform = lambda label: single_one_hot_encode(
            label, n_labels=config.n_labels)
        cond_size = config.n_labels

    else:
        labels_to_consider = np.arange(config.n_labels, total_labels)
        target_transform = lambda label: single_one_hot_encode_rev(
            label, start_label=config.n_labels, n_labels=total_labels)
        cond_size = total_labels - config.n_labels
    if reduce_labels:
        idx = np.any(
            [np.array(dataset.targets) == i for i in labels_to_consider],
            axis=0).nonzero()
        dataset.targets = dataset.targets[idx]
        dataset.data = dataset.data[idx]
    if one_hot:
        dataset.target_transform = target_transform
    if subset and args.subset_size != 0:
        dataset = torch.utils.data.Subset(dataset, np.arange(args.subset_size))
    dataloader = DataLoader(dataset,
                            batch_size=config.training.batch_size,
                            shuffle=shuffle,
                            num_workers=0)

    return dataloader, dataset, cond_size