Beispiel #1
0
def get_loader_mnist_rgb(batchsize):
    transform_train = augment_mnist_rgb()

    trainset_m = MNIST(root='./data', train=True, download=True, transform=transform_train)
    testset_m = MNIST(root='./data', train=False, download=True, transform=no_augment_mnist_rgb())
    # SVHN object accepts NUMPY:
    train_data = trainset_m.train_data.numpy()
    test_data = testset_m.test_data.numpy()
    train_labels = trainset_m.train_labels.numpy()
    test_labels = testset_m.test_labels.numpy()
    print("Original MNIST")
    print(train_data.shape, len(train_labels))
    print(test_data.shape, len(test_labels))

    ### use SVHN object to load MNIST RGB data
    trainset = SVHN(root='./data', split='train', download=True, transform=transform_train)
    testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb())
    trainset.data = convert_mnist_images(trainset_m.train_data)
    testset.data = convert_mnist_images(testset_m.test_data)
    trainset.labels = trainset_m.train_labels
    testset.labels = testset_m.test_labels
    print("RGB MNIST")
    print(trainset.data.shape, len(trainset.labels))
    print(testset.data.shape, len(testset.labels))

    trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
    testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0)

    print("MNIST train min=%f, max=%f" % (trainset.data.min(), trainset.data.max()))
    print("MNIST test min=%f, max=%f" % (testset.data.min(), testset.data.max()))

    return trainloader, testloader
Beispiel #2
0
def get_loader_digit_rgb(batchsize):
    transform_train = no_augment_mnist_rgb()

    ########## download synth data from Ganin's Google Drive ####################
    gdd.download_file_from_google_drive(file_id='0B9Z4d7lAwbnTSVR1dEFSRUFxOUU', dest_path='data/SynthDigits.zip', unzip=True)

    folder_name = "data/"
    file_train = 'synth_train_32x32.mat'
    train_data = loadmat(folder_name+file_train)
    train_x = train_data["X"]
    train_x = np.rollaxis(train_x, 3, 0)
    train_x = np.rollaxis(train_x, 3, 1)
    train_y = train_data["y"]
    print(train_x.shape)
    print(train_y.shape)

    file_test = 'synth_test_32x32.mat'
    test_data = loadmat(folder_name+file_test)
    test_x = test_data["X"]
    test_x = np.rollaxis(test_x, 3, 0)
    test_x = np.rollaxis(test_x, 3, 1)
    test_y = test_data["y"]
    print(test_x.shape)
    print(test_y.shape)

    trainset = SVHN(root='./data', split='train', download=True, transform=transform_train)
    testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb())

    trainset.data = train_x
    testset.data = test_x
    trainset.labels = train_y
    testset.labels = test_y

    print (trainset.data.shape, len(trainset.labels))
    print (testset.data.shape, len(testset.labels))

    trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
    testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0)

    print("synth train min=%f, max=%f" % (trainset.data.min(), trainset.data.max()))
    print("synth test min=%f, max=%f" % (testset.data.min(), testset.data.max()))

    return trainloader, testloader
Beispiel #3
0
def get_loaders(nb_labelled,
                batch_size,
                unlab_rat,
                augment_type,
                lab_inds=[],
                is_balanced=True):

    if augment_type == "affine":
        transform_train, transform_test = augment_affine_svhn()
    elif augment_type == "mean":
        transform_train, transform_test = augment_mean_svhn()
    elif augment_type == "no":
        transform_train, transform_test = noaug_SVHN()

    trainset_l = SVHN(root='./data',
                      split='train',
                      download=True,
                      transform=transform_train)
    test_set = SVHN(root='./data',
                    split='test',
                    download=True,
                    transform=transform_test)
    print(trainset_l.data.shape, len(trainset_l.labels))
    if len(lab_inds) == 0:
        if is_balanced:
            lab_inds = []
            for i in range(10):
                labels = np.array(trainset_l.labels)
                inds_i = np.where(labels == i)[0]
                inds_i = np.random.permutation(inds_i)
                lab_inds.extend(inds_i[0:int(nb_labelled / 10)].tolist())
            lab_inds = np.array(lab_inds)
        else:
            lab_inds = np.arange(0, nb_labelled)

    all_inds = np.arange(len(trainset_l.labels))
    unlab_inds = np.setdiff1d(all_inds, lab_inds)

    trainset_u = copy.deepcopy(trainset_l)
    unlab_inds = unlab_inds[0:int(unlab_rat * len(unlab_inds))]
    trainset_u.data = np.array(trainset_u.data)[unlab_inds]
    trainset_u.labels = np.array(trainset_u.labels)[unlab_inds]
    trainloader_u = DataLoader(trainset_u,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=1)
    print(trainset_u.data.shape, len(trainset_u.labels))

    trainset_l.data = np.array(trainset_l.data)[lab_inds]
    trainset_l.labels = np.array(trainset_l.labels)[lab_inds]

    print(trainset_l.data.shape, len(trainset_l.labels))
    trainloader_l = DataLoader(trainset_l,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=1)

    testloader = DataLoader(test_set,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=1)

    loaders = {
        "trainloader_l": trainloader_l,
        "testloader": testloader,
        "trainloader_u": trainloader_u,
        "trainset_l": trainset_l,
        "test_set": test_set,
        "trainset_u": trainset_u,
        "lab_inds": lab_inds
    }
    return loaders
Beispiel #4
0
def load_data(opt):
    """ Load Data

    Args:
        opt ([type]): Argument Parser

    Raises:
        IOError: Cannot Load Dataset

    Returns:
        [type]: dataloader
    """

    ##
    # LOAD DATA SET
    if opt.dataroot == '':
        opt.dataroot = './data/{}'.format(opt.dataset)

    if opt.dataset in ['cifar10']:
        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': False}

        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        classes = {
            'plane': 0,
            'car': 1,
            'bird': 2,
            'cat': 3,
            'deer': 4,
            'dog': 5,
            'frog': 6,
            'horse': 7,
            'ship': 8,
            'truck': 9
        }

        dataset = {}
        dataset['train'] = CIFAR10(root='./data',
                                   train=True,
                                   download=True,
                                   transform=transform)
        dataset['test'] = CIFAR10(root='./data',
                                  train=False,
                                  download=True,
                                  transform=transform)


        dataset['train'].data, dataset['train'].targets, \
        dataset['test'].data, dataset['test'].targets = get_cifar_anomaly_dataset(
            trn_img=dataset['train'].data,
            trn_lbl=dataset['train'].targets,
            tst_img=dataset['test'].data,
            tst_lbl=dataset['test'].targets,
            abn_cls_idx=classes[opt.abnormal_class],
            manualseed=opt.manualseed,
            perc_outlier=opt.perc_outlier
        )

        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }

        return dataloader
    elif opt.dataset in ['svhn']:
        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': False}

        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.ToTensor(),
            #transforms.Normalize((0.45141584, 0.45141453, 0.45142587), (0.19929032, 0.1992932,  0.19929022))
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])

        dataset = {}
        dataset['train'] = SVHN(root='./data',
                                split='train',
                                download=True,
                                transform=transform)
        dataset['test'] = SVHN(root='./data',
                               split='test',
                               download=True,
                               transform=transform)

        dataset['train'].data = dataset['train'].data.reshape(-1, 32, 32, 3)

        dataset['test'].data = dataset['test'].data.reshape(-1, 32, 32, 3)
        dataset['train'].data, dataset['train'].labels, \
        dataset['test'].data, dataset['test'].labels = get_cifar_anomaly_dataset(
            trn_img=dataset['train'].data,
            trn_lbl=dataset['train'].labels,
            tst_img=dataset['test'].data,
            tst_lbl=dataset['test'].labels,
            abn_cls_idx=int(opt.abnormal_class),
            manualseed=opt.manualseed
        )
        dataset['train'].data = dataset['train'].data.reshape(-1, 3, 32, 32)
        dataset['test'].data = dataset['test'].data.reshape(-1, 3, 32, 32)

        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }

        return dataloader

    elif opt.dataset in ['fashionmnist']:
        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': False}

        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.ToTensor(),
            transforms.Normalize((0.2860402, ), (0.3530239, ))
        ])

        classes = {
            'tshirt': 0,
            'trouser': 1,
            'pullover': 2,
            'dress': 3,
            'coat': 4,
            'sandal': 5,
            'shirt': 6,
            'sneacker': 7,
            'bag': 8,
            'boot': 9
        }

        dataset = {}
        dataset['train'] = FashionMNIST(root='./data',
                                        train=True,
                                        download=True,
                                        transform=transform)
        dataset['test'] = FashionMNIST(root='./data',
                                       train=False,
                                       download=True,
                                       transform=transform)

        dataset['train'].data, dataset['train'].targets, \
        dataset['test'].data, dataset['test'].targets = get_mnist_anomaly_dataset(
            trn_img=dataset['train'].data,
            trn_lbl=dataset['train'].targets,
            tst_img=dataset['test'].data,
            tst_lbl=dataset['test'].targets,
            abn_cls_idx=classes[opt.abnormal_class],
            manualseed=opt.manualseed,
            perc_outlier=opt.perc_outlier
        )

        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }
        return dataloader

    elif opt.dataset in ['mnist']:
        opt.abnormal_class = int(opt.abnormal_class)

        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': True}

        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        dataset = {}
        dataset['train'] = MNIST(root='./data',
                                 train=True,
                                 download=True,
                                 transform=transform)
        dataset['test'] = MNIST(root='./data',
                                train=False,
                                download=True,
                                transform=transform)

        dataset['train'].data, dataset['train'].targets, \
        dataset['test'].data, dataset['test'].targets = get_mnist_anomaly_dataset(
            trn_img=dataset['train'].data,
            trn_lbl=dataset['train'].targets,
            tst_img=dataset['test'].data,
            tst_lbl=dataset['test'].targets,
            abn_cls_idx=opt.abnormal_class,
            manualseed=opt.manualseed,
            perc_outlier=opt.perc_outlier
        )

        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }
        return dataloader

    elif opt.dataset in ['mnist2']:
        opt.abnormal_class = int(opt.abnormal_class)

        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': True}

        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.ToTensor(),
            transforms.Normalize((0.1307, ), (0.3081, ))
        ])

        dataset = {}
        dataset['train'] = MNIST(root='./data',
                                 train=True,
                                 download=True,
                                 transform=transform)
        dataset['test'] = MNIST(root='./data',
                                train=False,
                                download=True,
                                transform=transform)

        dataset['train'].data, dataset['train'].targets, \
        dataset['test'].data, dataset['test'].targets = get_mnist2_anomaly_dataset(
            trn_img=dataset['train'].data,
            trn_lbl=dataset['train'].targets,
            tst_img=dataset['test'].data,
            tst_lbl=dataset['test'].targets,
            nrm_cls_idx=opt.abnormal_class,
            proportion=opt.proportion,
            manualseed=opt.manualseed
        )

        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }
        return dataloader

    else:
        splits = ['train', 'test']
        drop_last_batch = {'train': True, 'test': False}
        shuffle = {'train': True, 'test': True}
        transform = transforms.Compose([
            transforms.Resize(opt.isize),
            transforms.CenterCrop(opt.isize),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

        dataset = {
            x: ImageFolder(os.path.join(opt.dataroot, x), transform)
            for x in splits
        }
        dataloader = {
            x: torch.utils.data.DataLoader(
                dataset=dataset[x],
                batch_size=opt.batchsize,
                shuffle=shuffle[x],
                num_workers=int(opt.workers),
                drop_last=drop_last_batch[x],
                worker_init_fn=(None if opt.manualseed == -1 else
                                lambda x: np.random.seed(opt.manualseed)))
            for x in splits
        }
        return dataloader