Beispiel #1
0
def get_loader_mnist_rgb(batchsize):
    transform_train = augment_mnist_rgb()

    trainset_m = MNIST(root='./data', train=True, download=True, transform=transform_train)
    testset_m = MNIST(root='./data', train=False, download=True, transform=no_augment_mnist_rgb())
    # SVHN object accepts NUMPY:
    train_data = trainset_m.train_data.numpy()
    test_data = testset_m.test_data.numpy()
    train_labels = trainset_m.train_labels.numpy()
    test_labels = testset_m.test_labels.numpy()
    print("Original MNIST")
    print(train_data.shape, len(train_labels))
    print(test_data.shape, len(test_labels))

    ### use SVHN object to load MNIST RGB data
    trainset = SVHN(root='./data', split='train', download=True, transform=transform_train)
    testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb())
    trainset.data = convert_mnist_images(trainset_m.train_data)
    testset.data = convert_mnist_images(testset_m.test_data)
    trainset.labels = trainset_m.train_labels
    testset.labels = testset_m.test_labels
    print("RGB MNIST")
    print(trainset.data.shape, len(trainset.labels))
    print(testset.data.shape, len(testset.labels))

    trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
    testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0)

    print("MNIST train min=%f, max=%f" % (trainset.data.min(), trainset.data.max()))
    print("MNIST test min=%f, max=%f" % (testset.data.min(), testset.data.max()))

    return trainloader, testloader
Beispiel #2
0
def get_loader_digit_rgb(batchsize):
    transform_train = no_augment_mnist_rgb()

    ########## download synth data from Ganin's Google Drive ####################
    gdd.download_file_from_google_drive(file_id='0B9Z4d7lAwbnTSVR1dEFSRUFxOUU', dest_path='data/SynthDigits.zip', unzip=True)

    folder_name = "data/"
    file_train = 'synth_train_32x32.mat'
    train_data = loadmat(folder_name+file_train)
    train_x = train_data["X"]
    train_x = np.rollaxis(train_x, 3, 0)
    train_x = np.rollaxis(train_x, 3, 1)
    train_y = train_data["y"]
    print(train_x.shape)
    print(train_y.shape)

    file_test = 'synth_test_32x32.mat'
    test_data = loadmat(folder_name+file_test)
    test_x = test_data["X"]
    test_x = np.rollaxis(test_x, 3, 0)
    test_x = np.rollaxis(test_x, 3, 1)
    test_y = test_data["y"]
    print(test_x.shape)
    print(test_y.shape)

    trainset = SVHN(root='./data', split='train', download=True, transform=transform_train)
    testset = SVHN(root='./data', split='test', download=True, transform=no_augment_mnist_rgb())

    trainset.data = train_x
    testset.data = test_x
    trainset.labels = train_y
    testset.labels = test_y

    print (trainset.data.shape, len(trainset.labels))
    print (testset.data.shape, len(testset.labels))

    trainloader = DataLoader(trainset, batch_size=batchsize, shuffle=True, num_workers=0)
    testloader = DataLoader(testset, batch_size=batchsize, shuffle=False, num_workers=0)

    print("synth train min=%f, max=%f" % (trainset.data.min(), trainset.data.max()))
    print("synth test min=%f, max=%f" % (testset.data.min(), testset.data.max()))

    return trainloader, testloader
Beispiel #3
0
def get_loaders(nb_labelled,
                batch_size,
                unlab_rat,
                augment_type,
                lab_inds=[],
                is_balanced=True):

    if augment_type == "affine":
        transform_train, transform_test = augment_affine_svhn()
    elif augment_type == "mean":
        transform_train, transform_test = augment_mean_svhn()
    elif augment_type == "no":
        transform_train, transform_test = noaug_SVHN()

    trainset_l = SVHN(root='./data',
                      split='train',
                      download=True,
                      transform=transform_train)
    test_set = SVHN(root='./data',
                    split='test',
                    download=True,
                    transform=transform_test)
    print(trainset_l.data.shape, len(trainset_l.labels))
    if len(lab_inds) == 0:
        if is_balanced:
            lab_inds = []
            for i in range(10):
                labels = np.array(trainset_l.labels)
                inds_i = np.where(labels == i)[0]
                inds_i = np.random.permutation(inds_i)
                lab_inds.extend(inds_i[0:int(nb_labelled / 10)].tolist())
            lab_inds = np.array(lab_inds)
        else:
            lab_inds = np.arange(0, nb_labelled)

    all_inds = np.arange(len(trainset_l.labels))
    unlab_inds = np.setdiff1d(all_inds, lab_inds)

    trainset_u = copy.deepcopy(trainset_l)
    unlab_inds = unlab_inds[0:int(unlab_rat * len(unlab_inds))]
    trainset_u.data = np.array(trainset_u.data)[unlab_inds]
    trainset_u.labels = np.array(trainset_u.labels)[unlab_inds]
    trainloader_u = DataLoader(trainset_u,
                               batch_size=batch_size,
                               shuffle=False,
                               num_workers=1)
    print(trainset_u.data.shape, len(trainset_u.labels))

    trainset_l.data = np.array(trainset_l.data)[lab_inds]
    trainset_l.labels = np.array(trainset_l.labels)[lab_inds]

    print(trainset_l.data.shape, len(trainset_l.labels))
    trainloader_l = DataLoader(trainset_l,
                               batch_size=batch_size,
                               shuffle=True,
                               num_workers=1)

    testloader = DataLoader(test_set,
                            batch_size=batch_size,
                            shuffle=False,
                            num_workers=1)

    loaders = {
        "trainloader_l": trainloader_l,
        "testloader": testloader,
        "trainloader_u": trainloader_u,
        "trainset_l": trainset_l,
        "test_set": test_set,
        "trainset_u": trainset_u,
        "lab_inds": lab_inds
    }
    return loaders