Exemple #1
0
def return_dataset(data, scale=False, usps=False, all_use='no', directory="."):
    if data == 'svhn':
        train_image, train_label, \
        test_image, test_label = load_svhn(directory)
    if data == 'mnist':
        train_image, train_label, \
        test_image, test_label = load_mnist(directory)
        # print(train_image.shape)
    if data == 'mnistm':
        train_image, train_label, \
        test_image, test_label = load_mnistm(directory)
        # print(train_image.shape)
    if data == 'usps':
        train_image, train_label, \
        test_image, test_label = load_usps(directory)
    if data == 'synth':
        train_image, train_label, \
        test_image, test_label = load_syntraffic(directory)
    if data == 'gtsrb':
        train_image, train_label, \
        test_image, test_label = load_gtsrb(directory)
    if data == 'syn':
        train_image, train_label, \
        test_image, test_label = load_syn(directory)

    return train_image, train_label, test_image, test_label
def return_dataset(data, scale=False, usps=False, all_use='no'):
    if data == 'svhn':
        train_image, train_label, test_image, test_label = load_svhn()
        print('The size of {} training dataset: {} and testing dataset: {}'.
              format(data, train_image.shape, test_image.shape))

    if data == 'mnist':
        train_image, train_label, test_image, test_label = load_mnist(
            scale=scale, usps=usps, all_use=all_use)
        print('The size of {} training dataset: {} and testing dataset: {}'.
              format(data, train_image.shape, test_image.shape))

    if data == 'usps':
        train_image, train_label, test_image, test_label = load_usps(
            all_use=all_use)
        print('The size of {} training dataset: {} and testing dataset: {}'.
              format(data, train_image.shape, test_image.shape))

    if data == 'synth':
        train_image, train_label, test_image, test_label = load_syntraffic()
        print('The size of {} training dataset: {} and testing dataset: {}'.
              format(data, train_image.shape, test_image.shape))

    if data == 'gtsrb':
        train_image, train_label, test_image, test_label = load_gtsrb()
        print('The size of {} training dataset: {} and testing dataset: {}'.
              format(data, train_image.shape, test_image.shape))

    return train_image, train_label, test_image, test_label
Exemple #3
0
def return_dataset(data, scale=False, usps=False, all_use=False):
    if data == 'svhn':
        train_image, train_label, \
        test_image, test_label = load_svhn()
    if data == 'mnist':
        train_image, train_label, \
        test_image, test_label = load_mnist(scale=scale, usps=usps, all_use=all_use)
        print(train_image.shape)
    if data == 'usps':
        train_image, train_label, \
        test_image, test_label = load_usps(all_use=all_use)
    return train_image, train_label, test_image, test_label
Exemple #4
0
    def get_ndarray(self):

        (x_train, y_train), (x_test, y_test) = load_svhn(True)
        # y is integer at this moment

        #y_train = np.identity(10)[y_train]
        #y_test  = np.identity(10)[y_test ]

        print(x_train[:2])
        print(x_train.shape)
        print(y_train.shape)
        print(x_test.shape)
        print(y_test.shape)
        return (x_train, y_train), (x_test, y_test)
Exemple #5
0
def return_dataset(data, scale=False, usps=False, all_use='no'):
    if data == 'svhn':
        train_image, train_label, \
        test_image, test_label = load_svhn()
    if data == 'mnist':
        train_image, train_label, \
        test_image, test_label = load_mnist(scale=scale, usps=usps, all_use=all_use)
    if data == 'usps':
        train_image, train_label, \
        test_image, test_label = load_usps(all_use=all_use)
    if data == 'synth':
        train_image, train_label, \
        test_image, test_label = load_syntraffic()
    if data == 'gtsrb':
        train_image, train_label, \
        test_image, test_label = load_gtsrb()

    return train_image, train_label, test_image, test_label
Exemple #6
0
    def __init__(self, dataset, batch_size):
        self.dataset = dataset

        # image width,height
        if self.dataset == "MNIST":
            from tensorflow.examples.tutorials import mnist
            _h, _w, _c = 28,28,1
            img_size = _h*_w*_c # the canvas size    
            _l = 10
        elif self.dataset == "CIFAR10":
            _h, _w, _c = 32,32,3
            img_size = _h*_w*_c 
            _l = 10
        elif self.dataset == "SVHN":
            #import loadSVHNKingma as svhn
            #PCA_DIM = 768
            PCA_DIM = -1    # no compressed raw data
            #img_size = PCA_DIM # PCA
            _h, _w, _c = 32,32,3
            img_size = _h*_w*_c 
            _l = 10
        elif self.dataset == "KaggleBreastHistology":
            _h, _w, _c = 50,50,3
            img_size = _h*_w*_c 
            _l = 2
        elif self.dataset == "BreaKHis":
            _h, _w, _c = 460,700,3
            img_size = _h*_w*_c 
            _l = 2
        elif self.dataset == "Kyoto2006":
            from loadKyoto2006 import loadKyoto2006
            _h, _w, _c = None,None,None
            img_size = None # dummy
            _l = 2 
        else: sys.exit("invalid dataset")

        self.h = _h
        self.w = _w
        self.c = _c
        self.l = _l
        self.img_size   = img_size
        self.batch_size = batch_size


        if self.dataset == "MNIST":
            PATH_OF_MNIST = "D:/data/img/MNIST/"
            data_directory = PATH_OF_MNIST
            if not os.path.exists(data_directory): os.makedirs(data_directory)
            mnist_datasets = mnist.input_data.read_data_sets(data_directory, one_hot=True)
            dataset_train, dataset_test = mnist_datasets.train, mnist_datasets.test  # binarized (0-1) mnist data

            n_examples_train = dataset_train.images.shape[0]
            n_examples_test = dataset_test.images.shape[0]
         
        elif self.dataset == "CIFAR10":
            #from cifar10 import load_cifar10
            from keras.datasets import cifar10
            (data_train, labels_train), (data_test, labels_test) = cifar10.load_data() # [0-255] integer
            data_train = data_train / 255.
            data_test  = data_test  / 255.

            if IS_NHWC_or_1D == '1D':
                data_train,   data_test    = data_train.reshape((-1, img_size)), data_test.reshape((-1, img_size)) # NHWC to 1d
            data_train,   data_test    = data_train.astype(np.float32), data_test.astype(np.float32)
            labels_train, labels_test  = labels_train.reshape((-1, )), labels_test.reshape((-1, ))  # flatten
        
            # if normalized or zca-ed one is preferable, 
            #data_train, labels_train, data_test, labels_test = cifar10.loadCIFAR10( PATH_OF_CIFAR10, use_cache=True)
            labels_train = self._one_hot_encoded(labels_train, 10)
            labels_test  = self._one_hot_encoded(labels_test,  10)
            n_examples_train = len(data_train)
            n_examples_test  = len(data_test)
        
        elif self.dataset == "SVHN":
            from svhn import load_svhn, NUM_EXAMPLES_TRAIN, NUM_EXAMPLES_TEST
            # data_train.shape is (604388,3072) w/ extra and (73257,3072) w/o extra
            #data_train, labels_train, data_test, labels_test = svhn.loadSVHN(cutoffdim=PCA_DIM, use_cache=False, use_extra=False)
            (data_train, labels_train), (data_test, labels_test) = load_svhn()
            labels_train = self._one_hot_encoded(labels_train, 10)
            labels_test  = self._one_hot_encoded(labels_test,  10)
            """
            n_examples_train = NUM_EXAMPLES_TRAIN
            n_examples_test  = NUM_EXAMPLES_TEST
        
            """
            n_examples_train = (data_train.shape[0]//self.batch_size) * self.batch_size  # discard residual
            data_train, labels_train = data_train[0:n_examples_train, :], labels_train[0:n_examples_train, :]
        
            n_examples_test = data_test.shape[0]//self.batch_size * self.batch_size
            data_test, labels_test = data_test[0:n_examples_test, :], labels_test[0:n_examples_test, :]
        
        elif self.dataset == "KaggleBreastHistology":
            from HandleIIDDataTFRecord import HandleIIDDataTFRecord
            K = 10
            TEST_IDXES = [9]
            PATHS = ( ['D:/data/img/KaggleBreastHistology'], None)  
            d = HandleIIDDataTFRecord( self.dataset, self.batch_size, K, PATHS, is_debug=False)

            (data_train, labels_train), (data_test, labels_test) = d.get_ndarrays(TEST_IDXES)
            labels_train = self._one_hot_encoded(labels_train, self.l)
            labels_test  = self._one_hot_encoded(labels_test,  self.l)


            #print('x:', data_train[0])
            #print('y:', labels_train[0])
            #sys.exit('kokomade')
            n_examples_train = len(data_train)
            n_examples_test  = len(data_test)

            n_examples_train = (data_train.shape[0]//self.batch_size) * self.batch_size  # discard residual
            n_examples_test = data_test.shape[0]//self.batch_size * self.batch_size
        
        elif self.dataset == "BreaKHis":
            from HandleIIDDataTFRecord import HandleIIDDataTFRecord
            K = 10
            TEST_IDXES = [9]
            PATHS = ( ['D:/data/img/BreaKHis/BreaKHis_v1/histology_slides/breast'], None)  
            d = HandleIIDDataTFRecord( self.dataset, self.batch_size, K, PATHS, is_debug=False)

            (data_train, labels_train), (data_test, labels_test) = d.get_ndarrays(TEST_IDXES)
            labels_train = self._one_hot_encoded(labels_train, self.l)
            labels_test  = self._one_hot_encoded(labels_test,  self.l)

            n_examples_train = len(data_train)
            n_examples_test  = len(data_test)

            #n_examples_train = (data_train.shape[0]//self.batch_size) * self.batch_size  # discard residual
            #data_train, labels_train = data_train[0:n_examples_train, :], labels_train[0:n_examples_train, :]
        
            #n_examples_test = data_test.shape[0]//self.batch_size * self.batch_size
            #data_test, labels_test = data_test[0:n_examples_test, :], labels_test[0:n_examples_test, :]
        
        elif self.dataset == "Kyoto2006":
            data_train, labels_train  = loadKyoto2006('train', use_sval=False, use_cache=True, as_onehot=True)
            data_test, labels_test    = loadKyoto2006( 'test', use_sval=False, use_cache=True, as_onehot=True)

            print(data_train.shape, labels_train.shape)
            n_examples_train = (data_train.shape[0]//self.batch_size) * self.batch_size  # discard residual
            data_train, labels_train = data_train[0:n_examples_train, :], labels_train[0:n_examples_train, :]
        
            n_examples_test = data_test.shape[0]//self.batch_size * self.batch_size
            data_test, labels_test = data_test[0:n_examples_test, :], labels_test[0:n_examples_test, :]

            self.img_size = data_train.shape[1]
            # ugly work waround for ImageInterface
            self.h, self.w, self.c = 1,1,self.img_size

        if self.dataset == "SVHN":
            pass
        else:
            assert(n_examples_train%self.batch_size ==0)
            assert(n_examples_test%self.batch_size ==0)
 
        if self.dataset == "MNIST":
            # following two properties are for MNIST.
            self.dataset_train  = dataset_train
            self.dataset_test   = dataset_test

            # bellow is just trial for crafting adv examples in eval.py
            self.data_train, self.labels_train = dataset_train.next_batch(55000) # x: (BATCH_SIZE x img_size)
            self.data_test,  self.labels_test  = dataset_test.next_batch(10000) # x: (BATCH_SIZE x img_size)

        else:
            self.data_train   = data_train
            self.labels_train = labels_train
            self.data_test    = data_test
            self.labels_test  = labels_test

        #if IS_NHWC_or_1D == 'NHWC':
        #    self.dataset_train = np.reshape( self.dataset_train, (self.batch_size, self.h, self.w, self.c))
        #    self.dataset_test  = np.reshape( self.dataset_test,  (self.batch_size, self.h, self.w, self.c))

        self.n_examples_train = n_examples_train
        self.n_examples_test  = n_examples_test 
        
        self.n_batches_train = int( self.n_examples_train/self.batch_size )
        self.n_batches_test  = int( self.n_examples_test/self.batch_size  )
        print('n_examples_train:%d, n_batches_train:%d, n_batches_test:%d' % \
                (self.n_examples_train, self.n_batches_train, self.n_batches_test))
#!/usr/bin/env python3
"""
Implementation of self-ensembling for visual domain adaptation
"""
import tensorflow as tf

from mnist import load_mnist
from usps import load_usps
from svhn import load_svhn
from load_data import load_dataset

if __name__ == "__main__":
    # Note: "It is worth noting that only the training sets of the small image
    # datasets were used during training; the test sets usedfor reporting scores
    # only." -- so, only use *_test for evaluation.
    # Note 2: "The USPS images were up-scaled using bilinear interpolation from
    # 16×16 to 28×28 resolution to match that of MNIST."
    # Note 3: "The MNIST images were padded to 32×32 resolution and converted
    # to RGB by replicating the greyscale channel into the three RGB channels
    # to match the format of SVHN."
    usps_train, usps_test = load_dataset(*load_usps())
    mnist_train, mnist_test = load_dataset(*load_mnist())
    svhn_train, svhn_test = load_dataset(*load_svhn())