def __init__(self, which_set='train', n_datapoints=None, fname="mnist.pkl.gz", preproc=[]):
        super(MNIST, self).__init__(preproc)

        _logger.info("Loading MNIST data")
        fname = datapath(fname)

        if fname[-3:] == ".gz":
            open_func = gzip.open
        else:
            open_func = open

        with open_func(fname) as f:
            (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(f)

        if which_set == 'train':
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        elif which_set == 'valid':
            self.X, self.Y = self.prepare(valid_x, valid_y, n_datapoints)
        elif which_set == 'test':
            self.X, self.Y = self.prepare(test_x, test_y, n_datapoints)
        elif which_set == 'salakhutdinov_train':
            train_x = np.concatenate([train_x, valid_x])
            train_y = np.concatenate([train_y, valid_y])
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        elif which_set == 'salakhutdinov_valid':
            train_x = np.concatenate([train_x, valid_x])[::-1]
            train_y = np.concatenate([train_y, valid_y])[::-1]
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        else:
            raise ValueError("Unknown dataset %s" % which_set)

        self.n_datapoints = self.X.shape[0]
Beispiel #2
0
    def __init__(self, which_set='train', fname="chardata.mat", shuffle_seed=123, n_used_for_validation=1345, preproc=[]):
        super(Omniglot, self).__init__(preproc)

        def reshape_data(data):
            return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')

        _logger.info("Loading Omniglot data (28x28)")
        fname = datapath(fname)

        omni_raw = scipy.io.loadmat(fname)

        train_data = reshape_data(omni_raw['data'].T.astype('float32'))
        test_data = reshape_data(omni_raw['testdata'].T.astype('float32'))

        permutation = np.random.RandomState(seed=shuffle_seed).permutation(train_data.shape[0])
        train_data = train_data[permutation]


        if which_set == 'train':
            self.X = train_data[:-n_used_for_validation]
        elif which_set == 'valid':
            self.X = train_data[-n_used_for_validation:]
        elif which_set == 'test':
            self.X = test_data
        else:
            raise ValueError("Unknown dataset %s" % which_set)


        self.n_datapoints = self.X.shape[0]

        self.Y = np.zeros((self.n_datapoints, 2), dtype=floatX)
Beispiel #3
0
    def __init__(self, which_set='train', n_datapoints=-1, path="caltech-silhouettes", preproc=[]):
        super(CalTechSilhouettes, self).__init__(preproc)

        _logger.info("Loading CalTech 101 Silhouettes data (28x28)")
        path = datapath(path)

        test_x = np.load(path+"/test_data.npy")
        test_y = np.load(path+"/test_labels.npy")

        if which_set == 'train':
            X = np.load(path+"/train_data.npy")
            Y = np.load(path+"/train_labels.npy")
        elif which_set == 'valid':
            X = np.load(path+"/val_data.npy")
            Y = np.load(path+"/val_labels.npy")
        elif which_set == 'test':
            X = np.load(path+"/test_data.npy")
            Y = np.load(path+"/test_labels.npy")
        else:
            raise ValueError("Unknown dataset %s" % which_set)

        if n_datapoints > 0:
            X = X[:n_datapoints]
            Y = Y[:n_datapoints]    
        else:
            n_datapoints = X.shape[0]

        X = X.astype(floatX)

        self.n_datapoints = n_datapoints
        self.X = X
        self.Y = Y
Beispiel #4
0
    def __init__(self, which_set='train', n_datapoints=None, fname="mnist.pkl.gz", preproc=[]):
        super(MNIST, self).__init__(preproc)

        _logger.info("Loading MNIST data")
        fname = datapath(fname)

        if fname[-3:] == ".gz":
            open_func = gzip.open
        else:
            open_func = open

        with open_func(fname) as f:
            (train_x, train_y), (valid_x, valid_y), (test_x, test_y) = pickle.load(f)

        if which_set == 'train':
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        elif which_set == 'valid':
            self.X, self.Y = self.prepare(valid_x, valid_y, n_datapoints)
        elif which_set == 'test':
            self.X, self.Y = self.prepare(test_x, test_y, n_datapoints)
        elif which_set == 'salakhutdinov_train':
            train_x = np.concatenate([train_x, valid_x])
            train_y = np.concatenate([train_y, valid_y])
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        elif which_set == 'salakhutdinov_valid':
            train_x = np.concatenate([train_x, valid_x])[::-1]
            train_y = np.concatenate([train_y, valid_y])[::-1]
            self.X, self.Y = self.prepare(train_x, train_y, n_datapoints)
        else:
            raise ValueError("Unknown dataset %s" % which_set)

        self.n_datapoints = self.X.shape[0]
Beispiel #5
0
    def __init__(self,
                 which_set='train',
                 size=48,
                 fold=0,
                 n_datapoints=-1,
                 path="TFD",
                 preproc=[]):
        super(TorontoFaceDataset, self).__init__(preproc)

        _logger.info("Loading Toronto Face Dataset (48x48)")

        fname = datapath(path)

        if size == 48:
            fname += "/TFD_48x48.mat"
        elif size == 96:
            fname += "/TFD_96x96.mat"
        else:
            raise ValueError("Unknown size %s. Allowerd options 48 or 96." %
                             size)

        assert 0 <= fold and fold <= 4

        # Load dataset
        data = loadmat(fname)

        if which_set == 'unlabeled':
            idx = (data['folds'][:, fold] == 0)
        elif which_set == 'train':
            idx = (data['folds'][:, fold] == 1)
        elif which_set == 'unlabeled+train':
            idx = (data['folds'][:, fold] == 0)
            idx += (data['folds'][:, fold] == 1)
        elif which_set == 'valid':
            idx = (data['folds'][:, fold] == 2)
        elif which_set == 'test':
            idx = (data['folds'][:, fold] == 3)
        else:
            raise ValueError("Unknown dataset %s" % which_set)

        X = data['images'][idx, :, :]
        #Y = data['labs_id'][idx,:]

        if n_datapoints > 0:
            X = X[:n_datapoints]
            Y = Y[:n_datapoints]
        else:
            n_datapoints = X.shape[0]

        # Normalize to 0..1
        X = (X / 255.).astype(floatX)

        # Flatten images
        X = X.reshape([n_datapoints, -1])

        self.n_datapoints = n_datapoints
        self.X = X
        self.Y = None
Beispiel #6
0
    def __init__(self, which_set='train', size=48, fold=0, n_datapoints=-1, path="TFD", preproc=[]):
        super(TorontoFaceDataset, self).__init__(preproc)

        _logger.info("Loading Toronto Face Dataset (48x48)")

        fname = datapath(path)

        if size == 48:
            fname += "/TFD_48x48.mat"
        elif size == 96:
            fname += "/TFD_96x96.mat"
        else:
            raise ValueError("Unknown size %s. Allowerd options 48 or 96." % size)

        assert 0 <= fold and fold <= 4

        # Load dataset 
        data = loadmat(fname)

        if which_set == 'unlabeled':
            idx = (data['folds'][:,fold] == 0)
        elif which_set == 'train':
            idx = (data['folds'][:,fold] == 1)
        elif which_set == 'unlabeled+train':
            idx =  (data['folds'][:,fold] == 0)
            idx += (data['folds'][:,fold] == 1)
        elif which_set == 'valid':
            idx = (data['folds'][:,fold] == 2)
        elif which_set == 'test':
            idx = (data['folds'][:,fold] == 3)
        else:
            raise ValueError("Unknown dataset %s" % which_set)

        X = data['images'][idx,:,:]
        #Y = data['labs_id'][idx,:]

        if n_datapoints > 0:
            X = X[:n_datapoints]
            Y = Y[:n_datapoints]
        else:
            n_datapoints = X.shape[0]

        # Normalize to 0..1 
        X = (X / 255.).astype(floatX)

        # Flatten images
        X = X.reshape([n_datapoints, -1])

        self.n_datapoints = n_datapoints
        self.X = X
        self.Y = None
Beispiel #7
0
    def __init__(self, which_set='train', fname="caltech101_silhouettes_28_split1.mat", preproc=[]):
        super(CalTech101Silhouettes, self).__init__(preproc)

        _logger.info("Loading CalTech 101 Silhouettes data (28x28)")
        fname = datapath(fname)

        if which_set == 'train':
            self.X = scipy.io.loadmat(fname)['train_data'].astype(floatX)
        elif which_set == 'valid':
            self.X = scipy.io.loadmat(fname)['val_data'].astype(floatX)
        elif which_set == 'test':
            self.X = scipy.io.loadmat(fname)['test_data'].astype(floatX)
        else:
            raise ValueError("Unknown dataset %s" % which_set)


        self.n_datapoints = self.X.shape[0]

        self.Y = np.zeros((self.n_datapoints, 2), dtype=floatX)
Beispiel #8
0
    def __init__(self, data_name, which_set='train', preproc=[]):
        super(UCIBinary, self).__init__(preproc)

        UCIdatasets = {
            'adult': 'adult.h5',
            'connect4': 'connect4.h5',
            'dna': 'dna.h5',
            'mushrooms': 'mushrooms.h5',
            'nips': 'nips.h5',
            'ocrletters': 'ocr_letters.h5',
            'rcv1': 'rcv1.h5',
            'web': 'web.h5'
        }
        assert data_name in UCIdatasets.keys()

        _logger.info("Loading %s data" % data_name)
        fname = datapath(UCIdatasets[data_name])

        try:
            with h5py.File(fname, "r") as h5:

                if which_set == 'train':
                    train_x = h5['train']
                    self.X = np.array(train_x).astype(floatX)
                elif which_set == 'valid':
                    valid_x = h5['valid']
                    self.X = np.array(valid_x).astype(floatX)
                elif which_set == 'test':
                    test_x = h5['test']
                    self.X = np.array(test_x).astype(floatX)
                else:
                    raise ValueError("Unknown dataset %s" % which_set)

        except KeyError, e:
            logger.info("Failed to read data from %s: %s" % (fname, e))
            exit(1)