Ejemplo n.º 1
0
 def load_datasets(self):
     from fuel.datasets.mnist import MNIST
     return dict(train=MNIST(which_sets=["train"],
                             subset=slice(None, 50000)),
                 valid=MNIST(which_sets=["train"],
                             subset=slice(50000, None)),
                 test=MNIST(which_sets=["test"]))
Ejemplo n.º 2
0
    def datasets(self, train_subset_indices=None):
        train_set_indices = self.train_set_indices
        if train_subset_indices is not None:
            train_set_indices = self.train_set_indices[train_subset_indices]

        train = MNIST(which_sets=('train',), subset=list(train_set_indices), load_in_memory=True)

        return train, self.val, self.test
Ejemplo n.º 3
0
    def __init__(self):
        self.train = MNIST(which_sets=['train'], subset=slice(0, 50000), load_in_memory=True)
        self.val = MNIST(which_sets=['train'], subset=slice(50000, None), load_in_memory=True)
        self.test = MNIST(which_sets=['test'], load_in_memory=True)

        self.train_set_indices = np.arange(50000)
        d_y = MNIST(which_sets=['train'], sources=['targets'], subset=slice(0, 50000),
                    load_in_memory=True)
        self.train_y = d_y.get_data(d_y.open(), slice(None))[0]
Ejemplo n.º 4
0
 def load_datasets(self):
     return dict(train=MNIST(which_sets=["train"],
                             subset=slice(None, 50000)),
                 valid=MNIST(which_sets=["train"],
                             subset=slice(50000, None)),
                 test=MNIST(which_sets=["test"]))
Ejemplo n.º 5
0
from fuel.schemes import ShuffledScheme
import theano
import logging
import numpy as np

logging.basicConfig()

m = VAModel()

# load parameters
model = Model(m.variational_cost)
print "loading params"
params = load_parameter_values(sys.argv[1])
model.set_param_values(params)

test_dataset = MNIST('test', sources=['features'])
test_scheme = ShuffledScheme(test_dataset.num_examples, 128)
test_stream = DataStream(test_dataset, iteration_scheme=test_scheme)

_func_noisy = theano.function([m.X], m.noisy)
_func_produced = theano.function([m.X], m.produced)

batch = test_stream.get_epoch_iterator().next()[0]
out_noise = _func_noisy(batch)
out_produced = _func_produced(batch)
import cv2
for k in range(10):
    print out_noise.shape
    img = np.reshape(out_noise[k, :], (28, 28))
    img = cv2.resize(img, (500, 500), interpolation=cv2.INTER_NEAREST)
    cv2.imshow('img', img)
Ejemplo n.º 6
0
def get_data(data_name):
    if data_name == 'bmnist':
        from fuel.datasets.binarized_mnist import BinarizedMNIST

        x_dim = 28 * 28

        data_train = BinarizedMNIST(which_sets=['train'], sources=['features'])
        data_valid = BinarizedMNIST(which_sets=['valid'], sources=['features'])
        data_test = BinarizedMNIST(which_sets=['test'], sources=['features'])
    elif data_name == 'mnist':
        from fuel.datasets.mnist import MNIST

        x_dim = 28 * 28

        data_train = MNIST(which_sets=['train'], sources=['features'])
        data_valid = MNIST(which_sets=['test'], sources=['features'])
        data_test = MNIST(which_sets=['test'], sources=['features'])
    elif data_name == 'silhouettes':
        from fuel.datasets.caltech101_silhouettes import CalTech101Silhouettes

        size = 28
        x_dim = size * size

        data_train = CalTech101Silhouettes(which_sets=['train'],
                                           size=size,
                                           sources=['features'])
        data_valid = CalTech101Silhouettes(which_sets=['valid'],
                                           size=size,
                                           sources=['features'])
        data_test = CalTech101Silhouettes(which_sets=['test'],
                                          size=size,
                                          sources=['features'])
    elif data_name == 'tfd':
        from fuel.datasets.toronto_face_database import TorontoFaceDatabase

        size = 48
        x_dim = size * size

        data_train = TorontoFaceDatabase(which_sets=['unlabeled'],
                                         size=size,
                                         sources=['features'])
        data_valid = TorontoFaceDatabase(which_sets=['valid'],
                                         size=size,
                                         sources=['features'])
        data_test = TorontoFaceDatabase(which_sets=['test'],
                                        size=size,
                                        sources=['features'])
    elif data_name == 'bars':
        from bars_data import Bars

        width = 4
        x_dim = width * width

        data_train = Bars(num_examples=5000, width=width, sources=['features'])
        data_valid = Bars(num_examples=5000, width=width, sources=['features'])
        data_test = Bars(num_examples=5000, width=width, sources=['features'])
    elif data_name in local_datasets:
        from fuel.datasets.hdf5 import H5PYDataset

        fname = "data/" + data_name + ".hdf5"

        data_train = H5PYDataset(fname,
                                 which_sets=["train"],
                                 sources=['features'],
                                 load_in_memory=True)
        data_valid = H5PYDataset(fname,
                                 which_sets=["valid"],
                                 sources=['features'],
                                 load_in_memory=True)
        data_test = H5PYDataset(fname,
                                which_sets=["test"],
                                sources=['features'],
                                load_in_memory=True)

        some_features = data_train.get_data(None, slice(0, 100))[0]
        assert some_features.shape[0] == 100

        some_features = some_features.reshape([100, -1])
        x_dim = some_features.shape[1]
    else:
        raise ValueError("Unknown dataset %s" % data_name)

    return x_dim, data_train, data_valid, data_test
Ejemplo n.º 7
0
    from monitors import GenerateNegtiveSample

    # sys.setrecursionlimit(100000)

    logger = logging.Logger(__name__)
    FORMAT = '[%(asctime)s] %(name)s %(message)s'
    DATEFMT = "%M:%D:%S"
    logging.basicConfig(format=FORMAT, datefmt=DATEFMT, level=logging.DEBUG)

    inits = {
        'weights_init': IsotropicGaussian(0.01),
        'biases_init': Constant(0.)
    }

    batch_size = 100
    data_train = MNIST(which_sets=['train'], sources=['features'])

    train_stream = Flatten(
        DataStream.default_stream(data_train,
                                  iteration_scheme=SequentialScheme(
                                      data_train.num_examples, batch_size)))

    features_size = 28 * 28 * 1

    inputs = T.matrix('features')

    test_data = {
        inputs:
        255 * np.random.normal(size=(batch_size, 28 * 28)).astype('float32')
    }
Ejemplo n.º 8
0
test_misclass = MisclassificationRate().apply(flat_y, test_out)
test_misclass.name = 'misclass'

model = Model(loss)

######################
# Data
######################
import numpy
#from mnist import MNIST
from fuel.datasets.mnist import MNIST
from fuel.transformers import ScaleAndShift, ForceFloatX
from fuel.streams import DataStream
from fuel.schemes import ShuffledScheme

mnist_train = MNIST(['train'])
#mnist_valid = MNIST('valid', n_protos=1, drop_input=False)
mnist_test = MNIST(['test'])

batch_size = 100  #Batch size for training
batch_size_mon = 2000  # Batch size for monitoring and batch normalization
n_batches = int(numpy.ceil(float(mnist_train.num_examples) / batch_size_mon))

ind = range(mnist_train.num_examples)
train_ind = ind[:50000]
val_ind = ind[50000:]


def preprocessing(data_stream):
    return ForceFloatX(ScaleAndShift(data_stream,
                                     1 / 255.0,