Exemple #1
0
def get_stream(batch_size, input_size, test=False):
    from fuel.datasets.dogs_vs_cats import DogsVsCats
    from fuel.streams import DataStream
    from fuel.schemes import ShuffledScheme, SequentialScheme, SequentialExampleScheme
    from fuel.transformers.image import RandomFixedSizeCrop
    from fuel.transformers import Flatten  #, ForceFloatX
    from ScikitResize import ScikitResize
    from fuel.transformers import Cast
    # Load the training set
    if test:
        train = DogsVsCats(('train', ), subset=slice(0, 30))
        valid = DogsVsCats(('train', ), subset=slice(19980, 20000))
        test = DogsVsCats(('test', ), subset=slice(0, 4))
    else:
        train = DogsVsCats(('train', ), subset=slice(0, 22000))
        valid = DogsVsCats(('train', ), subset=slice(22000, 25000))
        test = DogsVsCats(('test', ))
    #Generating stream
    train_stream = DataStream.default_stream(train,
                                             iteration_scheme=ShuffledScheme(
                                                 train.num_examples,
                                                 batch_size))

    valid_stream = DataStream.default_stream(valid,
                                             iteration_scheme=ShuffledScheme(
                                                 valid.num_examples,
                                                 batch_size))
    test_stream = DataStream.default_stream(
        test,
        iteration_scheme=SequentialScheme(test.num_examples, 1)
        #        iteration_scheme=SequentialExampleScheme(test.num_examples)
    )
    #Reshaping procedure
    #Apply crop and resize to desired square shape
    train_stream = ScikitResize(train_stream,
                                input_size,
                                which_sources=('image_features', ))
    valid_stream = ScikitResize(valid_stream,
                                input_size,
                                which_sources=('image_features', ))
    test_stream = ScikitResize(test_stream,
                               input_size,
                               which_sources=('image_features', ))

    #ForceFloatX, to spare you from possible bugs
    #train_stream = ForceFloatX(train_stream)
    #valid_stream = ForceFloatX(valid_stream)
    #test_stream = ForceFloatX(test_stream)

    #Cast instead of forcefloatX
    train_stream = Cast(train_stream,
                        dtype='float32',
                        which_sources=('image_features', ))
    valid_stream = Cast(valid_stream,
                        dtype='float32',
                        which_sources=('image_features', ))
    test_stream = Cast(test_stream,
                       dtype='float32',
                       which_sources=('image_features', ))
    return train_stream, valid_stream, test_stream
def test_cast():
    stream = DataStream(
        IterableDataset(OrderedDict([("features", numpy.array([1, 2, 3]).astype("float64")), ("targets", [0, 1, 0])]))
    )
    wrapper = Cast(stream, "float32", which_sources=("features",))
    assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)])
    assert all(f.dtype == "float32" for f, t in wrapper.get_epoch_iterator())
Exemple #3
0
def batch_iterator(dataset, batchsize, shuffle=False):
    if shuffle:
        train_scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=batchsize)
    else:
        train_scheme = SequentialScheme(examples=dataset.num_examples, batch_size=batchsize)
    stream = DataStream.default_stream(dataset=dataset, iteration_scheme=train_scheme)
    stream_scale = ScaleAndShift(stream, 1./256.0, 0, which_sources=('features',))
    stream_data = Cast(stream_scale, dtype=theano.config.floatX, which_sources=('features',))
    return stream_data.get_epoch_iterator()
 def setUp(self):
     dataset = IterableDataset(
         OrderedDict([
             ('features', numpy.array([1, 2, 3]).astype('float64')),
             ('targets', [0, 1, 0])]),
         axis_labels={'features': ('batch'), 'targets': ('batch')})
     self.stream = DataStream(dataset)
     self.wrapper = Cast(
         self.stream, 'float32', which_sources=('features',))
def test_cast():
    stream = DataStream(
        IterableDataset({'features': numpy.array([1, 2, 3]).astype('float64'),
                         'targets': [0, 1, 0]}))
    wrapper = Cast(stream, 'float32', which_sources=('features',))
    assert_equal(
        list(wrapper.get_epoch_iterator()),
        [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)])
    assert all(f.dtype == 'float32' for f, t in wrapper.get_epoch_iterator())
Exemple #6
0
def test_cast():
    stream = DataStream(
        IterableDataset(
            OrderedDict([
                ('features', numpy.array([1, 2, 3]).astype('float64')),
                ('targets', [0, 1, 0])])))
    wrapper = Cast(stream, 'float32', which_sources=('features',))
    assert_equal(
        list(wrapper.get_epoch_iterator()),
        [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)])
    assert all(f.dtype == 'float32' for f, t in wrapper.get_epoch_iterator())
def create_data(data, size, batch_size, _port):
    if data == "train":
        cats = DogsVsCats(('train', ), subset=slice(0, 20000))
        port = _port + 2
    elif data == "valid":
        cats = DogsVsCats(('train', ), subset=slice(20000, 25000))
        port = _port + 3
    print 'port', port
    stream = DataStream.default_stream(cats,
                                       iteration_scheme=ShuffledScheme(
                                           cats.num_examples, batch_size))
    stream_downscale = MinimumImageDimensions(
        stream, size, which_sources=('image_features', ))
    stream_rotate = FlipAsYouCan(stream_downscale, )
    stream_max = ScikitResize(stream_rotate,
                              image_size,
                              which_sources=('image_features', ))
    stream_scale = ScaleAndShift(stream_max,
                                 1. / 255,
                                 0,
                                 which_sources=('image_features', ))
    stream_data = Cast(stream_scale,
                       dtype='float32',
                       which_sources=('image_features', ))
    start_server(stream_data, port=port)
def create_data(data):

    stream = DataStream(data,
                        iteration_scheme=ShuffledScheme(
                            data.num_examples, batch_size))

    # Data Augmentation
    stream = MinimumImageDimensions(stream,
                                    image_size,
                                    which_sources=('image_features', ))
    stream = MaximumImageDimensions(stream,
                                    image_size,
                                    which_sources=('image_features', ))
    stream = RandomHorizontalSwap(stream, which_sources=('image_features', ))
    stream = Random2DRotation(stream, which_sources=('image_features', ))
    #stream = ScikitResize(stream, image_size, which_sources=('image_features',))

    # Data Preprocessing

    # Data Transformation
    stream = ScaleAndShift(stream,
                           1. / 255,
                           0,
                           which_sources=('image_features', ))
    stream = Cast(stream, dtype='float32', which_sources=('image_features', ))
    return stream
 def setUp(self):
     dataset = IterableDataset(
         OrderedDict([
             ('features', numpy.array([1, 2, 3]).astype('float64')),
             ('targets', [0, 1, 0])]),
         axis_labels={'features': ('batch'), 'targets': ('batch')})
     self.stream = DataStream(dataset)
     self.wrapper = Cast(
         self.stream, 'float32', which_sources=('features',))
class TestCast(object):
    def setUp(self):
        dataset = IterableDataset(
            OrderedDict([
                ('features', numpy.array([1, 2, 3]).astype('float64')),
                ('targets', [0, 1, 0])]),
            axis_labels={'features': ('batch'), 'targets': ('batch')})
        self.stream = DataStream(dataset)
        self.wrapper = Cast(
            self.stream, 'float32', which_sources=('features',))

    def test_cast(self):
        assert_equal(
            list(self.wrapper.get_epoch_iterator()),
            [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)])
        assert all(
            f.dtype == 'float32' for f, t in self.wrapper.get_epoch_iterator())

    def test_axis_labels_are_passed_through(self):
        assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
Exemple #11
0
class TestCast(object):
    def setUp(self):
        dataset = IterableDataset(
            OrderedDict([
                ('features', numpy.array([1, 2, 3]).astype('float64')),
                ('targets', [0, 1, 0])]),
            axis_labels={'features': ('batch'), 'targets': ('batch')})
        self.stream = DataStream(dataset)
        self.wrapper = Cast(
            self.stream, 'float32', which_sources=('features',))

    def test_cast(self):
        assert_equal(
            list(self.wrapper.get_epoch_iterator()),
            [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)])
        assert all(
            f.dtype == 'float32' for f, t in self.wrapper.get_epoch_iterator())

    def test_axis_labels_are_passed_through(self):
        assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
def build_stream(data, batch_size, columns=None):
    if columns is None:
        columns = ['Open', 'High', 'Low', 'Close', 'Qty', 'Vol']
    dataset = IndexableDataset(indexables=OrderedDict([(
        'x', data[columns].values), ('y', data['CloseTarget'].values)]))
    size = len(dataset.indexables[0])
    stream = DataStream(dataset=dataset,
                        iteration_scheme=SequentialScheme(
                            examples=range(size), batch_size=batch_size))

    stream = Mapping(stream, add_axes)
    stream = Cast(stream, theano.config.floatX)

    return stream
Exemple #13
0
 def create_data(data):
     stream = DataStream.default_stream(data,
                                        iteration_scheme=ShuffledScheme(
                                            data.num_examples, batch_size))
     stream_downscale = MinimumImageDimensions(
         stream, image_size, which_sources=('image_features', ))
     #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',))
     stream_max = ScikitResize(stream_downscale,
                               image_size,
                               which_sources=('image_features', ))
     stream_scale = ScaleAndShift(stream_max,
                                  1. / 255,
                                  0,
                                  which_sources=('image_features', ))
     stream_cast = Cast(stream_scale,
                        dtype='float32',
                        which_sources=('image_features', ))
     #stream_flat = Flatten(stream_scale, which_sources=('image_features',))
     return stream_cast
upscale_test_stream = MaximumImageDimensions(
  data_stream = downscale_test_stream, 
  maximum_shape = image_size, 
  which_sources=('image_features',)
)

scaled_test_stream = ScaleAndShift(
  data_stream = upscale_test_stream, 
  scale = 1./255, 
  shift = 0, 
  which_sources = ('image_features',)
)

data_test_stream = Cast(
  data_stream = scaled_test_stream, 
  dtype = 'float32', 
  which_sources = ('image_features',)
)
test_x =tensor.tensor4('image_features')
predict_function = theano.function(inputs=[test_x], outputs=top_mlp.apply(Flattener().apply(conv_sequence.apply(test_x))))

import csv
csvfile = csv.writer(open("test_pred_overfeat.csv",'wb'))
for i,test_image in enumerate(data_test_stream.get_epoch_iterator()):
    prediction = predict_function(test_image[0])[0]
    isadog = numpy.argmax(prediction)
    csvfile.writerow([str(i+1), str(isadog)])


Exemple #15
0
batch_size = 32
num_train_example = slice_train.stop - slice_train.start
num_valid_example = slice_valid.stop - slice_valid.start
num_test_example = slice_test.stop - slice_test.start

train_dataset = CIFAR10(('train', ), subset=slice_train)
train_stream = DataStream.default_stream(train_dataset,
                                         iteration_scheme=SequentialScheme(
                                             train_dataset.num_examples,
                                             batch_size))
train_stream = OneHotEncode10(train_stream, which_sources=('targets', ))
train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', ))
train_stream = MinimumImageDimensions(train_stream, (224, 224),
                                      which_sources=('features', ))
train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', ))
train_stream = Cast(train_stream, 'floatX', which_sources=('features', ))

valid_dataset = CIFAR10(('train', ), subset=slice_valid)
valid_stream = DataStream.default_stream(valid_dataset,
                                         iteration_scheme=SequentialScheme(
                                             valid_dataset.num_examples,
                                             batch_size))
valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', ))
valid_stream = MinimumImageDimensions(valid_stream, (224, 224),
                                      which_sources=('features', ))
valid_stream = ScaleAndShift(valid_stream, 1., 0, which_sources=('features', ))
valid_stream = Cast(valid_stream, 'floatX', which_sources=('features', ))

test_dataset = CIFAR10(('train', ), subset=slice_test)
test_stream = DataStream.default_stream(test_dataset,
                                        iteration_scheme=SequentialScheme(
Exemple #16
0
# Enlarge images that are too small
downnscale_stream = MinimumImageDimensions(stream, (64, 64),
                                           which_sources=('image_features', ))

# Our images are of different sizes, so we'll use a Fuel transformer
# to take random crops of size (32 x 32) from each image
cropped_stream = RandomFixedSizeCrop(downnscale_stream, (32, 32),
                                     which_sources=('image_features', ))

# We'll use a simple MLP, so we need to flatten the images
# from (channel, width, height) to simply (features,)
flattened_stream = Flatten(cropped_stream, which_sources=('image_features', ))

stream_data = Cast(cropped_stream,
                   dtype='float32',
                   which_sources=('image_features', ))

# Create the Theano MLP
import theano
from theano import tensor
import numpy

#X = tensor.matrix('image_features')
X = tensor.tensor4('image_features')
T = tensor.lmatrix('targets')
"""
W = theano.shared(
    numpy.random.uniform(low=-0.01, high=0.01, size=(3072, 500)), 'W')
b = theano.shared(numpy.zeros(500))
V = theano.shared(
Exemple #17
0
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift
size = (128, 128)
cats = DogsVsCats(('test', ))
stream = DataStream.default_stream(cats,
                                   iteration_scheme=SequentialExampleScheme(
                                       cats.num_examples))
stream_upscale = MaximumImageDimensions(stream,
                                        size,
                                        which_sources=('image_features', ))
stream_scale = ScaleAndShift(stream_upscale,
                             1. / 255,
                             0,
                             which_sources=('image_features', ))
stream_data = Cast(stream_scale,
                   dtype='float32',
                   which_sources=('image_features', ))

#Load the parameters of the model
params = load_parameter_values('convnet_parameters.pkl')
mo = Model(predict)
mo.set_parameter_values(params)
#Create the forward propagation function
fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True)
tab = []
i = 1
#Get the prediction for each example of the test set
for data in stream_data.get_epoch_iterator():
    predict = np.argmax(fprop(data))
    tab.append([i, predict])
    print str(i) + "," + str(predict)
Exemple #18
0
                                   iteration_scheme=SequentialScheme(
                                       train.num_examples, batch_size))

# upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',))
downscaled_stream = DownscaleMinDimension(stream,
                                          100,
                                          which_sources=('image_features', ))

# Our images are of different sizes, so we'll use a Fuel transformer
# to take random crops of size (32 x 32) from each image
cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100),
                                     which_sources=('image_features', ))

rotated_stream = Random2DRotation(cropped_stream,
                                  math.pi / 6,
                                  which_sources=('image_features', ))
flipped_stream = RandomHorizontalFlip(rotated_stream,
                                      which_sources=('image_features', ))

# We'll use a simple MLP, so we need to flatten the images
# from (channel, width, height) to simply (features,)
float_stream = ScaleAndShift(flipped_stream,
                             1. / 255,
                             0,
                             which_sources=('image_features', ))
float32_stream = Cast(float_stream,
                      numpy.float32,
                      which_sources=('image_features', ))

start_server(float32_stream, port=port)
Exemple #19
0
def sample_transformations(thestream):
    cast_stream = Cast(data_stream=thestream,
                       dtype='float32',
                       which_sources=('features', ))
    return cast_stream
Exemple #20
0
#Add the Softmax function
out = Flattener().apply(conv_sequence.apply(x))
predict = NDimensionalSoftmax().apply(out)

#get the test stream
from fuel.datasets.dogs_vs_cats import DogsVsCats
from fuel.streams import DataStream, ServerDataStream
from fuel.schemes import ShuffledScheme, SequentialExampleScheme
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation
from fuel.transformers import Flatten, Cast, ScaleAndShift
size = (128,128)
cats = DogsVsCats(('test',))
stream = DataStream.default_stream(cats, iteration_scheme=SequentialExampleScheme(cats.num_examples))
stream_upscale = MaximumImageDimensions(stream, size, which_sources=('image_features',))
stream_scale = ScaleAndShift(stream_upscale, 1./255, 0, which_sources=('image_features',))
stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features',))

#Load the parameters of the model
params = load_parameter_values('convnet_parameters.pkl')
mo = Model(predict)
mo.set_parameter_values(params)
#Create the forward propagation function
fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True)
tab = []
i = 1
#Get the prediction for each example of the test set
for data in stream_data.get_epoch_iterator():
    predict = np.argmax(fprop(data))
    tab.append([i, predict])
    print str(i) + "," + str(predict)
    i = i + 1