def get_stream(batch_size, input_size, test=False): from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream from fuel.schemes import ShuffledScheme, SequentialScheme, SequentialExampleScheme from fuel.transformers.image import RandomFixedSizeCrop from fuel.transformers import Flatten #, ForceFloatX from ScikitResize import ScikitResize from fuel.transformers import Cast # Load the training set if test: train = DogsVsCats(('train', ), subset=slice(0, 30)) valid = DogsVsCats(('train', ), subset=slice(19980, 20000)) test = DogsVsCats(('test', ), subset=slice(0, 4)) else: train = DogsVsCats(('train', ), subset=slice(0, 22000)) valid = DogsVsCats(('train', ), subset=slice(22000, 25000)) test = DogsVsCats(('test', )) #Generating stream train_stream = DataStream.default_stream(train, iteration_scheme=ShuffledScheme( train.num_examples, batch_size)) valid_stream = DataStream.default_stream(valid, iteration_scheme=ShuffledScheme( valid.num_examples, batch_size)) test_stream = DataStream.default_stream( test, iteration_scheme=SequentialScheme(test.num_examples, 1) # iteration_scheme=SequentialExampleScheme(test.num_examples) ) #Reshaping procedure #Apply crop and resize to desired square shape train_stream = ScikitResize(train_stream, input_size, which_sources=('image_features', )) valid_stream = ScikitResize(valid_stream, input_size, which_sources=('image_features', )) test_stream = ScikitResize(test_stream, input_size, which_sources=('image_features', )) #ForceFloatX, to spare you from possible bugs #train_stream = ForceFloatX(train_stream) #valid_stream = ForceFloatX(valid_stream) #test_stream = ForceFloatX(test_stream) #Cast instead of forcefloatX train_stream = Cast(train_stream, dtype='float32', which_sources=('image_features', )) valid_stream = Cast(valid_stream, dtype='float32', which_sources=('image_features', )) test_stream = Cast(test_stream, dtype='float32', which_sources=('image_features', )) return train_stream, valid_stream, test_stream
def test_cast(): stream = DataStream( IterableDataset(OrderedDict([("features", numpy.array([1, 2, 3]).astype("float64")), ("targets", [0, 1, 0])])) ) wrapper = Cast(stream, "float32", which_sources=("features",)) assert_equal(list(wrapper.get_epoch_iterator()), [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)]) assert all(f.dtype == "float32" for f, t in wrapper.get_epoch_iterator())
def batch_iterator(dataset, batchsize, shuffle=False): if shuffle: train_scheme = ShuffledScheme(examples=dataset.num_examples, batch_size=batchsize) else: train_scheme = SequentialScheme(examples=dataset.num_examples, batch_size=batchsize) stream = DataStream.default_stream(dataset=dataset, iteration_scheme=train_scheme) stream_scale = ScaleAndShift(stream, 1./256.0, 0, which_sources=('features',)) stream_data = Cast(stream_scale, dtype=theano.config.floatX, which_sources=('features',)) return stream_data.get_epoch_iterator()
def setUp(self): dataset = IterableDataset( OrderedDict([ ('features', numpy.array([1, 2, 3]).astype('float64')), ('targets', [0, 1, 0])]), axis_labels={'features': ('batch'), 'targets': ('batch')}) self.stream = DataStream(dataset) self.wrapper = Cast( self.stream, 'float32', which_sources=('features',))
def test_cast(): stream = DataStream( IterableDataset({'features': numpy.array([1, 2, 3]).astype('float64'), 'targets': [0, 1, 0]})) wrapper = Cast(stream, 'float32', which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)]) assert all(f.dtype == 'float32' for f, t in wrapper.get_epoch_iterator())
def test_cast(): stream = DataStream( IterableDataset( OrderedDict([ ('features', numpy.array([1, 2, 3]).astype('float64')), ('targets', [0, 1, 0])]))) wrapper = Cast(stream, 'float32', which_sources=('features',)) assert_equal( list(wrapper.get_epoch_iterator()), [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)]) assert all(f.dtype == 'float32' for f, t in wrapper.get_epoch_iterator())
def create_data(data, size, batch_size, _port): if data == "train": cats = DogsVsCats(('train', ), subset=slice(0, 20000)) port = _port + 2 elif data == "valid": cats = DogsVsCats(('train', ), subset=slice(20000, 25000)) port = _port + 3 print 'port', port stream = DataStream.default_stream(cats, iteration_scheme=ShuffledScheme( cats.num_examples, batch_size)) stream_downscale = MinimumImageDimensions( stream, size, which_sources=('image_features', )) stream_rotate = FlipAsYouCan(stream_downscale, ) stream_max = ScikitResize(stream_rotate, image_size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_max, 1. / 255, 0, which_sources=('image_features', )) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) start_server(stream_data, port=port)
def create_data(data): stream = DataStream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)) # Data Augmentation stream = MinimumImageDimensions(stream, image_size, which_sources=('image_features', )) stream = MaximumImageDimensions(stream, image_size, which_sources=('image_features', )) stream = RandomHorizontalSwap(stream, which_sources=('image_features', )) stream = Random2DRotation(stream, which_sources=('image_features', )) #stream = ScikitResize(stream, image_size, which_sources=('image_features',)) # Data Preprocessing # Data Transformation stream = ScaleAndShift(stream, 1. / 255, 0, which_sources=('image_features', )) stream = Cast(stream, dtype='float32', which_sources=('image_features', )) return stream
class TestCast(object): def setUp(self): dataset = IterableDataset( OrderedDict([ ('features', numpy.array([1, 2, 3]).astype('float64')), ('targets', [0, 1, 0])]), axis_labels={'features': ('batch'), 'targets': ('batch')}) self.stream = DataStream(dataset) self.wrapper = Cast( self.stream, 'float32', which_sources=('features',)) def test_cast(self): assert_equal( list(self.wrapper.get_epoch_iterator()), [(numpy.array(1), 0), (numpy.array(2), 1), (numpy.array(3), 0)]) assert all( f.dtype == 'float32' for f, t in self.wrapper.get_epoch_iterator()) def test_axis_labels_are_passed_through(self): assert_equal(self.wrapper.axis_labels, self.stream.axis_labels)
def build_stream(data, batch_size, columns=None): if columns is None: columns = ['Open', 'High', 'Low', 'Close', 'Qty', 'Vol'] dataset = IndexableDataset(indexables=OrderedDict([( 'x', data[columns].values), ('y', data['CloseTarget'].values)])) size = len(dataset.indexables[0]) stream = DataStream(dataset=dataset, iteration_scheme=SequentialScheme( examples=range(size), batch_size=batch_size)) stream = Mapping(stream, add_axes) stream = Cast(stream, theano.config.floatX) return stream
def create_data(data): stream = DataStream.default_stream(data, iteration_scheme=ShuffledScheme( data.num_examples, batch_size)) stream_downscale = MinimumImageDimensions( stream, image_size, which_sources=('image_features', )) #stream_rotate = Random2DRotation(stream_downscale, which_sources=('image_features',)) stream_max = ScikitResize(stream_downscale, image_size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_max, 1. / 255, 0, which_sources=('image_features', )) stream_cast = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) #stream_flat = Flatten(stream_scale, which_sources=('image_features',)) return stream_cast
upscale_test_stream = MaximumImageDimensions( data_stream = downscale_test_stream, maximum_shape = image_size, which_sources=('image_features',) ) scaled_test_stream = ScaleAndShift( data_stream = upscale_test_stream, scale = 1./255, shift = 0, which_sources = ('image_features',) ) data_test_stream = Cast( data_stream = scaled_test_stream, dtype = 'float32', which_sources = ('image_features',) ) test_x =tensor.tensor4('image_features') predict_function = theano.function(inputs=[test_x], outputs=top_mlp.apply(Flattener().apply(conv_sequence.apply(test_x)))) import csv csvfile = csv.writer(open("test_pred_overfeat.csv",'wb')) for i,test_image in enumerate(data_test_stream.get_epoch_iterator()): prediction = predict_function(test_image[0])[0] isadog = numpy.argmax(prediction) csvfile.writerow([str(i+1), str(isadog)])
batch_size = 32 num_train_example = slice_train.stop - slice_train.start num_valid_example = slice_valid.stop - slice_valid.start num_test_example = slice_test.stop - slice_test.start train_dataset = CIFAR10(('train', ), subset=slice_train) train_stream = DataStream.default_stream(train_dataset, iteration_scheme=SequentialScheme( train_dataset.num_examples, batch_size)) train_stream = OneHotEncode10(train_stream, which_sources=('targets', )) train_stream = RandomHorizontalFlip(train_stream, which_sources=('features', )) train_stream = MinimumImageDimensions(train_stream, (224, 224), which_sources=('features', )) train_stream = ScaleAndShift(train_stream, 1., 0, which_sources=('features', )) train_stream = Cast(train_stream, 'floatX', which_sources=('features', )) valid_dataset = CIFAR10(('train', ), subset=slice_valid) valid_stream = DataStream.default_stream(valid_dataset, iteration_scheme=SequentialScheme( valid_dataset.num_examples, batch_size)) valid_stream = OneHotEncode10(valid_stream, which_sources=('targets', )) valid_stream = MinimumImageDimensions(valid_stream, (224, 224), which_sources=('features', )) valid_stream = ScaleAndShift(valid_stream, 1., 0, which_sources=('features', )) valid_stream = Cast(valid_stream, 'floatX', which_sources=('features', )) test_dataset = CIFAR10(('train', ), subset=slice_test) test_stream = DataStream.default_stream(test_dataset, iteration_scheme=SequentialScheme(
# Enlarge images that are too small downnscale_stream = MinimumImageDimensions(stream, (64, 64), which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downnscale_stream, (32, 32), which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) flattened_stream = Flatten(cropped_stream, which_sources=('image_features', )) stream_data = Cast(cropped_stream, dtype='float32', which_sources=('image_features', )) # Create the Theano MLP import theano from theano import tensor import numpy #X = tensor.matrix('image_features') X = tensor.tensor4('image_features') T = tensor.lmatrix('targets') """ W = theano.shared( numpy.random.uniform(low=-0.01, high=0.01, size=(3072, 500)), 'W') b = theano.shared(numpy.zeros(500)) V = theano.shared(
from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation from fuel.transformers import Flatten, Cast, ScaleAndShift size = (128, 128) cats = DogsVsCats(('test', )) stream = DataStream.default_stream(cats, iteration_scheme=SequentialExampleScheme( cats.num_examples)) stream_upscale = MaximumImageDimensions(stream, size, which_sources=('image_features', )) stream_scale = ScaleAndShift(stream_upscale, 1. / 255, 0, which_sources=('image_features', )) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features', )) #Load the parameters of the model params = load_parameter_values('convnet_parameters.pkl') mo = Model(predict) mo.set_parameter_values(params) #Create the forward propagation function fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True) tab = [] i = 1 #Get the prediction for each example of the test set for data in stream_data.get_epoch_iterator(): predict = np.argmax(fprop(data)) tab.append([i, predict]) print str(i) + "," + str(predict)
iteration_scheme=SequentialScheme( train.num_examples, batch_size)) # upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',)) downscaled_stream = DownscaleMinDimension(stream, 100, which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100), which_sources=('image_features', )) rotated_stream = Random2DRotation(cropped_stream, math.pi / 6, which_sources=('image_features', )) flipped_stream = RandomHorizontalFlip(rotated_stream, which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) float_stream = ScaleAndShift(flipped_stream, 1. / 255, 0, which_sources=('image_features', )) float32_stream = Cast(float_stream, numpy.float32, which_sources=('image_features', )) start_server(float32_stream, port=port)
def sample_transformations(thestream): cast_stream = Cast(data_stream=thestream, dtype='float32', which_sources=('features', )) return cast_stream
#Add the Softmax function out = Flattener().apply(conv_sequence.apply(x)) predict = NDimensionalSoftmax().apply(out) #get the test stream from fuel.datasets.dogs_vs_cats import DogsVsCats from fuel.streams import DataStream, ServerDataStream from fuel.schemes import ShuffledScheme, SequentialExampleScheme from fuel.transformers.image import RandomFixedSizeCrop, MinimumImageDimensions, MaximumImageDimensions, Random2DRotation from fuel.transformers import Flatten, Cast, ScaleAndShift size = (128,128) cats = DogsVsCats(('test',)) stream = DataStream.default_stream(cats, iteration_scheme=SequentialExampleScheme(cats.num_examples)) stream_upscale = MaximumImageDimensions(stream, size, which_sources=('image_features',)) stream_scale = ScaleAndShift(stream_upscale, 1./255, 0, which_sources=('image_features',)) stream_data = Cast(stream_scale, dtype='float32', which_sources=('image_features',)) #Load the parameters of the model params = load_parameter_values('convnet_parameters.pkl') mo = Model(predict) mo.set_parameter_values(params) #Create the forward propagation function fprop = function(mo.inputs, mo.outputs[0], allow_input_downcast=True) tab = [] i = 1 #Get the prediction for each example of the test set for data in stream_data.get_epoch_iterator(): predict = np.argmax(fprop(data)) tab.append([i, predict]) print str(i) + "," + str(predict) i = i + 1