def test_format_exceptions(self): estream = RandomFixedSizeCrop(self.example_stream, (5, 4), which_sources=('source2', )) bstream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source2', )) assert_raises(ValueError, estream.transform_source_example, numpy.empty((5, 6)), 'source2') assert_raises(ValueError, bstream.transform_source_batch, [numpy.empty((7, 6))], 'source2') assert_raises(ValueError, bstream.transform_source_batch, [numpy.empty((8, 6))], 'source2')
def test_window_too_big_exceptions(self): stream = RandomFixedSizeCrop(self.example_stream, (5, 4), which_sources=('source2', )) assert_raises(ValueError, stream.transform_source_example, numpy.empty((3, 4, 2)), 'source2') bstream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source1', )) assert_raises(ValueError, bstream.transform_source_batch, numpy.empty((5, 3, 4, 2)), 'source1')
def test_ndarray_batch_source(self): # Make sure that with enough epochs we sample everything. stream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source1', )) seen_indices = numpy.array([], dtype='uint8') for i in range(30): for batch in stream.get_epoch_iterator(): assert batch[0].shape[1:] == (3, 5, 4) assert batch[0].shape[0] in (1, 2) seen_indices = numpy.union1d(seen_indices, batch[0].flatten()) if 3 * 7 * 5 == len(seen_indices): break else: assert False
def test_ndarray_batch_source(self): # Make sure that with enough epochs we sample everything. stream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source1',)) seen_indices = numpy.array([], dtype='uint8') for i in range(30): for batch in stream.get_epoch_iterator(): assert batch[0].shape[1:] == (3, 5, 4) assert batch[0].shape[0] in (1, 2) seen_indices = numpy.union1d(seen_indices, batch[0].flatten()) if 3 * 7 * 5 == len(seen_indices): break else: assert False
def test_list_batch_source(self): # Make sure that with enough epochs we sample everything. stream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source2', )) seen_indices = numpy.array([], dtype='uint8') for i in range(30): for batch in stream.get_epoch_iterator(): for example in batch[1]: assert example.shape == (2, 5, 4) seen_indices = numpy.union1d(seen_indices, example.flatten()) assert len(batch[1]) in (1, 2) if self.source2_biggest == len(seen_indices): break else: assert False
def test_list_batch_source(self): # Make sure that with enough epochs we sample everything. stream = RandomFixedSizeCrop(self.batch_stream, (5, 4), which_sources=('source2',)) seen_indices = numpy.array([], dtype='uint8') for i in range(30): for batch in stream.get_epoch_iterator(): for example in batch[1]: assert example.shape == (2, 5, 4) seen_indices = numpy.union1d(seen_indices, example.flatten()) assert len(batch[1]) in (1, 2) if self.source2_biggest == len(seen_indices): break else: assert False
train = DogsVsCats(('train', ), subset=slice(0, 20000)) # We now create a "stream" over the dataset which will return shuffled batches # of size 128. Using the DataStream.default_stream constructor will turn our # 8-bit images into floating-point decimals in [0, 1]. stream = DataStream.default_stream(train, iteration_scheme=ShuffledScheme( train.num_examples, 128)) # Enlarge images that are too small downnscale_stream = MinimumImageDimensions(stream, (64, 64), which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downnscale_stream, (32, 32), which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) flattened_stream = Flatten(cropped_stream, which_sources=('image_features', )) # Create the Theano MLP import theano from theano import tensor import numpy X = tensor.matrix('image_features') T = tensor.lmatrix('targets') W = theano.shared(numpy.random.uniform(low=-0.01, high=0.01, size=(3072, 500)), 'W')
# We now create a "stream" over the dataset which will return shuffled batches # of size 128. Using the DataStream.default_stream constructor will turn our # 8-bit images into floating-point decimals in [0, 1]. stream = DataStream.default_stream(train, iteration_scheme=SequentialScheme( train.num_examples, batch_size)) # upscaled_stream = MinimumImageDimensions(stream, (100, 100), which_sources=('image_features',)) downscaled_stream = DownscaleMinDimension(stream, 100, which_sources=('image_features', )) # Our images are of different sizes, so we'll use a Fuel transformer # to take random crops of size (32 x 32) from each image cropped_stream = RandomFixedSizeCrop(downscaled_stream, (100, 100), which_sources=('image_features', )) rotated_stream = Random2DRotation(cropped_stream, math.pi / 6, which_sources=('image_features', )) flipped_stream = RandomHorizontalFlip(rotated_stream, which_sources=('image_features', )) # We'll use a simple MLP, so we need to flatten the images # from (channel, width, height) to simply (features,) float_stream = ScaleAndShift(flipped_stream, 1. / 255, 0, which_sources=('image_features', )) float32_stream = Cast(float_stream, numpy.float32,