def test_in_memory(): skip_if_not_available(datasets=['mnist.hdf5']) # Load MNIST and get two batches mnist = MNIST(('train',), load_in_memory=True) data_stream = DataStream(mnist, iteration_scheme=SequentialScheme( examples=mnist.num_examples, batch_size=256)) epoch = data_stream.get_epoch_iterator() for i, (features, targets) in enumerate(epoch): if i == 1: break handle = mnist.open() known_features, _ = mnist.get_data(handle, slice(256, 512)) mnist.close(handle) assert numpy.all(features == known_features) # Pickle the epoch and make sure that the data wasn't dumped with tempfile.NamedTemporaryFile(delete=False) as f: filename = f.name cPickle.dump(epoch, f) assert os.path.getsize(filename) < 1024 * 1024 # Less than 1MB # Reload the epoch and make sure that the state was maintained del epoch with open(filename, 'rb') as f: epoch = cPickle.load(f) features, targets = next(epoch) handle = mnist.open() known_features, _ = mnist.get_data(handle, slice(512, 768)) mnist.close(handle) assert numpy.all(features == known_features)
def test_in_memory(): skip_if_not_available(datasets=['mnist.hdf5']) # Load MNIST and get two batches mnist = MNIST('train', load_in_memory=True) data_stream = DataStream(mnist, iteration_scheme=SequentialScheme( examples=mnist.num_examples, batch_size=256)) epoch = data_stream.get_epoch_iterator() for i, (features, targets) in enumerate(epoch): if i == 1: break handle = mnist.open() known_features, _ = mnist.get_data(handle, slice(256, 512)) mnist.close(handle) assert numpy.all(features == known_features) # Pickle the epoch and make sure that the data wasn't dumped with tempfile.NamedTemporaryFile(delete=False) as f: filename = f.name cPickle.dump(epoch, f) assert os.path.getsize(filename) < 1024 * 1024 # Less than 1MB # Reload the epoch and make sure that the state was maintained del epoch with open(filename, 'rb') as f: epoch = cPickle.load(f) features, targets = next(epoch) handle = mnist.open() known_features, _ = mnist.get_data(handle, slice(512, 768)) mnist.close(handle) assert numpy.all(features == known_features)
def test_mnist(): skip_if_not_available(datasets=['mnist']) mnist_train = MNIST('train', start=20000) assert len(mnist_train.features) == 40000 assert len(mnist_train.targets) == 40000 assert mnist_train.num_examples == 40000 mnist_test = MNIST('test', sources=('targets',)) assert len(mnist_test.targets) == 10000 assert mnist_test.num_examples == 10000 first_feature, first_target = mnist_train.get_data(request=[0]) assert first_feature.shape == (1, 784) assert first_feature.dtype.kind == 'f' assert first_target.shape == (1, 1) assert first_target.dtype is numpy.dtype('uint8') first_target, = mnist_test.get_data(request=[0, 1]) assert first_target.shape == (2, 1) binary_mnist = MNIST('test', binary=True, sources=('features',)) first_feature, = binary_mnist.get_data(request=[0]) assert first_feature.dtype.kind == 'b' assert_raises(ValueError, MNIST, 'valid') mnist_train = cPickle.loads(cPickle.dumps(mnist_train)) assert len(mnist_train.features) == 40000 mnist_test_unflattened = MNIST('test', flatten=False) assert mnist_test_unflattened.features.shape == (10000, 28, 28)
def test_mnist(): skip_if_not_available(datasets=['mnist']) mnist_train = MNIST('train', start=20000) assert len(mnist_train.features) == 40000 assert len(mnist_train.targets) == 40000 assert mnist_train.num_examples == 40000 mnist_test = MNIST('test', sources=('targets', )) assert len(mnist_test.targets) == 10000 assert mnist_test.num_examples == 10000 first_feature, first_target = mnist_train.get_data(request=[0]) assert first_feature.shape == (1, 784) assert first_feature.dtype.kind == 'f' assert first_target.shape == (1, 1) assert first_target.dtype is numpy.dtype('uint8') first_target, = mnist_test.get_data(request=[0, 1]) assert first_target.shape == (2, 1) binary_mnist = MNIST('test', binary=True, sources=('features', )) first_feature, = binary_mnist.get_data(request=[0]) assert first_feature.dtype.kind == 'b' assert_raises(ValueError, MNIST, 'valid') mnist_train = cPickle.loads(cPickle.dumps(mnist_train)) assert len(mnist_train.features) == 40000 mnist_test_unflattened = MNIST('test', flatten=False) assert mnist_test_unflattened.features.shape == (10000, 28, 28)
def test_mnist_test(): skip_if_not_available(datasets=["mnist.hdf5"]) dataset = MNIST(("test",), load_in_memory=False) handle = dataset.open() data, labels = dataset.get_data(handle, slice(0, 10)) assert data.dtype == "uint8" assert data.shape == (10, 1, 28, 28) assert labels.shape == (10, 1) known = numpy.array([0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) assert_allclose(data[0][0][7], known) assert labels[0][0] == 7 assert dataset.num_examples == 10000 dataset.close(handle) stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(10, 10)) data = next(stream.get_epoch_iterator())[0] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX
def test_mnist_train(): skip_if_not_available(datasets=['mnist.hdf5']) dataset = MNIST('train', load_in_memory=False) handle = dataset.open() data, labels = dataset.get_data(handle, slice(0, 10)) assert data.dtype == 'uint8' assert data.shape == (10, 1, 28, 28) assert labels.shape == (10, 1) known = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253, 253, 253, 253, 253, 225, 172, 253, 242, 195, 64, 0, 0, 0, 0]) assert_allclose(data[0][0][6], known) assert labels[0][0] == 5 assert dataset.num_examples == 60000 dataset.close(handle) stream = DataStream.default_stream( dataset, iteration_scheme=SequentialScheme(10, 10)) data = next(stream.get_epoch_iterator())[0] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX
def test_mnist_test(): skip_if_not_available(datasets=['mnist.hdf5']) dataset = MNIST('test', load_in_memory=False) handle = dataset.open() data, labels = dataset.get_data(handle, slice(0, 10)) assert data.dtype == 'uint8' assert data.shape == (10, 1, 28, 28) assert labels.shape == (10, 1) known = numpy.array([ 0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ]) assert_allclose(data[0][0][7], known) assert labels[0][0] == 7 assert dataset.num_examples == 10000 dataset.close(handle) stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme( 10, 10)) data = next(stream.get_epoch_iterator())[0] assert data.min() >= 0.0 and data.max() <= 1.0 assert data.dtype == config.floatX
from fuel.schemes import SequentialScheme from fuel.transformers import Mapping, Flatten from blocks.graph import ComputationGraph from blocks.monitoring import aggregation from blocks.extensions import FinishAfter, Timing, Printing from blocks.extensions.monitoring import (DataStreamMonitoring, TrainingDataMonitoring) from blocks.main_loop import MainLoop from blocks_contrib.extensions import DataStreamMonitoringAndSaving floatX = theano.config.floatX mnist = MNIST('train', sources=['features']) handle = mnist.open() data = mnist.get_data(handle, slice(0, 50000))[0] means = data.reshape((50000, 784)).mean(axis=0) def autocorrentropy2(X, ksize=np.inf): b, t, d = X.shape V = np.zeros((b, t, d)) for i in range(b): for j in range(t): if ksize in (np.inf, 0., np.nan): V[i, j, :] = (X[i, :(t-j), :] * X[i, j:, :]).sum(axis=0) / (t-j) else: V[i, j, :] = np.exp((-ksize * (X[i, :(t-j), :]-X[i, j:, :])**2)).sum(axis=0) / (t-j) return V
class BucketVisualizer: def __init__(self, save_to, act_table): self.mnist_test = MNIST(("test", ), sources=['features', 'targets']) self.table = self.load_act_table(save_to, act_table) def all_match(self, index, the_set, positive): if the_set is None or len(the_set) == 0: return True selected = self.table[index, the_set] if positive: matched = selected > 0 else: matched = selected <= 0 return matched.sum() == len(the_set) def activations_for_sample(self, index): return self.table[index, :] def positive_for_sample(self, index): return numpy.where(self.activations_for_sample(index) > 0)[0] def negative_for_sample(self, index): return numpy.where(self.activations_for_sample(index) <= 0)[0] def prediction_for_sample(self, index): return self.table[index, :10].argmax() def label_for_sample(self, index): return self.mnist_test.get_data(request=index)[1][0] def filter_image_bytes(self, positive_set=None, negative_set=None, sort_by=None, columns=100, limit=None, ulimit=None, descending=False): include_indexes = [ ind for ind in range(self.table.shape[0]) if (self.all_match(ind, positive_set, True) and self.all_match(ind, negative_set, False)) ] if sort_by: include_indexes.sort(key=lambda x: self.table[x, sort_by].sum()) if descending: include_indexes.reverse() if limit or ulimit and not (limit and ulimit and limit + ulimit >= len(include_indexes)): lower = include_indexes[:limit] if limit else [] upper = include_indexes[-ulimit:] if ulimit else [] include_indexes = lower + upper count = max(1, len(include_indexes)) grid_shape = (((count - 1) // columns + 1), min(columns, count)) filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape) for i, index in enumerate(include_indexes): filmstrip.set_image((i // columns, i % columns), self.mnist_test.get_data(request=index)[0]) return filmstrip.save_bytes() def example_count(self): return self.table.shape[0] def unit_count(self): return self.table.shape[1] def load_act_table(self, save_to, act_table): try: return pickle.load(open(act_table, 'rb')) except FileNotFoundError: return self.create_act_table(save_to, act_table) def create_act_table(self, save_to, act_table): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict( (full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply( y.flatten(), probs).copy(name='error_rate')) max_activation_table = (MaxActivationTable().apply(outs).copy( name='max_activation_table')) max_activation_table.tag.aggregation_scheme = ( Concatenate(max_activation_table)) model = Model([error_rate, max_activation_table]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test_stream = DataStream.default_stream( self.mnist_test, iteration_scheme=SequentialScheme(self.mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([error_rate, max_activation_table]) results = evaluator.evaluate(mnist_test_stream) table = results['max_activation_table'] pickle.dump(table, open(act_table, 'wb')) return table
train(x, y) print(w.get_value()) #something around 2 #https://raw.githubusercontent.com/Newmu/Theano-Tutorials/master/2_logistic_regression.py import theano from theano import tensor as T import numpy as np from fuel.datasets import MNIST from matplotlib import pyplot, cm dataset = MNIST(('train',), sources=('features',)) state = dataset.open() image, = dataset.get_data(state=state, request=[1234]) pyplot.imshow(image.reshape((28, 28)), cmap=cm.Greys_r, interpolation='nearest') pyplot.show() dataset.close(state) def floatX(X): return np.asarray(X, dtype=theano.config.floatX) def init_weights(shape): return theano.shared(floatX(np.random.randn(*shape) * 0.01)) def model(X, w): return T.nnet.softmax(T.dot(X, w)) trX, teX, trY, teY = mnist(onehot=True)
# In[3]: mnist.num_examples # In[4]: mnist.sources # In[5]: handle = mnist.open() data_sample = mnist.get_data(handle, [0, 1, 2]) # (ndarray, dnarray) # In[6]: data_sample[0].shape # features # In[7]: data_sample[1].shape # targets # ## DataStream # In[8]:
class BucketVisualizer: def __init__(self, save_to, act_table): self.mnist_test = MNIST(("test",), sources=['features', 'targets']) self.table = self.load_act_table(save_to, act_table) def all_match(self, index, the_set, positive): if the_set is None or len(the_set) == 0: return True selected = self.table[index, the_set] if positive: matched = selected > 0 else: matched = selected <= 0 return matched.sum() == len(the_set) def activations_for_sample(self, index): return self.table[index, :] def positive_for_sample(self, index): return numpy.where(self.activations_for_sample(index) > 0)[0] def negative_for_sample(self, index): return numpy.where(self.activations_for_sample(index) <= 0)[0] def prediction_for_sample(self, index): return self.table[index, :10].argmax() def label_for_sample(self, index): return self.mnist_test.get_data(request=index)[1][0] def filter_image_bytes(self, positive_set=None, negative_set=None, sort_by=None, columns=100, limit=None, ulimit=None, descending=False): include_indexes = [ind for ind in range(self.table.shape[0]) if (self.all_match(ind, positive_set, True) and self.all_match(ind, negative_set, False))] if sort_by: include_indexes.sort(key=lambda x: self.table[x, sort_by].sum()) if descending: include_indexes.reverse() if limit or ulimit and not( limit and ulimit and limit + ulimit >= len(include_indexes)): lower = include_indexes[:limit] if limit else [] upper = include_indexes[-ulimit:] if ulimit else [] include_indexes = lower + upper count = max(1, len(include_indexes)) grid_shape = (((count - 1) // columns + 1), min(columns, count)) filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape) for i, index in enumerate(include_indexes): filmstrip.set_image((i // columns, i % columns), self.mnist_test.get_data(request=index)[0]) return filmstrip.save_bytes() def example_count(self): return self.table.shape[0] def unit_count(self): return self.table.shape[1] def load_act_table(self, save_to, act_table): try: return pickle.load(open(act_table, 'rb')) except FileNotFoundError: return self.create_act_table(save_to, act_table) def create_act_table(self, save_to, act_table): batch_size = 500 image_size = (28, 28) output_size = 10 convnet = create_lenet_5() layers = convnet.layers x = tensor.tensor4('features') y = tensor.lmatrix('targets') # Normalize input and apply the convnet probs = convnet.apply(x) cg = ComputationGraph([probs]) def full_brick_name(brick): return '/'.join([''] + [b.name for b in brick.get_unique_path()]) # Find layer outputs to probe outmap = OrderedDict((full_brick_name(get_brick(out)), out) for out in VariableFilter( roles=[OUTPUT], bricks=[Convolutional, Linear])( cg.variables)) # Generate pics for biases biases = VariableFilter(roles=[BIAS])(cg.parameters) # Generate parallel array, in the same order, for outputs outs = [outmap[full_brick_name(get_brick(b))] for b in biases] # Figure work count error_rate = (MisclassificationRate().apply(y.flatten(), probs) .copy(name='error_rate')) max_activation_table = (MaxActivationTable().apply( outs).copy(name='max_activation_table')) max_activation_table.tag.aggregation_scheme = ( Concatenate(max_activation_table)) model = Model([ error_rate, max_activation_table]) # Load it with trained parameters params = load_parameters(open(save_to, 'rb')) model.set_parameter_values(params) mnist_test_stream = DataStream.default_stream( self.mnist_test, iteration_scheme=SequentialScheme( self.mnist_test.num_examples, batch_size)) evaluator = DatasetEvaluator([ error_rate, max_activation_table ]) results = evaluator.evaluate(mnist_test_stream) table = results['max_activation_table'] pickle.dump(table, open(act_table, 'wb')) return table