Python MNIST.get_dataの例、fuel.datasets.MNIST.get_data Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_serialization.py プロジェクト: Afrik/fuel

def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST(('train',), load_in_memory=True)
    data_stream = DataStream(mnist, iteration_scheme=SequentialScheme(
        examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)

コード例 #2

0

ファイルを表示

ファイル: test_serialization.py プロジェクト: xiaoyexixi/fuel

def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST('train', load_in_memory=True)
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(
                                 examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)

コード例 #3

0

ファイルを表示

ファイル: test_mnist.py プロジェクト: jfsantos/fuel

def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets',))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features',))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)

コード例 #4

0

ファイルを表示

ファイル: test_mnist.py プロジェクト: nagyist/fuel

def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets', ))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features', ))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)

コード例 #5

0

ファイルを表示

ファイル: test_mnist.py プロジェクト: ZhangAustin/attention-lvcsr

def test_mnist_test():
    skip_if_not_available(datasets=["mnist.hdf5"])

    dataset = MNIST(("test",), load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == "uint8"
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

コード例 #6

0

ファイルを表示

ファイル: test_mnist.py プロジェクト: DavidDJChen/fuel

def test_mnist_train():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('train', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 30, 36, 94, 154, 170, 253,
                         253, 253, 253, 253, 225, 172, 253, 242, 195,  64, 0,
                         0, 0, 0])
    assert_allclose(data[0][0][6], known)
    assert labels[0][0] == 5
    assert dataset.num_examples == 60000
    dataset.close(handle)

    stream = DataStream.default_stream(
        dataset, iteration_scheme=SequentialScheme(10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

コード例 #7

0

ファイルを表示

def test_mnist_test():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('test', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([
        0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset,
                                       iteration_scheme=SequentialScheme(
                                           10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX

コード例 #8

0

ファイルを表示

ファイル: rnn.py プロジェクト: EderSantana/autocorrentropy

from fuel.schemes import SequentialScheme
from fuel.transformers import Mapping, Flatten
from blocks.graph import ComputationGraph
from blocks.monitoring import aggregation
from blocks.extensions import FinishAfter, Timing, Printing
from blocks.extensions.monitoring import (DataStreamMonitoring,
                                          TrainingDataMonitoring)
from blocks.main_loop import MainLoop

from blocks_contrib.extensions import DataStreamMonitoringAndSaving
floatX = theano.config.floatX


mnist = MNIST('train', sources=['features'])
handle = mnist.open()
data = mnist.get_data(handle, slice(0, 50000))[0]
means = data.reshape((50000, 784)).mean(axis=0)


def autocorrentropy2(X, ksize=np.inf):
    b, t, d = X.shape
    V = np.zeros((b, t, d))
    for i in range(b):
        for j in range(t):
            if ksize in (np.inf, 0., np.nan):
                V[i, j, :] = (X[i, :(t-j), :] * X[i, j:, :]).sum(axis=0) / (t-j)
            else:
                V[i, j, :] = np.exp((-ksize * (X[i, :(t-j), :]-X[i, j:, :])**2)).sum(axis=0) / (t-j)
    return V

コード例 #9

0

ファイルを表示

class BucketVisualizer:
    def __init__(self, save_to, act_table):
        self.mnist_test = MNIST(("test", ), sources=['features', 'targets'])
        self.table = self.load_act_table(save_to, act_table)

    def all_match(self, index, the_set, positive):
        if the_set is None or len(the_set) == 0:
            return True
        selected = self.table[index, the_set]
        if positive:
            matched = selected > 0
        else:
            matched = selected <= 0
        return matched.sum() == len(the_set)

    def activations_for_sample(self, index):
        return self.table[index, :]

    def positive_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) > 0)[0]

    def negative_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) <= 0)[0]

    def prediction_for_sample(self, index):
        return self.table[index, :10].argmax()

    def label_for_sample(self, index):
        return self.mnist_test.get_data(request=index)[1][0]

    def filter_image_bytes(self,
                           positive_set=None,
                           negative_set=None,
                           sort_by=None,
                           columns=100,
                           limit=None,
                           ulimit=None,
                           descending=False):
        include_indexes = [
            ind for ind in range(self.table.shape[0])
            if (self.all_match(ind, positive_set, True)
                and self.all_match(ind, negative_set, False))
        ]
        if sort_by:
            include_indexes.sort(key=lambda x: self.table[x, sort_by].sum())
        if descending:
            include_indexes.reverse()
        if limit or ulimit and not (limit and ulimit and
                                    limit + ulimit >= len(include_indexes)):
            lower = include_indexes[:limit] if limit else []
            upper = include_indexes[-ulimit:] if ulimit else []
            include_indexes = lower + upper
        count = max(1, len(include_indexes))
        grid_shape = (((count - 1) // columns + 1), min(columns, count))

        filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape)
        for i, index in enumerate(include_indexes):
            filmstrip.set_image((i // columns, i % columns),
                                self.mnist_test.get_data(request=index)[0])
        return filmstrip.save_bytes()

    def example_count(self):
        return self.table.shape[0]

    def unit_count(self):
        return self.table.shape[1]

    def load_act_table(self, save_to, act_table):
        try:
            return pickle.load(open(act_table, 'rb'))
        except FileNotFoundError:
            return self.create_act_table(save_to, act_table)

    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict(
            (full_brick_name(get_brick(out)), out) for out in VariableFilter(
                roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(
            y.flatten(), probs).copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(outs).copy(
            name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
            Concatenate(max_activation_table))

        model = Model([error_rate, max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(self.mnist_test.num_examples,
                                              batch_size))

        evaluator = DatasetEvaluator([error_rate, max_activation_table])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table

コード例 #10

0

ファイルを表示

ファイル: tutorial.py プロジェクト: uredkar/mymlstat

        train(x, y)
        
print(w.get_value()) #something around 2


#https://raw.githubusercontent.com/Newmu/Theano-Tutorials/master/2_logistic_regression.py

import theano
from theano import tensor as T
import numpy as np
from fuel.datasets import MNIST
from matplotlib import pyplot, cm

dataset = MNIST(('train',), sources=('features',))
state = dataset.open()
image, = dataset.get_data(state=state, request=[1234])
pyplot.imshow(image.reshape((28, 28)), cmap=cm.Greys_r, interpolation='nearest')
pyplot.show()
dataset.close(state)

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def init_weights(shape):
    return theano.shared(floatX(np.random.randn(*shape) * 0.01))

def model(X, w):
    return T.nnet.softmax(T.dot(X, w))

trX, teX, trY, teY = mnist(onehot=True)

コード例 #11

0

ファイルを表示

ファイル: blocks_fuel_basics_tutorial.py プロジェクト: SunLinJie/DLMeetupSept2015


# In[3]:

mnist.num_examples


# In[4]:

mnist.sources


# In[5]:

handle = mnist.open()
data_sample = mnist.get_data(handle, [0, 1, 2])  # (ndarray, dnarray)


# In[6]:

data_sample[0].shape  # features


# In[7]:

data_sample[1].shape  # targets


# ## DataStream

# In[8]:

コード例 #12

0

ファイルを表示

ファイル: bucket.py プロジェクト: davidbau/net-intent

class BucketVisualizer:
    def __init__(self, save_to, act_table):
        self.mnist_test = MNIST(("test",), sources=['features', 'targets'])
        self.table = self.load_act_table(save_to, act_table)

    def all_match(self, index, the_set, positive):
        if the_set is None or len(the_set) == 0:
            return True
        selected = self.table[index, the_set]
        if positive:
            matched = selected > 0
        else:
            matched = selected <= 0
        return matched.sum() == len(the_set)

    def activations_for_sample(self, index):
        return self.table[index, :]

    def positive_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) > 0)[0]

    def negative_for_sample(self, index):
        return numpy.where(self.activations_for_sample(index) <= 0)[0]

    def prediction_for_sample(self, index):
        return self.table[index, :10].argmax()

    def label_for_sample(self, index):
        return self.mnist_test.get_data(request=index)[1][0]

    def filter_image_bytes(self,
            positive_set=None, negative_set=None, sort_by=None,
            columns=100, limit=None, ulimit=None, descending=False):
        include_indexes = [ind for ind in range(self.table.shape[0])
                if (self.all_match(ind, positive_set, True) and
                    self.all_match(ind, negative_set, False))]
        if sort_by:
            include_indexes.sort(key=lambda x: self.table[x, sort_by].sum())
        if descending:
            include_indexes.reverse()
        if limit or ulimit and not(
                limit and ulimit and limit + ulimit >= len(include_indexes)):
            lower = include_indexes[:limit] if limit else []
            upper = include_indexes[-ulimit:] if ulimit else []
            include_indexes = lower + upper
        count = max(1, len(include_indexes))
        grid_shape = (((count - 1) // columns + 1), min(columns, count))

        filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=grid_shape)
        for i, index in enumerate(include_indexes):
            filmstrip.set_image((i // columns, i % columns),
                        self.mnist_test.get_data(request=index)[0])
        return filmstrip.save_bytes()

    def example_count(self):
        return self.table.shape[0]

    def unit_count(self):
        return self.table.shape[1]

    def load_act_table(self, save_to, act_table):
        try:
            return pickle.load(open(act_table, 'rb'))
        except FileNotFoundError:
            return self.create_act_table(save_to, act_table)

    def create_act_table(self, save_to, act_table):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict((full_brick_name(get_brick(out)), out)
                for out in VariableFilter(
                    roles=[OUTPUT], bricks=[Convolutional, Linear])(
                        cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                      .copy(name='error_rate'))
        max_activation_table = (MaxActivationTable().apply(
                outs).copy(name='max_activation_table'))
        max_activation_table.tag.aggregation_scheme = (
                Concatenate(max_activation_table))

        model = Model([
            error_rate,
            max_activation_table])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test_stream = DataStream.default_stream(
            self.mnist_test,
            iteration_scheme=SequentialScheme(
                self.mnist_test.num_examples, batch_size))

        evaluator = DatasetEvaluator([
            error_rate,
            max_activation_table
            ])
        results = evaluator.evaluate(mnist_test_stream)
        table = results['max_activation_table']
        pickle.dump(table, open(act_table, 'wb'))
        return table