Ejemplo n.º 1
0
def get_data(data_name):
    if data_name == 'mnist':
        from fuel.datasets import MNIST

        img_size = (28, 28)

        data_train = MNIST(which_set="train", sources=['features'])
        data_valid = MNIST(which_set="test", sources=['features'])
        data_test = MNIST(which_set="test", sources=['features'])
    elif data_name == 'bmnist':
        from fuel.datasets.binarized_mnist import BinarizedMNIST

        img_size = (28, 28)

        data_train = BinarizedMNIST(which_set='train', sources=['features'])
        data_valid = BinarizedMNIST(which_set='valid', sources=['features'])
        data_test = BinarizedMNIST(which_set='test', sources=['features'])
    elif data_name == 'silhouettes':
        from fuel.datasets.caltech101_silhouettes import CalTech101Silhouettes

        size = 28
        img_size = (size, size)

        data_train = CalTech101Silhouettes(which_set=['train'],
                                           size=size,
                                           sources=['features'])
        data_valid = CalTech101Silhouettes(which_set=['valid'],
                                           size=size,
                                           sources=['features'])
        data_test = CalTech101Silhouettes(which_set=['test'],
                                          size=size,
                                          sources=['features'])
    elif data_name == 'tfd':
        from fuel.datasets.toronto_face_database import TorontoFaceDatabase

        size = 28
        img_size = (size, size)

        data_train = TorontoFaceDatabase(which_set=['unlabeled'],
                                         size=size,
                                         sources=['features'])
        data_valid = TorontoFaceDatabase(which_set=['valid'],
                                         size=size,
                                         sources=['features'])
        data_test = TorontoFaceDatabase(which_set=['test'],
                                        size=size,
                                        sources=['features'])

    elif data_name == 'speech':
        from SynthesisTaskData import SynthesisTaskData

        img_size = (28, 28)

        data_train = SynthesisTaskData(sources=['features'])
        data_valid = SynthesisTaskData(sources=['features'])
        data_test = SynthesisTaskData(sources=['features'])
    else:
        raise ValueError("Unknown dataset %s" % data_name)

    return img_size, data_train, data_valid, data_test
Ejemplo n.º 2
0
def main(save_to, num_epochs):
    mlp = MLP([Tanh(), Softmax()], [784, 100, 10],
              weights_init=IsotropicGaussian(0.01),
              biases_init=Constant(0))
    mlp.initialize()
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')
    probs = mlp.apply(tensor.flatten(x, outdim=2))
    cost = CategoricalCrossEntropy().apply(y.flatten(), probs)
    error_rate = MisclassificationRate().apply(y.flatten(), probs)

    cg = ComputationGraph([cost])
    W1, W2 = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + .00005 * (W1**2).sum() + .00005 * (W2**2).sum()
    cost.name = 'final_cost'

    mnist_train = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             Flatten(DataStream.default_stream(
                                 mnist_test,
                                 iteration_scheme=SequentialScheme(
                                     mnist_test.num_examples, 500)),
                                     which_sources=('features', )),
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        Printing()
    ]

    if BLOCKS_EXTRAS_AVAILABLE:
        extensions.append(
            Plot('MNIST example',
                 channels=[[
                     'test_final_cost',
                     'test_misclassificationrate_apply_error_rate'
                 ], ['train_total_gradient_norm']]))

    main_loop = MainLoop(algorithm,
                         Flatten(DataStream.default_stream(
                             mnist_train,
                             iteration_scheme=SequentialScheme(
                                 mnist_train.num_examples, 50)),
                                 which_sources=('features', )),
                         model=Model(cost),
                         extensions=extensions)

    main_loop.run()
Ejemplo n.º 3
0
def get_streams(num_train_examples, batch_size, use_test=True):
    dataset = MNIST(("train", ))
    all_ind = numpy.arange(dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(all_ind)

    indices_train = all_ind[:num_train_examples]
    indices_valid = all_ind[num_train_examples:]

    tarin_stream = Flatten(
        DataStream.default_stream(dataset,
                                  iteration_scheme=ShuffledScheme(
                                      indices_train, batch_size)))

    valid_stream = None
    if len(indices_valid) != 0:
        valid_stream = Flatten(
            DataStream.default_stream(dataset,
                                      iteration_scheme=ShuffledScheme(
                                          indices_valid, batch_size)))

    test_stream = None
    if use_test:
        dataset = MNIST(("test", ))
        ind = numpy.arange(dataset.num_examples)
        rng = numpy.random.RandomState(seed=1)
        rng.shuffle(all_ind)

        test_stream = Flatten(
            DataStream.default_stream(dataset,
                                      iteration_scheme=ShuffledScheme(
                                          ind, batch_size)))

    return tarin_stream, valid_stream, test_stream
Ejemplo n.º 4
0
def test_mnist():
    skip_if_not_available(datasets=['mnist'])
    mnist_train = MNIST('train', start=20000)
    assert len(mnist_train.features) == 40000
    assert len(mnist_train.targets) == 40000
    assert mnist_train.num_examples == 40000
    mnist_test = MNIST('test', sources=('targets', ))
    assert len(mnist_test.targets) == 10000
    assert mnist_test.num_examples == 10000

    first_feature, first_target = mnist_train.get_data(request=[0])
    assert first_feature.shape == (1, 784)
    assert first_feature.dtype.kind == 'f'
    assert first_target.shape == (1, 1)
    assert first_target.dtype is numpy.dtype('uint8')

    first_target, = mnist_test.get_data(request=[0, 1])
    assert first_target.shape == (2, 1)

    binary_mnist = MNIST('test', binary=True, sources=('features', ))
    first_feature, = binary_mnist.get_data(request=[0])
    assert first_feature.dtype.kind == 'b'
    assert_raises(ValueError, MNIST, 'valid')

    mnist_train = cPickle.loads(cPickle.dumps(mnist_train))
    assert len(mnist_train.features) == 40000

    mnist_test_unflattened = MNIST('test', flatten=False)
    assert mnist_test_unflattened.features.shape == (10000, 28, 28)
Ejemplo n.º 5
0
def get_mnist():
    mnist = MNIST(("train", ))
    mnist_test = MNIST(("test", ))

    def s(s):
        return Flatten(
            DataStream.default_stream(s,
                                      iteration_scheme=ShuffledScheme(
                                          s.num_examples, batch_size=256)))

    return s(mnist), s(mnist_test)
Ejemplo n.º 6
0
def test_in_memory():
    skip_if_not_available(datasets=['mnist.hdf5'])
    # Load MNIST and get two batches
    mnist = MNIST('train', load_in_memory=True)
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(
                                 examples=mnist.num_examples, batch_size=256))
    epoch = data_stream.get_epoch_iterator()
    for i, (features, targets) in enumerate(epoch):
        if i == 1:
            break
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(256, 512))
    mnist.close(handle)
    assert numpy.all(features == known_features)

    # Pickle the epoch and make sure that the data wasn't dumped
    with tempfile.NamedTemporaryFile(delete=False) as f:
        filename = f.name
        cPickle.dump(epoch, f)
    assert os.path.getsize(filename) < 1024 * 1024  # Less than 1MB

    # Reload the epoch and make sure that the state was maintained
    del epoch
    with open(filename, 'rb') as f:
        epoch = cPickle.load(f)
    features, targets = next(epoch)
    handle = mnist.open()
    known_features, _ = mnist.get_data(handle, slice(512, 768))
    mnist.close(handle)
    assert numpy.all(features == known_features)
Ejemplo n.º 7
0
def get_mnist(split, sources, load_in_memory):
    from fuel.datasets import MNIST
    if 'test' not in split:
        subset = slice(0, 50000) if 'train' in split else slice(50000, 60000)
        split = ('train', )
    else:
        subset = None
    return MNIST(split,
                 sources=sources,
                 subset=subset,
                 load_in_memory=load_in_memory)
def build_2d_datasets(dataset_name, n_train=20):
    if dataset_name not in ['mnist', 'sklearn', 'xor']:
        raise ValueError('This dataset is not supported')

    if dataset_name == 'xor':
        data_x = numpy.random.normal(size=(5000,
                                           2)).astype(dtype=fuel.config.floatX)
        which_cluster = (numpy.random.uniform(size=(data_x.shape[0], 2)) > .5)
        data_x += 2. * (2 * which_cluster - 1)
        data_y = (2 * which_cluster - 1).prod(axis=1) * .5 + .5
        data_y = data_y.astype(dtype='int32').reshape((-1, 1))
    if dataset_name == 'sklearn':
        data_x, data_y = make_classification(n_samples=1000,
                                             n_features=2,
                                             n_informative=2,
                                             n_redundant=0,
                                             n_classes=2)
        data_y = data_y.astype(dtype='int32').reshape((-1, 1))
    if dataset_name == 'mnist':
        dataset = MNIST('train')
        data_mean, data_cov = build_mean_covariance(dataset, 256)
        eigval, eigvec = numpy.linalg.eigh(data_cov)
        features = (dataset.indexables[0] - data_mean).dot(eigvec[:, -2:])
        features_pos = features[dataset.indexables[1][:, 0] == 3]
        features_neg = features[dataset.indexables[1][:, 0] == 5]

        data_x = numpy.zeros(
            (features_pos.shape[0] + features_neg.shape[0], 2))
        data_x[:n_train] = features_pos[:n_train]
        data_x[n_train:(2 * n_train)] = features_neg[:n_train]
        data_x[(2 * n_train):-(features_neg.shape[0] - n_train)] = \
            features_pos[n_train:]
        data_x[-(features_neg.shape[0] - n_train):] = features_neg[n_train:]

        data_y = numpy.zeros(
            (features_pos.shape[0] + features_neg.shape[0], 1))
        data_y[:n_train] = 1
        data_y[n_train:(2 * n_train)] = 0
        data_y[(2 * n_train):-(features_neg.shape[0] - n_train)] = 1
        data_y[-(features_neg.shape[0] - n_train):] = 0

    train_dataset = IndexableDataset({
        'features': data_x[:(2 * n_train)],
        'targets': data_y[:(2 * n_train)]
    })
    test_dataset = IndexableDataset({
        'features': data_x[(2 * n_train):],
        'targets': data_y[(2 * n_train):]
    })

    return train_dataset, test_dataset
Ejemplo n.º 9
0
def get_mnist_streams(num_train_examples, batch_size):
    from fuel.datasets import MNIST
    dataset = MNIST(("train", ))
    all_ind = numpy.arange(dataset.num_examples)
    rng = numpy.random.RandomState(seed=1)
    rng.shuffle(all_ind)

    indices_train = all_ind[:num_train_examples]
    indices_valid = all_ind[num_train_examples:]

    tarin_stream = Flatten(DataStream.default_stream(
        dataset, iteration_scheme=ShuffledScheme(indices_train, batch_size)),
                           which_sources=('features', ))

    valid_stream = Flatten(DataStream.default_stream(
        dataset, iteration_scheme=ShuffledScheme(indices_valid, batch_size)),
                           which_sources=('features', ))

    return tarin_stream, valid_stream
Ejemplo n.º 10
0
def test_mnist_test():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('test', load_in_memory=False)
    handle = dataset.open()
    data, labels = dataset.get_data(handle, slice(0, 10))
    assert data.dtype == 'uint8'
    assert data.shape == (10, 1, 28, 28)
    assert labels.shape == (10, 1)
    known = numpy.array([
        0, 0, 0, 0, 0, 0, 84, 185, 159, 151, 60, 36, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0
    ])
    assert_allclose(data[0][0][7], known)
    assert labels[0][0] == 7
    assert dataset.num_examples == 10000
    dataset.close(handle)

    stream = DataStream.default_stream(dataset,
                                       iteration_scheme=SequentialScheme(
                                           10, 10))
    data = next(stream.get_epoch_iterator())[0]
    assert data.min() >= 0.0 and data.max() <= 1.0
    assert data.dtype == config.floatX
Ejemplo n.º 11
0
def main(save_to):
    batch_size = 365
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations, 1, image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info("Input dim: {} {} {}".format(
        *convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(
                i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))

    x = tensor.tensor4('features')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cg = ComputationGraph([probs])
    outs = VariableFilter(
            roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables)

    # Create an interior activation model
    model = Model([probs] + outs)

    # Load it with trained parameters
    params = load_parameters(open(save_to, 'rb'))
    model.set_parameter_values(params)

    algorithm = MaximumActivationSearch(outputs=outs)

    # Use the mnist test set, unshuffled
    mnist_test = MNIST(("test",), sources=['features'])
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=SequentialScheme(
            mnist_test.num_examples, batch_size))

    extensions = [Timing(),
                  FinishAfter(after_n_epochs=1),
                  DataStreamMonitoring(
                      [],
                      mnist_test_stream,
                      prefix="test"),
                  Checkpoint("maxact.tar"),
                  ProgressBar(),
                  Printing()]

    main_loop = MainLoop(
        algorithm,
        mnist_test_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    examples = mnist_test.get_example_stream()
    example = examples.get_data(0)[0]
    layers = convnet.layers
    for output, record in algorithm.maximum_activations.items():
        layer = get_brick(output)
        activations, indices, snapshots = (
                r.get_value() if r else None for r in record[1:])
        filmstrip = Filmstrip(
            example.shape[-2:], (indices.shape[1], indices.shape[0]),
            background='blue')
        if layer in layers:
            fieldmap = layerarray_fieldmap(layers[0:layers.index(layer) + 1])
            for unit in range(indices.shape[1]):
                for index in range(100):
                    mask = make_mask(example.shape[-2:], fieldmap, numpy.clip(
                        snapshots[index, unit, :, :], 0, numpy.inf))
                    imagenum = indices[index, unit, 0]
                    filmstrip.set_image((unit, index),
                            examples.get_data(imagenum)[0], mask)
        else:
            for unit in range(indices.shape[1]):
                for index in range(100):
                    imagenum = indices[index, unit]
                    filmstrip.set_image((unit, index),
                            examples.get_data(imagenum)[0])
        filmstrip.save(layer.name + '_maxact.jpg')
Ejemplo n.º 12
0
    def __init__(self, save_to):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        logging.info("Input dim: {} {} {}".format(
            *convnet.children[0].get_dim('input_')))
        for i, layer in enumerate(convnet.layers):
            if isinstance(layer, Activation):
                logging.info("Layer {} ({})".format(
                    i, layer.__class__.__name__))
            else:
                logging.info("Layer {} ({}) dim: {} {} {}".format(
                    i, layer.__class__.__name__, *layer.get_dim('output')))

        mnist_test = MNIST(("test",), sources=['features', 'targets'])
        basis = create_fair_basis(mnist_test, 10, 10)

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outs = OrderedDict((full_brick_name(get_brick(out)), out)
                for out in VariableFilter(
                    roles=[OUTPUT], bricks=[Convolutional, Linear])(
                        cg.variables))

        # Normalize input and apply the convnet
        error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                      .copy(name='error_rate'))
        confusion = (ConfusionMatrix().apply(y.flatten(), probs)
                      .copy(name='confusion'))
        confusion.tag.aggregation_scheme = Sum(confusion)
        confusion_image = (ConfusionImage().apply(y.flatten(), probs, x)
                      .copy(name='confusion_image'))
        confusion_image.tag.aggregation_scheme = Sum(confusion_image)

        model = Model(
                [error_rate, confusion, confusion_image] + list(outs.values()))

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test = MNIST(("test",))
        mnist_test_stream = DataStream.default_stream(
            mnist_test,
            iteration_scheme=SequentialScheme(
                mnist_test.num_examples, batch_size))

        self.model = model
        self.mnist_test_stream = mnist_test_stream
        self.evaluator = DatasetEvaluator(
                [error_rate, confusion, confusion_image])
        self.base_results = self.evaluator.evaluate(mnist_test_stream)

        # TODO: allow target layer to be parameterized
        self.target_layer = '/lenet/mlp/linear_0'
        self.next_layer_param = '/lenet/mlp/linear_1.W'
        self.base_sample = extract_sample(
                outs[self.target_layer], mnist_test_stream)
        self.base_param_value = (
            model.get_parameter_dict()[
                self.next_layer_param].get_value().copy())
Ejemplo n.º 13
0
from blocks.main_loop import MainLoop
from fuel.datasets import MNIST
from fuel.streams import DataStream
from fuel.schemes import SequentialScheme

from blocks.monitoring.evaluators import DatasetEvaluator
from variance_aggregation import MeanAndVariance
from blocks.utils import shared_floatx_nans, shared_floatx_zeros

import theano
floatX = theano.config.floatX

from numpy import sqrt
from theano.tensor import cast

mnist_train = MNIST(['train'])
mnist_test = MNIST(['test'])

stream_train = DataStream(mnist_train,
    iteration_scheme=SequentialScheme(mnist_train.num_examples, 100))

#normalization = 'bn1'
normalization = 'bn2'
#normalization = 'off'

def normalize(input_, output_dim):

    if normalization == 'off':
        return input_, None, None

    #normed = tensor.clip(normed, -3., 3.)
Ejemplo n.º 14
0
def main(save_to):
    batch_size = 365
    feature_maps = [6, 16]
    mlp_hiddens = [120, 84]
    conv_sizes = [5, 5]
    pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # The above are from LeCun's paper. The blocks example had:
    #    feature_maps = [20, 50]
    #    mlp_hiddens = [500]

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    1,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='valid',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        if isinstance(layer, Activation):
            logging.info("Layer {} ({})".format(i, layer.__class__.__name__))
        else:
            logging.info("Layer {} ({}) dim: {} {} {}".format(
                i, layer.__class__.__name__, *layer.get_dim('output')))

    random_init = (numpy.random.rand(100, 1, 28, 28) * 128).astype('float32')
    layers = [l for l in convnet.layers if isinstance(l, Convolutional)]
    mnist_test = MNIST(("test", ), sources=['features', 'targets'])
    basis_init = create_fair_basis(mnist_test, 10, 50)
    basis_set = make_shifted_basis(basis_init, convnet, layers)

    for layer, basis in zip(layers, basis_set):
        # basis is 5d:
        # (probed_units, base_cases, 1-c, 28-y, 28-x)
        b = shared_floatx(basis)
        # coefficients is 2d:
        # (probed_units, base_cases)
        coefficients = shared_floatx(
            numpy.ones(basis.shape[0:2], dtype=theano.config.floatX))
        # prod is 5d: (probed_units, base_cases, 1-c, 28-y, 28-x)
        prod = tensor.shape_padright(coefficients, 3) * b
        # x is 4d: (probed_units, 1-c, 28-y, 28-x)
        ux = prod.sum(axis=1)
        x = tensor.clip(
            ux / tensor.shape_padright(ux.flatten(ndim=2).max(axis=1), 3), 0,
            1)

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])
        outs = VariableFilter(roles=[OUTPUT], bricks=[layer])(cg.variables)

        # Create an interior activation model
        model = Model([probs] + outs)

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        learning_rate = shared_floatx(0.03, 'learning_rate')
        # We will try to do all units at once.
        # unit = shared_floatx(0, 'unit', dtype='int64')
        # But we are only doing one layer at once.
        output = outs[0]
        dims = layer.get_dims(['output'])[0]
        if isinstance(dims, numbers.Integral):
            # FC case: output is 2d: (probed_units, units)
            dims = (dims, )
            unitrange = tensor.arange(dims[0])
            costvec = -tensor.log(
                tensor.nnet.softmax(output)[unitrange, unitrage].flatten())
        else:
            # Conv case: output is 4d: (probed_units, units, y, x)
            unitrange = tensor.arange(dims[0])
            print('dims is', dims)
            costvec = -tensor.log(
                tensor.nnet.softmax(output[unitrange, unitrange, dims[1] // 2,
                                           dims[2] // 2]).flatten())
        cost = costvec.sum()
        # grad is dims (probed_units, basis_size)
        grad = gradient.grad(cost, coefficients)
        stepc = coefficients  # - learning_rate * grad
        newc = stepc / tensor.shape_padright(stepc.mean(axis=1))
        fn = theano.function([], [cost, x], updates=[(coefficients, newc)])
        filmstrip = Filmstrip(random_init.shape[-2:], (dims[0], 1),
                              background='red')
        layer = get_brick(output)
        learning_rate.set_value(0.1)
        for index in range(20000):
            c, result = fn()
            if index % 1000 == 0:
                learning_rate.set_value(numpy.cast[theano.config.floatX](
                    learning_rate.get_value() * 0.8))
                print('cost', c)
                for u in range(dims[0]):
                    filmstrip.set_image((u, 0), result[u, :, :, :])
                    filmstrip.save(layer.name + '_stroke.jpg')
            for u in range(dims[0]):
                filmstrip.set_image((u, 0), result[u, :, :, :])
            filmstrip.save(layer.name + '_stroke.jpg')
Ejemplo n.º 15
0
def get_stream():
    return DataStream(MNIST(('train', )),
                      iteration_scheme=SequentialScheme(1500, 500))
Ejemplo n.º 16
0
def main(save_to, num_epochs,
         regularization=0.0003, subset=None, num_batches=None,
         histogram=None, resume=False):
    batch_size = 500
    output_size = 10
    convnet = create_lenet_5()
    layers = convnet.layers

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = (CategoricalCrossEntropy().apply(y.flatten(), probs)
            .copy(name='cost'))
    components = (ComponentwiseCrossEntropy().apply(y.flatten(), probs)
            .copy(name='components'))
    error_rate = (MisclassificationRate().apply(y.flatten(), probs)
                  .copy(name='error_rate'))
    confusion = (ConfusionMatrix().apply(y.flatten(), probs)
                  .copy(name='confusion'))
    confusion.tag.aggregation_scheme = Sum(confusion)

    cg = ComputationGraph([cost, error_rate, components])

    # Apply regularization to the cost
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    l2_norm = sum([(W ** 2).sum() for W in weights])
    l2_norm.name = 'l2_norm'
    cost = cost + regularization * l2_norm
    cost.name = 'cost_with_regularization'

    if subset:
        start = 30000 - subset // 2
        mnist_train = MNIST(("train",), subset=slice(start, start+subset))
    else:
        mnist_train = MNIST(("train",))
    mnist_train_stream = DataStream.default_stream(
        mnist_train, iteration_scheme=ShuffledScheme(
            mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test",))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(
            mnist_test.num_examples, batch_size))

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=AdaDelta(decay_rate=0.99))

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs,
                              after_n_batches=num_batches),
                  DataStreamMonitoring(
                      [cost, error_rate, confusion],
                      mnist_test_stream,
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate, l2_norm,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  ProgressBar(),
                  Printing()]

    if histogram:
        attribution = AttributionExtension(
            components=components,
            parameters=cg.parameters,
            components_size=output_size,
            after_batch=True)
        extensions.insert(0, attribution)

    if resume:
        extensions.append(Load(save_to, True, True))

    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        mnist_train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()

    if histogram:
        save_attributions(attribution, filename=histogram)

    with open('execution-log.json', 'w') as outfile:
        json.dump(main_loop.log, outfile, cls=NumpyEncoder)
Ejemplo n.º 17
0
def test_mnist_data_path():
    skip_if_not_available(datasets=['mnist.hdf5'])

    assert MNIST('train').data_path == os.path.join(config.data_path,
                                                    'mnist.hdf5')
Ejemplo n.º 18
0
def create_MNIST_data_streams():
    train_set = MNIST(('train',), subset=slice(0, 50000), sources=('features', 'targets'), load_in_memory=True)
    valid_set = MNIST(('train',), subset=slice(50000, 60000), sources=('features', 'targets'), load_in_memory=True)
    test_set = MNIST(('test',), sources=('features', 'targets'), load_in_memory=True)
    return train_set, valid_set, test_set
Ejemplo n.º 19
0
def main(num_epochs=1000):
    x = tensor.matrix('features')
    y = tensor.lmatrix('targets')

    softmax_regressor = SoftmaxRegressor(input_dim=784, n_classes=10)
    probs = softmax_regressor.get_probs(features=x)
    params = softmax_regressor.get_params()
    weights = softmax_regressor.get_weights()
    cost = softmax_regressor.get_cost(probs=probs, targets=y).mean()
    cost.name = 'cost'
    misclassification = softmax_regressor.get_misclassification(
        probs=probs, targets=y).mean()
    misclassification.name = 'misclassification'

    train_dataset = MNIST('train')
    test_dataset = MNIST('test')

    algorithm = GradientDescent(cost=cost,
                                params=params,
                                step_rule=Momentum(learning_rate=0.1,
                                                   momentum=0.1))

    train_data_stream = ForceFloatX(
        data_stream=DataStream(dataset=train_dataset,
                               iteration_scheme=ShuffledScheme(
                                   examples=train_dataset.num_examples,
                                   batch_size=100,
                               )))
    test_data_stream = ForceFloatX(
        data_stream=DataStream(dataset=test_dataset,
                               iteration_scheme=SequentialScheme(
                                   examples=test_dataset.num_examples,
                                   batch_size=1000,
                               )))

    model = Model(cost)

    extensions = []
    extensions.append(Timing())
    extensions.append(FinishAfter(after_n_epochs=num_epochs))
    extensions.append(
        DataStreamMonitoring([cost, misclassification],
                             test_data_stream,
                             prefix='test'))
    extensions.append(
        TrainingDataMonitoring([cost, misclassification],
                               prefix='train',
                               after_epoch=True))

    plotters = []
    plotters.append(
        Plotter(channels=[[
            'test_cost', 'test_misclassification', 'train_cost',
            'train_misclassification'
        ]],
                titles=['Costs']))
    display_train = ImageDataStreamDisplay(
        data_stream=copy.deepcopy(train_data_stream),
        image_shape=(28, 28, 1),
        axes=(0, 1, 'c'),
        shift=-0.5,
        rescale=2.,
    )
    weight_display = WeightDisplay(weights=weights,
                                   transpose=(1, 0),
                                   image_shape=(28, 28, 1),
                                   axes=(0, 1, 'c'),
                                   shift=-0.5,
                                   rescale=2.,
                                   grid_shape=(1, 10))
    images_displayer = DisplayImage(
        image_getters=[display_train, weight_display],
        titles=['Training examples', 'Softmax weights'])
    plotters.append(images_displayer)

    extensions.append(
        PlotManager('MNIST softmax examples',
                    plotters=plotters,
                    after_epoch=False,
                    every_n_epochs=10,
                    after_training=True))
    extensions.append(Printing())
    main_loop = MainLoop(model=model,
                         data_stream=train_data_stream,
                         algorithm=algorithm,
                         extensions=extensions)

    main_loop.run()
Ejemplo n.º 20
0
def train(args, model_args):

    #model_id = '/data/lisatmp4/lambalex/lsun_walkback/walkback_'

    model_id = '/data/lisatmp4/anirudhg/cifar_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=100000,
                           classes=1,
                           cycles=2.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))

    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    if args.dataset != 'lsun' and args.dataset != 'celeba':
        train_stream = Flatten(
            DataStream.default_stream(
                dataset_train,
                iteration_scheme=ShuffledScheme(
                    examples=dataset_train.num_examples -
                    (dataset_train.num_examples % args.batch_size),
                    batch_size=args.batch_size)))
    else:
        train_stream = dataset_train
        test_stream = dataset_test

    print "Width", WIDTH, spatial_width

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    '''
    x = T.matrix('x', dtype='float32')
    temp  = T.scalar('temp', dtype='float32')
    f=transition_operator(tparams, model_options, x, temp)

    for data in train_stream.get_epoch_iterator():
        print data[0]
        a = f([data[0], 1.0, 1])
        #ipdb.set_trace()
    '''
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    for param in tparams:
        print param
        print tparams[param].get_value().shape

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'

    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 1
    print 'Number of steps....'
    print args.num_steps
    print "Number of metasteps...."
    print args.meta_steps
    print 'Done'
    count_sample = 1
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
        n_samples = 0
        print 'Starting Next Epoch ', eidx
        for data in train_stream.get_epoch_iterator():

            if args.dataset == 'CIFAR10':
                if data[0].shape[0] == args.batch_size:
                    data_use = (data[0].reshape(args.batch_size,
                                                3 * 32 * 32), )
                else:
                    continue
            t0 = time.time()
            batch_index += 1
            n_samples += len(data_use[0])
            uidx += 1
            if data_use[0] is None:
                print 'No data '
                uidx -= 1
                continue
            ud_start = time.time()

            t1 = time.time()

            data_run = data_use[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        [data_run, temperature_forward, 1])
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)

            ud = time.time() - ud_start

            #gradient_updates_ = get_grads(data_use[0],args.temperature)

            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            t1 = time.time()
            #print time.time() - t1, "time to get grads"
            t1 = time.time()
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            #'Norm_1': np.linalg.norm(gradient_updates_[0]),
            #'Norm_2': np.linalg.norm(gradient_updates_[1]),
            #'Norm_3': np.linalg.norm(gradient_updates_[2]),
            #'Norm_4': np.linalg.norm(gradient_updates_[3])})
            #print time.time() - t1, "time to log"

            #print time.time() - t0, "total time in batch"
            t5 = time.time()

            if batch_index % 20 == 0:
                print batch_index, "cost", cost

            if batch_index % 200 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))
                temperature_forward = args.temperature

                for num_step in range(args.num_steps * args.meta_steps):
                    print "Forward temperature", temperature_forward
                    if num_step == 0:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [data_use[0], temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/' + "batch_" +
                            str(batch_index) + '_corrupted' + 'epoch_' +
                            str(count_sample) + '_time_step_' + str(num_step))
                    else:
                        x_data, sampled, sampled_activation, sampled_preactivation = forward_diffusion(
                            [x_data, temperature_forward, 1])
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        x_temp = x_data.reshape(args.batch_size, n_colors,
                                                WIDTH, WIDTH)
                        plot_images(
                            x_temp, model_dir + '/batch_' + str(batch_index) +
                            '_corrupted' + '_epoch_' + str(count_sample) +
                            '_time_step_' + str(num_step))

                    temperature_forward = temperature_forward * args.temperature_factor

                x_temp2 = data_use[0].reshape(args.batch_size, n_colors, WIDTH,
                                              WIDTH)
                plot_images(
                    x_temp2, model_dir + '/' + 'orig_' + 'epoch_' + str(eidx) +
                    '_batch_index_' + str(batch_index))

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On backward step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/' + "batch_" +
                        str(batch_index) + '_samples_backward_' + 'epoch_' +
                        str(count_sample) + '_time_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                if args.noise == "gaussian":
                    x_sampled = np.random.normal(
                        0.5, 2.0,
                        size=(args.batch_size, INPUT_SIZE)).clip(0.0, 1.0)
                else:
                    s = np.random.binomial(1, 0.5, INPUT_SIZE)

                temperature = args.temperature * (args.temperature_factor**(
                    args.num_steps * args.meta_steps - 1))

                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps * args.meta_steps +
                               args.extra_steps):
                    x_data, sampled, sampled_activation, sampled_preactivation = f_sample(
                        [x_data, temperature, 0])
                    print 'On step number, using temperature', i, temperature
                    reverse_time(
                        scl, shft, x_data, model_dir + '/batch_index_' +
                        str(batch_index) + '_inference_' + 'epoch_' +
                        str(count_sample) + '_step_' + str(i))
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

    ipdb.set_trace()
Ejemplo n.º 21
0
def get_stream():
    mnist = MNIST('train')
    data_stream = DataStream(mnist,
                             iteration_scheme=SequentialScheme(1500, 500))
    return data_stream
Ejemplo n.º 22
0
from blocks.graph import ComputationGraph
from blocks.main_loop import MainLoop
from blocks.bricks import MLP, Rectifier, Tanh, Logistic, Identity, BatchNormalizedMLP
from blocks.initialization import IsotropicGaussian, Constant
from blocks_extras.extensions.plot import Plot

from fuel.datasets import MNIST
from fuel.streams import DataStream
from fuel.schemes import ShuffledScheme
from fuel.transformers import Flatten, ScaleAndShift, Rename, Merge

seed = 123
batch_size = 1000
np.random.seed(seed=seed)

mnist_train = MNIST(which_sets=('train', ), subset=range(10000))
mnist_test = MNIST(which_sets=('test', ), subset=range(1000))


def _data_stream(dataset, batch_size):
    data_stream_ = DataStream.default_stream(dataset=dataset,
                                             iteration_scheme=ShuffledScheme(
                                                 examples=dataset.num_examples,
                                                 batch_size=batch_size))

    return data_stream_


def pair_data_stream(dataset, batch_size):
    data_streams = [
        Rename(_data_stream(dataset=dataset, batch_size=batch_size),
def train(args, model_args):

    model_id = '/data/lisatmp4/anirudhg/spiral_walk_back/walkback_'
    model_dir = create_log_dir(args, model_id)
    model_id2 = 'logs/walkback_'
    model_dir2 = create_log_dir(args, model_id2)
    print model_dir
    print model_dir2 + '/' + 'log.jsonl.gz'
    logger = mimir.Logger(filename=model_dir2 + '/log.jsonl.gz',
                          formatter=None)

    # TODO batches_per_epoch should not be hard coded
    lrate = args.lr
    import sys
    sys.setrecursionlimit(10000000)
    args, model_args = parse_args()

    #trng = RandomStreams(1234)

    if args.resume_file is not None:
        print "Resuming training from " + args.resume_file
        from blocks.scripts import continue_training
        continue_training(args.resume_file)

    ## load the training data
    if args.dataset == 'MNIST':
        print 'loading MNIST'
        from fuel.datasets import MNIST
        dataset_train = MNIST(['train'], sources=('features', ))
        dataset_test = MNIST(['test'], sources=('features', ))
        n_colors = 1
        spatial_width = 28

    elif args.dataset == 'CIFAR10':
        from fuel.datasets import CIFAR10
        dataset_train = CIFAR10(['train'], sources=('features', ))
        dataset_test = CIFAR10(['test'], sources=('features', ))
        n_colors = 3
        spatial_width = 32

    elif args.dataset == "lsun" or args.dataset == "lsunsmall":

        print "loading lsun class!"

        from load_lsun import load_lsun

        print "loading lsun data!"

        if args.dataset == "lsunsmall":
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=True)
            spatial_width = 32
        else:
            dataset_train, dataset_test = load_lsun(args.batch_size,
                                                    downsample=False)
            spatial_width = 64

        n_colors = 3

    elif args.dataset == "celeba":

        print "loading celeba data"

        from fuel.datasets.celeba import CelebA

        dataset_train = CelebA(which_sets=['train'],
                               which_format="64",
                               sources=('features', ),
                               load_in_memory=False)
        dataset_test = CelebA(which_sets=['test'],
                              which_format="64",
                              sources=('features', ),
                              load_in_memory=False)

        spatial_width = 64
        n_colors = 3

        tr_scheme = SequentialScheme(examples=dataset_train.num_examples,
                                     batch_size=args.batch_size)
        ts_scheme = SequentialScheme(examples=dataset_test.num_examples,
                                     batch_size=args.batch_size)

        train_stream = DataStream.default_stream(dataset_train,
                                                 iteration_scheme=tr_scheme)
        test_stream = DataStream.default_stream(dataset_test,
                                                iteration_scheme=ts_scheme)

        dataset_train = train_stream
        dataset_test = test_stream

        #epoch_it = train_stream.get_epoch_iterator()

    elif args.dataset == 'Spiral':
        print 'loading SPIRAL'
        train_set = Spiral(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.01,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
    elif args.dataset == 'Circle':
        print 'loading Circle'
        train_set = Circle(num_examples=20000,
                           classes=1,
                           cycles=1.,
                           noise=0.0,
                           sources=('features', ))
        dataset_train = DataStream.default_stream(
            train_set,
            iteration_scheme=ShuffledScheme(train_set.num_examples,
                                            args.batch_size))
        iter_per_epoch = train_set.num_examples
    else:
        raise ValueError("Unknown dataset %s." % args.dataset)

    model_options = locals().copy()

    train_stream = dataset_train

    shp = next(train_stream.get_epoch_iterator())[0].shape

    print "got epoch iterator"

    # make the training data 0 mean and variance 1
    # TODO compute mean and variance on full dataset, not minibatch
    Xbatch = next(train_stream.get_epoch_iterator())[0]
    scl = 1. / np.sqrt(np.mean((Xbatch - np.mean(Xbatch))**2))
    shft = -np.mean(Xbatch * scl)
    # scale is applied before shift
    #train_stream = ScaleAndShift(train_stream, scl, shft)
    #test_stream = ScaleAndShift(test_stream, scl, shft)

    print 'Building model'
    params = init_params(model_options)
    if args.reload_:
        print "Trying to reload parameters"
        if os.path.exists(args.saveto_filename):
            print 'Reloading Parameters'
            print args.saveto_filename
            params = load_params(args.saveto_filename, params)
    tparams = init_tparams(params)
    print tparams
    x, cost, start_temperature = build_model(tparams, model_options)
    inps = [x, start_temperature]

    x_Data = T.matrix('x_Data', dtype='float32')
    temperature = T.scalar('temperature', dtype='float32')
    forward_diffusion = one_step_diffusion(x_Data, model_options, tparams,
                                           temperature)

    #print 'Building f_cost...',
    #f_cost = theano.function(inps, cost)
    #print 'Done'
    print tparams
    grads = T.grad(cost, wrt=itemlist(tparams))

    #get_grads = theano.function(inps, grads)

    for j in range(0, len(grads)):
        grads[j] = T.switch(T.isnan(grads[j]), T.zeros_like(grads[j]),
                            grads[j])

    # compile the optimizer, the actual computational graph is compiled here
    lr = T.scalar(name='lr')
    print 'Building optimizers...',
    optimizer = args.optimizer

    f_grad_shared, f_update = getattr(optimizers, optimizer)(lr, tparams,
                                                             grads, inps, cost)
    print 'Done'

    print 'Buiding Sampler....'
    f_sample = sample(tparams, model_options)
    print 'Done'
    uidx = 0
    estop = False
    bad_counter = 0
    max_epochs = 4000
    batch_index = 0
    print 'Number of steps....', args.num_steps
    print 'Done'
    count_sample = 1
    batch_index = 0
    for eidx in xrange(max_epochs):
        if eidx % 20 == 0:
            params = unzip(tparams)
            save_params(params,
                        model_dir + '/' + 'params_' + str(eidx) + '.npz')
            if eidx == 30:
                ipdb.set_trace()
        n_samples = 0
        print 'Starting Next Epoch ', eidx

        for data in train_stream.get_epoch_iterator():
            batch_index += 1
            n_samples += len(data[0])
            uidx += 1
            if data[0] is None:
                print 'No data '
                uidx -= 1
                continue
            data_run = data[0]
            temperature_forward = args.temperature
            meta_cost = []
            for meta_step in range(0, args.meta_steps):
                meta_cost.append(f_grad_shared(data_run, temperature_forward))
                f_update(lrate)
                if args.meta_steps > 1:
                    data_run, sigma, _, _ = forward_diffusion(
                        data_run, temperature_forward)
                    temperature_forward *= args.temperature_factor
            cost = sum(meta_cost) / len(meta_cost)
            if np.isnan(cost) or np.isinf(cost):
                print 'NaN detected'
                return 1.
            logger.log({
                'epoch': eidx,
                'batch_index': batch_index,
                'uidx': uidx,
                'training_error': cost
            })
            empty = []
            spiral_x = [empty for i in range(args.num_steps)]
            spiral_corrupted = []
            spiral_sampled = []
            grad_forward = []
            grad_back = []
            x_data_time = []
            x_tilt_time = []
            if batch_index % 8 == 0:
                count_sample += 1
                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                temperature_forward = args.temperature
                for num_step in range(args.num_steps):
                    if num_step == 0:
                        x_data_time.append(data[0])
                        plot_images(
                            data[0], model_dir + '/' + 'orig_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index))
                        x_data, mu_data, _, _ = forward_diffusion(
                            data[0], temperature_forward)

                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)

                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)
                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                    else:
                        x_data_time.append(x_data)
                        x_data, mu_data, _, _ = forward_diffusion(
                            x_data, temperature_forward)
                        plot_images(
                            x_data, model_dir + '/' + 'corrupted_' + 'epoch_' +
                            str(count_sample) + '_batch_' + str(batch_index) +
                            '_time_step_' + str(num_step))
                        x_data = np.asarray(x_data).astype('float32').reshape(
                            args.batch_size, INPUT_SIZE)
                        spiral_corrupted.append(x_data)

                        mu_data = np.asarray(mu_data).astype(
                            'float32').reshape(args.batch_size, INPUT_SIZE)
                        mu_data = mu_data.reshape(args.batch_size, 2)
                        x_data_time.append(x_data)
                        temp_grad = np.concatenate(
                            (x_data_time[-2], x_data_time[-1]), axis=1)
                        grad_forward.append(temp_grad)
                    temperature_forward = temperature_forward * args.temperature_factor

                mean_sampled = x_data.mean()
                var_sampled = x_data.var()

                x_temp2 = data[0].reshape(args.batch_size, 2)
                plot_2D(
                    spiral_corrupted, args.num_steps,
                    model_dir + '/' + 'corrupted_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_temp2, 1, model_dir + '/' + 'orig_' + 'epoch_' +
                    str(count_sample) + '_batch_index_' + str(batch_index))
                plot_grad(
                    grad_forward,
                    model_dir + '/' + 'grad_forward_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                for i in range(args.num_steps + args.extra_steps):
                    x_tilt_time.append(x_data)
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    plot_images(
                        x_data, model_dir + '/' + 'sampled_' + 'epoch_' +
                        str(count_sample) + '_batch_' + str(batch_index) +
                        '_time_step_' + str(i))
                    x_tilt_time.append(x_data)
                    temp_grad = np.concatenate(
                        (x_tilt_time[-2], x_tilt_time[-1]), axis=1)
                    grad_back.append(temp_grad)

                    ###print 'Recons, On step number, using temperature', i, temperature
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor

                plot_grad(
                    grad_back, model_dir + '/' + 'grad_back_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
                plot_2D(
                    x_tilt_time, args.num_steps,
                    model_dir + '/' + 'sampled_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))

                s = np.random.normal(mean_sampled, var_sampled,
                                     [args.batch_size, 2])
                x_sampled = s

                temperature = args.temperature * (args.temperature_factor
                                                  **(args.num_steps - 1))
                x_data = np.asarray(x_sampled).astype('float32')
                for i in range(args.num_steps + args.extra_steps):
                    x_data, sampled_mean = f_sample(x_data, temperature)
                    spiral_sampled.append(x_data)
                    x_data = np.asarray(x_data).astype('float32')
                    x_data = x_data.reshape(args.batch_size, INPUT_SIZE)
                    if temperature == args.temperature:
                        temperature = temperature
                    else:
                        temperature /= args.temperature_factor
                plot_2D(
                    spiral_sampled, args.num_steps,
                    model_dir + '/' + 'inference_' + 'epoch_' +
                    str(count_sample) + '_batch_' + str(batch_index))
    ipdb.set_trace()
Ejemplo n.º 24
0
def test_mnist_axes():
    skip_if_not_available(datasets=['mnist.hdf5'])

    dataset = MNIST('train', load_in_memory=False)
    assert_equal(dataset.axis_labels['features'],
                 ('batch', 'channel', 'height', 'width'))
Ejemplo n.º 25
0
def main(save_to,
         num_epochs,
         feature_maps=None,
         mlp_hiddens=None,
         conv_sizes=None,
         pool_sizes=None,
         batch_size=500):
    if feature_maps is None:
        feature_maps = [20, 50]
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    if conv_sizes is None:
        conv_sizes = [5, 5]
    if pool_sizes is None:
        pool_sizes = [2, 2]
    image_size = (28, 28)
    output_size = 10

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = LeNet(conv_activations,
                    1,
                    image_size,
                    filter_sizes=zip(conv_sizes, conv_sizes),
                    feature_maps=feature_maps,
                    pooling_sizes=zip(pool_sizes, pool_sizes),
                    top_mlp_activations=mlp_activations,
                    top_mlp_dims=mlp_hiddens + [output_size],
                    border_mode='full',
                    weights_init=Uniform(width=.2),
                    biases_init=Constant(0))
    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    convnet.layers[0].weights_init = Uniform(width=.2)
    convnet.layers[1].weights_init = Uniform(width=.09)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info(
        "Input dim: {} {} {}".format(*convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        logging.info("Layer {} dim: {} {} {}".format(i,
                                                     *layer.get_dim('output')))

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = named_copy(CategoricalCrossEntropy().apply(y.flatten(), probs),
                      'cost')
    error_rate = named_copy(MisclassificationRate().apply(y.flatten(), probs),
                            'error_rate')

    cg = ComputationGraph([cost, error_rate])

    mnist_train = MNIST(("train", ))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test", ))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size))

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=Scale(learning_rate=0.1))
    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs),
        DataStreamMonitoring([cost, error_rate],
                             mnist_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]

    model = Model(cost)

    main_loop = MainLoop(algorithm,
                         mnist_train_stream,
                         model=model,
                         extensions=extensions)

    main_loop.run()
Ejemplo n.º 26
0
    net_final = lasagne.layers.DimshuffleLayer(net['final_crop'], (0, 2, 3, 1))
    laySize = lasagne.layers.get_output(net_final).shape
    net_final = lasagne.layers.ReshapeLayer(net_final,
                                            (T.prod(laySize[0:3]),
                                             laySize[3]))
    net_final = lasagne.layers.NonlinearityLayer(net_final,
                                                 nonlinearity=None)

    '''
    return net['conv1_1']


if __name__ == '__main__':

    from fuel.datasets import MNIST
    dataset_train = MNIST(['train'], sources=('features', ))
    dataset_test = MNIST(['test'], sources=('features', ))
    n_colors = 1
    spatial_width = 28
    train_stream = Flatten(
        DataStream.default_stream(dataset_train,
                                  iteration_scheme=ShuffledScheme(
                                      examples=dataset_train.num_examples -
                                      (dataset_train.num_examples % 32),
                                      batch_size=32)))
    shp = next(train_stream.get_epoch_iterator())[0].shape

    input_ = T.tensor4('inputs_var')
    unet = buildUnet(1, dropout=True, input_var=input_, trainable=True)
    output = unet.get_output_for(input_)
    test_prediction = lasagne.layers.get_output(unet, deterministic=True)[0]
Ejemplo n.º 27
0
# Construct the model
mlp = MLP(activations=[Tanh(), Softmax()],
          dims=[784, 100, 10],
          weights_init=IsotropicGaussian(0.01),
          biases_init=Constant(0))
mlp.initialize()

# Calculate the loss function
x = T.matrix('features')
y = T.lmatrix('targets')
y_hat = mlp.apply(x)
cost = CategoricalCrossEntropy().apply(y.flatten(), y_hat)
error_rate = MisclassificationRate().apply(y.flatten(), y_hat)

# load training data using Fuel
mnist_train = MNIST("train")
train_stream = Flatten(
    DataStream.default_stream(dataset=mnist_train,
                              iteration_scheme=SequentialScheme(
                                  mnist_train.num_examples, 128)), )

# load testing data
mnist_test = MNIST("test")
test_stream = Flatten(
    DataStream.default_stream(dataset=mnist_test,
                              iteration_scheme=SequentialScheme(
                                  mnist_test.num_examples, 1024)), )

# train the model
from blocks.model import Model
main_loop = MainLoop(model=Model(cost),
Ejemplo n.º 28
0
def create_main_loop(save_to,
                     num_epochs,
                     unit_order=None,
                     batch_size=500,
                     num_batches=None):
    image_size = (28, 28)
    output_size = 10
    convnet = create_lenet_5()
    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    case_costs = CasewiseCrossEntropy().apply(y.flatten(), probs)
    cost = case_costs.mean().copy(name='cost')
    # cost = (CategoricalCrossEntropy().apply(y.flatten(), probs)
    #         .copy(name='cost'))
    error_rate = (MisclassificationRate().apply(y.flatten(),
                                                probs).copy(name='error_rate'))

    cg = ComputationGraph([cost, error_rate])

    # Apply regularization to the cost
    weights = VariableFilter(roles=[WEIGHT])(cg.variables)
    cost = cost + sum([0.0003 * (W**2).sum() for W in weights])
    cost.name = 'cost_with_regularization'

    mnist_train = MNIST(("train", ))
    mnist_train_stream = DataStream.default_stream(
        mnist_train,
        iteration_scheme=ShuffledScheme(mnist_train.num_examples, batch_size))

    mnist_test = MNIST(("test", ))
    mnist_test_stream = DataStream.default_stream(
        mnist_test,
        iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size))

    # Generate pics for biases
    biases = VariableFilter(roles=[BIAS])(cg.parameters)

    # Train with simple SGD
    algorithm = GradientDescent(cost=cost,
                                parameters=cg.parameters,
                                step_rule=AdaDelta())

    # Find layer outputs to probe
    outs = OrderedDict(
        reversed(
            list((get_brick(out).name, out)
                 for out in VariableFilter(roles=[OUTPUT],
                                           bricks=[Convolutional, Linear])(
                                               cg.variables))))

    actpic_extension = ActpicExtension(actpic_variables=outs,
                                       case_labels=y,
                                       pics=x,
                                       label_count=output_size,
                                       rectify=-1,
                                       data_stream=mnist_test_stream,
                                       after_batch=True)

    synpic_extension = SynpicExtension(synpic_parameters=biases,
                                       case_costs=case_costs,
                                       case_labels=y,
                                       pics=x,
                                       batch_size=batch_size,
                                       pic_size=image_size,
                                       label_count=output_size,
                                       after_batch=True)

    # Impose an orderint for the SaveImages extension
    if unit_order is not None:
        with open(unit_order, 'rb') as handle:
            histograms = pickle.load(handle)
        unit_order = compute_unit_order(histograms)

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [
        Timing(),
        FinishAfter(after_n_epochs=num_epochs, after_n_batches=num_batches),
        actpic_extension, synpic_extension,
        SaveImages(picsources=[synpic_extension, actpic_extension],
                   title="LeNet-5: batch {i}, " +
                   "cost {cost_with_regularization:.2f}, " +
                   "trainerr {error_rate:.3f}",
                   data=[cost, error_rate],
                   graph='error_rate',
                   graph_len=500,
                   unit_order=unit_order,
                   after_batch=True),
        DataStreamMonitoring([cost, error_rate],
                             mnist_test_stream,
                             prefix="test"),
        TrainingDataMonitoring([
            cost, error_rate,
            aggregation.mean(algorithm.total_gradient_norm)
        ],
                               prefix="train",
                               after_epoch=True),
        Checkpoint(save_to),
        ProgressBar(),
        Printing()
    ]
    model = Model(cost)
    main_loop = MainLoop(algorithm,
                         mnist_train_stream,
                         model=model,
                         extensions=extensions)

    return main_loop
Ejemplo n.º 29
0
            prefix="test"),
        Printing(),
        ProgressBar(),
        #Checkpoint(path, after_epoch=True)
    ]

    if resume:
        print "Restoring from previous breakpoint"
        extensions.extend([
            Load(path)
        ])
    return model, algorithm, extensions


if __name__ == '__main__':
    mnist = MNIST(("train",), sources=sources)
    mnist_test = MNIST(("test",), sources=sources)
    training_stream = Flatten(
        DataStream(
            mnist,
            iteration_scheme=ShuffledScheme(mnist.num_examples, batch_size)
        ),
        which_sources=sources
    )
    # import ipdb; ipdb.set_trace()
    test_stream = Flatten(
        DataStream(
            mnist_test,
            iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)
        ),
        which_sources=sources
Ejemplo n.º 30
0
    def __init__(self, save_to):
        batch_size = 500
        image_size = (28, 28)
        output_size = 10
        convnet = create_lenet_5()
        layers = convnet.layers

        mnist_test = MNIST(("test", ), sources=['features', 'targets'])

        x = tensor.tensor4('features')
        y = tensor.lmatrix('targets')

        # Normalize input and apply the convnet
        probs = convnet.apply(x)
        cg = ComputationGraph([probs])

        def full_brick_name(brick):
            return '/'.join([''] + [b.name for b in brick.get_unique_path()])

        # Find layer outputs to probe
        outmap = OrderedDict(
            (full_brick_name(get_brick(out)), out) for out in VariableFilter(
                roles=[OUTPUT], bricks=[Convolutional, Linear])(cg.variables))
        # Generate pics for biases
        biases = VariableFilter(roles=[BIAS])(cg.parameters)

        # Generate parallel array, in the same order, for outputs
        outs = [outmap[full_brick_name(get_brick(b))] for b in biases]

        # Figure work count
        error_rate = (MisclassificationRate().apply(
            y.flatten(), probs).copy(name='error_rate'))
        sensitive_unit_count = (SensitiveUnitCount().apply(
            y.flatten(), probs, biases).copy(name='sensitive_unit_count'))
        sensitive_unit_count.tag.aggregation_scheme = (
            Concatenate(sensitive_unit_count))
        active_unit_count = (ActiveUnitCount().apply(outs).copy(
            name='active_unit_count'))
        active_unit_count.tag.aggregation_scheme = (
            Concatenate(active_unit_count))
        ignored_unit_count = (IgnoredUnitCount().apply(
            y.flatten(), probs, biases, outs).copy(name='ignored_unit_count'))
        ignored_unit_count.tag.aggregation_scheme = (
            Concatenate(ignored_unit_count))

        model = Model([
            error_rate, sensitive_unit_count, active_unit_count,
            ignored_unit_count
        ])

        # Load it with trained parameters
        params = load_parameters(open(save_to, 'rb'))
        model.set_parameter_values(params)

        mnist_test = MNIST(("test", ))
        mnist_test_stream = DataStream.default_stream(
            mnist_test,
            iteration_scheme=SequentialScheme(mnist_test.num_examples,
                                              batch_size))

        evaluator = DatasetEvaluator([
            error_rate, sensitive_unit_count, active_unit_count,
            ignored_unit_count
        ])
        results = evaluator.evaluate(mnist_test_stream)

        def save_ranked_image(scores, filename):
            sorted_instances = scores.argsort()
            filmstrip = Filmstrip(image_shape=(28, 28), grid_shape=(100, 100))
            for i, index in enumerate(sorted_instances):
                filmstrip.set_image((i // 100, i % 100),
                                    mnist_test.get_data(request=index)[0])
            filmstrip.save(filename)

        save_ranked_image(results['sensitive_unit_count'], 'sensitive.jpg')
        save_ranked_image(results['active_unit_count'], 'active.jpg')
        save_ranked_image(results['ignored_unit_count'], 'ignored.jpg')