Exemple #1
0
 def do(self, which_callback, *args):
     model = self.main_loop.model
     f = open(
         self.name + '_epoch_' +
         str(self.main_loop.log.status['epochs_done']) + '.pkl', 'w')
     dump(model, f)
     f.close()
def maxout_vae_mnist_test(path_vae_mnist):

    # load vae model on mnist
    vae_mnist = load(path_vae_mnist)
    maxout = Maxout()
    x = T.matrix('features')
    y = T.imatrix('targets')
    batch_size = 128
    z, _ = vae_mnist.sampler.sample(vae_mnist.encoder_mlp.apply(x))
    predict = maxout.apply(z)

    cost = Softmax().categorical_cross_entropy(y.flatten(), predict)
    y_hat = Softmax().apply(predict)
    cost.name = 'cost'
    cg = ComputationGraph(cost)

    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"maxout"

    error_brick = MisclassificationRate()
    error_rate = error_brick.apply(y, y_hat) 

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream_train = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test =Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], data_stream=data_stream_train, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream_train,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    # save here
    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(maxout, f)
Exemple #3
0
def test_protocol0_regression():
    """Check for a regression where protocol 0 dumps fail on load."""
    brick = Linear(5, 10)
    brick.allocate()
    buf = BytesIO()
    dump(brick, buf, parameters=list(brick.parameters), protocol=0)
    try:
        load(buf)
    except TypeError:
        assert False  # Regression
Exemple #4
0
 def do(self, callback_name, *args):
     """Pickle the model to the disk.
     """
     logger.info("ModelLogger has started")
     path = os.path.join(self.folder,
                         "{}{}.tar".format(self.file_prefix, self.counter))
     with open(path, 'wb') as f:
         dump(self.main_loop.model, f, use_cpickle=self.use_cpickle)
     logger.info("ModelLogger has finished")
     self.counter += 1
Exemple #5
0
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None],
               dims=[10, 10, 10],
               weights_init=Constant(1.),
               use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0],
                    ff,
                    'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(
            ['_pkl', '_parameters', 'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(), numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)

    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(
            ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
            **dict(parameters=[mlp2.children[0].W, mlp2.children[1].W]))
def test_add_to_dump():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)
    mlp2 = MLP(activations=[None, None], dims=[10, 10, 10],
               weights_init=Constant(1.), use_bias=False,
               name='mlp2')
    mlp2.initialize()

    # Ensure that adding to dump is working.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb+') as ff:
        add_to_dump(mlp.children[0], ff, 'child_0',
                    parameters=[mlp.children[0].W])
        add_to_dump(mlp.children[1], ff, 'child_1')
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_pkl', '_parameters',
                                               'child_0', 'child_1'])

    # Ensure that we can load any object from the tarball.
    with open(f.name, 'rb') as ff:
        saved_children_0 = load(ff, 'child_0')
        saved_children_1 = load(ff, 'child_1')
        assert_allclose(saved_children_0.W.get_value(),
                        numpy.ones((10, 10)))
        assert_allclose(saved_children_1.W.get_value(),
                        numpy.ones((10, 10)) * 2)
    
    # Check the error if using a reserved name.
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp.children[0], ff, '_pkl'])

    # Check the error if saving an object with other parameters
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))

    # Check the warning if adding to a dump with no parameters
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    with open(f.name, 'rb+') as ff:
        assert_raises(ValueError, add_to_dump, *[mlp2, ff, 'mlp2'],
                      **dict(parameters=[mlp2.children[0].W,
                                         mlp2.children[1].W]))
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
Exemple #8
0
def test_serialization():
    # Create a simple brick with two parameters
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Check the data using numpy.load
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])
    assert_allclose(numpy_data['mlp-linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['mlp-linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled
    mlp = load(f.name)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that only parameters are saved as NPY files
    mlp.random_data = numpy.random.rand(10)
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear_0.W', 'mlp-linear_1.W', 'pkl'])

    # Ensure that parameters can be loaded with correct names
    parameter_values = load_parameter_values(f.name)
    assert set(parameter_values.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])

    # Ensure that duplicate names are dealt with
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f)
    numpy_data = numpy.load(f.name)
    assert set(numpy_data.keys()) == \
        set(['mlp-linear.W', 'mlp-linear.W_2', 'pkl'])

    # Ensure warnings are raised when __main__ namespace objects are dumped
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)
Exemple #9
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None],
              dims=[10, 10, 10],
              weights_init=Constant(1.),
              use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
Exemple #10
0
def test_serialization():

    # Create a simple MLP to dump.
    mlp = MLP(activations=[None, None], dims=[10, 10, 10],
              weights_init=Constant(1.), use_bias=False)
    mlp.initialize()
    W = mlp.linear_transformations[1].W
    W.set_value(W.get_value() * 2)

    # Ensure warnings are raised when __main__ namespace objects are dumped.
    foo.__module__ = '__main__'
    import __main__
    __main__.__dict__['foo'] = foo
    mlp.foo = foo
    with NamedTemporaryFile(delete=False) as f:
        with warnings.catch_warnings(record=True) as w:
            dump(mlp.foo, f)
            assert len(w) == 1
            assert '__main__' in str(w[-1].message)

    # Check the parameters.
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear_0.W', '/mlp/linear_1.W'])
    assert_allclose(numpy_data['/mlp/linear_0.W'], numpy.ones((10, 10)))
    assert numpy_data['/mlp/linear_0.W'].dtype == theano.config.floatX

    # Ensure that it can be unpickled.
    with open(f.name, 'rb') as ff:
        mlp = load(ff)
    assert_allclose(mlp.linear_transformations[1].W.get_value(),
                    numpy.ones((10, 10)) * 2)

    # Ensure that duplicate names are dealt with.
    for child in mlp.children:
        child.name = 'linear'
    with NamedTemporaryFile(delete=False) as f:
        dump(mlp, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with open(f.name, 'rb') as ff:
        numpy_data = load_parameters(ff)
    assert set(numpy_data.keys()) == \
        set(['/mlp/linear.W', '/mlp/linear.W_2'])

    # Check when we don't dump the main object.
    with NamedTemporaryFile(delete=False) as f:
        dump(None, f, parameters=[mlp.children[0].W, mlp.children[1].W])
    with tarfile.open(f.name, 'r') as tarball:
        assert set(tarball.getnames()) == set(['_parameters'])
Exemple #11
0
def test_load_log():
    log = TrainingLog()
    log[0]["channel0"] = 0

    # test simple TrainingLog pickles
    with tempfile.NamedTemporaryFile() as f:
        dump(log, f)
        f.flush()

        log2 = plot.load_log(f.name)
        assert log2[0]["channel0"] == 0

    # test MainLoop pickles
    main_loop = MainLoop(model=None, data_stream=None, algorithm=None, log=log)

    with tempfile.NamedTemporaryFile() as f:
        dump(main_loop, f)
        f.flush()

        log2 = plot.load_log(f.name)
        assert log2[0]["channel0"] == 0
Exemple #12
0
def test_load_log():
    log = TrainingLog()
    log[0]['channel0'] = 0

    # test simple TrainingLog pickles
    with tempfile.NamedTemporaryFile() as f:
        dump(log, f)
        f.flush()

        log2 = plot.load_log(f.name)
        assert log2[0]['channel0'] == 0

    # test MainLoop pickles
    main_loop = MainLoop(model=None, data_stream=None, algorithm=None, log=log)

    with tempfile.NamedTemporaryFile() as f:
        dump(main_loop, f)
        f.flush()

        log2 = plot.load_log(f.name)
        assert log2[0]['channel0'] == 0
Exemple #13
0
def test_pickle_log():
    log1 = TrainingLog()
    dump(log1, "log1.pkl")
    log2 = load("log1.pkl")
    dump(log2, "log2.pkl")
    load("log2.pkl")  # loading an unresumed log works
    log2.resume()
    dump(log2, "log3.pkl")
    load("log3.pkl")  # loading a resumed log does not work
Exemple #14
0
def test_pickle_log():
    log1 = TrainingLog()
    with open('log1.tar', 'wb') as f:
        dump(log1, f)
    with open('log1.tar', 'rb') as f:
        log2 = load(f)
    with open('log2.tar', 'wb') as f:
        dump(log2, f)
    with open('log2.tar', 'rb') as f:
        load(f)  # loading an unresumed log works
    log2.resume()
    with open('log3.tar', 'wb') as f:
        dump(log2, f)
    with open('log3.tar', 'rb') as f:
        load(f)  # loading a resumed log does not work
    os.remove('log1.tar')
    os.remove('log2.tar')
    os.remove('log3.tar')
Exemple #15
0
        DataStream(
            mnist,
            iteration_scheme=ShuffledScheme(mnist.num_examples, batch_size)
        ),
        which_sources=sources
    )
    # import ipdb; ipdb.set_trace()
    test_stream = Flatten(
        DataStream(
            mnist_test,
            iteration_scheme=ShuffledScheme(mnist_test.num_examples, batch_size)
        ),
        which_sources=sources
    )
    "Print data loaded"

    if train:
        cost = create_network()
        model, algorithm, extensions = prepare_opti(cost, test_stream)

        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=training_stream,
            model=model,
            extensions=extensions
        )
        main_loop.run()
        dump(main_loop.model, open('pixelvae.pkl', 'w'))
    else:
        model = load(open('pixelvae.pkl', 'r'))
def train(save_to, num_epochs, feature_maps=None, mlp_hiddens=None,
         conv_sizes=None, pool_sizes=None, batch_size=500):

    # Initialize the training set
    train = CIFAR10(("train",))
    train_stream = DataStream.default_stream(
        train, iteration_scheme=ShuffledScheme(
            train.num_examples, batch_size))

    test = CIFAR10(("test",))
    test_stream = DataStream.default_stream(
        test,
        iteration_scheme=ShuffledScheme(
            test.num_examples, batch_size))

    # ConvMLP Parameters
    image_size = (32, 32)
    num_channels = 3
    num_conv = 3 # Number of Convolutional Layers
    if feature_maps is None:
        feature_maps = [20, 30, 30]
        if not len(feature_maps) == num_conv:
            raise ValueError('Must specify more feature maps')
    if conv_sizes is None:
        conv_sizes = [5] * num_conv
    if pool_sizes is None:
        pool_sizes = [2] * num_conv
    if mlp_hiddens is None:
        mlp_hiddens = [500]
    output_size = 10

    # Use ReLUs everywhere and softmax for the final prediction
    conv_activations = [Rectifier() for _ in feature_maps]
    mlp_activations = [Rectifier() for _ in mlp_hiddens] + [Softmax()]
    convnet = ConvMLP(conv_activations, num_channels, image_size,
                      filter_sizes=zip(conv_sizes, conv_sizes),
                      feature_maps=feature_maps,
                      pooling_sizes=zip(pool_sizes, pool_sizes),
                      top_mlp_activations=mlp_activations,
                      top_mlp_dims=mlp_hiddens + [output_size],
                      border_mode='full',
                      weights_init=Uniform(width=.2),
                      biases_init=Constant(0))

    # We push initialization config to set different initialization schemes
    # for convolutional layers.
    convnet.push_initialization_config()
    for i in range(num_conv):
        convnet.layers[i].weights_init = Uniform(width=.2)
    convnet.top_mlp.linear_transformations[0].weights_init = Uniform(width=.08)
    convnet.top_mlp.linear_transformations[1].weights_init = Uniform(width=.11)
    convnet.initialize()
    logging.info("Input dim: {} {} {}".format(
        *convnet.children[0].get_dim('input_')))
    for i, layer in enumerate(convnet.layers):
        logging.info("Layer {} dim: {} {} {}".format(
            i, *layer.get_dim('output')))

    x = tensor.tensor4('features')
    y = tensor.lmatrix('targets')

    # Normalize input and apply the convnet
    probs = convnet.apply(x)
    cost = named_copy(CategoricalCrossEntropy().apply(y.flatten(),
                      probs), 'cost')
    error_rate = named_copy(MisclassificationRate().apply(y.flatten(), probs),
                            'error_rate')

    cg = ComputationGraph([cost, error_rate])

    # Apply Dropout to outputs of rectifiers
    from blocks.roles import OUTPUT
    vs = VariableFilter(roles=[OUTPUT])(cg.variables)
    vs1 = [v for v in vs if v.name.startswith('rectifier')]
    vs1 = vs1[0: -2] # Only first two layers
    cg = apply_dropout(cg, vs1, 0.5)

    # Train with simple SGD
    algorithm = GradientDescent(
        cost=cost, parameters=cg.parameters,
        step_rule=AdaDelta())

    # `Timing` extension reports time for reading data, aggregating a batch
    # and monitoring;
    # `ProgressBar` displays a nice progress bar during training.
    extensions = [Timing(),
                  FinishAfter(after_n_epochs=num_epochs),
                  DataStreamMonitoring(
                      [cost, error_rate],
                      test_stream,
                      prefix="test"),
                  TrainingDataMonitoring(
                      [cost, error_rate,
                       aggregation.mean(algorithm.total_gradient_norm)],
                      prefix="train",
                      after_epoch=True),
                  Checkpoint(save_to),
                  ProgressBar(),
                  Printing()]

    model = Model(cost)

    main_loop = MainLoop(
        algorithm,
        train_stream,
        model=model,
        extensions=extensions)

    main_loop.run()
    classifier_fn = 'convmlp_cifar10.zip'
    with open(classifier_fn, 'w') as f:
        dump(convnet, f)    
    elif dataset == "cifar10":
        data = CIFAR10(("train",))
        data_test = CIFAR10(("test",))

    training_stream = DataStream(
        data,
        iteration_scheme=ShuffledScheme(data.num_examples, batch_size)
    )
    test_stream = DataStream(
        data_test,
        iteration_scheme=ShuffledScheme(data_test.num_examples, batch_size)
    )
    logger.info("Dataset: {} loaded".format(dataset))

    if train:
        cost, cost_bits_dim = create_network()
        model, algorithm, extensions = prepare_opti(cost, test_stream, cost_bits_dim)
        # import ipdb; ipdb.set_trace()
        main_loop = MainLoop(
            algorithm=algorithm,
            data_stream=training_stream,
            model=model,
            extensions=extensions
        )
        main_loop.run()
        with open(path+'/'+'pixelcnn.pkl', 'w') as f:
            dump(main_loop.model, f)
        model = main_loop.model
    else:
        model = load(open('pixelcnn_cifar10_2016-07-19/pixelcnn_cifar10_epoch_165.pkl', 'r'))
    elif dataset == "cifar10":
        data = CIFAR10(("train", ))
        data_test = CIFAR10(("test", ))
    else:
        pass  # Add CIFAR 10
    training_stream = DataStream(data,
                                 iteration_scheme=ShuffledScheme(
                                     data.num_examples, batch_size))
    test_stream = DataStream(data_test,
                             iteration_scheme=ShuffledScheme(
                                 data_test.num_examples, batch_size))
    logger.info("Dataset: {} loaded".format(dataset))

    if train:
        cost, cost_bits_dim = create_network()
        model, algorithm, extensions = prepare_opti(cost, test_stream,
                                                    cost_bits_dim)

        main_loop = MainLoop(algorithm=algorithm,
                             data_stream=training_stream,
                             model=model,
                             extensions=extensions)
        main_loop.run()
        with open(path + '/' + 'pixelcnn.pkl', 'w') as f:
            dump(main_loop.model, f)
        model = main_loop.model
    else:
        model = load(
            open('pixelcnn_cifar10_2016-07-19/pixelcnn_cifar10_epoch_165.pkl',
                 'r'))
def test_vae():

    activation = Rectifier()
    full_weights_init = Orthogonal()
    weights_init = full_weights_init
    
    layers = [784, 400, 20]
    encoder_layers = layers[:-1]
    encoder_mlp = MLP([activation] * (len(encoder_layers)-1),
              encoder_layers,
              name="MLP_enc", biases_init=Constant(0.), weights_init=weights_init)

    enc_dim = encoder_layers[-1]
    z_dim = layers[-1]
    #sampler = Qlinear(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    sampler = Qsampler(input_dim=enc_dim, output_dim=z_dim, biases_init=Constant(0.), weights_init=full_weights_init)
    decoder_layers = layers[:]  ## includes z_dim as first layer
    decoder_layers.reverse()
    decoder_mlp = MLP([activation] * (len(decoder_layers)-2) + [Rectifier()],
              decoder_layers,
              name="MLP_dec", biases_init=Constant(0.), weights_init=weights_init)

    
    vae = VAEModel(encoder_mlp, sampler, decoder_mlp)
    vae.initialize()

    x = T.matrix('features')
    batch_size = 124
    x_recons, kl_terms = vae.reconstruct(x)
    recons_term = BinaryCrossEntropy().apply(x, T.clip(x_recons, 1e-5, 1 - 1e-5))
    recons_term.name = "recons_term"

    cost = recons_term + kl_terms.mean()
    cost.name = "cost"
    cg = ComputationGraph(cost)
    temp = cg.parameters
    for t, i in zip(temp, range(len(temp))):
        t.name = t.name+str(i)+"vae_mnist"


    step_rule = RMSProp(0.001, 0.95)

    train_set = MNIST('train')

    train_set.sources = ("features", )
    test_set = MNIST("test")
    test_set.sources = ("features", )

    data_stream = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_monitoring = Flatten(DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size)))

    data_stream_test = Flatten(DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size)))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = TrainingDataMonitoring(
        variables=[cost], prefix="train", every_n_batches=10)
    monitor_valid = DataStreamMonitoring(
        variables=[cost], data_stream=data_stream_test, prefix="valid", every_n_batches=10)

    # drawing_samples = ImagesSamplesSave("../data_mnist", vae, (28, 28), every_n_epochs=1)
    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_batches=1500),
                    Printing(every_n_batches=10)
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    from blocks.serialization import dump
    with closing(open('../data_mnist/model_0', 'w')) as f:
	    dump(vae, f)
Exemple #20
0
 def do(self, which_callback, *args) :
     model = self.main_loop.model
     f = open(self.name + '_epoch_' +
         str(self.main_loop.log.status['epochs_done']) + '.pkl', 'w')
     dump(model, f)
     f.close()
def maxout_mnist_test():
    # if it is working
    # do a class
    x = T.tensor4('features')
    y = T.imatrix('targets')
    batch_size = 128
    # maxout convolutional layers
    # layer0
    filter_size = (8, 8)
    activation = Maxout_(num_pieces=2).apply
    pooling_size = 4
    pooling_step = 2
    pad = 0
    image_size = (28, 28)
    num_channels = 1
    num_filters = 48
    layer0 = ConvolutionalLayer(activation, filter_size, num_filters,
                                pooling_size=(pooling_size, pooling_size),
                                pooling_step=(pooling_step, pooling_step),
                                pad=pad,
                                image_size=image_size,
                                num_channels=num_channels,
                                weights_init=Uniform(width=0.01),
                                biases_init=Uniform(width=0.01),
                                name="layer_0")
    layer0.initialize()

    num_filters = 48
    filter_size = (8,8)
    pooling_size = 4
    pooling_step = 2
    pad = 3  
    image_size = (layer0.get_dim('output')[1],
                  layer0.get_dim('output')[2])
    num_channels = layer0.get_dim('output')[0]
    layer1 = ConvolutionalLayer(activation, filter_size, num_filters,
                                pooling_size=(pooling_size, pooling_size),
                                pooling_step=(pooling_step, pooling_step),
                                pad=pad,
                                image_size=image_size,
                                num_channels=num_channels,
                                weights_init=Uniform(width=0.01),
                                biases_init=Uniform(width=0.01),
                                name="layer_1")
    layer1.initialize()

    num_filters = 24
    filter_size=(5,5)
    pooling_size = 2
    pooling_step = 2
    pad = 3
    activation = Maxout_(num_pieces=4).apply
    image_size = (layer1.get_dim('output')[1],
                  layer1.get_dim('output')[2])
    num_channels = layer1.get_dim('output')[0]
    layer2 = ConvolutionalLayer(activation, filter_size, num_filters,
                                pooling_size=(pooling_size, pooling_size),
                                pooling_step=(pooling_step, pooling_step),
                                pad = pad,
                                image_size=image_size,
                                num_channels=num_channels,
                                weights_init=Uniform(width=0.01),
                                biases_init=Uniform(width=0.01),
                                name="layer_2")
    layer2.initialize()

    conv_layers = [layer0, layer1, layer2]
    output_conv = x
    for layer in conv_layers :
        output_conv = layer.apply(output_conv)
    output_conv = Flattener().apply(output_conv)

    mlp_layer = Linear(54, 10, 
                        weights_init=Uniform(width=0.01),
                        biases_init=Uniform(width=0.01), name="layer_5")
    mlp_layer.initialize()

    output_mlp = mlp_layer.apply(output_conv)

    params, names = build_params(conv_layers, [mlp_layer])

    cost = Softmax().categorical_cross_entropy(y.flatten(), output_mlp)
    cost.name = 'cost'
    cg_ = ComputationGraph(cost)
    weights = VariableFilter(roles=[WEIGHT])(cg_.variables)
    cost = cost + 0.001*sum([sum(p**2) for p in weights])
    cg = ComputationGraph(cost)
    error_rate = errors(output_mlp, y)
    error_rate.name = 'error'

    # training
    step_rule = RMSProp(0.01, 0.9)
    #step_rule = Momentum(0.2, 0.9)
    train_set = MNIST('train')
    test_set = MNIST("test")

    data_stream = DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))

    data_stream_monitoring = DataStream.default_stream(
            train_set, iteration_scheme=SequentialScheme(train_set.num_examples, batch_size))

    data_stream_test =DataStream.default_stream(
            test_set, iteration_scheme=SequentialScheme(test_set.num_examples, batch_size))

    algorithm = GradientDescent(cost=cost, params=cg.parameters,
                                step_rule=step_rule)

    monitor_train = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_monitoring, prefix="train")
    monitor_valid = DataStreamMonitoring(
        variables=[cost, error_rate], data_stream=data_stream_test, prefix="test")


    extensions = [  monitor_train,
                    monitor_valid,
                    FinishAfter(after_n_epochs=50),
                    Printing(every_n_epochs=1)
                  ]

    main_loop = MainLoop(data_stream=data_stream,
                        algorithm=algorithm, model = Model(cost),
                        extensions=extensions)
    main_loop.run()

    from blocks.serialization import dump
    with closing(open('../data_mnist/maxout', 'w')) as f:
	    dump(vae, f)