예제 #1
0
def test_rbm(learning_rate=0.1, training_epochs=15,
             dataset='mnist.pkl.gz', batch_size=20,
             n_chains=20, n_samples=10, output_folder='rbm_plots',
             n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    """
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    # compute number of minibatches for training, validation and testing
    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    # allocate symbolic variables for the data
    index = T.lscalar()    # index to a [mini]batch
    x = T.matrix('x')  # the data is presented as rasterized images

    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    # initialize storage for the persistent chain (state = hidden
    # layer of chain)
    persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden),
                                                 dtype=theano.config.floatX),
                                     borrow=True)

    # construct the RBM class
    rbm = RBM(input=x, n_visible=28 * 28,
              n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng)

    # get the cost and the gradient corresponding to one step of CD-15
    cost, updates = rbm.get_cost_updates(lr=learning_rate,
                                         persistent=persistent_chain, k=15)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    # start-snippet-5
    # it is ok for a theano function to have no output
    # the purpose of train_rbm is solely to update the RBM parameters
    train_rbm = theano.function(
        [index],
        cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        },
        name='train_rbm'
    )

    plotting_time = 0.
    start_time = timeit.default_timer()

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

        # Plot filters after each training epoch
        plotting_start = timeit.default_timer()
        # Construct image from the weight matrix
        image = Image.fromarray(
            tile_raster_images(
                X=rbm.W.get_value(borrow=True).T,
                img_shape=(28, 28),
                tile_shape=(10, 10),
                tile_spacing=(1, 1)
            )
        )
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = timeit.default_timer()
        plotting_time += (plotting_stop - plotting_start)

    end_time = timeit.default_timer()

    pretraining_time = (end_time - start_time) - plotting_time

    print ('Training took %f minutes' % (pretraining_time / 60.))
    # end-snippet-5 start-snippet-6
    #################################
    #     Sampling from the RBM     #
    #################################
    # find out the number of test samples
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]

    # pick random test examples, with which to initialize the persistent chain
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        numpy.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX
        )
    )
    # end-snippet-6 start-snippet-7
    plot_every = 1000
    # define one step of Gibbs sampling (mf = mean-field) define a
    # function that does `plot_every` steps before returning the
    # sample for plotting
    (
        [
            presig_hids,
            hid_mfs,
            hid_samples,
            presig_vis,
            vis_mfs,
            vis_samples
        ],
        updates
    ) = theano.scan(
        rbm.gibbs_vhv,
        outputs_info=[None, None, None, None, None, persistent_vis_chain],
        n_steps=plot_every
    )

    # add to updates the shared variable that takes care of our persistent
    # chain :.
    updates.update({persistent_vis_chain: vis_samples[-1]})
    # construct the function that implements our persistent chain.
    # we generate the "mean field" activations for plotting and the actual
    # samples for reinitializing the state of our persistent chain
    sample_fn = theano.function(
        [],
        [
            vis_mfs[-1],
            vis_samples[-1]
        ],
        updates=updates,
        name='sample_fn'
    )

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros(
        (29 * n_samples + 1, 29 * n_chains - 1),
        dtype='uint8'
    )
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
            X=vis_mf,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1)
        )

    # construct image
    image = Image.fromarray(image_data)
    image.save('samples.png')
    # end-snippet-7
    os.chdir('../')
예제 #2
0
def main(learning_rate=0.1, training_epochs=15,
            dataset='mnist.pkl.gz',
            batch_size=20, output_folder='dA_plots'):

    datasets = load_data(dataset)
    train_set_x, train_set_y = datasets[0]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    index = T.lscalar()
    x = T.matrix(name='x')

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    dae = DenoisingAutoencoder(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28*28,
        n_hidden=500
    )

    cost, updates = dae.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )

    train_autoencoder = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
        }
    )

    start_time = timeit.default_timer()

    for epoch in xrange(training_epochs):
        costs = []
        for batch_index in xrange(n_train_batches):
            costs.append(train_autoencoder(batch_index))

        print 'Training epoch %d, mean cost ' % epoch, np.mean(costs)

    end_time = timeit.default_timer()

    training_time = end_time - start_time
    print (
        'The 0% corruption code for file ',
        os.path.split(__file__)[1],
        ' ran for %.2fm' %(training_time / 60.)
    )

    image = Image.fromarray(
        tile_raster_images(
            X=dae.W.get_value(borrow=True).T,
            img_shape=(28, 28),
            tile_shape=(10, 10),
            tile_spacing=(1, 1)
        )
    )

    image.save('filters_corruption_0.png')

    dae = DenoisingAutoencoder(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=28*28,
        n_hidden=500,
        tied=False
    )


    cost, updates = dae.get_cost_updates(
        corruption_level=0.3,
        learning_rate=learning_rate
    )

    train_autoencoder = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
        }
    )

    start_time = timeit.default_timer()

    for epoch in xrange(training_epochs):
        costs = []
        for batch_index in xrange(n_train_batches):
            costs.append(train_autoencoder(batch_index))

        print 'Training epoch %d, mean cost ' % epoch, np.mean(costs)

    end_time = timeit.default_timer()

    training_time = end_time - start_time
    print (
        'The 30% corruption code for file ',
        os.path.split(__file__)[1],
        ' ran for %.2fm' %(training_time / 60.)
    )

    image = Image.fromarray(
        tile_raster_images(
            X=dae.W.get_value(borrow=True).T,
            img_shape=(28, 28),
            tile_shape=(10, 10),
            tile_spacing=(1, 1)
        )
    )

    image.save('filters_corruption_30.png')

    os.chdir('../')
예제 #3
0
def init_mlp(learning_rate, L1_reg, L2_reg, dataset, batch_size, n_hidden):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    validate_set_x, validate_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_validate_batches = validate_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    batches = (n_train_batches, n_validate_batches, n_test_batches)

    print '... building the model'

    index = T.lscalar()
    x = T.matrix('x')
    y = T.ivector('y')

    rng = np.random.RandomState(1234)

    classifier = MultiLayerPerceptron(
        rng=rng,
        input=x,
        n_in=28*28,
        n_hidden=n_hidden,
        n_out=10)

    cost = (
        classifier.negative_log_likelihood(y) +
        L1_reg * classifier.L1_norm +
        L2_reg * classifier.L2_norm
    )

    test_model = theano.function(
        inputs=[index],
        outputs=[classifier.errors(y)],
        givens={
            x: test_set_x[index * batch_size: (index + 1) * batch_size],
            y: test_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    validate_model = theano.function(
        inputs=[index],
        outputs=[classifier.errors(y)],
        givens={
            x: validate_set_x[index * batch_size: (index + 1) * batch_size],
            y: validate_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    gparams = [T.grad(cost, param) for param in classifier.params]

    updates = [
        (param, param - learning_rate * gparam)
        for param, gparam in zip(classifier.params, gparams)
    ]

    train_model = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            y: train_set_y[index * batch_size: (index + 1) * batch_size],
        }
    )

    return train_model, validate_model, test_model, batches
예제 #4
0
def main(
        finetune_learning_rate=0.1,
        pretrain_epochs=15,
        pretrain_learning_rate=0.001,
        training_epochs=1000,
        dataset='mnist.pkl.gz',
        batch_size=1,
        output_folder='SdA_plots/'
    ):

    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]
    n_train_batches /= batch_size

    numpy_rng = np.random.RandomState(123)
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)


    print '... building the model'

    sda = StackedDenoisingAutoencoder(
        numpy_rng=numpy_rng,
        n_ins=28*28,
        hidden_layers_sizes=[529, 529],
        n_outs=10
        )

    print '... getting the pretraining the model'

    pretrain_fns = sda.create_pretraining_functions(
        train_set_x=train_set_x,
        batch_size=batch_size)

    print '... pre-training the model'

    start_time = timeit.default_timer()
    corruption_levels = [.1, .2, .3]

    for i in xrange(sda.n_layers):
        for epoch in xrange(pretrain_epochs):
            costs = []
            for batch_index in xrange(n_train_batches):
                costs.append(pretrain_fns[i](
                    index=batch_index,
                    corruption=corruption_levels[i],
                    learning_rate=pretrain_learning_rate)
                )
            print 'Pre-training layer %i, epoch %d, cost ' % (i, epoch), np.mean(costs)
    end_time = timeit.default_timer()

    print >> sys.stderr, ('The pretraining code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))

    for i, layer in enumerate(sda.dA_layers):
        if i == 0:
            image = Image.fromarray(
                tile_raster_images(
                    X=layer.W.get_value(borrow=True).T,
                    img_shape=(28, 28),
                    tile_shape=(10, 10),
                    tile_spacing=(1, 1)
                )
            )
        else:
            image = Image.fromarray(
                tile_raster_images(
                    X=layer.W.get_value(borrow=True).T,
                    img_shape=(23, 23),
                    tile_shape=(10, 10),
                    tile_spacing=(1, 1)
                )
            )

        image.save('dAE_layer_pretrain_%d' % i + '.png')

    print '... getting the finetunning the model'

    train_fn, validatate_model, test_model = sda.build_finetune_functions(
        datasets=datasets,
        batch_size=batch_size,
        learning_rate=finetune_learning_rate
        )

    print '... fine-tunning the model'

    patience = 10 * n_train_batches
    patience_increase = 2.
    improvement_threshold = 0.995
    validation_frequency = min(n_train_batches, patience / 2)

    best_validation_loss = np.inf
    test_score = 0.
    start_time = timeit.default_timer()

    done_looping = False
    epoch = 0

    while (epoch < training_epochs) and (not done_looping):
        epoch = epoch + 1
        for minibatch_index in xrange(n_train_batches):
            minibatch_avg_cost = train_fn(minibatch_index)
            iter = (epoch - 1) * n_train_batches + minibatch_index

            if (iter + 1) % validation_frequency == 0:
                validation_losses = validatate_model()
                this_validation_loss = np.mean(validation_losses)
                print ('epoch %i, minibatch %i/%i, validation error %f %%' %
                       (epoch, minibatch_index + 1, n_train_batches, this_validation_loss * 100.))

                if this_validation_loss < best_validation_loss:
                    if this_validation_loss < best_validation_loss * improvement_threshold:
                        patience = max(patience, iter * patience_increase)

                    best_validation_loss = this_validation_loss
                    best_iter = iter

                    test_losses = test_model()
                    test_score = np.mean(test_losses)

                    print (('     epoch %i, minibatch %i/%i, test error of best model %f %%') %
                            (epoch, minibatch_index + 1, n_train_batches, test_score * 100.))
            if patience <= iter:
                done_looping = True
                break

    end_time = timeit.default_timer()
    print (
        (
            'Optimization complete with best validation score of %f %%, '
            'on iteration %i, '
            'with test performance %f %%'
        ) %
        (best_validation_loss * 100., best_iter + 1, test_score * 100.)
    )
    print >> sys.stderr, ('The training code for file ' +
                          os.path.split(__file__)[1] +
                          ' ran for %.2fm' % ((end_time - start_time) / 60.))
    for i, layer in enumerate(sda.dA_layers):
        if i == 0:
            image = Image.fromarray(
                tile_raster_images(
                    X=layer.W.get_value(borrow=True).T,
                    img_shape=(28, 28),
                    tile_shape=(10, 10),
                    tile_spacing=(1, 1)
                )
            )
        else:
            image = Image.fromarray(
                tile_raster_images(
                    X=layer.W.get_value(borrow=True).T,
                    img_shape=(23, 23),
                    tile_shape=(10, 10),
                    tile_spacing=(1, 1)
                )
            )

        image.save('dAE_layer_finetune_%d' % i + '.png')
    os.chdir('../')
예제 #5
0
def main(learning_rate=0.1,
         training_epochs=15,
         dataset='mnist.pkl.gz',
         batch_size=20,
         n_chains=20,
         n_samples=10,
         output_folder='rbm_plots',
         n_hidden=500):
    datasets = load_data(dataset)

    train_set_x, train_set_y = datasets[0]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size

    index = T.lscalar()
    x = T.matrix('x')

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    persistent_chain = theano.shared(np.zeros((batch_size, n_hidden),
                                               dtype=theano.config.floatX),
                                     borrow=True)

    rbm = RestrictedBolzmanMachine(
        input=x,
        n_visible=28 * 28,
        n_hidden=n_hidden,
        numpy_rng=rng,
        theano_rng=theano_rng
    )

    cost, updates = rbm.get_cost_updates(learning_rate=learning_rate,
                                         persistent=persistent_chain,
                                         k=15)

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    train_rbm = theano.function(
        inputs=[index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size]
        },
        name='train_rbm'
    )

    plotting_time = 0.
    start_time = timeit.default_timer()

    for epoch in xrange(training_epochs):
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            mean_cost += [train_rbm(batch_index)]

        print 'Training epoch %d, cost is ' % epoch, np.mean(mean_cost)

        plotting_start = timeit.default_timer()
        image = Image.fromarray(
            tile_raster_images(
                X=rbm.W.get_value(borrow=True).T,
                img_shape=(28, 28),
                tile_shape=(10, 10),
                tile_spacing=(1, 1)
            )
        )
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = timeit.default_timer()
        plotting_time += (plotting_stop - plotting_start)

    end_time = timeit.default_timer()

    pretraining_time = (end_time - start_time) - plotting_time

    print ('Training took %f minutes' % (pretraining_time / 60.))

    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        np.asarray(
            test_set_x.get_value(borrow=True)[test_idx: test_idx + n_chains],
            dtype=theano.config.floatX
        )
    )

    plot_every = 1000

    (
        [
            presig_hids,
            hid_mfs,
            hid_samples,
            presig_vis,
            vis_mfs,
            vis_samples
        ],
        updates
    ) = theano.scan(
        fn=rbm.gibbs_vhv,
        outputs_info=[None, None, None, None, None, persistent_vis_chain],
        n_steps=plot_every
    )

    updates.update({persistent_vis_chain: vis_samples[-1]})

    sample_fn = theano.function(
        inputs=[],
        outputs=[
            vis_mfs[-1],
            vis_samples[-1]
        ],
        updates=updates,
        name='sample_fn'
    )

    image_data = np.zeros(
        (29 * n_samples + 1, 29 * n_chains - 1),
        dtype='uint8'
    )

    for idx in xrange(n_samples):
        vis_mf, vis_sample = sample_fn()
        print '... plotting sample', idx
        image_data[29 * idx: 29 * idx + 28, :] = tile_raster_images(
            X=vis_mf,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1)
        )

    image = Image.fromarray(image_data)
    image.save('sample.png')
    os.chdir('../')
예제 #6
0
def main():
    datasets = load_data('mnist.pkl.gz')
    batch_size = 20

    train_set_x, train_set_y = datasets[0]
    valid_set_x, valid_set_y = datasets[1]
    test_set_x, test_set_y = datasets[2]

    n_train_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
    n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] / batch_size
    n_test_batches = test_set_x.get_value(borrow=True).shape[0] / batch_size

    print '... building model'

    index = T.lscalar()
    x = T.matrix('x')
    t = T.ivector('t')

    layers = [
        GeneralLayer(28 * 28, 500),
        GeneralLayer(500, 500),
        GeneralLayer(500, 500),
        GeneralLayer(500, 10, T.nnet.softmax)
    ]
    params = []

    for i, layer in enumerate(layers):
        params += layer.params
        if i == 0:
            layer_out = layer.forward_prop(x)
        else:
            layer_out = layer.forward_prop(layer_out)

    y = layers[-1].h

    L1_norm = sum([layer.L1_norm for layer in layers])
    L2_norm = sum([layer.L2_norm for layer in layers])

    # Negative log likelihood
    # cost = -T.mean(T.log(y)[T.arange(x.shape[0]), t]) + 0.000 * L1_norm + 0.0001 * L2_norm
    cost = -T.mean((y[T.arange(x.shape[0])] - t )** 2)

    error = T.mean(T.neq(T.argmax(y, axis=1), t))

    # gparams = [T.grad(cost, param) for param in params]
    gparams = T.grad(cost, params)
    gmomentums = [
        theano.shared(np.asarray(
            np.zeros_like(param.get_value(borrow=True)),
            dtype=theano.config.floatX)
        )
        for param in params
    ]
    updates = OrderedDict()

    # learning_rate = 0.1
    learning_rate = 0.01
    momentum = np.float(0.9)

    for param, gparam, gmomentum in zip(params, gparams, gmomentums):
        updates[gmomentum] = momentum * gmomentum - learning_rate * gparam
        updates[param] = param + updates[gmomentum]
        # updates[param] = param - learning_rate * gparam

    train = theano.function(
        inputs=[index],
        outputs=[cost],
        updates=updates,
        givens={
            x: train_set_x[index * batch_size: (index + 1) * batch_size],
            t: train_set_y[index * batch_size: (index + 1) * batch_size]
        }
    )

    valid = theano.function(
        inputs=[index],
        outputs=[error],
        givens={
            x: valid_set_x[index * batch_size: (index + 1) * batch_size],
            t: valid_set_y[index * batch_size: (index + 1) * batch_size],
        }
    )

    for epoch in xrange(500):
        for minibatch_index in xrange(n_train_batches):
            train(minibatch_index)
            iter = epoch * n_train_batches + minibatch_index
            if (iter + 1) % n_train_batches == 0:
                validation_losses = [valid(i) for i in xrange(n_valid_batches)]
                this_validation_loss = np.mean(validation_losses)
                print ("EPOCH:: %i, Validation cost: %f" % (epoch+1, this_validation_loss))