Beispiel #1
0
def test_rbm_mnist(learning_rate=0.01, training_epochs=10, batch_size=20,
                   n_chains=30, n_samples=5, output_folder=None, isPCD=0,
                   n_hidden=500):
    """
    Demonstrate how to train and afterwards sample from it using Theano.

    This is demonstrated on MNIST.

    :param learning_rate: learning rate used for training the RBM

    :param training_epochs: number of epochs used for training

    :param dataset: path the the pickled dataset

    :param batch_size: size of a batch used to train the RBM

    :param n_chains: number of parallel Gibbs chains to be used for sampling

    :param n_samples: number of samples to plot for each chain

    e.g.
        test_rbm_mnist(output_folder='/home/eric/Desktop/rbm_plots')

    """

    assert output_folder is not None

    from rbm_variants import RBM_Orthogonal as RBM
    # from rbm import RBM

    #################################
    #     Data Constructing         #
    #################################

    from sklearn.datasets import fetch_mldata

    mnist = fetch_mldata('MNIST original')

    from xylearn.utils.data_util import get_train_test
    from xylearn.utils.data_normalization import rescale

    data = get_train_test(rescale(mnist.data), mnist.target, useGPU=1, shuffle=True)

    train_x, train_y = data['train']
    n_vis = train_x.get_value(borrow=True).shape[1]

    print numpy.linalg.matrix_rank(train_x.get_value(borrow=True))

    n_train_batches = train_x.get_value(borrow=True).shape[0] / batch_size


    # construct the RBM class
    rbm = RBM(n_visible=n_vis, n_hidden=n_hidden, isPCD=isPCD)
    train_fn = rbm.get_train_fn(train_x, batch_size)


    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    plotting_time = 0.
    start_time = time.clock()
    import PIL.Image
    from visualizer import tile_raster_images

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            # for each batch, we extract the gibbs chain
            new_cost = train_fn(index=batch_index, lr=learning_rate)
            mean_cost += [new_cost]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)
        # monitor projected rank
        projection = rbm.project(train_x)
        print 'rank: ' + str(numpy.linalg.matrix_rank(projection))

        # W shape is [784 500]
        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(tile_raster_images(
            X=rbm.W.get_value(borrow=True).T,
            img_shape=(28, 28), tile_shape=(20, 20),
            tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()
    pretraining_time = (end_time - start_time) - plotting_time
    print ('Training took %f minutes' % (pretraining_time / 60.))


    #################################
    #     Sampling from the RBM     #
    #################################

    test_idx = 1

    test_x, test_y = data['test']
    sample_fn = rbm.get_sampling_fn(test_x, test_idx, n_chains)

    print '... begin sampling'
    # plot initial image first
    orig_img = test_x.get_value(borrow=True)[test_idx:test_idx + 1]
    image = PIL.Image.fromarray(tile_raster_images(
        X=orig_img,
        img_shape=(28, 28), tile_shape=(1, 1),
        tile_spacing=(1, 1)))
    image.save('orig_img.png')

    # create a space to store the image for plotting ( we need to leave
    # room for the tile_spacing as well)
    image_data = numpy.zeros((29 * n_samples + 1, 29 * n_chains - 1),
                             dtype='uint8')
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_mf, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
            X=vis_mf,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1))
        # construct image

    image = PIL.Image.fromarray(image_data)
    image.save('samples.png')
    os.chdir('../')

    #################################
    #     Projecting from the RBM   #
    #################################
    projection = rbm.project(train_x)

    print numpy.linalg.matrix_rank(projection)
Beispiel #2
0
def toy_test(learning_rate=0.01, training_epochs=100, batch_size=50,
             output_folder=None, isPCD=0,
             n_hidden=3):
    assert output_folder is not None
    # toy_data, word count vector, [num_terms, num_doc].
    # each cell represents the number of times a term occurs
    #                          d1 d2 d3 d4 d5
    toy_data = numpy.asarray([[0, 2, 0, 1, 0],
                              [9, 0, 3, 1, 1],
                              [4, 1, 1, 2, 1],
                              [10, 10, 1, 1, 0],
                              [1, 0, 8, 0, 10],
                              [0, 1, 10, 1, 0],
                              [1, 0, 2, 6, 1],
                              [0, 0, 1, 0, 0],
                              [1, 0, 0, 0, 0],
                              [1, 0, 1, 0, 0],
                              [1, 1, 0, 0, 1],
                              [10, 2, 0, 1, 0],
                              [0, 0, 1, 0, 10],
                              [1, 0, 0, 3, 0],
                              [0, 0, 2, 0, 1],
                              [10, 0, 1, 0, 0],
                              [0, 1, 0, 0, 0],
                              [0, 1, 0, 1, 0],
                              [1, 0, 1, 0, 0],
                              [1, 0, 0, 0, 1],
                              [1, 0, 1, 0, 0],
                              [0, 0, 1, 0, 0]])

    # from rbm import RBM
    from rbm_variants import RBM_Orthogonal as RBM
    # from rbm_variants import PoissonRBM as RBM


    train_x = toSharedX(toy_data, name="toy_data")

    n_vis = train_x.get_value(borrow=True).shape[1]

    n_samples = train_x.get_value(borrow=True).shape[0]

    if batch_size >= n_samples:
        batch_size = n_samples

    n_train_batches = n_samples / batch_size


    # construct the RBM class
    rbm = RBM(n_visible=n_vis, n_hidden=n_hidden, isPCD=isPCD)
    train_fn = rbm.get_train_fn(train_x, batch_size)

    print "... projecting"
    print rbm.project(train_x, hidSample=1)

    #################################
    #     Training the RBM          #
    #################################
    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)
    os.chdir(output_folder)

    plotting_time = 0.
    start_time = time.clock()
    import PIL.Image
    from visualizer import tile_raster_images

    # go through training epochs
    for epoch in xrange(training_epochs):

        # go through the training set
        mean_cost = []
        for batch_index in xrange(n_train_batches):
            # for each batch, we extract the gibbs chain
            new_cost = train_fn(index=batch_index, lr=learning_rate)
            mean_cost += [new_cost]

        print 'Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)

        if numpy.mean(mean_cost) >= 0:
            break

        # W shape is [784 500]
        # Plot filters after each training epoch
        plotting_start = time.clock()
        # Construct image from the weight matrix
        image = PIL.Image.fromarray(tile_raster_images(
            X=rbm.W.get_value(borrow=True).T,
            # weight is [n_vis, n_hidden]
            # so, among 'n_hidden' rows,
            # each row corresponds to propdown one hidden unit
            img_shape=(1, n_vis), tile_shape=(n_hidden, 1),
            tile_spacing=(1, 1)))
        image.save('filters_at_epoch_%i.png' % epoch)
        plotting_stop = time.clock()
        plotting_time += (plotting_stop - plotting_start)

    end_time = time.clock()
    pretraining_time = (end_time - start_time) - plotting_time
    print ('Training took %f minutes' % (pretraining_time / 60.))

    print "... projecting"
    print rbm.project(train_x, hidSample=1)

    print "... reconstructing"
    print rbm.reconstruct(train_x, showSample=1) * train_x.get_value(borrow=True)