Ejemplo n.º 1
0
def train_rbm():
    batch_size = 20
    learning_rate = 0.1
    n_training_epochs = 15
    n_visible=28*28
    n_hidden=500
    n_contrastive_divergence_steps=15
    persistent_contrastive_divergence=True

    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, valid_set, test_set = get_dataset('mnist')
    train_set_x, _ = load_dataset(train_set)
    test_set_x, _ = load_dataset(test_set)
    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    x = T.matrix('x')

    if persistent_contrastive_divergence:
        persistent_chain = theano.shared(
            np.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True)
    else:
        persistent_chain = None

    rbm = RBM.create_with_random_weights(n_visible, n_hidden, rng)

    # persistent contrastive divergence with n_contrastive_divergence_steps steps
    cost, updates = rbm.get_cost_updates(
        x, learning_rate, number_of_gibbs_steps=n_contrastive_divergence_steps,
        theano_rng=theano_rng, persistent_state=persistent_chain)

    minibatch_index = T.iscalar('minibatch_index')

    train_rbm = theano.function(
        inputs=[minibatch_index],
        outputs=cost,
        updates=updates,
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        }
    )

    start_time = time.time()

    for epoch in range(n_training_epochs):
        epoch_start_time = time.time()
        costs = []
        for batch_index in range(n_train_batches):
            costs.append(train_rbm(batch_index))
        print('Training epoch %d of %d, cost is %f, took %.1fs' %
              (epoch, n_training_epochs, np.mean(costs), time.time() - epoch_start_time))
    filters = tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(28, 28))
    cv2.imshow('filter', filters)
    cv2.waitKey(-1)
    cv2.destroyWindow('filter')

    print ('Training took %d minutes' % ((time.time()-start_time)/60.))

    return rbm.get_parameter_values()
def run_3_denoising_autoencoder(corruption_level=0.3):
    batch_size = 20
    learning_rate = 0.01
    training_epochs = 250
    n_in=28*28
    n_hidden=500
    rng = np.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 30))

    train_set, _, _ = get_dataset('mnist')
    train_set_x, train_set_y = load_dataset(train_set)

    n_train_batches = train_set_x.get_value(borrow=True).shape[0]/batch_size

    x = T.matrix('x')

    corrupted_input = theano_rng.binomial(size=x.shape, n=1, p=1-corruption_level, dtype=theano.config.floatX)*x
    reconstructed, params = autoencoder(corrupted_input, n_in, n_hidden, rng)
    cost = mean_cross_entropy(reconstructed, x)

    minibatch_index = T.iscalar('minibatch_index')
    train_model = theano.function(
        inputs=[minibatch_index],
        outputs=[cost],
        updates=[[p, p - learning_rate*T.grad(cost, p)]
                 for p in params],
        givens={
            x: train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size],
        },
        profile=False
    )

    start_time = time.time()

    print('Going to run the training with floatX=%s' % (theano.config.floatX))
    for epoch in range(training_epochs):
        costs = []
        epoch_start_time = time.time()
        for minibatch_index in range(n_train_batches):
            costs.append(train_model(minibatch_index))
        print("Mean costs at epoch %d is %f%% (ran for %.1fs)" % (epoch, np.mean(costs), time.time() - epoch_start_time))

    total_time = time.time()-start_time
    print('The training code run %.1fs, for %d epochs, for with %f epochs/sec' % (total_time, epoch, epoch/total_time))

    filters = tile_raster_images(X=params[0].get_value(borrow=True).T,
                                 img_shape=(28, 28), tile_shape=(23, 22),
                                 tile_spacing=(1, 1))
    filters = cv2.resize(filters, dsize=None, fx=1., fy=1.)
    cv2.imshow('filters', filters)
    cv2.waitKey(-1)
Ejemplo n.º 3
0
def sample_from_trained_rbm(w_init, b_hidden_init, b_visible_init):

    # for sampling from trained model
    n_chains = 20
    n_samples = 10

    mnist_pkl = get_dataset('mnist')
    with open(mnist_pkl) as f:
        train_set, valid_set, test_set = pickle.load(f)

    test_set_x, _ = load_dataset(test_set)
    # sample from trained RBM
    number_of_test_samples = test_set_x.get_value(borrow=True).shape[0]
    # pick random test examples, with which to initialize the persistent chain
    rng = np.random.RandomState(123)
    test_idx = rng.randint(number_of_test_samples - n_chains)
    persistent_vis_chain = theano.shared(
        np.asarray(
            test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains],
            dtype=theano.config.floatX
        )
    )

    theano_rng = RandomStreams(rng.randint(2 ** 30))
    plot_every = 1000

    rbm = RBM(w_init, b_hidden_init, b_visible_init)

    (hidden_samples, hidden_activations, hidden_linear_activations,
     visible_samples, visible_activations, linear_visible_activations), sampling_updates = theano.scan(
        fn=lambda x: rbm.gibbs_update_visible_hidden_visible(x, theano_rng),
        outputs_info=[None, None, None, persistent_vis_chain, None, None],
        n_steps=plot_every
    )

    sampling_updates[persistent_vis_chain] = visible_samples[-1]
    sample_fn = theano.function(
        [],
        [
            visible_activations[-1],
            visible_samples[-1]
        ],
        updates=sampling_updates
    )

    image_data = np.zeros(
        (29 * n_samples + 1, 29 * n_chains - 1),
        dtype='uint8'
    )
    for idx in xrange(n_samples):
        # generate `plot_every` intermediate samples that we discard,
        # because successive samples in the chain are too correlated
        vis_activations, vis_sample = sample_fn()
        print ' ... plotting sample ', idx
        image_data[29 * idx:29 * idx + 28, :] = tile_raster_images(
            X=vis_activations,
            img_shape=(28, 28),
            tile_shape=(1, n_chains),
            tile_spacing=(1, 1)
        )

    image_data = cv2.resize(image_data, dsize=None, fx=2., fy=2.)
    cv2.imshow('sampling', image_data)
    cv2.waitKey(-1)