def sample(self, test_data): number_of_test_samples = test_data.images.shape[0] rng = np.random.RandomState(123) test_idx = rng.randint(number_of_test_samples - self.n_chains) self.persistent_vis_chain = test_data.images[test_idx:test_idx + self.n_chains] # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = np.zeros( (29 * self.n_samples + 1, 29 * self.n_chains - 1), dtype='uint8') for idx in range(self.n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = self.sample_fn() print(' ... plotting sample %d' % idx) image_data[29 * idx:29 * idx + 28, :] = tile_raster_images( X=vis_mf.eval(), img_shape=(28, 28), tile_shape=(1, self.n_chains), tile_spacing=(1, 1)) # construct image image = Image.fromarray(image_data) image.save('samples.png')
def show_image(path, n_w, img_shape, tile_shape): image = Image.fromarray( tile_raster_images(X=n_w.T, img_shape=img_shape, tile_shape=tile_shape, tile_spacing=(1, 1))) image.save(path)
def train(self): data_X = self.data.images optim = tf.train.GradientDescentOptimizer(0.1)\ .minimize(self.loss) counter = 1 start_time = time.time() for epoch in range(2): batch_idxs = len(data_X) // self.batch_size for idx in range(0, batch_idxs): batch_images = data_X[idx * self.batch_size:(idx + 1) * self.batch_size] #batch_labels = data_y[idx*self.batch_size:(idx+1)*self.batch_size] _ = self.sess.run([optim], feed_dict={self.input: batch_images}) loss = self.loss.eval({self.input: batch_images}) counter += 1 print("Epoch: [%2d] [%4d/%4d] time: %4.4f, loss: %.8f" \ % (epoch, idx, batch_idxs, time.time() - start_time, loss)) image = Image.fromarray( tile_raster_images(X=tf.transpose(self.W).eval(), img_shape=(28, 28), tile_shape=(25, 20), tile_spacing=(1, 1))) image.save("rbm_%d.png" % epoch)
def view_weights(params=None, size=33): if params == None: params, details = load_params(PARAM_PATH) W = params[0][0] else: W = params[0][0].get_value() img = tile_raster_images(W.T, (size,size), (25,25)) plt.imshow(img) plt.show()
def test_autoencoder(data, params, details): details['mode'] = 'test' model = AutoEncoder(params=params, details=details) test_model = theano.function([], [ model.layers[-1].output, model.layers[0].output, model.corrupted, model.layers[0].params[0] ], givens={model.x: data}) recon, hidden, cor, w = test_model() data = data.get_value() hidden_l = details['n_h'] widths = [10] for ww in widths: if hidden_l % ww == 0: hidden_shape = (ww, hidden_l / ww) break width = int(np.sqrt(len(recon[0]))) for i in xrange(100): h = hidden[i] active_unit_indices = [j for j, v in enumerate(h >= 0.1) if v] active_features = w[:, active_unit_indices].T n_units = len(active_unit_indices) tile_l = int(np.sqrt(n_units)) + 1 features = tile_raster_images(active_features, (width, width), (tile_l, tile_l)) print width, type(cor[i]), len(cor[i]) print cor[i].shape plt.figure() plt.subplot(311) plt.imshow(cor[i].reshape(width, width)) plt.subplot(312) plt.imshow(hidden[i].reshape(hidden_shape)) plt.subplot(313) plt.imshow(recon[i].reshape(width, width)) plt.figure() plt.imshow(features) plt.show()
def test_autoencoder(data, params, details): details['mode'] = 'test' model = AutoEncoder(params=params, details = details) test_model = theano.function([], [model.layers[-1].output, model.layers[0].output, model.corrupted, model.layers[0].params[0]], givens={model.x: data}) recon, hidden, cor, w = test_model() data = data.get_value() hidden_l = details['n_h'] widths = [10] for ww in widths: if hidden_l % ww == 0: hidden_shape = (ww, hidden_l/ww) break width = int(np.sqrt(len(recon[0]))) for i in xrange(100): h = hidden[i] active_unit_indices = [j for j,v in enumerate(h >= 0.1) if v] active_features = w[:, active_unit_indices].T n_units = len(active_unit_indices) tile_l = int(np.sqrt(n_units)) + 1 features = tile_raster_images(active_features, (width,width), (tile_l, tile_l)) print width, type(cor[i]), len(cor[i]) print cor[i].shape plt.figure() plt.subplot(311) plt.imshow(cor[i].reshape(width,width)) plt.subplot(312) plt.imshow(hidden[i].reshape(hidden_shape)) plt.subplot(313) plt.imshow(recon[i].reshape(width,width)) plt.figure() plt.imshow(features) plt.show()
o_w = np.zeros([784, 500], np.float32) o_vb = np.zeros([784], np.float32) o_hb = np.zeros([500], np.float32) print sess.run( err_sum, feed_dict={X: trX, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb}) for start, end in zip( range(0, len(trX), batchsize), range(batchsize, len(trX), batchsize)): batch = trX[start:end] n_w = sess.run(update_w, feed_dict={ X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb}) n_vb = sess.run(update_vb, feed_dict={ X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb}) n_hb = sess.run(update_hb, feed_dict={ X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb}) o_w = n_w o_vb = n_vb o_hb = n_hb if start % 10000 == 0: print sess.run( err_sum, feed_dict={X: trX, rbm_w: n_w, rbm_vb: n_vb, rbm_hb: n_hb}) image = Image.fromarray( tile_raster_images( X=n_w.T, img_shape=(28, 28), tile_shape=(25, 20), tile_spacing=(1, 1) ) ) image.save("rbm_%d.png" % (start / 10000))
n_vb = sess.run(update_vb, feed_dict={ X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb }) n_hb = sess.run(update_hb, feed_dict={ X: batch, rbm_w: o_w, rbm_vb: o_vb, rbm_hb: o_hb }) o_w = n_w o_vb = n_vb o_hb = n_hb if start % 10000 == 0: print sess.run(err_sum, feed_dict={ X: trX, rbm_w: n_w, rbm_vb: n_vb, rbm_hb: n_hb }) image = Image.fromarray( tile_raster_images(X=n_w.T, img_shape=(28, 28), tile_shape=(25, 20), tile_spacing=(1, 1)))
def train(self, X): """TODO: Docstring for train. :X: TODO :returns: TODO """ import os RES_PATH = "./rbm_test_res" if os.path.isdir(RES_PATH) == False: os.mkdir(RES_PATH) _w = tf.placeholder("float", [self._input_size, self._output_size]) _hb = tf.placeholder("float", [self._output_size]) _vb = tf.placeholder("float", [self._input_size]) _vw = tf.placeholder("float", [self._input_size, self._output_size]) _vhb = tf.placeholder("float", [self._output_size]) _vvb = tf.placeholder("float", [self._input_size]) _current_vw = np.zeros( [self._input_size, self._output_size], np.float32) _current_vhb = np.zeros([self._output_size], np.float32) _current_vvb = np.zeros([self._input_size], np.float32) v0 = tf.placeholder("float", [None, self._input_size]) h0 = self.sample_prob(self.propup(v0, _w, _hb)) v1 = self.sample_prob(self.propdown(h0, _w, _vb)) h1 = self.propup(v1, _w, _hb) positive_grad = tf.matmul(tf.transpose(v0), h0) negative_grad = tf.matmul(tf.transpose(v1), h1) update_vw = _vw * self._opts._momentum + self._opts._learning_rate *\ (positive_grad - negative_grad) / tf.to_float(tf.shape(v0)[0]) update_vvb = _vvb * self._opts._momentum + \ self._opts._learning_rate * tf.reduce_mean(v0 - v1, 0) update_vhb = _vhb * self._opts._momentum + \ self._opts._learning_rate * tf.reduce_mean(h0 - h1, 0) update_w = _w + _vw update_vb = _vb + _vvb update_hb = _hb + _vhb with tf.Session() as sess: sess.run(tf.initialize_all_variables()) old_w = self.init_w old_hb = self.init_hb old_vb = self.init_vb for i in range(self._opts._epoches): for start, end in zip(range(0, len(X), self._opts._batchsize), range(self._opts._batchsize, len(X), self._opts._batchsize)): batch = X[start:end] _current_vw = sess.run(update_vw, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vw: _current_vw}) _current_vhb = sess.run(update_vhb, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vhb: _current_vhb}) _current_vvb = sess.run(update_vvb, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vvb: _current_vvb}) old_w = sess.run(update_w, feed_dict={ _w: old_w, _vw: _current_vw}) old_hb = sess.run(update_hb, feed_dict={ _hb: old_hb, _vhb: _current_vhb}) old_vb = sess.run(update_vb, feed_dict={ _vb: old_vb, _vvb: _current_vvb}) image = Image.fromarray( tile_raster_images( X=old_w.T, img_shape=(int(math.sqrt(self._input_size)), int(math.sqrt(self._input_size))), tile_shape=(int(math.sqrt(self._output_size)), int(math.sqrt(self._output_size))), tile_spacing=(1, 1) ) ) image.save(RES_PATH+"/%s_%d.png" % (self._name, i)) self.w = old_w self.hb = old_hb self.vb = old_vb
train_step = tf.train.AdamOptimizer(1e-3).minimize(loss) sess = tf.Session() sess.run(tf.initialize_all_variables()) for i in range(1001): batch_v, _ = mnist.train.next_batch(batch_size) batch_v = np.float32(batch_v > 0) batch_v_sampling = gibbs_v(batch_v, sess.run(W), sess.run(b), sess.run(c), k=15) if i % 50 == 0: loss_this_batch = sess.run(loss, feed_dict={v: batch_v, v_sampling: batch_v_sampling}) reconstruct_err = np.mean(np.abs(batch_v - batch_v_sampling)) print 'step {i}, loss {l:.4f}, reconstruction err {r:.6f}'.format(i=i, l=loss_this_batch, r=reconstruct_err) x = np.float32(mnist.test.images[0:100, :] > 0) image = tile_raster_images(x, (28, 28), (10, 10)) image = np.stack((image, image, image), axis=2) fig = plt.figure(0) ax = fig.add_subplot(121) ax.imshow(image) ax.axis('off') x_sampling = gibbs_v(x, sess.run(W), sess.run(b), sess.run(c), k=1) image_sampling = Image.fromarray(tile_raster_images(x_sampling, (28, 28), (10, 10))) image_sampling = np.stack((image_sampling, image_sampling, image_sampling), axis=2) ax = fig.add_subplot(122) ax.imshow(image_sampling) ax.axis('off') fig.savefig("results/step{i}.png".format(i=i)) sess.run(train_step, feed_dict={v: batch_v, v_sampling: batch_v_sampling})
def train(self, X): """TODO: Docstring for train. :X: TODO :returns: TODO """ _w = tf.placeholder("float", [self._input_size, self._output_size]) _hb = tf.placeholder("float", [self._output_size]) _vb = tf.placeholder("float", [self._input_size]) _vw = tf.placeholder("float", [self._input_size, self._output_size]) _vhb = tf.placeholder("float", [self._output_size]) _vvb = tf.placeholder("float", [self._input_size]) _current_vw = np.zeros( [self._input_size, self._output_size], np.float32) _current_vhb = np.zeros([self._output_size], np.float32) _current_vvb = np.zeros([self._input_size], np.float32) v0 = tf.placeholder("float", [None, self._input_size]) h0 = self.sample_prob(self.propup(v0, _w, _hb)) v1 = self.sample_prob(self.propdown(h0, _w, _vb)) h1 = self.propup(v1, _w, _hb) positive_grad = tf.matmul(tf.transpose(v0), h0) negative_grad = tf.matmul(tf.transpose(v1), h1) update_vw = _vw * self._opts._momentum + self._opts._learning_rate *\ (positive_grad - negative_grad) / tf.to_float(tf.shape(v0)[0]) update_vvb = _vvb * self._opts._momentum + \ self._opts._learning_rate * tf.reduce_mean(v0 - v1, 0) update_vhb = _vhb * self._opts._momentum + \ self._opts._learning_rate * tf.reduce_mean(h0 - h1, 0) update_w = _w + _vw update_vb = _vb + _vvb update_hb = _hb + _vhb with tf.Session() as sess: sess.run(tf.initialize_all_variables()) old_w = self.init_w old_hb = self.init_hb old_vb = self.init_vb for i in range(self._opts._epoches): for start, end in zip(range(0, len(X), self._opts._batchsize), range(self._opts._batchsize, len(X), self._opts._batchsize)): batch = X[start:end] _current_vw = sess.run(update_vw, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vw: _current_vw}) _current_vhb = sess.run(update_vhb, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vhb: _current_vhb}) _current_vvb = sess.run(update_vvb, feed_dict={ v0: batch, _w: old_w, _hb: old_hb, _vb: old_vb, _vvb: _current_vvb}) old_w = sess.run(update_w, feed_dict={ _w: old_w, _vw: _current_vw}) old_hb = sess.run(update_hb, feed_dict={ _hb: old_hb, _vhb: _current_vhb}) old_vb = sess.run(update_vb, feed_dict={ _vb: old_vb, _vvb: _current_vvb}) image = Image.fromarray( tile_raster_images( X=old_w.T, img_shape=(int(math.sqrt(self._input_size)), int(math.sqrt(self._input_size))), tile_shape=(int(math.sqrt(self._output_size)), int(math.sqrt(self._output_size))), tile_spacing=(1, 1) ) ) image.save("%s_%d.png" % (self._name, i)) self.w = old_w self.hb = old_hb self.vb = old_vb
rbm_hb: o_hb }) n_hb = sess.run(update_hb, feed_dict={ X: batch, rbm_w: o_w, rbm_visible_bias: o_vb, rbm_hb: o_hb }) o_w = n_w o_vb = n_vb o_hb = n_hb if start % 10000 == 0: print( sess.run(err_sum, feed_dict={ X: train_X, rbm_w: n_w, rbm_visible_bias: n_vb, rbm_hb: n_hb })) # If you provide too many tiles you get blank ones at the end pictures_tall = 5 pictures_wide = 2 image = Image.fromarray( tile_raster_images(X=n_w.T, img_shape=(28, 28), tile_shape=(pictures_tall, pictures_wide), tile_spacing=(1, 1))) image.save("rbm_%d.png" % (start / 10000))
def sgd_optimize(learning_rate=0.1, n_epochs=15, batch_size=20, output_folder="da_images", corruption_level=0.): # Load input train, valid, test = util.load() print "loading 0 - ", train[0].shape[0], " train inputs in gpu memory" train_x, train_y = util.create_theano_shared(train) print "loading 0 - ", valid[0].shape[0], " validation inputs in gpu memory" valid_x, valid_y = util.create_theano_shared(valid) print "loading 0 - ", test[0].shape[0], " test inputs in gpu memory" test_x, test_y = util.create_theano_shared(test) # Define symbolic input matrices print "Building Model..." index = T.iscalar() x = T.matrix("x") # Define Denoising AutoEncoder random_generator = numpy.random.RandomState(1) theano_random_generator = RandomStreams(random_generator.randint(2 ** 30)) da = DenoisingAutoEncoder(random_generator, theano_random_generator, x_dim=28*28, y_dim=500, input=x) # Define training model cost, updates = da.cost_updates(corruption_level=corruption_level, learning_rate=learning_rate) train_model = theano.function( inputs=[index], outputs=cost, updates=updates, givens= { x: train_x[index * batch_size : (index+1) * batch_size] }) n_train_batches = train[0].shape[0] / batch_size # Train start_time = time.clock() for epoch in range(n_epochs): c = [] for minibatch_index in range(n_train_batches): c.append(train_model(minibatch_index)) print 'Training epoch %d, cost ' % epoch, numpy.mean(c) end_time = time.clock() # Save image if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) image = Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_{}.png'.format(int(corruption_level * 100))) os.chdir('../')
n_hb = np.zeros([neuron_count], np.float32) o_w = np.zeros([784, neuron_count], np.float32) o_vb = np.zeros([784], np.float32) o_hb = np.zeros([neuron_count], np.float32) print(sess.run(err_sum, feed_dict={X: train_X, rbm_w: o_w, rbm_visible_bias: o_vb, rbm_hb: o_hb})) for start, end in zip(range(0, len(train_X), batchsize), range(batchsize, len(train_X), batchsize)): batch = train_X[start:end] n_w = sess.run(update_w, feed_dict={X: batch, rbm_w: o_w, rbm_visible_bias: o_vb, rbm_hb: o_hb}) n_vb = sess.run(update_vb, feed_dict={X: batch, rbm_w: o_w, rbm_visible_bias: o_vb, rbm_hb: o_hb}) n_hb = sess.run(update_hb, feed_dict={X: batch, rbm_w: o_w, rbm_visible_bias: o_vb, rbm_hb: o_hb}) o_w = n_w o_vb = n_vb o_hb = n_hb if start % 10000 == 0: print(sess.run(err_sum, feed_dict={X: train_X, rbm_w: n_w, rbm_visible_bias: n_vb, rbm_hb: n_hb})) # If you provide too many tiles you get blank ones at the end pictures_tall = 5 pictures_wide = 2 image = Image.fromarray( tile_raster_images( X=n_w.T, img_shape=(28, 28), tile_shape=(pictures_tall, pictures_wide), tile_spacing=(1, 1) ) ) image.save("rbm_%d.png" % (start / 10000))
# will not reload if data is already in workspace try: datasets except NameError: print 'loading data' datasets = deserialize_object(os.path.join(data_path, 'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl')) w0 = datasets[2].T print w0.shape n_cases, n_dims = w0.shape im_w = int(np.sqrt(n_dims)) # assume square case_w = int(np.sqrt(n_cases))+1 out = tile_raster_images(w0, (im_w, im_w), (case_w, case_w), tile_spacing=(3,3)) plt.imshow(out, cmap='gray') plt.show() quit() #map_w = np.sqrt(n_dims_out) # assume square print im_w n_train_batches = int(np.ceil(float(n_cases) / show_batchsize)) for b in xrange(n_train_batches): plt.figure(b) plt.subplot(1, 2, 1) batch_start = b * show_batchsize batch_end = min((b + 1) * show_batchsize, n_cases)
except NameError: print 'loading data' datasets = deserialize_object( os.path.join( data_path, 'results/params_tracer_data_multi_full_33_17_17_nC1_11000_switch1.00_2718_noise_nh1a_300_nh1b_50_nh2_500_nout_9_preTrainFalse_LRi_0.01000_reg_10.00_dropoutFalse_2.647312.pkl' )) w0 = datasets[2].T print w0.shape n_cases, n_dims = w0.shape im_w = int(np.sqrt(n_dims)) # assume square case_w = int(np.sqrt(n_cases)) + 1 out = tile_raster_images(w0, (im_w, im_w), (case_w, case_w), tile_spacing=(3, 3)) plt.imshow(out, cmap='gray') plt.show() quit() #map_w = np.sqrt(n_dims_out) # assume square print im_w n_train_batches = int(np.ceil(float(n_cases) / show_batchsize)) for b in xrange(n_train_batches): plt.figure(b) plt.subplot(1, 2, 1) batch_start = b * show_batchsize batch_end = min((b + 1) * show_batchsize, n_cases)
def show_image(path, n_w, img_shape, tile_shape): image = Image.fromarray( tile_raster_images(X=n_w.T, img_shape=img_shape, tile_shape=tile_shape, tile_spacing=(1, 1)) ) image.save(path)
def test_dA(learning_rate=0.1, training_epochs=15, dataset='mnist.pkl.gz', batch_size=20, output_folder='dA_plots'): """ This demo is tested on MNIST :type learning_rate: float :param learning_rate: learning rate used for training the DeNosing AutoEncoder :type training_epochs: int :param training_epochs: number of epochs used for training :type dataset: string :param dataset: path to the picked dataset """ datasets = load_data(dataset) train_set_x, train_set_y = datasets[0] # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # start-snippet-2 # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images # end-snippet-2 if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) #################################### # BUILDING THE MODEL NO CORRUPTION # #################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)), file=sys.stderr) image = Image.fromarray( tile_raster_images(X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_0.png') # start-snippet-3 ##################################### # BUILDING THE MODEL CORRUPTION 30% # ##################################### rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 30)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=28 * 28, n_hidden=500 ) cost, updates = da.get_cost_updates( corruption_level=0.3, learning_rate=learning_rate ) train_da = theano.function( [index], cost, updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size] } ) start_time = timeit.default_timer() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)) print('Training epoch %d, cost ' % epoch, numpy.mean(c)) end_time = timeit.default_timer() training_time = (end_time - start_time) print(('The 30% corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)), file=sys.stderr) # end-snippet-3 # start-snippet-4 image = Image.fromarray(tile_raster_images( X=da.W.get_value(borrow=True).T, img_shape=(28, 28), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_corruption_30.png') # end-snippet-4 os.chdir('../')
def test_rbm(learning_rate=0.1, training_epochs=30, dataset='data/mnist.pkl.gz', batch_size=20, n_chains=20, n_samples=10, output_folder='data/rbm_plots', n_hidden=1000): """ Demonstrate how to train and afterwards sample from it using Theano. This is demonstrated on MNIST. :param learning_rate: learning rate used for training the RBM :param training_epochs: number of epochs used for training :param dataset: path the the pickled dataset :param batch_size: size of a batch used to train the RBM :param n_chains: number of parallel Gibbs chains to be used for sampling :param n_samples: number of samples to plot for each chain """ datasets = load_data(dataset) # train_set_x, train_set_y = datasets[0] # test_set_x, test_set_y = datasets[2] img_size = 28 img_size_1 = img_size + 1 o_train_set_x = numpy.load('data/origin_target_train_28.npy') # o_train_set_x = numpy.load('f_total_data_original.npy') o_test_set_x = o_train_set_x[0:3000, :] train_set_x = theano.shared(numpy.asarray(o_train_set_x, dtype=theano.config.floatX), borrow=True) test_set_x = theano.shared(numpy.asarray(o_test_set_x, dtype=theano.config.floatX), borrow=True) # compute number of minibatches for training, validation and testing n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size # allocate symbolic variables for the data index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2**30)) # initialize storage for the persistent chain (state = hidden # layer of chain) persistent_chain = theano.shared(numpy.zeros((batch_size, n_hidden), dtype=theano.config.floatX), borrow=True) # construct the RBM class rbm = RBM(input=x, n_visible=img_size * img_size, n_hidden=n_hidden, numpy_rng=rng, theano_rng=theano_rng) # get the cost and the gradient corresponding to one step of CD-15 cost, updates = rbm.get_cost_updates(lr=learning_rate, persistent=persistent_chain, k=100) ################################# # Training the RBM # ################################# if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) # start-snippet-5 # it is ok for a theano function to have no output # the purpose of train_rbm is solely to update the RBM parameters train_rbm = theano.function( [index], cost, updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}, name='train_rbm') plotting_time = 0. start_time = timeit.default_timer() # go through training epochs for epoch in range(training_epochs): # go through the training set mean_cost = [] for batch_index in range(n_train_batches): mean_cost += [train_rbm(batch_index)] print('Training epoch %d, cost is ' % epoch, numpy.mean(mean_cost)) # Plot filters after each training epoch plotting_start = timeit.default_timer() # Construct image from the weight matrix image = Image.fromarray( tile_raster_images(X=rbm.W.get_value(borrow=True).T, img_shape=(img_size, img_size), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('filters_at_epoch_%i.png' % epoch) plotting_stop = timeit.default_timer() plotting_time += (plotting_stop - plotting_start) end_time = timeit.default_timer() pretraining_time = (end_time - start_time) - plotting_time print('Training took %f minutes' % (pretraining_time / 60.)) # end-snippet-5 start-snippet-6 ################################# # Sampling from the RBM # ################################# # find out the number of test samples number_of_test_samples = test_set_x.get_value(borrow=True).shape[0] # pick random test examples, with which to initialize the persistent chain test_idx = rng.randint(number_of_test_samples - n_chains) persistent_vis_chain = theano.shared( numpy.asarray(test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], dtype=theano.config.floatX)) # end-snippet-6 start-snippet-7 plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every, name="gibbs_vhv") # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') # create a space to store the image for plotting ( we need to leave # room for the tile_spacing as well) image_data = numpy.zeros( (img_size_1 * (n_samples + 1) + 1, img_size_1 * n_chains - 1), dtype='uint8') image_data[0:img_size, :] = tile_raster_images( X=test_set_x.get_value(borrow=True)[test_idx:test_idx + n_chains], img_shape=(img_size, img_size), tile_shape=(1, n_chains), tile_spacing=(1, 1)) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() print(' ... plotting sample %d' % idx) idx += 1 image_data[img_size_1 * idx:img_size_1 * idx + img_size, :] = tile_raster_images(X=vis_mf, img_shape=(img_size, img_size), tile_shape=(1, n_chains), tile_spacing=(1, 1)) # construct image image = Image.fromarray(image_data) image.save('samples.png') # end-snippet-7 os.chdir('../')