def test_vae(): ds = odin.dataset.load_mnist() W = T.variable(T.np_glorot_uniform(shape=(784, 512)), name='W') WT = T.transpose(W) encoder = odin.nnet.Dense((None, 28, 28), num_units=512, W=W, name='encoder') # decoder = odin.nnet.Dense((None, 256), num_units=512, name='decoder1') decoder = odin.nnet.Dense((None, 512), num_units=784, W=WT, name='decoder2') vae = odin.nnet.VariationalEncoderDecoder(encoder=encoder, decoder=decoder, prior_logsigma=1.7, batch_size=64) # ====== prediction ====== # x = ds['X_train'][:16] f = T.function(inputs=vae.input_var, outputs=vae(training=False)) print("Predictions:", f(x)[0].shape) f = T.function( inputs=vae.input_var, outputs=vae.set_reconstruction_mode(True)(training=False)) y = f(x)[0].reshape(-1, 28, 28) print("Predictions:", y.shape) odin.visual.plot_images(x) odin.visual.plot_images(y) odin.visual.plot_show() print('Params:', [p.name for p in vae.get_params(False)]) print('Params(globals):', [p.name for p in vae.get_params(True)]) # ====== Optimizer ====== # cost, updates = vae.get_optimization( objective=odin.objectives.categorical_crossentropy, optimizer=lambda x, y: odin.optimizers.sgd(x, y, learning_rate=0.01), globals=True, training=True) f = T.function(inputs=vae.input_var, outputs=cost, updates=updates) cost = [] niter = ds['X_train'].iter_len() / 64 for j in xrange(2): for i, x in enumerate(ds['X_train'].iter(64)): if x.shape[0] != 64: continue cost.append(f(x)) odin.logger.progress(i, niter, str(cost[-1])) odin.visual.print_bar(cost) # ====== reconstruc ====== # f = T.function( inputs=vae.input_var, outputs=vae.set_reconstruction_mode(True)(training=False)) X_test = ds['X_test'][:16] X_reco = f(X_test)[0].reshape(-1, 28, 28) odin.visual.plot_images(X_test) odin.visual.plot_images(X_reco) odin.visual.plot_show()
def test_rbm(): batch_size = 32 persistent_chain = T.variable(numpy.zeros((batch_size, 500))) input_ = odin.nnet.Dense((None, 28, 28), num_units=784) input_ = (None, 28, 28) rbm = odin.nnet.RBM(input_, 500, persistent=persistent_chain) print('Input variables:', rbm.input_var) print('Output variables:', rbm.output_var) sgd = lambda x, y: odin.optimizers.sgd(x, y, learning_rate=0.01) cost, updates = rbm.get_optimization( optimizer=sgd, globals=True, objective=odin.objectives.contrastive_divergence) print('Building functions...') train_rbm = T.function( inputs=rbm.input_var, outputs=cost, updates=updates ) cost = [] niter = ds['X_train'].iter_len() / batch_size for i, x in enumerate(ds['X_train'].iter(batch_size, seed=13)): if x.shape[0] != batch_size: continue # x = x.astype(int) # this one can mess up the whole training process cost.append(train_rbm(x)) odin.logger.progress(i, niter, title='%.5f' % cost[-1]) odin.visual.print_bar(cost, bincount=20) vis_mfc = rbm.set_sampling_steps(1).set_reconstruction_mode(True)() print('Building functions...') sample_rbm = T.function( inputs=rbm.input_var, outputs=vis_mfc, updates=updates) test_x = ds['X_test'].value for i in xrange(3): t = numpy.random.randint(test_x.shape[0] - 16) x = test_x[t:t + 16] x_mean = sample_rbm(x)[0] odin.visual.plot_images(x) odin.visual.plot_images(x_mean) plt.show(block=False) raw_input('<Enter>') plt.close('all')
def test_aED(): #AutoEncoderDecoder Wa = T.variable(T.np_glorot_uniform(shape=(784, 256)), name='W') Wb = T.variable(T.np_glorot_uniform(shape=(256, 128)), name='W') d1a = odin.nnet.Dense((None, 28, 28), num_units=256, W=Wa, name='d1a', nonlinearity=T.sigmoid) d1b = odin.nnet.Dense(d1a, num_units=128, W=Wb, name='d1b', nonlinearity=T.sigmoid) # or d1b, (None, 128) as incoming d2a = odin.nnet.Dense((None, 128), num_units=256, W=Wb.T, name='d2a', nonlinearity=T.sigmoid) d2b = odin.nnet.Dense(d2a, num_units=784, W=Wa.T, name='d2b', nonlinearity=T.sigmoid) aED = odin.nnet.AutoEncoderDecoder(d1b, d2b) sgd = lambda x, y: odin.optimizers.sgd(x, y, learning_rate=0.01) cost, updates = aED.get_optimization( objective=odin.objectives.categorical_crossentropy, optimizer=sgd, globals=True) f_train = T.function( inputs=aED.input_var, outputs=cost, updates=updates) cost = [] niter = ds['X_train'].iter_len() / 64 choices = None for _ in xrange(3): for i, x in enumerate(ds['X_train'].iter(64)): cost.append(f_train(x)) odin.logger.progress(i, niter, title=str(cost[-1])) print() odin.visual.print_bar([i for i in cost if i == i], bincount=20) W = T.get_value(aED.get_params(True)[0]).T.reshape(-1, 28, 28) if choices is None: choices = np.random.choice( np.arange(W.shape[0]), size=16, replace=False) W = W[choices] odin.visual.plot_images(W) plt.show(block=False) raw_input('<enter>') # ====== Output reconstruction ====== # f_pred = T.function( inputs=aED.input_var, outputs=aED.set_reconstruction_mode(True)()) for i in xrange(3): t = np.random.randint(ds['X_test'].shape[0] - 16) X = ds['X_test'][t:t + 16] X_pred = f_pred(X)[0].reshape(-1, 28, 28) odin.visual.plot_images(X) odin.visual.plot_images(X_pred) odin.visual.plot_show() # ====== OUtput hidden activation ====== # f_pred = T.function( inputs=aED.input_var, outputs=aED()) X = ds['X_test'][t:t + 16] print(f_pred(X)[0].shape)
# =========================================================================== # Conclusion: # Stack parameters and precompute_inputs significantly increase speed # =========================================================================== from __future__ import print_function, division import os os.environ['ODIN'] = 'cpu,float32,theano' import odin from odin import tensor as T import numpy as np import time batch_size = 128 seq_len = 512 X = T.variable(np.random.rand(batch_size, seq_len, 20)) W1 = T.variable(np.random.rand(20, 10)) W2 = T.variable(np.random.rand(20, 10)) W3 = T.variable(np.random.rand(20, 10)) W4 = T.variable(np.random.rand(20, 10)) hidden = T.variable(np.random.rand(batch_size, 20)) # ====== First approach ====== # W = T.concatenate((W1, W2, W3, W4), axis=1) # 20x40 inputs = T.dot(X, W) #batch_sizexseq_lenx40 inputs1 = T.dot(X, W1) inputs2 = T.dot(X, W2) inputs3 = T.dot(X, W3) inputs4 = T.dot(X, W4)
from __future__ import print_function, division import numpy as np import os os.environ['ODIN'] = 'theano,float32' from odin import tensor as T import time import theano def step(s1, s2, s3, o1, o2, n1, n2): return o1, o2 seq1 = T.variable(np.arange(10)) seq2 = T.variable(np.arange(20)) seq3 = T.variable(np.arange(5)) nonseq1 = T.variable(1.) nonseq2 = T.variable(2.) ([o1, o2], updates) = theano.scan( fn=step, sequences=[seq1, seq2, seq3], outputs_info=[T.zeros((2, 2)), T.ones((2, 2))], non_sequences=[nonseq1, nonseq2], n_steps=None, truncate_gradient=-1, go_backwards=False) f1 = T.function( inputs=[], outputs=[o1, o2], updates=updates) a, b = f1()