def ConstructAE(Xtr, Denc=[500], Dz=20, Ddec=[500], f=T.tanh, s2=1.0, \ inf=AdaGrad(0.01)): theta = [] # Construct encoder network and 'latent' variables. X, Dobs = T.matrix('X'), Xtr.get_value().shape[1] Wenc, benc = mlp.WeightMatrices([Dobs] + Denc), mlp.BiasVectors(Denc) Hz = mlp.ConstructMLP(X, Wenc, benc, f) Wz, bz = mlp.WeightMatrix(Denc[-1], Dz, 'Wz'), mlp.BiasVector(Dz, 'bz') Z = f(T.dot(Hz, Wz) + bz) theta += Wenc + benc + [Wz, bz] # Construct decoder network. Type of output depends upon otype. Wdec, bdec = mlp.WeightMatrices([Dz] + Ddec), mlp.BiasVectors(Ddec) Hx = mlp.ConstructMLP(Z, Wdec, bdec, f) theta += Wdec + bdec # Map to output. Looking to minimise the squared-error (se). Wout = mlp.WeightMatrix(Ddec[-1], Dobs, 'Wout') bout = mlp.BiasVector(Dobs, 'bout') Xpr = lpdf.OutToProbs(Hx, Wout, bout) se = T.sum(T.sqr(X - Xpr)) theta += [Wout, bout] # Add weight decay (equivalent to normal prior). weightdecay = mlp.ConstructNormalPrior(theta, s2) logjoint = -se + weightdecay # Build function to construct training procedure. updates = inf.construct(logjoint, theta) idx = T.ivector('idx') train = function(inputs=[idx], outputs=se / X.shape[0], name='train', updates=updates, givens={X: Xtr[idx]}) # Build function to make predictions (distributions over outputs). reconstruct = function(inputs=[X], outputs=Xpr, name='reconstruct', updates=[], givens={}) # Build a function to encode data. encode = function(inputs=[X], outputs=Z, name='encode', updates=[], givens={}) # Build a function to decode the data. decode = function(inputs=[Z], outputs=Xpr, name='decode', updates=[], givens={}) return train, reconstruct, encode, decode, theta
def LearnMLPracticalClassification(Xtr, Ytr, Xte, Yte, bayesian=0,\ epochs=100, Dh=[500,500,500], infer=AdaGrad(0.01), L=1): # Build classifier learning procedure. print('Building classifier graph.') Ntr, Din = Xtr.get_value().shape Dout = Ytr.get_value().shape[1] f = T.tanh s2 = [1.0] * (2 * (len(Dh) + 1)) if bayesian: buildtraining, predict, logjoint, theta = \ ConstructBayesianMLPClassifier(Din, Dh, Dout, f, s2, infer) else: buildtraining, predict, logjoint, theta = \ ConstructPMLPClassifier(Din, Dh, Dout, f, s2, infer) print('Compiling MLP.') train = buildtraining(Xtr, Ytr) # Perform inference. print('Performing inference.') bs, logp = 100, [] for i in range(epochs): lb = 0 while lb < Ntr: ub = lb + bs if ub > Ntr: ub = Ntr logp.append(train(lb, ub)) lb = ub musg = 0 print('Epoch ' + str(i) + '. logjoint = ' + str(logp[-1]) + '.') # Compute accuracies. print('Computing predictions under posterior.') def accuracy(Y, Ypr): correct = 0.0 idx = np.argmax(Ypr, 1) for i in range(idx.shape[0]): if Y[i, idx[i]] == 1.0: correct += 1.0 return correct / Ypr.shape[0] Yprtr, Yprte = 0.0, 0.0 for t in range(L): Yprtr += predict(Xtr.get_value()) Yprte += predict(Xte.get_value()) Yprtr /= L Yprte /= L print('training accuracy = ' + str(accuracy(Ytr.get_value(), Yprtr))) print('testing accuracy = ' + str(accuracy(Yte.get_value(), Yprte))) # Plot log probability over time. plt.plot(logp) plt.savefig('logp.pdf') plt.close() return theta
def main(): def TestBiasVectors(): biases = BiasVectors([5, 4]) for bias in biases: print(bias.get_value().shape) return biases #biases = TestBiasVectors() def TestWeightMatrices(): weights = WeightMatrices([10, 5, 4]) for weight in weights: print(weight.get_value().shape) return weights #weights = TestWeightMatrices() def TestMLP(): X = T.matrix('X') f = T.nnet.sigmoid mlp, W, b = ConstructMLP(X, 10, [5, 4], f) out = function(inputs=[X], outputs=mlp) pydotprint(mlp, 'mlptest.png') #TestMLP() def TestNormalPrior(): theta = biases + weights s2 = [1.0] * len(theta) logprior = ConstructNormalPrior(theta, s2) pydotprint(logprior, 'priortest.png') #TestNormalPrior() rnd.seed(15485863) """Xtr, Ytr, Xte, Yte = data.GenerateGaussians(1000, 1000) Dh = [500] theta = LearnMLPracticalClassification(Xtr, Ytr, Xte, Yte, \ bayesian=1, epochs=100, Dh=Dh, L=100, infer=AdaGrad(0.1)) return""" # Load data. print('Loading data.') Xtr, Ytr, Xte, Yte = data.LoadMLPracticalClassification(0) Dh = [1000, 1000] theta = LearnMLPracticalClassification(Xtr, Ytr, Xte, Yte, bayesian=0,\ epochs=250, L=100, infer=AdaGrad(0.1), Dh=Dh) """Xtr, Ytr, Xte, Yte = data.LoadMNIST()
def LearnMNIST(epochs=25, Dz=20, Ntr=50000): # Load mnist data with 50000 training images. Note: returns # theano shared variable objects. To access the actual data use # Xtr.get_value() or Xte.get_value(). Xtr, Ytr, Xte, Yte = data.LoadMNIST(Ntr) # Construct autoencoder. train, reconstruct, encode, decode, theta = ConstructAE( \ Xtr, Denc=[500], Dz=Dz, Ddec=[500], inf=AdaGrad(0.01)) # Train the autoencoder. Permute the order of the data after each epoch - # if you don't do this then you get weird periodicities in the learning curve. print('Training the autoencoder.') batch_size, mse = 100, [] for i in range(epochs): idx = rnd.permutation(np.arange(Ntr)).astype(np.int32()) lb = 0 while lb < Ntr: ub = lb + batch_size if ub > Ntr: ub = Ntr mse.append(train(idx[lb:ub])) lb = ub print('Epoch ' + str(i) + '. mse = ' + str(mse[-1])) # Save the learning curve. #plt.plot(mse) #plt.savefig('mnist-mse.pdf') #plt.close() # Compute reconstruction error. I'm using rmse. Xtrpr, Xtepr = reconstruct(Xtr.get_value()), reconstruct(Xte.get_value()) rmsetr, rmsete = rmse(Xtr.get_value(), Xtrpr), rmse(Xte.get_value(), Xtepr) print('training rmse = ' + str(rmsetr)) print('testing rmse = ' + str(rmsete)) return reconstruct, encode, decode, Xtr, Xte
def LearnFreyFace(epochs=100, Dz=20, Ntr=1500): # Load Frey Face data with 1500 of the faces as training data. nb. returns # theano shared variable objects. To access the actual data use # Xtr.get_value() or Xte.get_value(). Xtr, Xte = data.LoadFreyFace(Ntr) # Construct autoencoder. train, reconstruct, encode, decode, theta = ConstructAE( \ Xtr, Denc=[200], Dz=Dz, Ddec=[200], otype='cont', inf=AdaGrad(0.01)) # Train the autoencoder. Permute the order of the data after each epoch - # if you don't do this then you get weird periodicities in the learning curve. print('Training the autoencoder.') batch_size, loglik = 100, [] for i in range(epochs): idx = rnd.permutation(np.arange(Ntr)).astype(np.int32()) lb = 0 while lb < Ntr: ub = lb + batch_size if ub > Ntr: ub = Ntr loglik.append(train(idx[lb:ub])) lb = ub print('Epoch ' + str(i) + '. mean loglik = ' + str(loglik[-1])) # Save the learning curve. plt.plot(loglik) plt.savefig('frey-loglik.pdf') # Compute reconstruction error. I'm using rmse. Xtrpr, Xtepr = reconstruct(Xtr.get_value()), reconstruct(Xte.get_value()) rmsetr, rmsete = rmse(Xtr.get_value(), Xtrpr), rmse(Xte.get_value(), Xtepr) print('training rmse = ' + str(rmsetr)) print('testing rmse = ' + str(rmsete)) return reconstruct, encode, decode
def ConstructAE(Xtr, Denc=[500], Dz=20, Ddec=[500], f=T.tanh, s2=1.0, \ inf=AdaGrad(0.01), otype='binary'): theta = [] # Construct encoder network and 'latent' variables. X, Dobs = T.matrix('X'), Xtr.get_value().shape[1] Wenc, benc = mlp.WeightMatrices([Dobs] + Denc), mlp.BiasVectors(Denc) Hz = mlp.ConstructMLP(X, Wenc, benc, f) Wz, bz = mlp.WeightMatrix(Denc[-1], Dz, 'Wz'), mlp.BiasVector(Dz, 'bz') Z = T.dot(Hz, Wz) + bz theta += Wenc + benc + [Wz, bz] # Construct decoder network. Type of output depends upon otype. Wdec, bdec = mlp.WeightMatrices([Dz] + Ddec), mlp.BiasVectors(Ddec) Hx = mlp.ConstructMLP(Z, Wdec, bdec, f) theta += Wdec + bdec if otype == 'binary': Wout = mlp.WeightMatrix(Ddec[-1], Dobs, 'Wout') bout = mlp.BiasVector(Dobs, 'bout') Xpr = lpdf.OutToProbs(Hx, Wout, bout) loglik = lpdf.bernoulli(X, Xpr) theta += [Wout, bout] elif otype == 'cont': Wmu, Wlogs2, bmu, blogs2 = mlp.WeightMatrix(Ddec[-1], Dobs, 'Wmu'), \ mlp.WeightMatrix(Ddec[-1], Dobs, 'Wlogs2'), \ mlp.BiasVector(Dobs, 'bmu'), mlp.BiasVector(Dobs, 'blogs2') Xpr = lpdf.OutToProbs(Hx, Wmu, bmu) Xlogs2 = lpdf.OutToReal(Hx, Wlogs2, blogs2) loglik = lpdf.indep_normal(X, Xpr, Xlogs2) theta += [Wmu, Wlogs2, bmu, blogs2] else: raise ValueError('otype currently only supports binary.') # Define prior and compute log joint. logprior = mlp.ConstructNormalPrior(theta, s2) logjoint = loglik + logprior + mlp.ConstructNormalPrior([Z], 1.0) # Build function to construct training procedure. updates = inf.construct(logjoint, theta) idx = T.ivector('idx') train = function(inputs=[idx], outputs=loglik / X.shape[0], name='train', updates=updates, givens={X: Xtr[idx]}) # Build function to make predictions (distributions over outputs). reconstruct = function(inputs=[X], outputs=Xpr, name='reconstruct', updates=[], givens={}) # Build a function to encode data. encode = function(inputs=[X], outputs=Z, name='encode', updates=[], givens={}) # Build a function to decode the data. decode = function(inputs=[Z], outputs=Xpr, name='decode', updates=[], givens={}) return train, reconstruct, encode, decode, theta