def test_write(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function content = T.fmatrix('content') center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') images = zaw.write(content, center_y, center_x, delta, sigma) do_write = theano.function([content, center_y, center_x, delta, sigma], images, name="do_write", allow_input_downcast=True) # Test theano function content = np.random.uniform(size=(batch_size, N**2)) center_y = np.linspace(-height, 2 * height, batch_size) center_x = np.linspace(-width, 2 * width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) images = do_write(content, center_y, center_x, delta, sigma) assert images.shape == (batch_size, height * width) assert np.isfinite(images).all()
def test_filterbank_matrices(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') FY, FX = zaw.filterbank_matrices(center_y, center_x, delta, sigma) do_filterbank = theano.function([center_y, center_x, delta, sigma], [FY, FX], name="do_filterbank_matrices", allow_input_downcast=True) # test theano function center_y = np.linspace(-height, 2 * height, batch_size) center_x = np.linspace(-width, 2 * width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) FY, FX = do_filterbank(center_y, center_x, delta, sigma) assert FY.shape == (batch_size, N, height) assert FX.shape == (batch_size, N, width) assert np.isfinite(FY).all() assert np.isfinite(FX).all()
def test_filterbank_matrices(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') FY, FX = zaw.filterbank_matrices(center_y, center_x, delta, sigma) do_filterbank = theano.function( [center_y, center_x, delta, sigma], [FY, FX], name="do_filterbank_matrices", allow_input_downcast=True) # test theano function center_y = np.linspace(-height, 2*height, batch_size) center_x = np.linspace(-width, 2*width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) FY, FX = do_filterbank(center_y, center_x, delta, sigma) assert FY.shape == (batch_size, N, height) assert FX.shape == (batch_size, N, width) assert np.isfinite(FY).all() assert np.isfinite(FX).all()
def test_read(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function images = T.ftensor3('images') center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') readout = zaw.read(images, center_y, center_x, delta, sigma) do_read = theano.function( [images, center_y, center_x, delta, sigma], readout, name="do_read", allow_input_downcast=True) # Test theano function images = np.random.uniform(size=(batch_size, height, width)) center_y = np.linspace(-height, 2*height, batch_size) center_x = np.linspace(-width, 2*width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) readout = do_read(images, center_y, center_x, delta, sigma) assert readout.shape == (batch_size, N**2) assert np.isfinite(readout).all() assert (readout >= 0.).all() assert (readout <= 1.).all()
def test_read(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function images = T.ftensor3('images') center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') readout = zaw.read(images, center_y, center_x, delta, sigma) do_read = theano.function([images, center_y, center_x, delta, sigma], readout, name="do_read", allow_input_downcast=True) # Test theano function images = np.random.uniform(size=(batch_size, height, width)) center_y = np.linspace(-height, 2 * height, batch_size) center_x = np.linspace(-width, 2 * width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) readout = do_read(images, center_y, center_x, delta, sigma) assert readout.shape == (batch_size, N**2) assert np.isfinite(readout).all() assert (readout >= 0.).all() assert (readout <= 1.).all()
def test_write(self): batch_size = 100 height, width = self.height, self.width N = self.N zaw = self.zaw # Create theano function content = T.fmatrix('content') center_y, center_x = T.fvectors('center_x', 'center_y') delta, sigma = T.fvectors('delta', 'sigma') images = zaw.write(content, center_y, center_x, delta, sigma) do_write = theano.function( [content, center_y, center_x, delta, sigma], images, name="do_write", allow_input_downcast=True) # Test theano function content = np.random.uniform(size=(batch_size, N**2)) center_y = np.linspace(-height, 2*height, batch_size) center_x = np.linspace(-width, 2*width, batch_size) delta = np.linspace(0.1, height, batch_size) sigma = np.linspace(0.1, height, batch_size) images = do_write(content, center_y, center_x, delta, sigma) assert images.shape == (batch_size, height*width) assert np.isfinite(images).all()
def __init__(self, steps = 1, num_layers = 2, num_units = 32, eps = 1e-2): self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.dt = T.scalar('dt') self.matrix_inv = T.nlinalg.MatrixInverse() self.ar = AutoRegressiveModel(steps = steps, num_layers = num_layers, num_units = num_units, eps = eps) l = InputLayer(input_var = self.X, shape = (steps,)) l = ReshapeLayer(l, shape = (1,steps,)) l = self.ar.network(l) l = ReshapeLayer(l, shape=(1,)) self.l_ = l self.f_ = get_output(self.l_) self.X_ = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.X)], axis=0) self.fX_ = G.jacobian(self.X_.flatten(), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + \ T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:]) self.h = T.dot(T.eye(steps)[0:1], self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True)
def evaluate_net(*states): activations = T.fvectors(len(weights)) idx = 0 for neurons, activator, isInput, isOutput, weightFrame in weights: sumParts = [] for i, info in enumerate(weightFrame): srcIdx, w = info sumParts.append(T.dot(states[srcIdx], w.transpose())) if len(sumParts): sumParts = T.stack(*sumParts) activity = T.sum(sumParts, axis=0) if activator == TIDENTITY: activation = activity elif activator == TLOGISTIC: activation = 1. / (1. + T.exp(-activity)) elif activator == THYPERBOLIC: activation = T.tanh(activity) elif activator == TTHRESHOLD: activation = T.sgn(activity) elif activator == TBIAS: activation = T.ones_like(activity, dtype='float32') elif activator == TRADIAL: activation = T.exp(-activity*activity/2.0) else: raise Exception("Unknown activation function for layer {0}" + layer.id) else: activation = T.zeros_like(states[idx])#states[idx] activations[idx] = activation idx += 1 checklist = [T.all(T.eq(a,s)) for a,s in zip(activations, states)] condition = T.all(T.as_tensor_variable(checklist)) return activations, {}, theano.scan_module.until(condition )
def __init__(self, steps = 1, num_layers = 2, num_units = 32, eps = 1e-2, alpha = 1e-2, beta = 2.0, kappa = 0.0): lam = alpha * alpha * (steps + kappa) - steps + beta self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.dt = T.scalar('dt') sqrtm = MatrixSqrt() self.matrix_inv = T.nlinalg.MatrixInverse() self.ar = AutoRegressiveModel(steps = steps, num_layers = num_layers, num_units = num_units, eps = eps) def weighted_mean(A,w): mu = T.zeros((steps, 1)) for i in range(2 * steps + 1): mu += w[i] * A[:,i:i+1] return mu def weighted_covariance(A,B,a,b,w): sigma = T.zeros((steps,steps)) for i in range(2 * steps + 1): sigma += w[i] * T.dot((A[:,i:i+1] - a), (B[:,i:i+1] - b).T) return sigma self.sqrtP = sqrtm(self.P) self.XB = T.dot(T.stack(self.X).T, T.ones((1, 2 * steps +1))) + T.concatenate([T.zeros((steps,1)), T.sqrt(steps + lam) * self.sqrtP, -T.sqrt(steps + lam) * self.sqrtP], axis=1) l = InputLayer(input_var = self.XB.T, shape = (2 * steps + 1, steps)) l = self.ar.network(l) l = ReshapeLayer(l, shape=(1, 2 * steps + 1)) self.l_ = l self.f_ = get_output(self.l_) self.XC = T.concatenate([self.f_, T.dot(T.eye(steps)[:-1], self.XB)], axis=0) W_m = T.concatenate([(lam / (steps + lam)) * T.ones(1), (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0) W_c = T.concatenate([(lam / (steps + lam) + (1.0 - alpha * alpha + beta)) * T.ones(1), (1.0 / (2.0 * (steps + lam))) * T.ones(2 * steps)], axis=0) self.X_ = weighted_mean(self.XC, W_m) self.P_ = weighted_covariance(self.XC, self.XC, self.X_, self.X_, W_c) + \ T.dot(T.dot(T.eye(steps)[:,0:1], self.dt * self.Q), T.eye(steps)[0:1,:]) self.ZB = T.dot(T.eye(steps)[0:1,:], self.XC) self.Z_ = weighted_mean(self.ZB, W_m) self.S = weighted_covariance(self.ZB, self.ZB, self.Z_, self.Z_, W_c) + self.R self.K = T.dot(weighted_covariance(self.XC, self.ZB, self.X_, self.Z_, W_c), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.Z - self.Z_) self.P__ = self.P_ - T.dot(T.dot(self.K, self.S), self.K.T) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True)
''' A theano implementation of the T-LSTM ''' import theano.tensor as T from theano import function import numpy as np import collections import pdb import os #np.seterr(under='warn') h, b = T.fvectors('h', 'b') W, X = T.fmatrices('W', 'X') dotvec = function([h,b], T.dot(h,b)) dot = function([W, h], T.dot(W, h)) #dotF = function([W, h], T.dot(W, h)) #dot = lambda W, h: dotF(W, h.squeeze()) dotW = function([W, X], T.dot(W,X)) layer = function([W, h, b], T.dot(W, h) + b) #layerF = function([W, h, b], T.dot(W, h) + b) #layer = lambda W, h, b: layerF(W, h.squeeze(), b.squeeze()) sigmoid = function([h], T.nnet.ultra_fast_sigmoid(h)) #sigmoidF = function([h], T.nnet.ultra_fast_sigmoid(h)) #sigmoid = lambda h: sigmoidF(h.squeeze()) tanh = function([h], T.tanh(h)) #tanhF = function([h], T.tanh(h)) #tanh = lambda h: tanhF(h.squeeze()) add = function([h, b], h+b)
def find_Ys(Xs_shared, Ys_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, final_momentum, momentum_switch, lmbda, metric, verbose=0): """Optimize cost wrt Ys[t], simultaneously for all t""" # Optimization hyperparameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyperparameter lmbda_var = T.fscalar('lmbda') lmbda_shared = theano.shared(np.array(lmbda, dtype=floath)) # Yv velocities Yvs_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): Yvs_shared.append(theano.shared(np.array(zero_velocities))) # Cost Xvars = T.fmatrices(steps) Yvars = T.fmatrices(steps) Yv_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + lmbda_var*movement_penalty(Yvars, N) # Setting update for Ys velocities grad_Y = T.grad(cost, Yvars) givens = {lr: lr_shared, momentum: momentum_shared, lmbda_var: lmbda_shared} updates = [] for t in range(steps): updates.append((Yvs_shared[t], momentum*Yv_vars[t] - lr*grad_Y[t])) givens[Xvars[t]] = Xs_shared[t] givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for Ys positions updates = [] givens = dict() for t in range(steps): updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t])) givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] update_Ys = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_Ys() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) Ys = [] for t in range(steps): Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath)) return Ys
def output_types_preference(type_re, type_im): return type_re, type_im def rec(self, re, im): if np_cabs(re, im) <= r0: return 1., 0., re + CP_re, im + CP_im else: x_re, x_im = cmul(re, im, CPR_re, CPR_im) d_re, d_im, ci_re, ci_im = self.rec(x_re, x_im) y_re, y_im = np_cexp(d_re, d_im) z_re, z_im = cmul(y_re, y_im, d_re, d_im) w_re, w_im = cmul(z_re, z_im, CPR_re, CPR_im) cix_re, cix_im = np_cexp(ci_re, ci_im) return w_re, w_im, cix_re, cix_im def impl(self, re, im): return self.rec(re, im) if __name__ == '__main__': x_re, x_im = T.fvectors(['x_re', 'x_im']) chi_re, chi_im = chi(x_re, x_im) f = theano.function([x_re, x_im], [chi_re, chi_im]) x_re_val = np.asarray([1, 2, 3], dtype='float32') x_im_val = np.asarray([10, 20, 30], dtype='float32') print f(x_re_val, x_im_val)
def test_mlp(): # rootpath = '/root/sharedfolder/code/demo_18aug22' # save_path = '/root/sharedfolder/code/demo_18aug22' rootpath = '/root/sharedfolder/code/emt_dnn/test' save_path = '/root/sharedfolder/code/emt_dnn/test' # save_path = '/Users/bspl/Desktop/regssion' save_name = '%s/rst_vlnc_predcition.mat' % (save_path) # a directory to save dnnwsp result [n_in, n_hidden1, n_hidden2, n_hidden3, n_output] =[55417,20,20,20,1] # DNN strcture val_L2 = 1e-5; # L2-norm parameter itlrate = 0.0005; # learning rate batch_size = 2; # batch size momentum =0.01; # momentum n_epochs = 500; # the total number of epoch scal_ref = 10; # the scale for the emotion response dcay_rate = 0.99; # decay learning rate for the learning rate corruption_level = 0.3 # entries of the inputs the same and zero-out randomly selected subset of size corruption_level # Parameters for the node-wise control of weight sparsity # If you have three hidden layer, the number of target Hoyer's sparseness should be same hsp_level = [0.7, 0.5, 0.3]; # Target sparsity max_beta = [0.03,0.5,0.5]; # Maximum beta changes beta_lrates = 1e-2; rng = np.random.RandomState(8000) ########################################## Input data ################################################# sbjinfo = sio.loadmat('%s/emt_valence_sample.mat' % rootpath) ############# emt_sample_data.mat ############# # train_x = 64 volumes x 55417 voxels # train_y = 64 volumes x 1 [valence, arousal or dominance scores for training] # test_x = 16 volumes x 55417 voxels # test_y = 16 volumes x 1 [valence, arousal or dominance scores for test] ############################################################ start_time = timeit.default_timer() train_x = sbjinfo['train_x']; train_y = np.asarray(sbjinfo['train_y'],'float32').flatten() / scal_ref ; test_x = sbjinfo['test_x']; test_y = np.asarray(sbjinfo['test_y'],'float32').flatten() / scal_ref ; n_train_set_x = scipy.stats.zscore(train_x,axis=1,ddof=1) n_test_set_x = scipy.stats.zscore(test_x,axis=1,ddof=1) n_trvld_batches = int(train_x.shape[0] / batch_size) n_test_batches = int(n_test_set_x.shape[0] / batch_size) ########################################## Build model ################################################# index = T.lscalar() # index to a [mini]batch x = T.matrix('x') # the data is presented as rasterized images y = T.fvectors('y') # the emotion responses are presented as a 1D vector is_train = T.iscalar('is_train') # pseudo boolean for switching between training and prediction L1p_ly1 = T.fvector() # index to a [mini]batch L1p_ly2 = T.fvector() L1p_ly3 = T.fvector() L2p_ly = T.fscalar() lrate = T.fscalar() [op_tg_L1_ly1, op_tg_L1_ly2, op_tg_L1_ly3]= hsp_level [max_beta_ly1, max_beta_ly2, max_beta_ly3] = max_beta print ('... optimal HSP!!') print ('%1.1f-%1.1f-%1.1f' % (op_tg_L1_ly1,op_tg_L1_ly2,op_tg_L1_ly3)) hsp_ly1 = 0; hsp_ly2 = 0; hsp_ly3 =0; val_L1_ly1 = 0; val_L1_ly2 = 0; val_L1_ly3=0; list_hsp_ly1 = np.zeros((n_epochs,1)); list_hsp_ly2 = np.zeros((n_epochs,1)); list_hsp_ly3 = np.zeros((n_epochs,1)) list_L1_ly1 = np.zeros((n_epochs,1)); list_L1_ly2 = np.zeros((n_epochs,1)); list_L1_ly3 = np.zeros((n_epochs,1)) list_tr_err = np.zeros((n_epochs,1)); list_ts_err = np.zeros((n_epochs,1)) lrate_list =np.zeros((n_epochs,1)); train_set_x = theano.shared(np.asarray(n_train_set_x, dtype=theano.config.floatX)) train_set_y = T.cast(theano.shared(train_y,borrow=True),'float32') test_set_x = theano.shared(np.asarray(n_test_set_x, dtype=theano.config.floatX)); test_set_y = T.cast(theano.shared(test_y,borrow=True),'float32') lrate_val = itlrate # construct the MLP class classifier = MLP( rng=rng, input=x, n_in= n_in, n_hidden1=n_hidden1, n_hidden2=n_hidden2, n_hidden3=n_hidden3, n_out=n_output, corruption_level = corruption_level, is_train=is_train, ) # cost function cost = ((classifier.linearRegressionLayer.y_pred-y)**2).sum() cost += (T.dot(abs(classifier.hiddenLayer1.W),L1p_ly1)).sum(); cost += (T.dot(abs(classifier.hiddenLayer2.W),L1p_ly2)).sum(); cost += (T.dot(abs(classifier.hiddenLayer3.W),L1p_ly3)).sum(); cost += L2p_ly * classifier.L2_sqr # gparams = [T.grad(cost, param) for param in classifier.params] new_gparams =[]; gparams = [T.grad(cost, param) for param in classifier.params] new_gparams = [i/float(batch_size) for i in gparams] updates = [] for param, gparam, oldparam in zip(classifier.params, new_gparams, classifier.oldparams): delta = lrate * gparam + momentum * oldparam updates.append((param, param - delta)) updates.append((oldparam, delta)) trvld_model = theano.function( inputs=[index, L1p_ly1, L1p_ly2, L1p_ly3, L2p_ly, lrate], outputs=[classifier.errors(y), classifier.linearRegressionLayer.y_pred], updates=updates, givens={ x: train_set_x[index * batch_size: (index + 1) * batch_size], y: train_set_y[index * batch_size: (index + 1) * batch_size], is_train: np.cast['int32'](1) }, allow_input_downcast = True, on_unused_input = 'ignore', ) test_model = theano.function( inputs=[index], outputs=[classifier.errors(y), classifier.linearRegressionLayer.y_pred], givens={ x: test_set_x[index * batch_size:(index + 1) * batch_size], y: test_set_y[index * batch_size:(index + 1) * batch_size], is_train: np.cast['int32'](0) }, on_unused_input='ignore' ) list_trvld_err = np.zeros((n_epochs,1)); tst_err = numpy.zeros((n_epochs,1)); #pct_trvld = np.zeros((n_epochs,n_trvld_batches*batch_size), dtype='float32') pct_trvld = np.zeros((n_epochs,n_trvld_batches*batch_size)) #pct_tst = np.zeros((n_epochs,n_test_batches*batch_size), dtype='float32') pct_tst = np.zeros((n_epochs,n_test_batches*batch_size)) hsp_val_ly1 = np.zeros((n_epochs+1,n_hidden1)); hsp_val_ly2 = np.zeros((n_epochs+1,n_hidden2)); hsp_val_ly3 = np.zeros((n_epochs+1,n_hidden3)); L1_val_ly1 = np.zeros((n_epochs+1,n_hidden1)); L1_val_ly2 = np.zeros((n_epochs+1,n_hidden2)); L1_val_ly3 = np.zeros((n_epochs+1,n_hidden3)); ########################################## Learning model ################################################# print ('... Training & Test') epoch = 0 done_looping = False while (epoch < n_epochs) and (not done_looping): epoch = epoch + 1 trvld_score = np.zeros((n_trvld_batches,1)); tmp_trvld_pct =0; for minibatch_index in range(n_trvld_batches): tmp_mat_ly1 = (L1_val_ly1[epoch-1,:]); tmp_mat_ly2 = (L1_val_ly2[epoch-1,:]); tmp_mat_ly3 = (L1_val_ly3[epoch-1,:]); trvld_out = trvld_model(minibatch_index,tmp_mat_ly1,tmp_mat_ly2,tmp_mat_ly3,val_L2,lrate_val) if minibatch_index ==0: tmp_trvld_pct = trvld_out[1] else: tmp_trvld_pct = np.concatenate((tmp_trvld_pct,trvld_out[1]),axis=0) [hsp_val_ly1[epoch,:], L1_val_ly1[epoch,:]] = hsp_fnc_inv_mat_cal(L1_val_ly1[epoch-1,:],classifier.hiddenLayer1.W,max_beta_ly1,op_tg_L1_ly1,beta_lrates) [hsp_val_ly2[epoch,:], L1_val_ly2[epoch,:]] = hsp_fnc_inv_mat_cal(L1_val_ly2[epoch-1,:],classifier.hiddenLayer2.W,max_beta_ly2,op_tg_L1_ly2,beta_lrates) [hsp_val_ly3[epoch,:], L1_val_ly3[epoch,:]] = hsp_fnc_inv_mat_cal(L1_val_ly3[epoch-1,:],classifier.hiddenLayer3.W,max_beta_ly3,op_tg_L1_ly3,beta_lrates) trvld_score=0; trvld_score = (np.mean(abs(tmp_trvld_pct-train_y[numpy.arange(0,len(tmp_trvld_pct))]))) list_trvld_err[epoch-1] = trvld_score * scal_ref pct_trvld[epoch-1][:] = tmp_trvld_pct tmp_test_pct=0; for i in range(n_test_batches): test_out = test_model(i) if i ==0: tmp_test_pct = test_out[1] else: tmp_test_pct = np.concatenate((tmp_test_pct,test_out[1]),axis=0) test_score = 0; test_score = (np.mean(abs(tmp_test_pct-test_y[numpy.arange(0,len(tmp_test_pct))]))) tst_err[epoch-1] = test_score * scal_ref pct_tst[epoch-1][:] = tmp_test_pct lrate_val *= dcay_rate lrate_list[epoch-1] = lrate_val print('#######') print('CP %.2f inv_hsp-lrate %6f, test epoch %i/%i, minibatch %i/%i, tr_err %f, test_err %f' % (corruption_level, lrate_list[epoch-1],epoch,n_epochs, minibatch_index+1, n_trvld_batches,trvld_score * scal_ref, test_score * scal_ref)) print (("hsp_ly1= %.3f/%.3f, L1p_ly1= %.3f, hsp_ly2= %.3f/%.3f, L1p_ly2= %.3f, hsp_ly3= %.3f/%.3f, L1p_ly3= %.3f ") % (np.mean(hsp_val_ly1[epoch-1,:]),op_tg_L1_ly1,np.mean(L1_val_ly1[epoch-1,:]), np.mean(hsp_val_ly2[epoch-1,:]),op_tg_L1_ly2,np.mean(L1_val_ly2[epoch-1,:]), np.mean(hsp_val_ly3[epoch-1,:]),op_tg_L1_ly3,np.mean(L1_val_ly3[epoch-1,:]))) list_ts_err[epoch-1] = test_score * scal_ref ########################################## Save variables ################################################# if not os.path.exists(save_path): os.makedirs(save_path) end_time = timeit.default_timer() cst_time = (end_time - start_time) / 60. sio.savemat(save_name, {'w1': classifier.hiddenLayer1.W.get_value(borrow=True),'b1': classifier.hiddenLayer1.b.get_value(borrow=True), 'w2': classifier.hiddenLayer2.W.get_value(borrow=True),'b2': classifier.hiddenLayer2.b.get_value(borrow=True), 'w3': classifier.hiddenLayer3.W.get_value(borrow=True),'b3': classifier.hiddenLayer3.b.get_value(borrow=True), 'w4': classifier.linearRegressionLayer.W.get_value(borrow=True),'b4': classifier.linearRegressionLayer.b.get_value(borrow=True), 'pct_trvld':pct_trvld,'pct_tst':pct_tst,'trvld_err':list_trvld_err,'ts_err':list_ts_err,'L2_val':val_L2, 'l1ly1':L1_val_ly1,'l1ly2':L1_val_ly2,'l1ly3':L1_val_ly3,'hsply1':hsp_val_ly1,'hsply2':hsp_val_ly2,'hsply3':hsp_val_ly3, 'l_rate':lrate_list,'cst_time':cst_time,'epch':epoch,'max_beta':max_beta,'beta_lrates':beta_lrates, 'test_y':test_y,'train_y':train_y,'mtum':momentum,'btch_size':batch_size,'opt_hsp':hsp_level,'cp_lev':corruption_level}) print ('...done!')
def find_all_step_visible_data(all_step_original_data_shared, all_step_visible_data_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, initial_momentum, final_momentum, momentum_switch, penalty_lambda, metric, verbose=0): """Optimize cost wrt all_step_visible_data[t], simultaneously for all t""" # Optimization hyper-parameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyper-parameter penalty_lambda_var = T.fscalar('penalty_lambda') penalty_lambda_shared = theano.shared( np.array(penalty_lambda, dtype=floath)) # Yv velocities all_step_visible_progress_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): all_step_visible_progress_shared.append( theano.shared(np.array(zero_velocities))) # Cost all_step_original_data_vars = T.fmatrices(steps) all_step_visible_data_vars = T.fmatrices(steps) all_step_visible_progress_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append( cost_var(all_step_original_data_vars[t], all_step_visible_data_vars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + penalty_lambda_var * movement_penalty( all_step_visible_data_vars, N) # Setting update for all_step_visible_data velocities grad_Y = T.grad(cost, all_step_visible_data_vars) givens = { lr: lr_shared, momentum: momentum_shared, penalty_lambda_var: penalty_lambda_shared } updates = [] for t in range(steps): updates.append( (all_step_visible_progress_shared[t], momentum * all_step_visible_progress_vars[t] - lr * grad_Y[t])) givens[ all_step_original_data_vars[t]] = all_step_original_data_shared[t] givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t] givens[all_step_visible_progress_vars[ t]] = all_step_visible_progress_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for all_step_visible_data positions updates = [] givens = dict() for t in range(steps): updates.append( (all_step_visible_data_shared[t], all_step_visible_data_vars[t] + all_step_visible_progress_vars[t])) givens[all_step_visible_data_vars[t]] = all_step_visible_data_shared[t] givens[all_step_visible_progress_vars[ t]] = all_step_visible_progress_shared[t] update_all_step_visible_data = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_all_step_visible_data() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) all_step_visible_data = [] for t in range(steps): all_step_visible_data.append( np.array(all_step_visible_data_shared[t].get_value(), dtype=floath)) return all_step_visible_data
def __init__(self, N=None, size=1, mu0=0.1, sigma_mean0=10, sigma_std0=1.0, sigma_min=0.1, sigma_max=10, data=None): self.N = N self.K = size # Parameter initialization #random init if data is None: # mu = random normal with std mu0,mean 0 self.mu = mu0 * np.random.randn(self.N, self.K).astype(DTYPE) # Sigma = random normal with mean sigma_mean0, std sigma_std0, and min/max of sigma_min, sigma_max self.Sigma = np.random.randn(self.N, 1).astype(DTYPE) self.Sigma *= sigma_std0 self.Sigma += sigma_mean0 self.Sigma = np.maximum(sigma_min, np.minimum(self.Sigma, sigma_max)) self.Gaussian = np.concatenate((self.mu, self.Sigma), axis=1) # TensorVariables for mi, mj, si, sj respectivelly. a, b = T.fvectors('a', 'b') c, d = T.fscalars('c', 'd') # Energy as a TensorVariable E = -0.5 * (self.K * d / c + T.sum( (a - b)**2 / c) - self.K - self.K * T.log(d / c)) self.enrg = function([a, b, c, d], E) g1 = T.grad(E, a) # dE/dmi self.f1 = function([a, b, c, d], g1) g2 = T.grad(E, b) # dE/dmj self.f2 = function([a, b, c, d], g2) g3 = T.grad(E, c) # dE/dsi self.f3 = function([a, b, c, d], g3) g4 = T.grad(E, d) # dE/dsj self.f4 = function([a, b, c, d], g4) #non random init else: self.mu = [] self.Sigma = [] for i in range(len(data)): mu, std = norm.fit(data[i]) var = np.power(std, 2) self.mu.append(mu) self.Sigma.append(var) self.Gaussian = np.concatenate( (np.asarray(self.mu), np.asarray(self.Sigma)), axis=1) self.Gaussian = np.reshape(self.Gaussian, (2, N)).T
def find_Ys(Xs_shared, Ys_shared, sigmas_shared, N, steps, output_dims, n_epochs, initial_lr, final_lr, lr_switch, init_stdev, initial_momentum, final_momentum, momentum_switch, lmbda, metric, verbose=0): """Optimize cost wrt Ys[t], simultaneously for all t""" # Optimization hyperparameters initial_lr = np.array(initial_lr, dtype=floath) final_lr = np.array(final_lr, dtype=floath) initial_momentum = np.array(initial_momentum, dtype=floath) final_momentum = np.array(final_momentum, dtype=floath) lr = T.fscalar('lr') lr_shared = theano.shared(initial_lr) momentum = T.fscalar('momentum') momentum_shared = theano.shared(initial_momentum) # Penalty hyperparameter lmbda_var = T.fscalar('lmbda') lmbda_shared = theano.shared(np.array(lmbda, dtype=floath)) # Yv velocities Yvs_shared = [] zero_velocities = np.zeros((N, output_dims), dtype=floath) for t in range(steps): Yvs_shared.append(theano.shared(np.array(zero_velocities))) # Cost Xvars = T.fmatrices(steps) Yvars = T.fmatrices(steps) Yv_vars = T.fmatrices(steps) sigmas_vars = T.fvectors(steps) c_vars = [] for t in range(steps): c_vars.append(cost_var(Xvars[t], Yvars[t], sigmas_vars[t], metric)) cost = T.sum(c_vars) + lmbda_var * movement_penalty(Yvars, N) # Setting update for Ys velocities grad_Y = T.grad(cost, Yvars) givens = { lr: lr_shared, momentum: momentum_shared, lmbda_var: lmbda_shared } updates = [] for t in range(steps): updates.append((Yvs_shared[t], momentum * Yv_vars[t] - lr * grad_Y[t])) givens[Xvars[t]] = Xs_shared[t] givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] givens[sigmas_vars[t]] = sigmas_shared[t] update_Yvs = theano.function([], cost, givens=givens, updates=updates) # Setting update for Ys positions updates = [] givens = dict() for t in range(steps): updates.append((Ys_shared[t], Yvars[t] + Yv_vars[t])) givens[Yvars[t]] = Ys_shared[t] givens[Yv_vars[t]] = Yvs_shared[t] update_Ys = theano.function([], [], givens=givens, updates=updates) # Momentum-based gradient descent for epoch in range(n_epochs): if epoch == lr_switch: lr_shared.set_value(final_lr) if epoch == momentum_switch: momentum_shared.set_value(final_momentum) c = update_Yvs() update_Ys() if verbose: print('Epoch: {0}. Cost: {1:.6f}.'.format(epoch + 1, float(c))) Ys = [] for t in range(steps): Ys.append(np.array(Ys_shared[t].get_value(), dtype=floath)) return Ys
def __init__(self, state = 'x', measurement = 'z', motion_transition = None, measurement_transition = None): self.N = len(state.split(' ')) self.M = len(measurement.split(' ')) self.X, self.Z = T.fvectors('X','Z') self.P, self.Q, self.R = T.fmatrices('P','Q','R') self.F, self.H = T.matrices('F','H') self.dt = T.scalar('dt') self.X_ = T.dot(self.F, self.X) self.fX_ = G.jacobian(T.flatten(self.X_), self.X) self.P_ = T.dot(T.dot(self.fX_, self.P), T.transpose(self.fX_)) + self.dt * self.Q self.h = T.dot(self.H, self.X_) self.y = self.Z - self.h self.hX_ = G.jacobian(self.h, self.X_) self.matrix_inv = T.nlinalg.MatrixInverse() self.S = T.dot(T.dot(self.hX_, self.P_), T.transpose(self.hX_)) + self.R self.K = T.dot(T.dot(self.P_, T.transpose(self.hX_)), self.matrix_inv(self.S)) self.X__ = self.X_ + T.dot(self.K, self.y) self.P__ = T.dot(T.identity_like(self.P) - T.dot(self.K, self.hX_), self.P_) self.prediction = theano.function(inputs = [self.X, self.P, self.Q, self.F, self.dt], outputs = [self.X_, self.P_], allow_input_downcast = True) self.update = theano.function(inputs = [self.X, self.Z, self.P, self.Q, self.R, self.F, self.H, self.dt], outputs = [self.X__, self.P__], allow_input_downcast = True) if motion_transition == None: self.motion_transition = np.eye(self.N) else: self.motion_transition = np.array(motion_transition) if measurement_transition == None: self.measurement_transition = np.eye(self.M) else: self.measurement_transition = np.array(motion_transition)