def test_linear_cg(): rng = numpy.random.RandomState([1,2,3]) n = 5 M = rng.randn(2*n,n) M = numpy.dot(M.T,M).astype(config.floatX) b = rng.randn(n).astype(config.floatX) c = rng.randn(1).astype(config.floatX)[0] x = theano.tensor.vector('x') f = 0.5 * tensor.dot(x,tensor.dot(M,x)) - tensor.dot(b,x) + c sol = linear_cg.linear_cg(f,[x]) fn_sol = theano.function([x], sol) start = time.time() sol = fn_sol( rng.randn(n).astype(config.floatX))[0] my_lcg = time.time() -start eval_f = theano.function([x],f) cgf = eval_f(sol) print("conjugate gradient's value of f:", str(cgf), 'time (s)', my_lcg) skip_if_no_scipy() spf = eval_f( scipy.linalg.solve(M,b) ) print("scipy.linalg.solve's value of f: "+str(spf)) abs_diff = abs(cgf - spf) if not (abs_diff < 1e-5): raise AssertionError("Expected abs_diff < 1e-5, got abs_diff of " + str(abs_diff))
def test_linear_cg(): rng = numpy.random.RandomState([1, 2, 3]) n = 5 M = rng.randn(2 * n, n) M = numpy.dot(M.T, M).astype(config.floatX) b = rng.randn(n).astype(config.floatX) c = rng.randn(1).astype(config.floatX)[0] x = theano.tensor.vector('x') f = 0.5 * tensor.dot(x, tensor.dot(M, x)) - tensor.dot(b, x) + c sol = linear_cg.linear_cg(f, [x]) fn_sol = theano.function([x], sol) start = time.time() sol = fn_sol(rng.randn(n).astype(config.floatX))[0] my_lcg = time.time() - start eval_f = theano.function([x], f) cgf = eval_f(sol) print("conjugate gradient's value of f:", str(cgf), 'time (s)', my_lcg) skip_if_no_scipy() spf = eval_f(scipy.linalg.solve(M, b)) print("scipy.linalg.solve's value of f: " + str(spf)) abs_diff = abs(cgf - spf) if not (abs_diff < 1e-5): raise AssertionError("Expected abs_diff < 1e-5, got abs_diff of " + str(abs_diff))
def infer_S_hat(self, V, H_hat, S_hat, var_s0_hat, var_s1_hat, max_iters): alpha = self.model.alpha obj = self.truncated_KL( V = V, obs = locals() ).mean() new_S_hat = linear_cg( fn = obj, params = S_hat, max_iters = max_iters) return new_S_hat
def train_batch(self, dataset, batch_size): """ .. todo:: WRITEME """ #TODO-- this results in compilation happening every time learn is # called should cache the compilation results, including those # inside cg X = dataset.get_design_matrix() m = X.shape[0] assert X.shape[1] == self.nvis gamma = N.zeros((batch_size, self.nhid)) cur_gamma = T.vector(name='cur_gamma') cur_v = T.vector(name='cur_v') recons = T.dot(cur_gamma, self.W) recons.name = 'recons' recons_diffs = cur_v - recons recons_diffs.name = 'recons_diffs' recons_diff_sq = T.sqr(recons_diffs) recons_diff_sq.name = 'recons_diff' recons_error = T.sum(recons_diff_sq) recons_error.name = 'recons_error' dict_dists = T.sum(T.sqr(self.W - cur_v), axis=1) dict_dists.name = 'dict_dists' abs_gamma = abs(cur_gamma) abs_gamma.name = 'abs_gamma' weighted_dists = T.dot(abs_gamma, dict_dists) weighted_dists.name = 'weighted_dists' penalty = self.coeff * weighted_dists penalty.name = 'penalty' #prevent directions of absolute flatness in the hessian #W_sq = T.sqr(self.W) #W_sq.name = 'W_sq' #debug = T.sum(W_sq) debug = 1e-10 * T.sum(dict_dists) debug.name = 'debug' #J = debug J = recons_error + penalty + debug J.name = 'J' Jf = function([cur_v, cur_gamma], J) start = self.rng.randint(m - batch_size + 1) batch_X = X[start:start + batch_size, :] #TODO-- optimize gamma print 'optimizing gamma' for i in xrange(batch_size): #print str(i+1)+'/'+str(batch_size) gamma[i, :] = self.optimize_gamma(batch_X[i, :]) print 'max min' print N.abs(gamma).min(axis=0).max() print 'min max' print N.abs(gamma).max(axis=0).max() #Optimize W print 'optimizing W' warnings.warn("not tested since switching to Razvan's all-theano implementation of linear cg") cg.linear_cg(J, [self.W], max_iters=3) err = 0. for i in xrange(batch_size): err += Jf(batch_X[i, :], gamma[i, :]) assert not N.isnan(err) assert not N.isinf(err) print 'err: ' + str(err) return True
def find_careduce(var): if var.owner is None: return False op = var.owner.op opname = op.__class__.__name__ if opname == 'CAReduce': print var.owner.inputs print var return True else: for ipt in var.owner.inputs: if find_careduce(ipt): print var return True cg_update = function([V], obj, updates = { S : linear_cg( fn = - obj, params = S, max_iters = 3) } ) for i in xrange(num_batches): X = dataset.get_batch_design(batch_size) em = init(X) print 'batch ',i print em for j in xrange(ga_updates): print 'ga: ',update(X) print 'cg: ',cg_update(X)
if var.owner is None: return False op = var.owner.op opname = op.__class__.__name__ if opname == 'CAReduce': print var.owner.inputs print var return True else: for ipt in var.owner.inputs: if find_careduce(ipt): print var return True cg_update = function([V], obj, updates={S: linear_cg(fn=-obj, params=S, max_iters=3)}) for i in xrange(num_batches): X = dataset.get_batch_design(batch_size) em = init(X) print 'batch ', i print em for j in xrange(ga_updates): print 'ga: ', update(X) print 'cg: ', cg_update(X)