def test_lincg_L_precond(): symb['M'] = T.vector('M') vals['M'] = numpy.diag(vals['L']) ### without preconditioning ### [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)], [symb['g']], M=None, rtol=1e-8, maxiter=10000, floatX=floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g']) print 'No precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3) ### with preconditioning ### [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)], [symb['g']], M=[symb['M']], rtol=1e-8, maxiter=10000, floatX=floatX) f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g'], vals['M']) print 'With precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
def test_lincg_L_diag_heavy_precond(): vals['Ldh'] = copy.copy(vals['L']) rdiag = numpy.random.rand(nparams) * 100 for i in xrange(len(vals['L'])): vals['Ldh'][i, i] += rdiag[i] vals['Ldh_inv_g'] = linalg.cho_solve(linalg.cho_factor(vals['Ldh']), vals['g']) symb['M'] = T.vector('M') vals['M'] = numpy.diag(vals['Ldh']) ### without preconditioning ### [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)], [symb['g']], M=None, rtol=1e-20, maxiter=10000, floatX=floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g']) print 'No precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3) ### with preconditioning ### [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)], [symb['g']], M=[symb['M']], rtol=1e-20, maxiter=10000, floatX=floatX) f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'], vals['M']) print 'With precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3) ### test scipy implementation ### t1 = time.time() cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10) print 'scipy.sparse.linalg.cg (no preconditioning): Elapsed ', time.time( ) - t1 t1 = time.time() cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10, M=numpy.diag(1. / vals['M'])) print 'scipy.sparse.linalg.cg (preconditioning): Elapsed ', time.time( ) - t1
def test_lincg_L_diag_heavy_precond(): vals['Ldh'] = copy.copy(vals['L']) rdiag = numpy.random.rand(nparams) * 100 for i in xrange(len(vals['L'])): vals['Ldh'][i,i] += rdiag[i] vals['Ldh_inv_g'] = linalg.cho_solve(linalg.cho_factor(vals['Ldh']), vals['g']) symb['M'] = T.vector('M') vals['M'] = numpy.diag(vals['Ldh']) ### without preconditioning ### [sol, niter, rerr] = lincg.linear_cg( lambda x: [T.dot(symb['L'], x)], [symb['g']], M = None, rtol=1e-20, maxiter = 10000, floatX = floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g']) print 'No precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3) ### with preconditioning ### [sol, niter, rerr] = lincg.linear_cg( lambda x: [T.dot(symb['L'], x)], [symb['g']], M = [symb['M']], rtol=1e-20, maxiter = 10000, floatX = floatX) f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'], vals['M']) print 'With precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3) ### test scipy implementation ### t1 = time.time() cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10) print 'scipy.sparse.linalg.cg (no preconditioning): Elapsed ', time.time() - t1 t1 = time.time() cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10, M=numpy.diag(1./vals['M'])) print 'scipy.sparse.linalg.cg (preconditioning): Elapsed ', time.time() - t1
def test_linearcg(): vv = theano.shared(v, name='v') gg = theano.shared(g, name='g') hh = theano.shared(h, name='h') dw = T.dot(v.T,g) / M dv = T.dot(g.T,h) / M da = T.mean(v, axis=0) db = T.mean(g, axis=0) dc = T.mean(h, axis=0) newgrads = lincg.linear_cg( lambda xw, xv, xa, xb, xc: natural.compute_Lx(vv,gg,hh,xw,xv,xa,xb,xc), [dw, dv, da, db, dc], rtol=1e-5, maxiter = 30, damp = 0., floatX = floatX, profile=0) f = theano.function([], newgrads) [new_dw, new_dv, new_da, new_db, new_dc] = f() numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1) numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1) numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1) numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1) numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
def test_linearcg(): vv = theano.shared(v, name='v') gg = theano.shared(g, name='g') hh = theano.shared(h, name='h') dw = T.dot(v.T, g) / M dv = T.dot(g.T, h) / M da = T.mean(v, axis=0) db = T.mean(g, axis=0) dc = T.mean(h, axis=0) newgrads = lincg.linear_cg(lambda xw, xv, xa, xb, xc: natural.compute_Lx( vv, gg, hh, xw, xv, xa, xb, xc), [dw, dv, da, db, dc], rtol=1e-5, maxiter=30, damp=0., floatX=floatX, profile=0) f = theano.function([], newgrads) [new_dw, new_dv, new_da, new_db, new_dc] = f() numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1) numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1) numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1) numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1) numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
def test_lincg_L_precond(): symb['M'] = T.vector('M') vals['M'] = numpy.diag(vals['L']) ### without preconditioning ### [sol, niter, rerr] = lincg.linear_cg( lambda x: [T.dot(symb['L'], x)], [symb['g']], M = None, rtol=1e-8, maxiter = 10000, floatX = floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g']) print 'No precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3) ### with preconditioning ### [sol, niter, rerr] = lincg.linear_cg( lambda x: [T.dot(symb['L'], x)], [symb['g']], M = [symb['M']], rtol=1e-8, maxiter = 10000, floatX = floatX) f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g'], vals['M']) print 'With precond: test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
def test_lincg(): [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)], [symb['g']], M=None, rtol=1e-8, maxiter=10000, floatX=floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g']) print 'test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
def test_lincg(): [sol, niter, rerr] = lincg.linear_cg( lambda x: [T.dot(symb['L'], x)], [symb['g']], M = None, rtol=1e-8, maxiter = 10000, floatX = floatX) f = theano.function([symb['L'], symb['g']], sol + [niter, rerr]) t1 = time.time() [Linv_g, niter, rerr] = f(vals['L'], vals['g']) print 'test_lincg runtime (s):', time.time() - t1 print '\t niter = ', niter print '\t residual error = ', rerr numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
def test_fisher_Linv_x(): Linv_x = linalg.cho_solve(linalg.cho_factor(L), vals['x']) Linv_x_w = Linv_x[:N0*N1].reshape(N0,N1) Linv_x_v = Linv_x[N0*N1 : N0*N1 + N1*N2].reshape(N1,N2) Linv_x_a = Linv_x[N0*N1 + N1*N2 : N0*N1 + N1*N2 + N0] Linv_x_b = Linv_x[N0*N1 + N1*N2 + N0 : N0*N1 + N1*N2 + N0 + N1] Linv_x_c = Linv_x[-N2:] energies = -T.sum(T.dot(symb['v'], symb['W']) * symb['g'], axis=1)\ -T.sum(T.dot(symb['g'], symb['V']) * symb['h'], axis=1)\ -T.dot(symb['v'], symb['a'])\ -T.dot(symb['g'], symb['b'])\ -T.dot(symb['h'], symb['c']) def Lx_func(*args): symb_params = [symb[p] for p in params] Lneg_x = fisher.compute_Lx(energies, symb_params, args) return Lneg_x rvals = lincg.linear_cg( lambda xw, xv, xa, xb, xc: Lx_func(xw,xv,xa,xb,xc), [symb['x_W'], symb['x_V'], symb['x_a'], symb['x_b'], symb['x_c']], rtol=1e-5, damp = 0., maxiter = 10000) [niter, rerr] = rvals[:2] newgrads = rvals[2:] f = theano.function([symb['v'], symb['g'], symb['h'], symb['W'], symb['V'], symb['a'], symb['b'], symb['c'], symb['x_W'], symb['x_V'], symb['x_a'], symb['x_b'], symb['x_c']], newgrads) [new_dw, new_dv, new_da, new_db, new_dc] = f( vals['v'], vals['g'], vals['h'], vals['W'], vals['V'], vals['a'], vals['b'], vals['c'], vals['x_W'], vals['x_V'], vals['x_a'], vals['x_b'], vals['x_c']) numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1) numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1) numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1) numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1) numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
def get_natural_direction(self, ml_cost, nsamples, xinit=None, precondition=None): """ Returns: list See lincg documentation for the meaning of each return value. rvals[0]: niter rvals[1]: rerr """ assert precondition in [None, 'jacobi'] self.cg_params.setdefault('batch_size', self.batch_size) nsamples = nsamples[:self.cg_params['batch_size']] neg_energies = self.energy(nsamples) if self.computational_bs > 0: raise NotImplementedError() else: def Lx_func(*args): Lneg_x = fisher.compute_Lx(neg_energies, self.params, args) if self.flags['minresQLP']: return Lneg_x, {} else: return Lneg_x M = None if precondition == 'jacobi': cnsamples = self.center_samples(nsamples) raw_M = fisher.compute_L_diag(cnsamples) M = [(Mi + self.cg_params['damp']) for Mi in raw_M] if self.flags['minres']: rvals = minres.minres( Lx_func, [ml_cost.grads[param] for param in self.params], rtol=self.cg_params['rtol'], maxiter=self.cg_params['maxiter'], damp=self.cg_params['damp'], xinit=xinit, Ms=M) [newgrads, flag, niter, rerr] = rvals[:4] elif self.flags['minresQLP']: param_shapes = [] for p in self.params: param_shapes += [p.get_value().shape] rvals = minresQLP.minresQLP( Lx_func, [ml_cost.grads[param] for param in self.params], param_shapes, rtol=self.cg_params['rtol'], maxit=self.cg_params['maxiter'], damp=self.cg_params['damp'], Ms=M, profile=0) [newgrads, flag, niter, rerr] = rvals[:4] else: rvals = lincg.linear_cg( Lx_func, [ml_cost.grads[param] for param in self.params], rtol=self.cg_params['rtol'], damp=self.cg_params['damp'], maxiter=self.cg_params['maxiter'], xinit=xinit, M=M) [newgrads, niter, rerr] = rvals # Now replace grad with natural gradient. cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(newgrads[i]**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), newgrads[i].flatten()) ml_cost.grads[param] = newgrads[i] cos_dist /= (norm2_old * norm2_new) return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)
def get_natural_direction(self, ml_cost, nsamples, xinit=None, precondition=None): """ Returns: list See lincg documentation for the meaning of each return value. rvals[0]: niter rvals[1]: rerr """ assert precondition in [None, 'jacobi'] self.cg_params.setdefault('batch_size', self.batch_size) nsamples = nsamples[:self.cg_params['batch_size']] neg_energies = self.energy(nsamples) if self.computational_bs > 0: raise NotImplementedError() else: def Lx_func(*args): Lneg_x = fisher.compute_Lx( neg_energies, self.params, args) if self.flags['minresQLP']: return Lneg_x, {} else: return Lneg_x M = None if precondition == 'jacobi': cnsamples = self.center_samples(nsamples) raw_M = fisher.compute_L_diag(cnsamples) M = [(Mi + self.cg_params['damp']) for Mi in raw_M] if self.flags['minres']: rvals = minres.minres( Lx_func, [ml_cost.grads[param] for param in self.params], rtol = self.cg_params['rtol'], maxiter = self.cg_params['maxiter'], damp = self.cg_params['damp'], xinit = xinit, Ms = M) [newgrads, flag, niter, rerr] = rvals[:4] elif self.flags['minresQLP']: param_shapes = [] for p in self.params: param_shapes += [p.get_value().shape] rvals = minresQLP.minresQLP( Lx_func, [ml_cost.grads[param] for param in self.params], param_shapes, rtol = self.cg_params['rtol'], maxit = self.cg_params['maxiter'], damp = self.cg_params['damp'], Ms = M, profile = 0) [newgrads, flag, niter, rerr] = rvals[:4] else: rvals = lincg.linear_cg( Lx_func, [ml_cost.grads[param] for param in self.params], rtol = self.cg_params['rtol'], damp = self.cg_params['damp'], maxiter = self.cg_params['maxiter'], xinit = xinit, M = M) [newgrads, niter, rerr] = rvals # Now replace grad with natural gradient. cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(newgrads[i]**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), newgrads[i].flatten()) ml_cost.grads[param] = newgrads[i] cos_dist /= (norm2_old * norm2_new) return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)