def get_natural_diag_direction(self, ml_cost, nsamples): damp = self.cg_params['damp'] cnsamples = self.center_samples(nsamples) rvals = fisher.compute_L_diag(cnsamples) # keep track of cosine similarity cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): new_gradi = ml_cost.grads[param] * 1./(rvals[i] + damp) norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(new_gradi**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), new_gradi.flatten()) ml_cost.grads[param] = new_gradi cos_dist /= (norm2_old * norm2_new) return [T.constant(1), T.constant(0), cos_dist], OrderedDict()
def get_natural_diag_direction(self, ml_cost, nsamples): damp = self.cg_params['damp'] cnsamples = self.center_samples(nsamples) rvals = fisher.compute_L_diag(cnsamples) # keep track of cosine similarity cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): new_gradi = ml_cost.grads[param] * 1. / (rvals[i] + damp) norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(new_gradi**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), new_gradi.flatten()) ml_cost.grads[param] = new_gradi cos_dist /= (norm2_old * norm2_new) return [T.constant(1), T.constant(0), cos_dist], OrderedDict()
def test_fisher_diag_implementations(): Minv = numpy.float32(1./M) ### implementation 1 ### L1 = [] for i, pi in enumerate(params): lterm = (stats[pi] - grads[pi]).reshape(M,-1) rterm = (stats[pi] - grads[pi]).reshape(M,-1) L1_pi = Minv * numpy.dot(lterm.T, rterm) L1 += [L1_pi] ### implementation 2 ### samples = [symb['v'], symb['g'], symb['h']] symb_L2 = fisher.compute_L_diag(samples) f = theano.function(samples, symb_L2) L2 = f(vals['v'], vals['g'], vals['h']) ### compare both ### for (L1i, L2i) in zip(L1,L2): numpy.testing.assert_almost_equal( numpy.diag(L1i), L2i.flatten(), decimal=3)
def get_natural_direction(self, ml_cost, nsamples, xinit=None, precondition=None): """ Returns: list See lincg documentation for the meaning of each return value. rvals[0]: niter rvals[1]: rerr """ assert precondition in [None, 'jacobi'] self.cg_params.setdefault('batch_size', self.batch_size) nsamples = nsamples[:self.cg_params['batch_size']] neg_energies = self.energy(nsamples) if self.computational_bs > 0: raise NotImplementedError() else: def Lx_func(*args): Lneg_x = fisher.compute_Lx(neg_energies, self.params, args) if self.flags['minresQLP']: return Lneg_x, {} else: return Lneg_x M = None if precondition == 'jacobi': cnsamples = self.center_samples(nsamples) raw_M = fisher.compute_L_diag(cnsamples) M = [(Mi + self.cg_params['damp']) for Mi in raw_M] if self.flags['minres']: rvals = minres.minres( Lx_func, [ml_cost.grads[param] for param in self.params], rtol=self.cg_params['rtol'], maxiter=self.cg_params['maxiter'], damp=self.cg_params['damp'], xinit=xinit, Ms=M) [newgrads, flag, niter, rerr] = rvals[:4] elif self.flags['minresQLP']: param_shapes = [] for p in self.params: param_shapes += [p.get_value().shape] rvals = minresQLP.minresQLP( Lx_func, [ml_cost.grads[param] for param in self.params], param_shapes, rtol=self.cg_params['rtol'], maxit=self.cg_params['maxiter'], damp=self.cg_params['damp'], Ms=M, profile=0) [newgrads, flag, niter, rerr] = rvals[:4] else: rvals = lincg.linear_cg( Lx_func, [ml_cost.grads[param] for param in self.params], rtol=self.cg_params['rtol'], damp=self.cg_params['damp'], maxiter=self.cg_params['maxiter'], xinit=xinit, M=M) [newgrads, niter, rerr] = rvals # Now replace grad with natural gradient. cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(newgrads[i]**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), newgrads[i].flatten()) ml_cost.grads[param] = newgrads[i] cos_dist /= (norm2_old * norm2_new) return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)
def get_natural_direction(self, ml_cost, nsamples, xinit=None, precondition=None): """ Returns: list See lincg documentation for the meaning of each return value. rvals[0]: niter rvals[1]: rerr """ assert precondition in [None, 'jacobi'] self.cg_params.setdefault('batch_size', self.batch_size) nsamples = nsamples[:self.cg_params['batch_size']] neg_energies = self.energy(nsamples) if self.computational_bs > 0: raise NotImplementedError() else: def Lx_func(*args): Lneg_x = fisher.compute_Lx( neg_energies, self.params, args) if self.flags['minresQLP']: return Lneg_x, {} else: return Lneg_x M = None if precondition == 'jacobi': cnsamples = self.center_samples(nsamples) raw_M = fisher.compute_L_diag(cnsamples) M = [(Mi + self.cg_params['damp']) for Mi in raw_M] if self.flags['minres']: rvals = minres.minres( Lx_func, [ml_cost.grads[param] for param in self.params], rtol = self.cg_params['rtol'], maxiter = self.cg_params['maxiter'], damp = self.cg_params['damp'], xinit = xinit, Ms = M) [newgrads, flag, niter, rerr] = rvals[:4] elif self.flags['minresQLP']: param_shapes = [] for p in self.params: param_shapes += [p.get_value().shape] rvals = minresQLP.minresQLP( Lx_func, [ml_cost.grads[param] for param in self.params], param_shapes, rtol = self.cg_params['rtol'], maxit = self.cg_params['maxiter'], damp = self.cg_params['damp'], Ms = M, profile = 0) [newgrads, flag, niter, rerr] = rvals[:4] else: rvals = lincg.linear_cg( Lx_func, [ml_cost.grads[param] for param in self.params], rtol = self.cg_params['rtol'], damp = self.cg_params['damp'], maxiter = self.cg_params['maxiter'], xinit = xinit, M = M) [newgrads, niter, rerr] = rvals # Now replace grad with natural gradient. cos_dist = 0. norm2_old = 0. norm2_new = 0. for i, param in enumerate(self.params): norm2_old += T.sum(ml_cost.grads[param]**2) norm2_new += T.sum(newgrads[i]**2) cos_dist += T.dot(ml_cost.grads[param].flatten(), newgrads[i].flatten()) ml_cost.grads[param] = newgrads[i] cos_dist /= (norm2_old * norm2_new) return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)