Example #1
0
def test_lincg_L_precond():
    symb['M'] = T.vector('M')
    vals['M'] = numpy.diag(vals['L'])

    ### without preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)],
                                         [symb['g']],
                                         M=None,
                                         rtol=1e-8,
                                         maxiter=10000,
                                         floatX=floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'])
    print 'No precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)

    ### with preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)],
                                         [symb['g']],
                                         M=[symb['M']],
                                         rtol=1e-8,
                                         maxiter=10000,
                                         floatX=floatX)

    f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'], vals['M'])
    print 'With precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
Example #2
0
def test_lincg_L_diag_heavy_precond():
    vals['Ldh'] = copy.copy(vals['L'])
    rdiag = numpy.random.rand(nparams) * 100
    for i in xrange(len(vals['L'])):
        vals['Ldh'][i, i] += rdiag[i]
    vals['Ldh_inv_g'] = linalg.cho_solve(linalg.cho_factor(vals['Ldh']),
                                         vals['g'])
    symb['M'] = T.vector('M')
    vals['M'] = numpy.diag(vals['Ldh'])

    ### without preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)],
                                         [symb['g']],
                                         M=None,
                                         rtol=1e-20,
                                         maxiter=10000,
                                         floatX=floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'])
    print 'No precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3)

    ### with preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)],
                                         [symb['g']],
                                         M=[symb['M']],
                                         rtol=1e-20,
                                         maxiter=10000,
                                         floatX=floatX)

    f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'], vals['M'])
    print 'With precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3)

    ### test scipy implementation ###
    t1 = time.time()
    cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10)
    print 'scipy.sparse.linalg.cg (no preconditioning): Elapsed ', time.time(
    ) - t1
    t1 = time.time()
    cg(vals['Ldh'],
       vals['g'],
       maxiter=10000,
       tol=1e-10,
       M=numpy.diag(1. / vals['M']))
    print 'scipy.sparse.linalg.cg (preconditioning): Elapsed ', time.time(
    ) - t1
Example #3
0
def test_lincg_L_diag_heavy_precond():
    vals['Ldh'] = copy.copy(vals['L'])
    rdiag = numpy.random.rand(nparams) * 100
    for i in xrange(len(vals['L'])):
        vals['Ldh'][i,i] += rdiag[i]
    vals['Ldh_inv_g'] = linalg.cho_solve(linalg.cho_factor(vals['Ldh']), vals['g'])
    symb['M'] = T.vector('M')
    vals['M'] = numpy.diag(vals['Ldh'])

    ### without preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(
            lambda x: [T.dot(symb['L'], x)],
            [symb['g']],
            M = None,
            rtol=1e-20,
            maxiter = 10000,
            floatX = floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'])
    print 'No precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3)

    ### with preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(
            lambda x: [T.dot(symb['L'], x)],
            [symb['g']],
            M = [symb['M']],
            rtol=1e-20,
            maxiter = 10000,
            floatX = floatX)

    f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['Ldh'], vals['g'], vals['M'])
    print 'With precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Ldh_inv_g'], decimal=3)

    ### test scipy implementation ###
    t1 = time.time()
    cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10)
    print 'scipy.sparse.linalg.cg (no preconditioning): Elapsed ', time.time() - t1
    t1 = time.time()
    cg(vals['Ldh'], vals['g'], maxiter=10000, tol=1e-10, M=numpy.diag(1./vals['M']))
    print 'scipy.sparse.linalg.cg (preconditioning): Elapsed ', time.time() - t1
Example #4
0
def test_linearcg():
    vv = theano.shared(v, name='v')
    gg = theano.shared(g, name='g')
    hh = theano.shared(h, name='h')
    dw = T.dot(v.T,g) / M
    dv = T.dot(g.T,h) / M
    da = T.mean(v, axis=0)
    db = T.mean(g, axis=0)
    dc = T.mean(h, axis=0)

    newgrads = lincg.linear_cg(
            lambda xw, xv, xa, xb, xc: natural.compute_Lx(vv,gg,hh,xw,xv,xa,xb,xc),
            [dw, dv, da, db, dc],
            rtol=1e-5,
            maxiter = 30,
            damp = 0.,
            floatX = floatX,
            profile=0)

    f = theano.function([], newgrads)
    [new_dw, new_dv, new_da, new_db, new_dc] = f()
    numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
Example #5
0
def test_linearcg():
    vv = theano.shared(v, name='v')
    gg = theano.shared(g, name='g')
    hh = theano.shared(h, name='h')
    dw = T.dot(v.T, g) / M
    dv = T.dot(g.T, h) / M
    da = T.mean(v, axis=0)
    db = T.mean(g, axis=0)
    dc = T.mean(h, axis=0)

    newgrads = lincg.linear_cg(lambda xw, xv, xa, xb, xc: natural.compute_Lx(
        vv, gg, hh, xw, xv, xa, xb, xc), [dw, dv, da, db, dc],
                               rtol=1e-5,
                               maxiter=30,
                               damp=0.,
                               floatX=floatX,
                               profile=0)

    f = theano.function([], newgrads)
    [new_dw, new_dv, new_da, new_db, new_dc] = f()
    numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
Example #6
0
def test_lincg_L_precond():
    symb['M'] = T.vector('M')
    vals['M'] = numpy.diag(vals['L'])

    ### without preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(
            lambda x: [T.dot(symb['L'], x)],
            [symb['g']],
            M = None,
            rtol=1e-8,
            maxiter = 10000,
            floatX = floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'])
    print 'No precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)

    ### with preconditioning ###
    [sol, niter, rerr] = lincg.linear_cg(
            lambda x: [T.dot(symb['L'], x)],
            [symb['g']],
            M = [symb['M']],
            rtol=1e-8,
            maxiter = 10000,
            floatX = floatX)

    f = theano.function([symb['L'], symb['g'], symb['M']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'], vals['M'])
    print 'With precond: test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
Example #7
0
def test_lincg():
    [sol, niter, rerr] = lincg.linear_cg(lambda x: [T.dot(symb['L'], x)],
                                         [symb['g']],
                                         M=None,
                                         rtol=1e-8,
                                         maxiter=10000,
                                         floatX=floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'])
    print 'test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
Example #8
0
def test_lincg():
    [sol, niter, rerr] = lincg.linear_cg(
            lambda x: [T.dot(symb['L'], x)],
            [symb['g']],
            M = None,
            rtol=1e-8,
            maxiter = 10000,
            floatX = floatX)

    f = theano.function([symb['L'], symb['g']], sol + [niter, rerr])
    t1 = time.time()
    [Linv_g, niter, rerr] = f(vals['L'], vals['g'])
    print 'test_lincg runtime (s):', time.time() - t1
    print '\t niter = ', niter
    print '\t residual error = ', rerr
    numpy.testing.assert_almost_equal(Linv_g, vals['Linv_g'], decimal=3)
Example #9
0
def test_fisher_Linv_x():
    Linv_x = linalg.cho_solve(linalg.cho_factor(L), vals['x'])
    Linv_x_w = Linv_x[:N0*N1].reshape(N0,N1)
    Linv_x_v = Linv_x[N0*N1 : N0*N1 + N1*N2].reshape(N1,N2)
    Linv_x_a = Linv_x[N0*N1 + N1*N2 : N0*N1 + N1*N2 + N0]
    Linv_x_b = Linv_x[N0*N1 + N1*N2 + N0 : N0*N1 + N1*N2 + N0 + N1]
    Linv_x_c = Linv_x[-N2:]

    energies = -T.sum(T.dot(symb['v'], symb['W']) * symb['g'], axis=1)\
               -T.sum(T.dot(symb['g'], symb['V']) * symb['h'], axis=1)\
               -T.dot(symb['v'], symb['a'])\
               -T.dot(symb['g'], symb['b'])\
               -T.dot(symb['h'], symb['c'])

    def Lx_func(*args):
        symb_params = [symb[p] for p in params]
        Lneg_x = fisher.compute_Lx(energies, symb_params, args)
        return Lneg_x

    rvals = lincg.linear_cg(
            lambda xw, xv, xa, xb, xc: Lx_func(xw,xv,xa,xb,xc),
            [symb['x_W'], symb['x_V'], symb['x_a'], symb['x_b'], symb['x_c']],
            rtol=1e-5,
            damp = 0.,
            maxiter = 10000)
    [niter, rerr] = rvals[:2]
    newgrads = rvals[2:]

    f = theano.function([symb['v'], symb['g'], symb['h'],
                         symb['W'], symb['V'], symb['a'], symb['b'], symb['c'],
                         symb['x_W'], symb['x_V'], symb['x_a'], symb['x_b'], symb['x_c']], newgrads)

    [new_dw, new_dv, new_da, new_db, new_dc] = f(
            vals['v'], vals['g'], vals['h'],
            vals['W'], vals['V'], vals['a'], vals['b'], vals['c'],
            vals['x_W'], vals['x_V'], vals['x_a'], vals['x_b'], vals['x_c'])

    numpy.testing.assert_almost_equal(Linv_x_w, new_dw, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_v, new_dv, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_a, new_da, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_b, new_db, decimal=1)
    numpy.testing.assert_almost_equal(Linv_x_c, new_dc, decimal=1)
Example #10
0
    def get_natural_direction(self,
                              ml_cost,
                              nsamples,
                              xinit=None,
                              precondition=None):
        """
        Returns: list
            See lincg documentation for the meaning of each return value.
            rvals[0]: niter
            rvals[1]: rerr
        """
        assert precondition in [None, 'jacobi']
        self.cg_params.setdefault('batch_size', self.batch_size)

        nsamples = nsamples[:self.cg_params['batch_size']]
        neg_energies = self.energy(nsamples)

        if self.computational_bs > 0:
            raise NotImplementedError()
        else:

            def Lx_func(*args):
                Lneg_x = fisher.compute_Lx(neg_energies, self.params, args)
                if self.flags['minresQLP']:
                    return Lneg_x, {}
                else:
                    return Lneg_x

        M = None
        if precondition == 'jacobi':
            cnsamples = self.center_samples(nsamples)
            raw_M = fisher.compute_L_diag(cnsamples)
            M = [(Mi + self.cg_params['damp']) for Mi in raw_M]

        if self.flags['minres']:
            rvals = minres.minres(
                Lx_func, [ml_cost.grads[param] for param in self.params],
                rtol=self.cg_params['rtol'],
                maxiter=self.cg_params['maxiter'],
                damp=self.cg_params['damp'],
                xinit=xinit,
                Ms=M)
            [newgrads, flag, niter, rerr] = rvals[:4]
        elif self.flags['minresQLP']:
            param_shapes = []
            for p in self.params:
                param_shapes += [p.get_value().shape]
            rvals = minresQLP.minresQLP(
                Lx_func, [ml_cost.grads[param] for param in self.params],
                param_shapes,
                rtol=self.cg_params['rtol'],
                maxit=self.cg_params['maxiter'],
                damp=self.cg_params['damp'],
                Ms=M,
                profile=0)
            [newgrads, flag, niter, rerr] = rvals[:4]
        else:
            rvals = lincg.linear_cg(
                Lx_func, [ml_cost.grads[param] for param in self.params],
                rtol=self.cg_params['rtol'],
                damp=self.cg_params['damp'],
                maxiter=self.cg_params['maxiter'],
                xinit=xinit,
                M=M)
            [newgrads, niter, rerr] = rvals

        # Now replace grad with natural gradient.
        cos_dist = 0.
        norm2_old = 0.
        norm2_new = 0.
        for i, param in enumerate(self.params):
            norm2_old += T.sum(ml_cost.grads[param]**2)
            norm2_new += T.sum(newgrads[i]**2)
            cos_dist += T.dot(ml_cost.grads[param].flatten(),
                              newgrads[i].flatten())
            ml_cost.grads[param] = newgrads[i]
        cos_dist /= (norm2_old * norm2_new)

        return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)
Example #11
0
    def get_natural_direction(self, ml_cost, nsamples, xinit=None,
                              precondition=None):
        """
        Returns: list
            See lincg documentation for the meaning of each return value.
            rvals[0]: niter
            rvals[1]: rerr
        """
        assert precondition in [None, 'jacobi']
        self.cg_params.setdefault('batch_size', self.batch_size)

        nsamples = nsamples[:self.cg_params['batch_size']]
        neg_energies = self.energy(nsamples)

        if self.computational_bs > 0:
            raise NotImplementedError()
        else:
            def Lx_func(*args):
                Lneg_x = fisher.compute_Lx(
                        neg_energies,
                        self.params,
                        args)
                if self.flags['minresQLP']:
                    return Lneg_x, {}
                else:
                    return Lneg_x

        M = None
        if precondition == 'jacobi':
            cnsamples = self.center_samples(nsamples)
            raw_M = fisher.compute_L_diag(cnsamples)
            M = [(Mi + self.cg_params['damp']) for Mi in raw_M]

        if self.flags['minres']:
            rvals = minres.minres(
                    Lx_func,
                    [ml_cost.grads[param] for param in self.params],
                    rtol = self.cg_params['rtol'],
                    maxiter = self.cg_params['maxiter'],
                    damp = self.cg_params['damp'],
                    xinit = xinit,
                    Ms = M)
            [newgrads, flag, niter, rerr] = rvals[:4]
        elif self.flags['minresQLP']:
            param_shapes = []
            for p in self.params:
                param_shapes += [p.get_value().shape]
            rvals = minresQLP.minresQLP(
                    Lx_func,
                    [ml_cost.grads[param] for param in self.params],
                    param_shapes,
                    rtol = self.cg_params['rtol'],
                    maxit = self.cg_params['maxiter'],
                    damp = self.cg_params['damp'],
                    Ms = M,
                    profile = 0)
            [newgrads, flag, niter, rerr] = rvals[:4]
        else:
            rvals = lincg.linear_cg(
                    Lx_func,
                    [ml_cost.grads[param] for param in self.params],
                    rtol = self.cg_params['rtol'],
                    damp = self.cg_params['damp'],
                    maxiter = self.cg_params['maxiter'],
                    xinit = xinit,
                    M = M)
            [newgrads, niter, rerr] = rvals

        # Now replace grad with natural gradient.
        cos_dist  = 0.
        norm2_old = 0.
        norm2_new = 0.
        for i, param in enumerate(self.params):
            norm2_old += T.sum(ml_cost.grads[param]**2)
            norm2_new += T.sum(newgrads[i]**2)
            cos_dist += T.dot(ml_cost.grads[param].flatten(),
                              newgrads[i].flatten())
            ml_cost.grads[param] = newgrads[i]
        cos_dist /= (norm2_old * norm2_new)
        
        return [niter, rerr, cos_dist], self.get_dparam_updates(*newgrads)