Example #1
0
def test_linear_cg():
    rng = numpy.random.RandomState([1,2,3])
    n = 5
    M = rng.randn(2*n,n)
    M = numpy.dot(M.T,M).astype(config.floatX)
    b = rng.randn(n).astype(config.floatX)
    c = rng.randn(1).astype(config.floatX)[0]
    x = theano.tensor.vector('x')
    f = 0.5 * tensor.dot(x,tensor.dot(M,x)) - tensor.dot(b,x) + c
    sol = linear_cg.linear_cg(f,[x])

    fn_sol = theano.function([x], sol)

    start = time.time()
    sol  = fn_sol( rng.randn(n).astype(config.floatX))[0]
    my_lcg = time.time() -start

    eval_f = theano.function([x],f)
    cgf = eval_f(sol)
    print("conjugate gradient's value of f:", str(cgf), 'time (s)', my_lcg)
    skip_if_no_scipy()
    spf = eval_f( scipy.linalg.solve(M,b) )
    print("scipy.linalg.solve's value of f: "+str(spf))

    abs_diff = abs(cgf - spf)
    if not (abs_diff < 1e-5):
        raise AssertionError("Expected abs_diff < 1e-5, got abs_diff of " +
                str(abs_diff))
Example #2
0
def test_linear_cg():
    rng = numpy.random.RandomState([1, 2, 3])
    n = 5
    M = rng.randn(2 * n, n)
    M = numpy.dot(M.T, M).astype(config.floatX)
    b = rng.randn(n).astype(config.floatX)
    c = rng.randn(1).astype(config.floatX)[0]
    x = theano.tensor.vector('x')
    f = 0.5 * tensor.dot(x, tensor.dot(M, x)) - tensor.dot(b, x) + c
    sol = linear_cg.linear_cg(f, [x])

    fn_sol = theano.function([x], sol)

    start = time.time()
    sol = fn_sol(rng.randn(n).astype(config.floatX))[0]
    my_lcg = time.time() - start

    eval_f = theano.function([x], f)
    cgf = eval_f(sol)
    print("conjugate gradient's value of f:", str(cgf), 'time (s)', my_lcg)
    skip_if_no_scipy()
    spf = eval_f(scipy.linalg.solve(M, b))
    print("scipy.linalg.solve's value of f: " + str(spf))

    abs_diff = abs(cgf - spf)
    if not (abs_diff < 1e-5):
        raise AssertionError("Expected abs_diff < 1e-5, got abs_diff of " +
                             str(abs_diff))
Example #3
0
    def infer_S_hat(self, V, H_hat, S_hat, var_s0_hat, var_s1_hat, max_iters):

        alpha = self.model.alpha

        obj = self.truncated_KL( V = V,
                obs = locals() ).mean()

        new_S_hat = linear_cg( fn = obj, params = S_hat, max_iters = max_iters)

        return new_S_hat
Example #4
0
    def train_batch(self, dataset, batch_size):
        """
        .. todo::

            WRITEME
        """
        #TODO-- this results in compilation happening every time learn is
        # called should cache the compilation results, including those
        # inside cg
        X = dataset.get_design_matrix()
        m = X.shape[0]
        assert X.shape[1] == self.nvis

        gamma = N.zeros((batch_size, self.nhid))
        cur_gamma = T.vector(name='cur_gamma')
        cur_v = T.vector(name='cur_v')
        recons = T.dot(cur_gamma, self.W)
        recons.name = 'recons'

        recons_diffs = cur_v - recons
        recons_diffs.name = 'recons_diffs'

        recons_diff_sq = T.sqr(recons_diffs)
        recons_diff_sq.name = 'recons_diff'

        recons_error = T.sum(recons_diff_sq)
        recons_error.name = 'recons_error'

        dict_dists = T.sum(T.sqr(self.W - cur_v), axis=1)
        dict_dists.name = 'dict_dists'

        abs_gamma = abs(cur_gamma)
        abs_gamma.name = 'abs_gamma'

        weighted_dists = T.dot(abs_gamma, dict_dists)
        weighted_dists.name = 'weighted_dists'

        penalty = self.coeff * weighted_dists
        penalty.name = 'penalty'

        #prevent directions of absolute flatness in the hessian
        #W_sq = T.sqr(self.W)
        #W_sq.name = 'W_sq'
        #debug =  T.sum(W_sq)
        debug = 1e-10 * T.sum(dict_dists)
        debug.name = 'debug'

        #J = debug
        J = recons_error + penalty + debug
        J.name = 'J'

        Jf = function([cur_v, cur_gamma], J)

        start = self.rng.randint(m - batch_size + 1)
        batch_X = X[start:start + batch_size, :]

        #TODO-- optimize gamma
        print 'optimizing gamma'
        for i in xrange(batch_size):
            #print str(i+1)+'/'+str(batch_size)
            gamma[i, :] = self.optimize_gamma(batch_X[i, :])

        print 'max min'
        print N.abs(gamma).min(axis=0).max()
        print 'min max'
        print N.abs(gamma).max(axis=0).max()

        #Optimize W
        print 'optimizing W'
        warnings.warn("not tested since switching to Razvan's all-theano implementation of linear cg")
        cg.linear_cg(J, [self.W], max_iters=3)

        err = 0.

        for i in xrange(batch_size):
            err += Jf(batch_X[i, :], gamma[i, :])
        assert not N.isnan(err)
        assert not N.isinf(err)
        print 'err: ' + str(err)
        return True
def find_careduce(var):
    if var.owner is None:
        return False
    op = var.owner.op
    opname = op.__class__.__name__

    if opname == 'CAReduce':
        print var.owner.inputs
        print var
        return True
    else:
        for ipt in var.owner.inputs:
            if find_careduce(ipt):
                print var
                return True



cg_update = function([V], obj,  updates = { S : linear_cg( fn = - obj, params = S, max_iters = 3) } )

for i in xrange(num_batches):
    X = dataset.get_batch_design(batch_size)
    em = init(X)

    print 'batch ',i
    print em

    for j in xrange(ga_updates):
        print 'ga: ',update(X)
        print 'cg: ',cg_update(X)
Example #6
0
    if var.owner is None:
        return False
    op = var.owner.op
    opname = op.__class__.__name__

    if opname == 'CAReduce':
        print var.owner.inputs
        print var
        return True
    else:
        for ipt in var.owner.inputs:
            if find_careduce(ipt):
                print var
                return True


cg_update = function([V],
                     obj,
                     updates={S: linear_cg(fn=-obj, params=S, max_iters=3)})

for i in xrange(num_batches):
    X = dataset.get_batch_design(batch_size)
    em = init(X)

    print 'batch ', i
    print em

    for j in xrange(ga_updates):
        print 'ga: ', update(X)
        print 'cg: ', cg_update(X)