Example #1
0
File: heise.py Project: ursk/heise
	def learnpixel(self, stim, spike, fourier=False):
		from scipy.optimize import fmin_l_bfgs_b as lbfgs
		
		self.im=24 # reduce the stimulus size a little bit. 
		
				
		
		if fourier:
			fft = np.abs(np.fft.fft2(stim[:,4:28,4:28]))
			f_mean = np.fft.fftshift(fft.mean(0))
			f_std = np.fft.fftshift((fft-fft.mean(0)).std(0))
			stim = (fft-f_mean) / f_std 
			stim = stim.reshape(self.T, self.im**2)[:,0:self.im*(self.im/2+1)] # cut off redundant frequencies
		else:
			stim = stim[:,4:28,4:28].reshape(self.T, self.im**2) # subset and flatten
		
		x0 = 0.001 * np.random.randn(stim.shape[1]+1)
		args = (stim[0:self.Ttrain, :], spike[0:self.Ttrain])
		out = lbfgs(self.cost_pixel, x0, fprime=None, args=[args], iprint=-1, maxiter=self.maxiter, disp=1)

		x = out[0]
		k = x[0:-1]
		b = x[-1]

		prediction = np.exp(np.dot(stim, k) + b)
		pixel_rsq = np.corrcoef(prediction[self.Ttrain:self.T], spike[self.Ttrain:self.T])[0,1]

		return pixel_rsq
    def LBFGSDescent(self):

        n, p = self.Datapoint.shape

        if self.feat_0 is None:
            if self.feat_init == "sampled":
                Ys = np.empty((self.n_components, p))
                Ys[:] = self.Datapoint[np.random.randint(0, n, self.n_components), :]
            elif self.feat_init == "uniform":
                Ys = np.ones((self.n_components, p)) / p
            elif self.feat_init == "random":
                Ys = np.random.rand(self.n_components, p)
                Ys = (Ys.T / np.sum(Ys, axis = 1)).T
        else:
            Ys = self.feat_0
        if self.wgt_0 is None:
            if self.wgt_init == "uniform":
                w = tf.divide(tf.ones((n, self.n_components)) ,
                              tf.cast(self.n_components,dtype=tf.float32))
            elif self.wgt_init == "random":
                w = np.random.rand(n, self.n_components)
                
                w = (w.T / np.sum(w, axis=1)).T
        else:
            w = self.wgt_0
        w = tf.cast(w,tf.float64)
        
        dicw0 = tf.reshape(log10(tf.concat([tf.transpose(Ys),w],axis=0)),[-1])
        #dicw0 = tf.concat([self.Datapoint[0],self.Datapoint[1]],axis=0)
        #dicw0= tf.reshape(log10(tf.concat([tf.transpose(dicw0),w],axis =0)),[-1])
        #err ,fullgrad = self.LBFGSFunc(dicw0)
        #dic = tfp.optimizer.lbfgs_minimize(self.LBFGSFunc, initial_position = dicw0, max_iterations=15000,parallel_iterations=1)
        dic= lbfgs(self.func, dicw0.numpy(),factr=10, pgtol=1e-10, maxiter=30)

        return   dic, dicw0
Example #3
0
def multiproc_wrapper(stuff):
    """ Trivial wrapper for python multiprocessing """
    args, kwargs = stuff
    #print str(args)  + "\n" +  str(kwargs) + "\n----\n"
    params, _, result = lbfgs(*args, **kwargs)
    if result['warnflag'] != 0:
        logging.warning('lbfgs failed')
    return kwargs['args'][0], params
def multiproc_wrapper(stuff):
  """ Trivial wrapper for python multiprocessing """
  args, kwargs = stuff
  params, _, result = lbfgs(*args, **kwargs)
  if result['warnflag'] != 0:
    logging.warning('lbfgs -- {}'.format(result['task']))
  logging.debug("Ending params:\n{}".format(params))
  return params
def multiproc_wrapper(stuff):
  """ Trivial wrapper for python multiprocessing """
  args, kwargs = stuff
  #print str(args)  + "\n" +  str(kwargs) + "\n----\n"
  params, _, result = lbfgs(*args, **kwargs)
  if result['warnflag'] != 0:
    logging.warning('lbfgs failed')
  return kwargs['args'][0], params
Example #6
0
    def learn(self, maxiter=1000, disp=0):
        """ trains the model using batch L-BFGS """
        reg = self.theta2vec(eye(self.N), zeros((self.embedding_size, self.N)), zeros((self.character_size, self.N)), zeros((self.N)))

        
        def ff(vec):
            W, E, S, b = self.vec2theta(vec)
            return self.f(W, E, S, b) + self.C / 2.0 * np.dot(vec - reg, vec - reg)

        def gg(vec):
            W, E, S, b = self.vec2theta(vec)
            gW, gE, gS, gb = self.g(W, E, S, b)
            return self.theta2vec(gW, gE, gS, gb) + self.C * (vec - reg)
        
        v = self.theta2vec(self.W, self.E, self.S, self.b)
        v, _, _ = lbfgs(ff, v, fprime=gg, maxiter=maxiter, disp=disp)
        self.W, self.E, self.S, self.b = self.vec2theta(v)
def train_ksne_lbfgs(X,Y,P,K,mf=10):
    N = X.shape[0]   # num points
    D = X.shape[1]   # input dimension

    Z = np.random.randn(N,P)  # points in embedded space

    T = sc.make_balanced_binary_tree(N)
    root_idx = N + T.shape[0]-1

    global_count_potential = np.zeros(N+1)
    global_count_potential[K] = 1
    only_global_constraint_dict = {}
    only_global_constraint_dict[root_idx] = global_count_potential

    res = lbfgs(sne_obj, A.flatten(), fprime=sne_grad, args=(X,Y,K,P,T,only_global_constraint_dict,roots), maxfun=mf, disp=1)

    return res[0].reshape(A.shape)
Example #8
0
File: heise.py Project: ursk/heise
	def learn(self, stimexp, spike):
		""" do the learning """	

		from scipy.optimize import fmin_l_bfgs_b as lbfgs
		# initialize at STA
		if 1:
			u=-self.sta_u      # U is 64 FFT components -- the SVD vectors are unit L2 norm! 
			v=-self.sta_v      # V is 525 locations -- sign flip weirdness in the gradient function?
			b=1*np.ones(1) # bias
			x0=np.vstack((u,v.T,b)).flatten() / np.sqrt(self.sta_s) # package parameters
		else:
			x0 = 0.001 * np.random.randn(self.win**2+self.frame**2+1)

		#stimexp = self.expand_stim(stim) # build fourier representation, 3GB
		args = (stimexp[0:self.Ttrain,:], spike[0:self.Ttrain]) # leave a validation set
		
		# numerical sanity check
		if 1:
			epsi = 1e-6
			eps1  =np.zeros(self.win**2 +self.frame**2 +1); eps1[1]=epsi
			eps100=np.zeros(self.win**2 +self.frame**2 +1); eps100[self.win**2+1]=epsi
			eps145=np.zeros(self.win**2 +self.frame**2 +1); eps145[-1]=epsi

			cost, gradient = self.cost_store(x0, args)
			cost1, gradient = self.cost_store(x0+eps1, args)
			cost100, gradient = self.cost_store(x0+eps100, args)
			cost145, gradient = self.cost_store(x0+eps145, args)
			print "Numerical gradient checks:"
			print gradient[1], (cost1-cost)/epsi   # ok
			print gradient[self.win**2+1], (cost100-cost)/epsi # ok
			print gradient[-1], (cost145-cost)/epsi# ok
			

		out = lbfgs(self.cost_store, x0, fprime=None, args=[args], iprint=-1, maxiter=self.maxiter, disp=1)

		x = out[0]
		glm_u = x[0:self.win**2]
		glm_v = x[self.win**2:-1]
		glm_b = x[-1]

		return glm_u, glm_v, glm_b
Example #9
0
    # SUB(n,i)|LLC(e) 537
    tensor_features = []

    extracted = features.extracted
    for i in xrange(len(train_str)):
        tensor_feature = np.zeros((len(train_str[i][0])+1, len(Sigma), len(Sigma), 3, features.num_features))
        for j in xrange(len(train_str[i][0])+1):
            for x in xrange(len(Sigma)):
                for y in xrange(len(Sigma)):
                    for a in xrange(3):
                        for feat in extracted[i][j][x][y][a]:
                            tensor_feature[j, x, y, a, feat] = 1.0


    w = np.zeros(((len(string1)+1)*len(Sigma)*len(Sigma)*3))
    
    def f(w):
        W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3))
        return t.func(string1, string2, W)

    def g(w):
        W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3))
        W_grad = np.asarray(t.grad(string1, string2, W))
        return W_grad.reshape(((len(string1)+1)*len(Sigma)*len(Sigma)*3))
        
    
    w, _, _ = lbfgs(f, w, fprime=g, disp=2, maxiter=1000)

    W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3))
    print int2str(string1, Sigma_inv), "->", int2str(t.decode(string1, W), Sigma_inv)
Example #10
0
def multiproc_wrapper(stuff):
    """ Trivial wrapper for python multiprocessing """
    args, kwargs = stuff
    #print str(args)  + "\n" +  str(kwargs) + "\n----\n"
    return lbfgs(*args, **kwargs)
    #        random.choice(200,inputs_shape[2])[newaxis,newaxis,:]
    #        ]
    #feed_dict[inputs] = xy[
    #        random.randint(200/inputs_shape[0])+int(200/inputs_shape[0])*arange(0,inputs_shape[0],dtype=int32)[:,newaxis,newaxis],
    #        random.randint(200/inputs_shape[1])+int(200/inputs_shape[1])*arange(0,inputs_shape[1],dtype=int32)[newaxis,:,newaxis],
    #        random.randint(200/inputs_shape[2])+int(200/inputs_shape[2])*arange(0,inputs_shape[2],dtype=int32)[newaxis,newaxis,:]
    #        ]
    feed_dict[sample_label] = f(feed_dict[inputs])

    from scipy.optimize import fmin_l_bfgs_b as lbfgs
    xopt, fopt, dict_opt = lbfgs(
        func=obj.f,
        x0=xopt,
        #x0=(random.rand(obj.allsize)-0.5)/10,
        fprime=obj.g,
        args=(feed_dict, ),
        m=200,
        maxiter=5,
        iprint=5,
        factr=1e1,
        pgtol=1e-16)
obj.set_xopt(xopt)
obj.update()
#%%
point_flat = reshape(xy, [-1, dim])
point_flat = xy[random.randint(200 / inputs_shape[0]) +
                int(200 / inputs_shape[0]) *
                arange(0, inputs_shape[0], dtype=int32)[:, newaxis, newaxis],
                random.randint(200 / inputs_shape[1]) +
                int(200 / inputs_shape[1]) *
                arange(0, inputs_shape[1], dtype=int32)[newaxis, :, newaxis],
                   [0.52, 0.06, -1.30],
                   [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autograd.
cost_grad = grad(cost)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(cost, weights)

# Optimize weights using gradient descent.
print "Initial loss:", cost(weights)
momentum = 0
for i in xrange(1000):
    # print cost_grad(weights)
    momentum = cost_grad(weights) + momentum*0.8
    weights -= momentum
    # print cost(weights)

print  "Trained loss:", cost(weights)
print weights

weights = np.array([0.0, 0.0, 0.0])
[x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)
print x
print f
print d

print cost(np.array([ 4.82414793,-0.91942305, 6.91707966]))
Example #13
0
        return W_grad.reshape(((len(string1)+1)*len(Sigma)*len(Sigma)*3))
    """

    #f_tropical(theta)
    #import sys; sys.exit(0)

    cProfile.runctx("f(theta)", globals(), locals(), '.prof')
    s = pstats.Stats('.prof')
    s.strip_dirs().sort_stats('time').print_stats(30)

    cProfile.runctx("f_tropical(theta)", globals(), locals(), '.prof')
    s = pstats.Stats('.prof')
    s.strip_dirs().sort_stats('time').print_stats(30)

    import sys; sys.exit(0)
    theta, _, _ = lbfgs(f, theta, fprime=g, disp=2, maxiter=100)
    #w, _, _ = lbfgs(f, w, fprime=g, disp=2, maxiter=100)
    #W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3))

<<<<<<< HEAD
for i, (x, y) in enumerate(train):
    score, decoded = t.decode_features(x, i, theta, features, True)
    guess = int2str(decoded, Sigma_inv)
    print int2str(x, Sigma_inv), "->", guess, int2str(y, Sigma_inv) , guess == int2str(y, Sigma_inv)
=======
    """
    for iteration in xrange(30):
        print iteration
        #npr.shuffle(train)
        for i, (x, y) in enumerate(train):
            theta_g = zeros_like(theta)
Example #14
0
def opt(X, n, m, r, L, eta, iters=2000, fhandle=None, fudge=0.01):

    W = maximum(np.random.randn(n, r), 0)
    H = maximum(np.random.randn(r, m), 0)
    theta = maximum(np.random.randn(r), 0)

    mu_max = 1000.

    Mu_W = maximum(np.random.randn(n, r), 0) + fudge
    rho_W = ones_like(Mu_W)
    Mu_H = maximum(np.random.randn(r, m), 0) + fudge
    rho_H = ones_like(Mu_H)
    Mu_theta = maximum(np.random.randn(r), 0) + fudge
    rho_theta = ones_like(Mu_theta)

    for k in xrange(iters):

        # iterate on W
        for i in xrange(50):
            params = W.reshape(-1)

            def f(params):
                W_new = np.resize(params, W.shape)
                np.resize(params, W.shape)
                return func(X, W_new, theta, H, L, eta, Mu_W, Mu_theta, Mu_H,
                            rho_W, rho_theta, rho_H)

            def g(params):
                W_new = np.resize(params, W.shape)
                return np.resize(
                    grad_W(X, W_new, theta, H, L, eta, Mu_W, Mu_theta, Mu_H,
                           rho_W, rho_theta, rho_H), params.shape)

            opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=10)
            W = opt.reshape(W.shape)
            Mu_W += rho_W * np.maximum(0, -W)
            V_W = np.minimum(W, Mu_W / rho_W)
            rho_W *= 1.01
            Mu_W = np.minimum(Mu_W, mu_max)

            string = "innerW {0} {1}: {2}".format(
                *(i, k, func_orig(X, W, theta, H, L, eta)))
            print string
            if fhandle is not None:
                fhandle.write(string + "\n")

        W = np.maximum(0, W)

        string = "W {0}: {1}".format(*(k, func_orig(X, W, theta, H, L, eta)))
        print string
        if fhandle is not None:
            fhandle.write(string + "\n")

        # iterate on H
        for i in xrange(50):
            params = H.reshape(-1)

            def f(params):
                H_new = np.resize(params, H.shape)
                np.resize(params, H.shape)
                return func(X, W, theta, H_new, L, eta, Mu_W, Mu_theta, Mu_H,
                            rho_W, rho_theta, rho_H)

            def g(params):
                H_new = np.resize(params, H.shape)
                return np.resize(
                    grad_H(X, W, theta, H_new, L, eta, Mu_W, Mu_theta, Mu_H,
                           rho_W, rho_theta, rho_H), params.shape)

            opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=10)
            H = opt.reshape(H.shape)
            Mu_H += rho_H * np.maximum(0, -H)
            V_H = np.minimum(H, Mu_H / rho_H)
            rho_H *= 1.01
            Mu_H = np.minimum(Mu_H, mu_max)

            string = "innerH {0} {1}: {2}".format(
                *(i, k, func_orig(X, W, theta, H, L, eta)))
            print string
            if fhandle is not None:
                fhandle.write(string + "\n")

        H = np.maximum(0, H)

        string = "H {0}: {1}".format(*(k, func_orig(X, W, theta, H, L, eta)))
        print string
        if fhandle is not None:
            fhandle.write(string + "\n")

        # iterate on theta
        for i in xrange(50):
            params = theta.reshape(-1)

            def f(params):
                theta_new = np.resize(params, theta.shape)
                np.resize(params, theta.shape)
                return func(X, W, theta_new, H, L, eta, Mu_W, Mu_theta, Mu_H,
                            rho_W, rho_theta, rho_H)

            def g(params):
                theta_new = np.resize(params, theta.shape)
                return np.resize(
                    grad_theta(X, W, theta_new, H, L, eta, Mu_W, Mu_theta,
                               Mu_H, rho_W, rho_theta, rho_H), params.shape)

            opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=100)
            theta = opt.reshape(theta.shape)

            Mu_theta += rho_theta * np.maximum(0, -theta)
            V_theta = np.minimum(theta, Mu_theta / rho_theta)
            rho_theta *= 1.01
            Mu_theta = np.minimum(Mu_theta, mu_max)

            string = "innertheta {0} {1}: {2}".format(
                *(i, k, func_orig(X, W, theta, H, L, eta)))
            print string
            if fhandle is not None:
                fhandle.write(string + "\n")

        theta = np.maximum(0, theta)

        string = "theta {0}: {1}".format(*(k,
                                           func_orig(X, W, theta, H, L, eta)))
        print string
        if fhandle is not None:
            fhandle.write(string + "\n")
Example #15
0
        a,
    ])
sess = tf.InteractiveSession()
sess.run([a.initializer, b.initializer])
#%%
from scipy.optimize import fmin_l_bfgs_b as lbfgs
data = random.randn(100, 10)
data_updater = updater()
data_updater.add(x, y)
with sess.as_default():
    data_updater.update({
        x: data,
        y: data @ a_true + b_true + random.randn(100, 1) * 0.3
    })
    xopt, fopt, dict_opt = lbfgs(loss1wrapper.f,
                                 zeros(11),
                                 fprime=loss1wrapper.g)
    loss1wrapper.set_xopt(xopt)
    loss1wrapper.update()
    print('l1 opt: ')
    print('a: ', reshape(a.eval(), [
        10,
    ]))
    print('b: ', b.eval())
    xopt, fopt, dict_opt = lbfgs(loss2wrapper.f,
                                 zeros(11),
                                 fprime=loss2wrapper.g,
                                 args=({
                                     x:
                                     data,
                                     y:
inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15],
                   [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]])
targets = np.array([True, True, False, True])

# Build a function that returns gradients of training loss using autograd.
cost_grad = grad(cost)

# Check the gradients numerically, just to be safe.
weights = np.array([0.0, 0.0, 0.0])
quick_grad_check(cost, weights)

# Optimize weights using gradient descent.
print "Initial loss:", cost(weights)
momentum = 0
for i in xrange(1000):
    # print cost_grad(weights)
    momentum = cost_grad(weights) + momentum * 0.8
    weights -= momentum
    # print cost(weights)

print "Trained loss:", cost(weights)
print weights

weights = np.array([0.0, 0.0, 0.0])
[x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad)
print x
print f
print d

print cost(np.array([4.82414793, -0.91942305, 6.91707966]))
def multiproc_wrapper(stuff):
  """ Trivial wrapper for python multiprocessing """
  args, kwargs = stuff
  #print str(args)  + "\n" +  str(kwargs) + "\n----\n"
  return lbfgs(*args, **kwargs)
feed_dict = {}
try:
    feed_dict[gamma] = 500
except NameError:
    pass
for iii in range(50):
    feed_dict[inputs] = xy[random.choice(1000, 100)[:, newaxis],
                           random.choice(1000, 100)[newaxis, :]]
    feed_dict[sample_label] = f(feed_dict[inputs])

    from scipy.optimize import fmin_l_bfgs_b as lbfgs
    xopt, fopt, dict_opt = lbfgs(func=obj.f,
                                 x0=xopt,
                                 fprime=obj.g,
                                 args=(feed_dict, ),
                                 m=200,
                                 maxiter=10,
                                 iprint=10,
                                 factr=1e1,
                                 pgtol=1e-16)
    obj.set_xopt(xopt)
    obj.update()
#%%
feed_dict[inputs] = xy[5::10, 5::10]
feed_dict[sample_label] = f(feed_dict[inputs])
print(sqrt(loss.eval(feed_dict=feed_dict)).mean())
print(fit_err.eval(feed_dict=feed_dict).max())
infe = reshape(outputs.eval(feed_dict=feed_dict), [100, 100])
infe_true = reshape(feed_dict[sample_label], [100, 100])
import shelve
with shelve.open('results/infe') as db: