def learnpixel(self, stim, spike, fourier=False): from scipy.optimize import fmin_l_bfgs_b as lbfgs self.im=24 # reduce the stimulus size a little bit. if fourier: fft = np.abs(np.fft.fft2(stim[:,4:28,4:28])) f_mean = np.fft.fftshift(fft.mean(0)) f_std = np.fft.fftshift((fft-fft.mean(0)).std(0)) stim = (fft-f_mean) / f_std stim = stim.reshape(self.T, self.im**2)[:,0:self.im*(self.im/2+1)] # cut off redundant frequencies else: stim = stim[:,4:28,4:28].reshape(self.T, self.im**2) # subset and flatten x0 = 0.001 * np.random.randn(stim.shape[1]+1) args = (stim[0:self.Ttrain, :], spike[0:self.Ttrain]) out = lbfgs(self.cost_pixel, x0, fprime=None, args=[args], iprint=-1, maxiter=self.maxiter, disp=1) x = out[0] k = x[0:-1] b = x[-1] prediction = np.exp(np.dot(stim, k) + b) pixel_rsq = np.corrcoef(prediction[self.Ttrain:self.T], spike[self.Ttrain:self.T])[0,1] return pixel_rsq
def LBFGSDescent(self): n, p = self.Datapoint.shape if self.feat_0 is None: if self.feat_init == "sampled": Ys = np.empty((self.n_components, p)) Ys[:] = self.Datapoint[np.random.randint(0, n, self.n_components), :] elif self.feat_init == "uniform": Ys = np.ones((self.n_components, p)) / p elif self.feat_init == "random": Ys = np.random.rand(self.n_components, p) Ys = (Ys.T / np.sum(Ys, axis = 1)).T else: Ys = self.feat_0 if self.wgt_0 is None: if self.wgt_init == "uniform": w = tf.divide(tf.ones((n, self.n_components)) , tf.cast(self.n_components,dtype=tf.float32)) elif self.wgt_init == "random": w = np.random.rand(n, self.n_components) w = (w.T / np.sum(w, axis=1)).T else: w = self.wgt_0 w = tf.cast(w,tf.float64) dicw0 = tf.reshape(log10(tf.concat([tf.transpose(Ys),w],axis=0)),[-1]) #dicw0 = tf.concat([self.Datapoint[0],self.Datapoint[1]],axis=0) #dicw0= tf.reshape(log10(tf.concat([tf.transpose(dicw0),w],axis =0)),[-1]) #err ,fullgrad = self.LBFGSFunc(dicw0) #dic = tfp.optimizer.lbfgs_minimize(self.LBFGSFunc, initial_position = dicw0, max_iterations=15000,parallel_iterations=1) dic= lbfgs(self.func, dicw0.numpy(),factr=10, pgtol=1e-10, maxiter=30) return dic, dicw0
def multiproc_wrapper(stuff): """ Trivial wrapper for python multiprocessing """ args, kwargs = stuff #print str(args) + "\n" + str(kwargs) + "\n----\n" params, _, result = lbfgs(*args, **kwargs) if result['warnflag'] != 0: logging.warning('lbfgs failed') return kwargs['args'][0], params
def multiproc_wrapper(stuff): """ Trivial wrapper for python multiprocessing """ args, kwargs = stuff params, _, result = lbfgs(*args, **kwargs) if result['warnflag'] != 0: logging.warning('lbfgs -- {}'.format(result['task'])) logging.debug("Ending params:\n{}".format(params)) return params
def learn(self, maxiter=1000, disp=0): """ trains the model using batch L-BFGS """ reg = self.theta2vec(eye(self.N), zeros((self.embedding_size, self.N)), zeros((self.character_size, self.N)), zeros((self.N))) def ff(vec): W, E, S, b = self.vec2theta(vec) return self.f(W, E, S, b) + self.C / 2.0 * np.dot(vec - reg, vec - reg) def gg(vec): W, E, S, b = self.vec2theta(vec) gW, gE, gS, gb = self.g(W, E, S, b) return self.theta2vec(gW, gE, gS, gb) + self.C * (vec - reg) v = self.theta2vec(self.W, self.E, self.S, self.b) v, _, _ = lbfgs(ff, v, fprime=gg, maxiter=maxiter, disp=disp) self.W, self.E, self.S, self.b = self.vec2theta(v)
def train_ksne_lbfgs(X,Y,P,K,mf=10): N = X.shape[0] # num points D = X.shape[1] # input dimension Z = np.random.randn(N,P) # points in embedded space T = sc.make_balanced_binary_tree(N) root_idx = N + T.shape[0]-1 global_count_potential = np.zeros(N+1) global_count_potential[K] = 1 only_global_constraint_dict = {} only_global_constraint_dict[root_idx] = global_count_potential res = lbfgs(sne_obj, A.flatten(), fprime=sne_grad, args=(X,Y,K,P,T,only_global_constraint_dict,roots), maxfun=mf, disp=1) return res[0].reshape(A.shape)
def learn(self, stimexp, spike): """ do the learning """ from scipy.optimize import fmin_l_bfgs_b as lbfgs # initialize at STA if 1: u=-self.sta_u # U is 64 FFT components -- the SVD vectors are unit L2 norm! v=-self.sta_v # V is 525 locations -- sign flip weirdness in the gradient function? b=1*np.ones(1) # bias x0=np.vstack((u,v.T,b)).flatten() / np.sqrt(self.sta_s) # package parameters else: x0 = 0.001 * np.random.randn(self.win**2+self.frame**2+1) #stimexp = self.expand_stim(stim) # build fourier representation, 3GB args = (stimexp[0:self.Ttrain,:], spike[0:self.Ttrain]) # leave a validation set # numerical sanity check if 1: epsi = 1e-6 eps1 =np.zeros(self.win**2 +self.frame**2 +1); eps1[1]=epsi eps100=np.zeros(self.win**2 +self.frame**2 +1); eps100[self.win**2+1]=epsi eps145=np.zeros(self.win**2 +self.frame**2 +1); eps145[-1]=epsi cost, gradient = self.cost_store(x0, args) cost1, gradient = self.cost_store(x0+eps1, args) cost100, gradient = self.cost_store(x0+eps100, args) cost145, gradient = self.cost_store(x0+eps145, args) print "Numerical gradient checks:" print gradient[1], (cost1-cost)/epsi # ok print gradient[self.win**2+1], (cost100-cost)/epsi # ok print gradient[-1], (cost145-cost)/epsi# ok out = lbfgs(self.cost_store, x0, fprime=None, args=[args], iprint=-1, maxiter=self.maxiter, disp=1) x = out[0] glm_u = x[0:self.win**2] glm_v = x[self.win**2:-1] glm_b = x[-1] return glm_u, glm_v, glm_b
# SUB(n,i)|LLC(e) 537 tensor_features = [] extracted = features.extracted for i in xrange(len(train_str)): tensor_feature = np.zeros((len(train_str[i][0])+1, len(Sigma), len(Sigma), 3, features.num_features)) for j in xrange(len(train_str[i][0])+1): for x in xrange(len(Sigma)): for y in xrange(len(Sigma)): for a in xrange(3): for feat in extracted[i][j][x][y][a]: tensor_feature[j, x, y, a, feat] = 1.0 w = np.zeros(((len(string1)+1)*len(Sigma)*len(Sigma)*3)) def f(w): W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3)) return t.func(string1, string2, W) def g(w): W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3)) W_grad = np.asarray(t.grad(string1, string2, W)) return W_grad.reshape(((len(string1)+1)*len(Sigma)*len(Sigma)*3)) w, _, _ = lbfgs(f, w, fprime=g, disp=2, maxiter=1000) W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3)) print int2str(string1, Sigma_inv), "->", int2str(t.decode(string1, W), Sigma_inv)
def multiproc_wrapper(stuff): """ Trivial wrapper for python multiprocessing """ args, kwargs = stuff #print str(args) + "\n" + str(kwargs) + "\n----\n" return lbfgs(*args, **kwargs)
# random.choice(200,inputs_shape[2])[newaxis,newaxis,:] # ] #feed_dict[inputs] = xy[ # random.randint(200/inputs_shape[0])+int(200/inputs_shape[0])*arange(0,inputs_shape[0],dtype=int32)[:,newaxis,newaxis], # random.randint(200/inputs_shape[1])+int(200/inputs_shape[1])*arange(0,inputs_shape[1],dtype=int32)[newaxis,:,newaxis], # random.randint(200/inputs_shape[2])+int(200/inputs_shape[2])*arange(0,inputs_shape[2],dtype=int32)[newaxis,newaxis,:] # ] feed_dict[sample_label] = f(feed_dict[inputs]) from scipy.optimize import fmin_l_bfgs_b as lbfgs xopt, fopt, dict_opt = lbfgs( func=obj.f, x0=xopt, #x0=(random.rand(obj.allsize)-0.5)/10, fprime=obj.g, args=(feed_dict, ), m=200, maxiter=5, iprint=5, factr=1e1, pgtol=1e-16) obj.set_xopt(xopt) obj.update() #%% point_flat = reshape(xy, [-1, dim]) point_flat = xy[random.randint(200 / inputs_shape[0]) + int(200 / inputs_shape[0]) * arange(0, inputs_shape[0], dtype=int32)[:, newaxis, newaxis], random.randint(200 / inputs_shape[1]) + int(200 / inputs_shape[1]) * arange(0, inputs_shape[1], dtype=int32)[newaxis, :, newaxis],
[0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autograd. cost_grad = grad(cost) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(cost, weights) # Optimize weights using gradient descent. print "Initial loss:", cost(weights) momentum = 0 for i in xrange(1000): # print cost_grad(weights) momentum = cost_grad(weights) + momentum*0.8 weights -= momentum # print cost(weights) print "Trained loss:", cost(weights) print weights weights = np.array([0.0, 0.0, 0.0]) [x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad) print x print f print d print cost(np.array([ 4.82414793,-0.91942305, 6.91707966]))
return W_grad.reshape(((len(string1)+1)*len(Sigma)*len(Sigma)*3)) """ #f_tropical(theta) #import sys; sys.exit(0) cProfile.runctx("f(theta)", globals(), locals(), '.prof') s = pstats.Stats('.prof') s.strip_dirs().sort_stats('time').print_stats(30) cProfile.runctx("f_tropical(theta)", globals(), locals(), '.prof') s = pstats.Stats('.prof') s.strip_dirs().sort_stats('time').print_stats(30) import sys; sys.exit(0) theta, _, _ = lbfgs(f, theta, fprime=g, disp=2, maxiter=100) #w, _, _ = lbfgs(f, w, fprime=g, disp=2, maxiter=100) #W = w.reshape((len(string1)+1, len(Sigma), len(Sigma), 3)) <<<<<<< HEAD for i, (x, y) in enumerate(train): score, decoded = t.decode_features(x, i, theta, features, True) guess = int2str(decoded, Sigma_inv) print int2str(x, Sigma_inv), "->", guess, int2str(y, Sigma_inv) , guess == int2str(y, Sigma_inv) ======= """ for iteration in xrange(30): print iteration #npr.shuffle(train) for i, (x, y) in enumerate(train): theta_g = zeros_like(theta)
def opt(X, n, m, r, L, eta, iters=2000, fhandle=None, fudge=0.01): W = maximum(np.random.randn(n, r), 0) H = maximum(np.random.randn(r, m), 0) theta = maximum(np.random.randn(r), 0) mu_max = 1000. Mu_W = maximum(np.random.randn(n, r), 0) + fudge rho_W = ones_like(Mu_W) Mu_H = maximum(np.random.randn(r, m), 0) + fudge rho_H = ones_like(Mu_H) Mu_theta = maximum(np.random.randn(r), 0) + fudge rho_theta = ones_like(Mu_theta) for k in xrange(iters): # iterate on W for i in xrange(50): params = W.reshape(-1) def f(params): W_new = np.resize(params, W.shape) np.resize(params, W.shape) return func(X, W_new, theta, H, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H) def g(params): W_new = np.resize(params, W.shape) return np.resize( grad_W(X, W_new, theta, H, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H), params.shape) opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=10) W = opt.reshape(W.shape) Mu_W += rho_W * np.maximum(0, -W) V_W = np.minimum(W, Mu_W / rho_W) rho_W *= 1.01 Mu_W = np.minimum(Mu_W, mu_max) string = "innerW {0} {1}: {2}".format( *(i, k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n") W = np.maximum(0, W) string = "W {0}: {1}".format(*(k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n") # iterate on H for i in xrange(50): params = H.reshape(-1) def f(params): H_new = np.resize(params, H.shape) np.resize(params, H.shape) return func(X, W, theta, H_new, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H) def g(params): H_new = np.resize(params, H.shape) return np.resize( grad_H(X, W, theta, H_new, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H), params.shape) opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=10) H = opt.reshape(H.shape) Mu_H += rho_H * np.maximum(0, -H) V_H = np.minimum(H, Mu_H / rho_H) rho_H *= 1.01 Mu_H = np.minimum(Mu_H, mu_max) string = "innerH {0} {1}: {2}".format( *(i, k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n") H = np.maximum(0, H) string = "H {0}: {1}".format(*(k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n") # iterate on theta for i in xrange(50): params = theta.reshape(-1) def f(params): theta_new = np.resize(params, theta.shape) np.resize(params, theta.shape) return func(X, W, theta_new, H, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H) def g(params): theta_new = np.resize(params, theta.shape) return np.resize( grad_theta(X, W, theta_new, H, L, eta, Mu_W, Mu_theta, Mu_H, rho_W, rho_theta, rho_H), params.shape) opt, _, _ = lbfgs(f, params, fprime=g, disp=0, maxiter=100) theta = opt.reshape(theta.shape) Mu_theta += rho_theta * np.maximum(0, -theta) V_theta = np.minimum(theta, Mu_theta / rho_theta) rho_theta *= 1.01 Mu_theta = np.minimum(Mu_theta, mu_max) string = "innertheta {0} {1}: {2}".format( *(i, k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n") theta = np.maximum(0, theta) string = "theta {0}: {1}".format(*(k, func_orig(X, W, theta, H, L, eta))) print string if fhandle is not None: fhandle.write(string + "\n")
a, ]) sess = tf.InteractiveSession() sess.run([a.initializer, b.initializer]) #%% from scipy.optimize import fmin_l_bfgs_b as lbfgs data = random.randn(100, 10) data_updater = updater() data_updater.add(x, y) with sess.as_default(): data_updater.update({ x: data, y: data @ a_true + b_true + random.randn(100, 1) * 0.3 }) xopt, fopt, dict_opt = lbfgs(loss1wrapper.f, zeros(11), fprime=loss1wrapper.g) loss1wrapper.set_xopt(xopt) loss1wrapper.update() print('l1 opt: ') print('a: ', reshape(a.eval(), [ 10, ])) print('b: ', b.eval()) xopt, fopt, dict_opt = lbfgs(loss2wrapper.f, zeros(11), fprime=loss2wrapper.g, args=({ x: data, y:
inputs = np.array([[0.52, 1.12, 0.77], [0.88, -1.08, 0.15], [0.52, 0.06, -1.30], [0.74, -2.49, 1.39]]) targets = np.array([True, True, False, True]) # Build a function that returns gradients of training loss using autograd. cost_grad = grad(cost) # Check the gradients numerically, just to be safe. weights = np.array([0.0, 0.0, 0.0]) quick_grad_check(cost, weights) # Optimize weights using gradient descent. print "Initial loss:", cost(weights) momentum = 0 for i in xrange(1000): # print cost_grad(weights) momentum = cost_grad(weights) + momentum * 0.8 weights -= momentum # print cost(weights) print "Trained loss:", cost(weights) print weights weights = np.array([0.0, 0.0, 0.0]) [x, f, d] = lbfgs(func=cost, x0=weights, fprime=cost_grad) print x print f print d print cost(np.array([4.82414793, -0.91942305, 6.91707966]))
feed_dict = {} try: feed_dict[gamma] = 500 except NameError: pass for iii in range(50): feed_dict[inputs] = xy[random.choice(1000, 100)[:, newaxis], random.choice(1000, 100)[newaxis, :]] feed_dict[sample_label] = f(feed_dict[inputs]) from scipy.optimize import fmin_l_bfgs_b as lbfgs xopt, fopt, dict_opt = lbfgs(func=obj.f, x0=xopt, fprime=obj.g, args=(feed_dict, ), m=200, maxiter=10, iprint=10, factr=1e1, pgtol=1e-16) obj.set_xopt(xopt) obj.update() #%% feed_dict[inputs] = xy[5::10, 5::10] feed_dict[sample_label] = f(feed_dict[inputs]) print(sqrt(loss.eval(feed_dict=feed_dict)).mean()) print(fit_err.eval(feed_dict=feed_dict).max()) infe = reshape(outputs.eval(feed_dict=feed_dict), [100, 100]) infe_true = reshape(feed_dict[sample_label], [100, 100]) import shelve with shelve.open('results/infe') as db: