def fit(self,X,y,initParams = None): self.params = np.zeros([1,X.shape[1]+1]) self.labels = np.unique(y) X_nopad = X X = np.pad(X,((0,0),(1,0)),mode='constant',constant_values=1) #print self.cost(self.params,X, y) if initParams is None: init = np.random.random(self.params.size) #init = np.zeros(self.params.size) else: init = initParams if DEBUG: _epsilon = np.sqrt(np.finfo(float).eps) #print approx_fprime(self.params[0], self.cost, _epsilon, X,y) print check_grad(self.cost, self.grad, init,X,y) if self.optimizeOrder == 0: self.params = self.optimize(self.cost,init,args=(X,y),disp=False) if self.optimizeOrder == 1: self.params = self.optimize(self.cost,init,self.grad,args=(X,y),disp=False) return self
def test(): data = np.loadtxt("data.txt") X = data[:,0:-1] # everything except the last column y = data[:,-1] # just the last column args = (X,y) #theta = np.array([ 1.7657065779589087, -1.3841332550882446, -10.162222605402242]) #theta = np.array([ 1.7999382115210827, -14.001391904643032 , -5.577578503745549]) theta = np.zeros(3) theta[0] = np.random.normal(0,5) theta[1] = np.random.normal(0,5) theta[2] = np.random.normal(0,5) print theta print np.exp(theta) print logPosterior(theta,args) print gradLogPosterior(theta,args) print so.check_grad(logPosterior, gradLogPosterior, theta, args) newTheta = so.fmin_cg(logPosterior, theta, fprime=gradLogPosterior, args=[args], gtol=1e-4,maxiter=100,disp=1) print newTheta, logPosterior(newTheta,args) K = kernel2(X,X,newTheta,wantderiv=False) L = np.linalg.cholesky(K) beta = np.linalg.solve(L.transpose(), np.linalg.solve(L,y)) test = X #pred = [predict(i,input,K,target,newTheta,L,beta) for i in input] #pred = np.squeeze([predict(i,input,K,target,newTheta,L,beta) for i in input]) demoplot(theta,args) demoplot(newTheta,args)
def test_gradients(): K = 1 B = 3 T = 100 dt = 1.0 true_model = DiscreteTimeNetworkHawkesModelGammaMixture(K=K, B=B, dt=dt) S,R = true_model.generate(T=T) # Test with a standard Hawkes model test_model = DiscreteTimeStandardHawkesModel(K=K, B=B, dt=dt) test_model.add_data(S) # Check gradients with the initial parameters def objective(x): test_model.weights[0,:] = np.exp(x) return test_model.log_likelihood() def gradient(x): test_model.weights[0,:] = np.exp(x) return test_model.compute_gradient(0) print("Checking initial gradient: ") print(gradient(np.log(test_model.weights[0,:]))) check_grad(objective, gradient, np.log(test_model.weights[0,:])) print("Checking gradient at true model parameters: ") test_model.initialize_with_gibbs_model(true_model) print(gradient(np.log(test_model.weights[0,:]))) check_grad(objective, gradient, np.log(test_model.weights[0,:]))
def test_back_prop_with_diff_grad_checks(self, iter=200): eps = math.sqrt(np.finfo(float).eps) init_val = self.packTheta(self.W1, self.b1, self.W2, self.b2) err = optimize.check_grad(self.cost, self.cost_prime, init_val, self.X) print ("Error after 0 iterations: %f, Error per Param: %f" % (err, err/init_val.size)) res = optimize.minimize(fun=self.cost, x0=init_val, args=(self.X,), jac=self.cost_prime, method='L-BFGS-B', options={'maxiter':iter}) self.W1, self.b1, self.W2, self.b2 = self.unpackTheta(res.x) err = optimize.check_grad(self.cost, self.cost_prime, init_val, self.X) print ("Error after 200 iterations: %f, Error per Param: %f" % (err, err/init_val.size)) init_val = res.x res = optimize.minimize(fun=self.cost, x0=init_val, args=(self.X,), jac=self.cost_prime, method='L-BFGS-B', options={'maxiter':iter}) self.W1, self.b1, self.W2, self.b2 = self.unpackTheta(res.x) err = optimize.check_grad(self.cost, self.cost_prime, init_val, self.X) print ("Error after 400 iterations: %f, Error per Param: %f" % (err, err/init_val.size)) init_val = res.x res = optimize.minimize(fun=self.cost, x0=init_val, args=(self.X,), jac=self.cost_prime, method='L-BFGS-B', options={'maxiter':iter}) self.W1, self.b1, self.W2, self.b2 = self.unpackTheta(res.x) err = optimize.check_grad(self.cost, self.cost_prime, init_val, self.X) print ("Error after 600 iterations: %f, Error per Param: %f" % (err, err/init_val.size)) init_val = res.x res = optimize.minimize(fun=self.cost, x0=init_val, args=(self.X,), jac=self.cost_prime, method='L-BFGS-B', options={'maxiter':iter}) self.W1, self.b1, self.W2, self.b2 = self.unpackTheta(res.x) err = optimize.check_grad(self.cost, self.cost_prime, init_val, self.X) print ("Error after 800 iterations: %f, Error per Param: %f" % (err, err/init_val.size))
def test_dldtheta(self): self.ECG.primary = ['q'] def f(X): self.ECG.array2primary(X) lv = self.ECG.loglik(self.data); slv = sum(lv) return slv def df(X): self.ECG.array2primary(X) gv = self.ECG.dldtheta(self.data) sgv = sum(gv, axis=1); return sgv theta0 = self.ECG.primary2array() theta0 = abs(randn(len(theta0)))+1 err = check_grad(f,df,theta0) print "error in gradient: ", err self.ECG.primary = ['W'] def f2(X): self.ECG.array2primary(X) lv = self.ECG.loglik(self.data); slv = sum(lv) return slv def df2(X): self.ECG.array2primary(X) gv = self.ECG.dldtheta(self.data) sgv = sum(gv, axis=1); return sgv theta0 = self.ECG.primary2array() theta0 = abs(randn(len(theta0)))+1 err = check_grad(f2,df2,theta0) print "error in gradient: ", err self.assertTrue(err < 1e-02)
def check_gradient(self): def cost(ws): return self.cost_function(ws,self._training_data[0:100,:],self._training_labels[0:100]) def gradcost(ws): return self._back_prop(ws,self._training_data[0:100,:],self._training_labels[0:100]) print check_grad(cost, gradcost,self._betas)
def test_logistic_loss_derivative(n_samples=4, n_features=10, decimal=5): rng = np.random.RandomState(42) X = rng.randn(n_samples, n_features) y = rng.randn(n_samples) n_features = X.shape[1] w = rng.randn(n_features + 1) np.testing.assert_almost_equal( check_grad(lambda w: _logistic(X, y, w), lambda w: _logistic_loss_grad(X, y, w), w), 0.0, decimal=decimal ) np.testing.assert_almost_equal( check_grad(lambda w: _logistic(X, y, w), lambda w: _logistic_loss_grad(X, y, w), w), 0.0, decimal=decimal )
def fit(self,X,y,initParams = None): X = np.pad(X,((0,0),(1,0)),mode='constant',constant_values=1) inDim = X.shape[1] # if DEBUG: # # self.layersSize.append(1) # # self.layersSize.insert(0, int(inDim)) # # self.yindi = np.asarray(np.logical_not(y),dtype=np.int32) # else: self.layersSize.append(len(np.unique(y))) self.layersSize.insert(0, int(inDim)) self.setIndi(y) # self.layersSize[-1]=1 # self.yindi = np.expand_dims(self.yindi[:,0].T,1) paramSum = 0 for i,layer in enumerate(self.layers): if not( i == len(self.layers)-1): layer.initParams([self.layersSize[i+1],self.layersSize[i]]) split = self.layersSize[i+1] * self.layersSize[i] paramSum += split self.paramSplits.append(paramSum) else: layer.setParams(None) if initParams is None: init = self.getParams() else: init = initParams if DEBUG: _epsilon = np.sqrt(np.finfo(float).eps) #print approx_fprime(self.params[0], self.cost, _epsilon, X,y) print check_grad(self.cost, self.grad, np.zeros(init.shape),X,self.yindi) print check_grad(self.cost, self.grad, init,X,self.yindi) if self.optimizeOrder == 0: newParams = self.optimize(self.cost,init,args=(X,self.yindi),disp=False) if self.optimizeOrder == 1: newParams = self.optimize(self.cost,init,args=(X,self.yindi),disp=False) #newParams = self.optimize(self.cost, self.getParams(), args = (X,y)) self.setParams(newParams)
def test_gradient(): # Test gradient of Kullback-Leibler divergence. random_state = check_random_state(0) n_samples = 50 n_features = 2 n_components = 2 alpha = 1.0 distances = random_state.randn(n_samples, n_features).astype(np.float32) distances = np.abs(distances.dot(distances.T)) np.fill_diagonal(distances, 0.0) X_embedded = random_state.randn(n_samples, n_components).astype(np.float32) P = _joint_probabilities(distances, desired_perplexity=25.0, verbose=0) def fun(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[0] def grad(params): return _kl_divergence(params, P, alpha, n_samples, n_components)[1] assert_almost_equal(check_grad(fun, grad, X_embedded.ravel()), 0.0, decimal=5)
def self_test1(): D = 100 K = 2 N = 10 L = 1e-6 # check parsing W01 = np.random.randn(D,nh) b1 = np.random.randn(1,nh) W12 = np.random.randn(nh,nh) b2 = np.random.randn(1,nh) W23 = np.random.randn(nh,K) b3 = np.random.randn(1,K) w = np.concatenate((W01.flatten(), b1.flatten(), W12.flatten(), b2.flatten(), W23.flatten(), b3.flatten()), axis=0) W01_,b1_,W12_,b2_,W23_,b3_ = parseParams(w,D,K) print ((W01-W01_)**2).sum()/(W01**2).sum() print ((b1-b1_)**2).sum()/(b1**2).sum() print ((W12-W12_)**2).sum()/(W12**2).sum() print ((b2-b2_)**2).sum()/(b2**2).sum() print ((W23-W23_)**2).sum()/(W23**2).sum() print ((b3-b3_)**2).sum()/(b3**2).sum() w = init(D, K) w = 1e-0*np.random.normal(size=w.size) X = np.random.normal(size=(N,D)) y = np.random.randint(K,size=(N,)) err = check_grad(loss, grad, w, X, y, L, K) print err
def gradient_check(theta, x, y, l2_regularization): print 'check_grad:', check_grad(calculate_cost, calculate_gradient, theta, x, y, l2_regularization) spatial_alpha_vec, spatial_mean_vec, spatial_sigma_vec, temporal_mean, temporal_sigma = span_params(theta) cost1 = calculate_cost(theta, x, y, l2_regularization) num_of_params = len(spatial_alpha_vec) + 2*len(spatial_mean_vec) + len(spatial_sigma_vec) + 2 direction = np.random.randint(2, size=num_of_params)*2-1 eps = 1e-7 gradient = eps * direction total = 0 spatial_alpha_vec2 = spatial_alpha_vec + gradient[0:len(spatial_alpha_vec)] total += len(spatial_alpha_vec) spatial_mean_vec2 = spatial_mean_vec + gradient[total:total+2*len(spatial_mean_vec)].reshape(-1,2) total += 2*len(spatial_mean_vec) spatial_sigma_vec2 = spatial_sigma_vec + gradient[total:total+len(spatial_sigma_vec)] total += len(spatial_sigma_vec) temporal_mean2 = np.array(temporal_mean + gradient[-2]) temporal_sigma2 = np.array(temporal_sigma + gradient[-1]) theta2 = compress_params(spatial_alpha_vec2, spatial_mean_vec2, spatial_sigma_vec2, temporal_mean2, temporal_sigma2) cost2 = calculate_cost(theta2, x, y, l2_regularization) delta = (cost2-cost1) print 'Gradient check:' print 'Empiric:', delta print 'Analytic:', gradient.dot(calculate_gradient(theta, x, y, l2_regularization)) diff = abs(delta - gradient.dot(calculate_gradient(theta, x, y, l2_regularization))) print 'Difference:', diff if diff < 1e-3: print 'Gradient is O.K' else: print 'Gradient check FAILED'
def test_checkgrad(): from scipy.optimize import check_grad import numpy as np for x in range(100): x = x * np.ones((1)) / 10 print "check_grad @ %.2f: %.6f" % (x, check_grad(f, fgrad, x))
def test_01_6_unitary_hadamard_grad(self): """ control.pulseoptim: Hadamard gate gradient check assert that gradient approx and exact gradient match in tolerance """ # Hadamard H_d = sigmaz() H_c = [sigmax()] U_0 = identity(2) U_targ = hadamard_transform(1) n_ts = 10 evo_time = 10 # Create the optim objects optim = cpo.create_pulse_optimizer(H_d, H_c, U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-10, dyn_type='UNIT', init_pulse_type='LIN', gen_stats=True) dyn = optim.dynamics init_amps = optim.pulse_generator.gen_pulse().reshape([-1, 1]) dyn.initialize_controls(init_amps) # Check the exact gradient func = optim.fid_err_func_wrapper grad = optim.fid_err_grad_wrapper x0 = dyn.ctrl_amps.flatten() grad_diff = check_grad(func, grad, x0) assert_almost_equal(grad_diff, 0.0, decimal=6, err_msg="Unitary gradient outside tolerance")
def test_nonlinear_mean_return_model(self): model = Nonlinear(delta=0.1, lmb=1.0, hidden=7) for i in range(10): diff = check_grad(model.cost, model.grad, model.weights(self.trX, i), self.trX, self.trY) self.assertTrue(diff < 1.0e-5, diff)
def test_pairwise_gradient(): fcts = PairwiseFcts(PAIRWISE_DATA, 0.2) for sigma in np.linspace(1, 20, num=10): xs = sigma * RND.randn(8) val = approx_fprime(xs, fcts.objective, EPS) err = check_grad(fcts.objective, fcts.gradient, xs, epsilon=EPS) assert abs(err / np.linalg.norm(val)) < 1e-5
def check_gradient(self, input, expected_output): """ Check whether cost properly calculates gradients. Result should be close to zero. Input and expected_output must be lists, even if they only contain a single item. """ array, shapes = NeuralNet.unroll(self.weights) def fun(x): """ Wrapper around cost which allows it to interact with scipy.optimize.check_grad. """ return NeuralNet.cost(NeuralNet.roll(x, shapes), self.lambda_, input, expected_output, self.is_analog)[0] def grad(x): """ Wrapper around cost which serves as the derivative function for scipy.optimize.check_grad. """ return NeuralNet.unroll( NeuralNet.cost(NeuralNet.roll(x, shapes), self.lambda_, input, expected_output, self.is_analog)[1])[0] return check_grad(fun, grad, array)
def test_grad(x1,y1,x2,y2,alpha1,alpha2): import numpy as np # initial guess xc0 = 0.5*(x1+x2) yc0 = y1 + 0.5*(y1 - y2) r1 = np.sqrt((x1 - xc0)**2+(y1 - yc0)**2) r2 = np.sqrt((x2 - xc0)**2+(y2 - yc0)**2) a0 = 0.5*(r1+r2) b0 = 0.1 theta1 = np.pi theta2 = 1.5*np.pi x0 = np.ones(4) x0[0] = xc0 x0[1] = yc0 x0[2] = a0 x0[3] = b0 #x0[4] = theta1 #x0[5] = theta2 #args={'x1':x1,'y1':y1,'x2':y2,'alpha1':alpha1,'alpha2':alpha2} xargs=(x1,y1,x2,y2,alpha1,alpha2) err=scio.check_grad(objectf_4x4,objectfprime_4x4,x0,x1,y1,x2,y2,alpha1,alpha2) print err
def learnGPparamsWithPrior(oldParams, infRes, experiment, tauOptimMethod, regularizer_stepsize_tau): xdim, T = np.shape(infRes['post_mean'][0]) binSize = experiment.binSize oldTau = oldParams['tau']*1000/binSize precomp = makePrecomp(infRes) tempTau = np.zeros(xdim) pOptimizeDetails = [[]]*xdim for xd in range(xdim): initp = np.log(1/oldTau[xd]**2) if False: # gradient check and stuff gradcheck = op.check_grad( MStepGPtimescaleCostWithPrior, MStepGPtimescaleCostWithPrior_grad, initp,precomp[0],0.001,binSize, oldParams['tau'][xd], regularizer_stepsize_tau) print('tau learning grad check = ' + str(gradcheck)) pdb.set_trace() apprxGrad = op.approx_fprime( initp,MStepGPtimescaleCostWithPrior,1e-8, precomp[xd],0.001,binSize,oldParams['tau'][xd],regularizer_stepsize_tau) calcdGrad = MStepGPtimescaleCostWithPrior_grad( initp,precomp[xd],0.001,binSize,oldParams['tau'][xd],regularizer_stepsize_tau) plt.plot(apprxGrad,linewidth = 10, color = 'k', alpha = 0.4) plt.plot(calcdGrad,linewidth = 2, color = 'k', alpha = 0.4) plt.legend(['approximated','calculated']) plt.title('Approx. vs. calculated Grad of Tau learning cost') plt.tight_layout() plt.show() def cost(p): cost = MStepGPtimescaleCostWithPrior( p, precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau) return cost def cost_grad(p): grad = MStepGPtimescaleCostWithPrior_grad( p, precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau) return grad pdb.set_trace() if False: # bench for setting hessian as inverse variance hessTau = op.approx_fprime([initp], MStepGPtimescaleCost_grad, 1e-14, precomp[xd], 0.001) priorVar = -1/hessTau regularizer_stepsize_tau = np.sqrt(np.abs(priorVar)) # pdb.set_trace() res = op.minimize( fun = MStepGPtimescaleCostWithPrior, x0 = initp, args = (precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau), jac = MStepGPtimescaleCostWithPrior_grad, options = {'disp': False,'gtol':1e-10}, method = tauOptimMethod) pOptimizeDetails[xd] = res tempTau[xd] = (1/np.exp(res.x))**(0.5) newTau = tempTau*binSize/1000 return newTau, pOptimizeDetails
def test_ridge_grad_cov(): """Test ovk.OVKRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.OVKRidgeRisk(0.01) assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(X.shape[0] * y.shape[1]), y.ravel(), K(X, X)) < 1e-3
def test_grad(self): from scipy import optimize f = lambda z: crps_gaussian(self.obs[0, 0], z[0], z[1], grad=False) g = lambda z: crps_gaussian(self.obs[0, 0], z[0], z[1], grad=True)[1] x0 = np.array([self.mu.reshape(-1), self.sig.reshape(-1)]).T for x in x0: self.assertLessEqual(optimize.check_grad(f, g, x), 1e-6)
def callback(pv): if not self.monitor_gradient: return err = optimize.check_grad( self.model.evaluate_objective_fn, self.model.evaluate_jacobian_fn, pv, X, y, sample_weight ) self.gradient_err_.append(err)
def test_rff_ridge_grad_cov(): """Test ovk.ORFFRidgeRisk gradient with finite differences.""" K = ovk.DecomposableKernel(A=eye(2)) risk = ovk.ORFFRidgeRisk(0.01) D = 100 assert check_grad(lambda *args: risk.functional_grad_val(*args)[0], lambda *args: risk.functional_grad_val(*args)[1], randn(D * y.shape[1]), y.ravel(), K.get_orff_map(X, D), K) < 1e-3
def test_non_linear_sample_fidelities_gradient(self, non_linear_model, fidelity_idx, func_idx, grad_idx): np.random.seed(1234) x0 = np.random.rand(2) func = lambda x: np.sum(non_linear_model._predict_samples_with_gradients(x[None, :], fidelity_idx)[func_idx], axis=0) grad = lambda x: np.sum(non_linear_model._predict_samples_with_gradients(x[None, :], fidelity_idx)[grad_idx], axis=0) assert check_grad(func, grad, x0) < 1e-6
def self_test1(): D = 100 N = 1000 L = 1e-6 w = init(D) w = np.random.normal(size=w.size) X = np.random.normal(size=(N,D)) y = 2*np.random.randint(2,size=(N,))-1 err = check_grad(loss, grad, w, X, y, L) print err
def test_grad_logistic(): X, y = datasets.make_classification() y[y==0] = -1 y = y.astype(np.float) f = lambda x: loss(x, X, y, 1.) f_grad = lambda x: grad_hess(x, X, y, 1.)[0] small = optimize.check_grad(f, f_grad, np.random.randn(X.shape[1])) tools.assert_less(small, 1.)
def test_acquisition_gradient_computation(acquisition, n_dims, tol): rng = np.random.RandomState(43) x_test = rng.rand(10, n_dims) acq = lambda x: acquisition.evaluate(np.array([x]))[0][0] grad = lambda x: acquisition.evaluate_with_gradients(np.array([x]))[1][0] for xi in x_test: err = check_grad(acq, grad, xi, epsilon=gradient_check_step_size) assert err < tol
def test_pairwise_hessian(): fcts = PairwiseFcts(PAIRWISE_DATA, 0.2) for sigma in np.linspace(1, 20, num=10): xs = sigma * RND.randn(8) for i in range(8): obj = lambda xs: fcts.gradient(xs)[i] grad = lambda xs: fcts.hessian(xs)[i] val = approx_fprime(xs, obj, EPS) err = check_grad(obj, grad, xs, epsilon=EPS) assert abs(err / np.linalg.norm(val)) < 1e-5
def check(self, layer, input_blobs, output_blobs, check_indices = None): """Checks a layer with given input blobs and output blobs. """ # pre-run to get the input and output shapes. if check_indices is None: checked_blobs = input_blobs else: checked_blobs = [input_blobs[i] for i in check_indices] layer.forward(input_blobs, output_blobs) input_backup = blobs_to_vec(checked_blobs) param_backup = blobs_to_vec(layer.param()) num_output = blobs_to_vec(output_blobs).size max_err = 0 # first, check grad w.r.t. param x_init = blobs_to_vec(layer.param()) if len(x_init) > 0: for i in range(-1, num_output): # pylint: disable=E1101 err = optimize.check_grad( GradChecker._func, GradChecker._grad, x_init, layer, input_blobs, output_blobs, False, i, checked_blobs) max_err = max(err, max_err) self.assertLessEqual(err, self._threshold) if err > self._threshold: return (False, i, err, 'param') # restore param vec_to_blobs(param_backup, layer.param()) # second, check grad w.r.t. input x_init = blobs_to_vec(checked_blobs) if len(x_init) > 0: for i in range(-1, num_output): # pylint: disable=E1101 err = optimize.check_grad( GradChecker._func, GradChecker._grad, x_init, layer, input_blobs, output_blobs, True, i, checked_blobs) max_err = max(err, max_err) self.assertLessEqual(err, self._threshold) if err > self._threshold: return (False, i, err, 'input') # restore input vec_to_blobs(input_backup, checked_blobs) return (True, max_err)
def self_test1(): D = 100 K = 10 N = 1000 L = 1e-6 w = init(D, K) w = np.random.normal(size=w.size) X = np.random.normal(size=(N,D)) y = np.random.randint(K,size=(N,)) err = check_grad(loss, grad, w, X, y, L, K) print err
def test_grad(): n_samples, n_features = 12, 20 size_u, size_v = 2, 10 np.random.seed(0) w = np.random.randn(size_u + size_v + 3) X = np.random.randn(n_samples, n_features) Y = np.random.randn(n_samples) drifts = np.random.randn(n_samples, 3) func = lambda x: he.f_grad(x, X, Y, drifts, size_u, size_v)[0] grad = lambda x: he.f_grad(x, X, Y, drifts, size_u, size_v)[1] assert optimize.check_grad(func, grad, w) < .1
def test_lindbladian(self): """ Optimise pulse for amplitude damping channel with Lindbladian dyn assert that fidelity error is below threshold """ Sx = sigmax() Sz = sigmaz() Si = identity(2) Sd = Qobj(np.array([[0, 1], [0, 0]])) Sm = Qobj(np.array([[0, 0], [1, 0]])) Sd_m = Qobj(np.array([[1, 0], [0, 0]])) gamma = 0.1 L0_Ad = gamma*(2*tensor(Sm, Sd.trans()) - (tensor(Sd_m, Si) + tensor(Si, Sd_m.trans()))) LC_x = -1j*(tensor(Sx, Si) - tensor(Si, Sx)) LC_z = -1j*(tensor(Sz, Si) - tensor(Si, Sz)) drift = L0_Ad ctrls = [LC_z, LC_x] n_ctrls = len(ctrls) initial = identity(4) had_gate = hadamard_transform(1) target_DP = tensor(had_gate, had_gate) n_ts = 10 evo_time = 5 result = cpo.optimize_pulse(drift, list(ctrls), initial, target_DP, n_ts, evo_time, fid_err_targ=1e-3, max_iter=200, init_pulse_type='LIN', gen_stats=True) assert_(result.fid_err < 0.1, msg="Fidelity higher than expected") # Check same result is achieved using the create objects method optim = cpo.create_pulse_optimizer(drift, list(ctrls), initial, target_DP, n_ts, evo_time, fid_err_targ=1e-3, init_pulse_type='LIN', gen_stats=True) dyn = optim.dynamics p_gen = optim.pulse_generator init_amps = np.zeros([n_ts, n_ctrls]) for j in range(n_ctrls): init_amps[:, j] = p_gen.gen_pulse() dyn.initialize_controls(init_amps) # Check the exact gradient func = optim.fid_err_func_wrapper grad = optim.fid_err_grad_wrapper x0 = dyn.ctrl_amps.flatten() grad_diff = check_grad(func, grad, x0) assert_almost_equal(grad_diff, 0.0, decimal=7, err_msg="Frechet gradient outside tolerance") result2 = optim.run_optimization() assert_almost_equal(result.fid_err, result2.fid_err, decimal=3, err_msg="Direct and indirect methods produce " "different results for ADC")
def test_check_grads(self): x_ = np.array([[np.random.rand()]]) assert check_grad(self.lcb, lambda x: -self.lcb(x, True)[1], x_) < 1e-5
def f_grad(x): """ :param x: :return: """ arg_tup = tuple([X_tr, y_tr, k_name, rbfn_mname, centers_in]) _, g = negative_log_likelihood(x, *arg_tup) return g print "Function call: {}\n\n".format(f_val(params_i)) print "Function grad: {}\n\n".format(f_grad(params_i)) print check_grad(func=f_val, grad=f_grad, x0=params_i) print "\n\n" #minimize(fun=negative_log_likelihood, x0=params_i, args=arg_tup_rbfn, jac=True, method="BFGS") fig = plt.figure() ax1 = fig.add_subplot(111) ax1.plot(np.squeeze(data_in_1D), np.squeeze(func_obs), 'ro', linewidth=3.0, label="obs") ax1.set_xlabel("x") ax1.set_ylabel("f(x)") ax1.title.set_text("Function with observations") plt.show()
return -np.sum(np.sum(np.log(proba ^ Y), axis=1)) grad_negloglik_auto = grad(negloglik_autograd) # In[547]: from scipy.optimize import check_grad rng = np.random.RandomState(7) x0 = rng.randn(p + k - 1) x0[1:k - 1] = np.abs(x0[1:k - 1]) # WARNING: check_grad is likely to return a quite high value # due to numerical instability with exp and log with tiny # probability values. Don't be surprised as long as your # solvers below converge. check_grad(negloglik, grad_negloglik, x0=x0) # Now plug your gradient into L-BFGS and check the result: # In[548]: x0 = np.zeros(p + k - 1) x0[:k - 1] = np.arange( k - 1) # initiatlizing with etas all equal to zero is a bad idea! bounds = [(None, None)] + [(0, np.inf) for j in range(k - 2)] + [(None, None)] * p x_hat, _, _ = fmin_l_bfgs_b(negloglik, fprime=grad_negloglik, x0=x0, bounds=bounds)
def _allnoomega_update(self, maxiter=-1): print '[HPSeqFullSumGradConstr] _allnoomega_update' logallparams = \ np.log(updates.mualphaupdates_fullsum_nonapprox.encode_all_params_nogammanoomega(self.mu, self.alpha)) err = check_grad( lambda logallparams, omega, gamma, node_vec, eventmemes, \ etimes, T, W, beta, kernel_evaluate, K_evaluate, lenmu, \ derivative_kernel_evaluate, derivative_K_evaluate: \ updates.mualphaupdates_fullsum_nonapprox.log_fullsum_func( logallparams, omega, gamma, node_vec, eventmemes, etimes, T, W, beta, kernel_evaluate, K_evaluate, lenmu, ), updates.mualphaupdates_fullsum_nonapprox.log_fullsum_grad, logallparams, self.omega, self.gamma, self.node_vec, self.eventmemes, self.etimes, self.T, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, len(self.mu), self.derivative_kernel_evaluate, self.derivative_K_evaluate, ) print 'gradient error ', err options = self.optim_options if maxiter > 0: options['maxiter'] = maxiter optout = minimize( updates.mualphaupdates_fullsum_nonapprox.log_fullsum_funcgrad, logallparams, ( self.omega, self.gamma, self.node_vec, self.eventmemes, self.etimes, self.T, self.W, self.beta, self.kernel_evaluate, self.K_evaluate, len(self.mu), self.derivative_kernel_evaluate, self.derivative_K_evaluate, ), method='L-BFGS-B', jac=True, options=options, ) new_allparams = np.exp(optout.x) return new_allparams
def test_gammainc_fails(): a = 0.1 gammainc_1 = lambda x: gammainc(a, x) gammainc_2 = lambda x: grad(gammainc, argnum=1)(a, x) assert not check_grad(gammainc_1, gammainc_2, 1e-4) < 0.0001
def solve_unit_norm_dual(lhs, rhs, lambd0, factr=1e7, debug=False, lhs_is_toeplitz=False): if np.all(rhs == 0): return np.zeros(lhs.shape[0]), 0. n_atoms = lambd0.shape[0] n_times_atom = lhs.shape[0] // n_atoms # precompute SVD # U, s, V = linalg.svd(lhs) if lhs_is_toeplitz: # first column of the toeplitz matrix lhs lhs_c = lhs[0, :] # lhs will not stay toeplitz if we add different lambd on the diagonal assert n_atoms == 1 def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) lhs_c_copy = lhs_c.copy() lhs_c_copy[0] += lambd return linalg.solve_toeplitz(lhs_c_copy, rhs) else: def x_star(lambd): lambd += 1e-14 # avoid numerical issues # lhs_inv = np.dot(V.T / (s + np.repeat(lambd, n_times_atom)), U.T) # return np.dot(lhs_inv, rhs) return linalg.solve(lhs + np.diag(np.repeat(lambd, n_times_atom)), rhs) def dual(lambd): x_hats = x_star(lambd) norms = linalg.norm(x_hats.reshape(-1, n_times_atom), axis=1) return (x_hats.T.dot(lhs).dot(x_hats) - 2 * rhs.T.dot(x_hats) + np.dot(lambd, norms**2 - 1.)) def grad_dual(lambd): x_hats = x_star(lambd).reshape(-1, n_times_atom) return linalg.norm(x_hats, axis=1)**2 - 1. def func(lambd): return -dual(lambd) def grad(lambd): return -grad_dual(lambd) bounds = [(0., None) for idx in range(0, n_atoms)] if debug: assert optimize.check_grad(func, grad, lambd0) < 1e-5 lambd_hats, _, _ = optimize.fmin_l_bfgs_b(func, x0=lambd0, fprime=grad, bounds=bounds, factr=factr) x_hat = x_star(lambd_hats) return x_hat, lambd_hats
def test_model_hawkes_loglik_grad(self): """...Test that ModelHawkesFixedExpKernLeastSq gradient is consistent with loss """ self.assertLess( check_grad(self.model.loss, self.model.grad, self.coeffs), 1e-5)
def test_gradient_with_l2reg(self): lm = testee.LinearModel(l2reg=1) init_beta = np.random.rand(self.n_features) got = check_grad( lm.loss_function, lm.gradient, init_beta, self.X, self.y, lm.l2reg) assert_almost_equal(got, 0, places=1)
def test_symplectic(self): """ Optimise pulse for coupled oscillators with Symplectic dynamics assert that fidelity error is below threshold """ g1 = 1.0 g2 = 0.2 A0 = Qobj(np.array([[1, 0, g1, 0], [0, 1, 0, g2], [g1, 0, 1, 0], [0, g2, 0, 1]])) A_rot = Qobj(np.array([ [1, 0, 0, 0], [0, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ])) A_sqz = Qobj(0.4*np.array([ [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0] ])) A_c = [A_rot, A_sqz] n_ctrls = len(A_c) initial = identity(4) A_targ = Qobj(np.array([ [0, 0, 1, 0], [0, 0, 0, 1], [1, 0, 0, 0], [0, 1, 0, 0] ])) Omg = Qobj(sympl.calc_omega(2)) S_targ = (-A_targ*Omg*np.pi/2.0).expm() n_ts = 20 evo_time = 10 result = cpo.optimize_pulse(A0, list(A_c), initial, S_targ, n_ts, evo_time, fid_err_targ=1e-3, max_iter=200, dyn_type='SYMPL', init_pulse_type='ZERO', gen_stats=True) assert_(result.goal_achieved, msg="Symplectic goal not achieved") assert_almost_equal(result.fid_err, 0.0, decimal=2, err_msg="Symplectic infidelity too high") # Check same result is achieved using the create objects method optim = cpo.create_pulse_optimizer(A0, list(A_c), initial, S_targ, n_ts, evo_time, fid_err_targ=1e-3, dyn_type='SYMPL', init_pulse_type='ZERO', gen_stats=True) dyn = optim.dynamics p_gen = optim.pulse_generator init_amps = np.zeros([n_ts, n_ctrls]) for j in range(n_ctrls): init_amps[:, j] = p_gen.gen_pulse() dyn.initialize_controls(init_amps) # Check the exact gradient func = optim.fid_err_func_wrapper grad = optim.fid_err_grad_wrapper x0 = dyn.ctrl_amps.flatten() grad_diff = check_grad(func, grad, x0) assert_almost_equal(grad_diff, 0.0, decimal=5, err_msg="Frechet gradient outside tolerance " "(SYMPL)") result2 = optim.run_optimization() assert_almost_equal(result.fid_err, result2.fid_err, decimal=6, err_msg="Direct and indirect methods produce " "different results for Symplectic")
print("Initial infidelity: {}".format(dyn.fid_computer.get_fid_err())) #print("onto_evo_target: {}".format(dyn.onto_evo_target)) # Save initial amplitudes to a text file pulsefile = "ctrl_amps_initial_" + outfile_end dyn.save_amps(pulsefile, times="exclude") if (log_level <= logging.INFO): print("Initial amplitudes output to file: " + pulsefile) if check_gradient: print("***********************************") print("Checking gradient") func = optim.fid_err_func_wrapper grad = optim.fid_err_grad_wrapper x0 = dyn.ctrl_amps.flatten() grad_diff = check_grad(func, grad, x0) print("Normalised grad diff: {}".format(grad_diff)) print("***********************************") print("Starting pulse optimisation") result = optim.run_optimization() # Save final amplitudes to a text file pulsefile = "ctrl_amps_final_" + outfile_end dyn.save_amps(pulsefile) if (log_level <= logging.INFO): print("Final amplitudes output to file: " + pulsefile) print("\n***********************************") print("Optimising complete. Stats follow:") result.stats.report()
def ge_criterion_train(data, labels, weak_signal_probabilities, num_weak_signals, check_gradient=False): """ Trains generalized expectation criteria :param data: size (n, d) ndarray containing n examples described by d features each :type data: ndarray :param labels: length n array of the integer class labels :type labels: array :param weak_signal_probabilities: size num_weak_signals x n of the weak signal probabilities :type weak_signal_probabilities: ndarray :param num_weak_signals: the number of weak signal to be used in training :type num_weak_signals: integer :return: the learned model :rtype: array """ n, d = data.shape weights = np.random.rand(d) def compute_empirical_distribution(est_probability, weak_signal): """ Computes the score value of the emperical distribution :param est_probability: size n estimated probabtilities for the instances :type labels: array :param weak_signal: weak signal trained using one dimensional feature :type weak_signal: array :return: (tuple of scalar values of the empirical distribution, tuple of index of instances) :rtype: tuple """ threshold = 0.5 positive_index = np.where(weak_signal >= threshold) negative_index = np.where(weak_signal < threshold) pos_feature_labels = est_probability[positive_index] neg_feature_labels = est_probability[negative_index] try: with np.errstate(all='ignore'): empirical_pos_probability = np.sum(pos_feature_labels) / pos_feature_labels.size empirical_neg_probability = np.sum(neg_feature_labels) / neg_feature_labels.size except: empirical_pos_probability = np.nan_to_num(np.sum(pos_feature_labels) / pos_feature_labels.size) + 0 empirical_neg_probability = np.nan_to_num(np.sum(neg_feature_labels) / neg_feature_labels.size) + 0 empirical_probability = empirical_pos_probability, empirical_neg_probability instances_index = positive_index, negative_index return empirical_probability, instances_index def train_ge_criteria(new_weights): """ This internal function returns the objective value of ge criteria :param new_weights: weights to use for computing multinomial logistic regression :type new_weights: ndarray :return: tuple containing (objective, gradient) :rtype: (float, array) """ obj = 0 score = data.dot(new_weights) probs, grad = logistic(score) gradient = 0 # Code to compute the objective function for i in range(num_weak_signals): weak_signal = weak_signal_probabilities[i] reference_probs = compute_reference_distribution(labels, weak_signal) empirical_probs, index = compute_empirical_distribution(probs, weak_signal) # empirical computations pos_empirical_probs, neg_empirical_probs = empirical_probs pos_index, neg_index = index # reference computations pos_reference_probs, neg_reference_probs = reference_probs try: with np.errstate(all='ignore'): # compute objective for positive probabilities obj += pos_reference_probs * np.log(pos_reference_probs / pos_empirical_probs) gradient += (pos_reference_probs / pos_empirical_probs) * data[pos_index].T.dot(grad[pos_index]) / grad[pos_index].size # compute objective for negative probabilities obj += neg_reference_probs * np.log(neg_reference_probs / neg_empirical_probs) gradient += (neg_reference_probs / neg_empirical_probs) * data[neg_index].T.dot(grad[neg_index]) / grad[neg_index].size except: # compute objective for positive probabilities obj += np.nan_to_num(pos_reference_probs * np.log(pos_reference_probs / pos_empirical_probs)) gradient += np.nan_to_num((pos_reference_probs / pos_empirical_probs) * data[pos_index].T.dot(grad[pos_index]) / grad[pos_index].size) # compute objective for negative probabilities obj += np.nan_to_num(neg_reference_probs * np.log(neg_reference_probs / neg_empirical_probs)) gradient += np.nan_to_num((neg_reference_probs / neg_empirical_probs) * data[neg_index].T.dot(grad[neg_index]) / grad[neg_index].size) objective = obj + (0.5 * np.sum(new_weights**2)) gradient = new_weights - gradient return objective, gradient if check_gradient: grad_error = check_grad(lambda w: train_ge_criteria(w)[0], lambda w: train_ge_criteria(w)[1].ravel(), weights) print("Provided gradient differed from numerical approximation by %e (should be below 1e-3)" % grad_error) # pass the internal objective function into the optimizer res = minimize(lambda w: train_ge_criteria(w)[0], jac=lambda w: train_ge_criteria(w)[1].ravel(), x0=weights) weights = res.x return weights
def check_gradient(params, X, y): #kind of like a unit test. Just check the gradient of the first 10 words #this takes a while so be forwarned print(check_grad(gc.log_p_y_given_x_avg, gc.gradient_avg, params, X, y, 1))
def opt_hyper(gpr, hyperparams, Ifilter=None, maxiter=1000, gradcheck=False, bounds=None, optimizer=OPT.fmin_tnc, gradient_tolerance=1E-4, *args, **kw_args): """ Optimize hyperparemters of :py:class:`pygp.gp.basic_gp.GP` ``gpr`` starting from given hyperparameters ``hyperparams``. **Parameters:** gpr : :py:class:`pygp.gp.basic_gp` GP regression class hyperparams : {'covar':logtheta, ...} Dictionary filled with starting hyperparameters for optimization. logtheta are the CF hyperparameters. Ifilter : [boolean] Index vector, indicating which hyperparameters shall be optimized. For instance:: logtheta = [1,2,3] Ifilter = [0,1,0] means that only the second entry (which equals 2 in this example) of logtheta will be optimized and the others remain untouched. bounds : [[min,max]] Array with min and max value that can be attained for any hyperparameter maxiter: int maximum number of function evaluations gradcheck: boolean check gradients comparing the analytical gradients to their approximations optimizer: :py:class:`scipy.optimize` which scipy optimizer to use? (standard lbfgsb) ** argument passed onto LML** priors : [:py:class:`pygp.priors`] non-default prior, otherwise assume first index amplitude, last noise, rest:lengthscales """ def f(x): x_ = X0 x_[Ifilter_x] = x rv = gpr.LML(param_list_to_dict(x_, param_struct, skeys), *args, **kw_args) #LG.debug("L("+str(x_)+")=="+str(rv)) if SP.isnan(rv): return 1E6 return rv def df(x): x_ = X0 x_[Ifilter_x] = x rv = gpr.LMLgrad(param_list_to_dict(x_, param_struct, skeys), *args, **kw_args) rv = param_dict_to_list(rv, skeys) #LG.debug("dL("+str(x_)+")=="+str(rv)) if not SP.isfinite(rv).all(): #SP.isnan(rv).any(): In = SP.isnan(rv) rv[In] = 1E6 return rv[Ifilter_x] #0. store parameter structure skeys = SP.sort(hyperparams.keys()) param_struct = dict([(name, hyperparams[name].shape) for name in skeys]) #1. convert the dictionaries to parameter lists X0 = param_dict_to_list(hyperparams, skeys) if Ifilter is not None: Ifilter_x = SP.array(param_dict_to_list(Ifilter, skeys), dtype='bool') else: Ifilter_x = SP.ones(len(X0), dtype='bool') #2. bounds if bounds is not None: #go through all hyperparams and build bound array (flattened) _b = [] for key in skeys: if key in bounds.keys(): _b.extend(bounds[key]) else: _b.extend([(-SP.inf, +SP.inf)] * hyperparams[key].size) bounds = SP.array(_b) bounds = bounds[Ifilter_x] pass #2. set stating point of optimization, truncate the non-used dimensions x = X0.copy()[Ifilter_x] LG.debug("startparameters for opt:" + str(x)) if gradcheck: checkgrad(f, df, x) LG.info("check_grad (pre) (Enter to continue):" + str(OPT.check_grad(f, df, x))) raw_input() LG.debug("start optimization") #general optimizer interface #note: x is a subset of X, indexing the parameters that are optimized over # Ifilter_x pickes the subest of X, yielding x opt_RV = optimizer(f, x, fprime=df, maxfun=int(maxiter), pgtol=gradient_tolerance, messages=False, bounds=bounds) # optimizer = OPT.fmin_l_bfgs_b # opt_RV=optimizer(f, x, fprime=df, maxfun=int(maxiter),iprint =1, bounds=bounds, factr=10.0, pgtol=1e-10) opt_x = opt_RV[0] #relate back to X Xopt = X0.copy() Xopt[Ifilter_x] = opt_x #convert into dictionary opt_hyperparams = param_list_to_dict(Xopt, param_struct, skeys) #get the log marginal likelihood at the optimum: opt_lml = gpr.LML(opt_hyperparams, **kw_args) if gradcheck: checkgrad(f, df, opt_RV[0]) LG.info("check_grad (post) (Enter to continue):" + str(OPT.check_grad(f, df, opt_RV[0]))) pdb.set_trace() # raw_input() LG.debug("old parameters:") LG.debug(str(hyperparams)) LG.debug("optimized parameters:") LG.debug(str(opt_hyperparams)) LG.debug("grad:" + str(df(opt_x))) return [opt_hyperparams, opt_lml]
def test_unitary(self): """ Optimise pulse for Hadamard and QFT gate with linear initial pulses assert that goal is achieved and fidelity error is below threshold """ # Hadamard H_d = sigmaz() H_c = [sigmax()] U_0 = identity(2) U_targ = hadamard_transform(1) n_ts = 10 evo_time = 6 # Run the optimisation result = cpo.optimize_pulse_unitary(H_d, list(H_c), U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-10, init_pulse_type='LIN', gen_stats=True) assert_(result.goal_achieved, msg="Hadamard goal not achieved") assert_almost_equal(result.fid_err, 0.0, decimal=10, err_msg="Hadamard infidelity too high") #Try without stats result = cpo.optimize_pulse_unitary(H_d, list(H_c), U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-10, init_pulse_type='LIN', gen_stats=False) assert_(result.goal_achieved, msg="Hadamard goal not achieved " "(no stats)") # Check same result is achieved using the create objects method optim = cpo.create_pulse_optimizer(H_d, list(H_c), U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-10, dyn_type='UNIT', init_pulse_type='LIN', gen_stats=True) dyn = optim.dynamics init_amps = optim.pulse_generator.gen_pulse().reshape([-1, 1]) dyn.initialize_controls(init_amps) # Check the exact gradient func = optim.fid_err_func_wrapper grad = optim.fid_err_grad_wrapper x0 = dyn.ctrl_amps.flatten() grad_diff = check_grad(func, grad, x0) assert_almost_equal(grad_diff, 0.0, decimal=7, err_msg="Unitary gradient outside tolerance") result2 = optim.run_optimization() assert_almost_equal(result.fid_err, result2.fid_err, decimal=10, err_msg="Direct and indirect methods produce " "different results for Hadamard") # QFT Sx = sigmax() Sy = sigmay() Sz = sigmaz() Si = 0.5*identity(2) H_d = 0.5*(tensor(Sx, Sx) + tensor(Sy, Sy) + tensor(Sz, Sz)) H_c = [tensor(Sx, Si), tensor(Sy, Si), tensor(Si, Sx), tensor(Si, Sy)] #n_ctrls = len(H_c) U_0 = identity(4) # Target for the gate evolution - Quantum Fourier Transform gate U_targ = qft.qft(2) result = cpo.optimize_pulse_unitary(H_d, list(H_c), U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-9, init_pulse_type='LIN', gen_stats=True) assert_(result.goal_achieved, msg="QFT goal not achieved") assert_almost_equal(result.fid_err, 0.0, decimal=7, err_msg="QFT infidelity too high") # check bounds result2 = cpo.optimize_pulse_unitary(H_d, list(H_c), U_0, U_targ, n_ts, evo_time, fid_err_targ=1e-9, amp_lbound=-1.0, amp_ubound=1.0, init_pulse_type='LIN', gen_stats=True) assert_((result2.final_amps >= -1.0).all() and (result2.final_amps <= 1.0).all(), msg="Amplitude bounds exceeded for QFT")
def test_gammainccinv(): for a in np.logspace(-1, 1, 10): for y in np.linspace(0.01, 0.99, 10): gammainccinv_1 = lambda x: gammainccinv(a, x) gammainccinv_2 = lambda x: grad(gammainccinv, argnum=1)(a, x) assert check_grad(gammainccinv_1, gammainccinv_2, y) < 0.0005, (a, y)
def _update_z_idx(X, ds, reg, z0, idxs, debug, solver='l-bfgs', b_hat_0=None, solver_kwargs=dict(), sample_weights=None, timing=False): n_trials, n_times = X.shape n_atoms, n_times_atom = ds.shape n_times_valid = n_times - n_times_atom + 1 bounds = [(0, None) for idx in range(n_atoms * n_times_valid)] zhats = [] for i in idxs: if sample_weights is None: sample_weights_i = None else: sample_weights_i = sample_weights[i] def func_and_grad(zi): return _fprime(ds, zi, Xi=X[i], reg=reg, return_func=True, sample_weights=sample_weights_i) def grad_noreg(zi): return _fprime(ds, zi, Xi=X[i], reg=None, return_func=False, sample_weights=sample_weights_i) if z0 is None: f0 = np.zeros(n_atoms * n_times_valid) else: f0 = z0[:, i, :].reshape((n_atoms * n_times_valid)) if timing: times = [0] pobj = [func_and_grad(f0)[0]] start = [time.time()] if debug: def pobj(zi): return func_and_grad(zi)[0] def fprime(zi): return func_and_grad(zi)[1] assert optimize.check_grad(pobj, fprime, f0) < 1e-5 if solver == 'l-bfgs': if timing: def callback(xk): times.append(time.time() - start[0]) pobj.append(func_and_grad(xk)[0]) # use a reference to have access inside this function start[0] = time.time() else: callback = None factr = solver_kwargs.get('factr', 1e15) # default value maxiter = solver_kwargs.get('maxiter', 15000) # default value zhat, f, d = optimize.fmin_l_bfgs_b(func_and_grad, f0, fprime=None, args=(), approx_grad=False, bounds=bounds, factr=factr, maxiter=maxiter, callback=callback) elif solver == "ista": zhat = f0.copy() DTD = gram_block_circulant(ds, n_times_valid, 'custom', sample_weights=sample_weights_i) tol = solver_kwargs.get('power_iteration_tol', 1e-4) L = power_iteration(DTD, b_hat_0=b_hat_0, tol=tol) step_size = 0.99 / L max_iter = solver_kwargs.get('max_iter', 20) for k in range(max_iter): # run ISTA iterations zhat -= step_size * grad_noreg(zhat) zhat = np.maximum(zhat - reg * step_size, 0.) if timing: times.append(time.time() - start[0]) pobj.append(func_and_grad(zhat)[0]) start[0] = time.time() elif solver == "fista": # init x_new = f0.copy() y = x_new.copy() t_new = 1.0 DTD = gram_block_circulant(ds, n_times_valid, 'custom', sample_weights=sample_weights_i) # compute the Lipschitz constant tol = solver_kwargs.get('power_iteration_tol', 1e-4) L = power_iteration(DTD, b_hat_0=b_hat_0, tol=tol) step_size = 0.99 / L max_iter = solver_kwargs.get('max_iter', 20) restart = solver_kwargs.get('restart', None) for k in range(max_iter): # run FISTA iterations # restart every n iterations if k > 0 and restart is not None and (k % restart) == 0: y = x_new.copy() t_new = 1.0 # update the old t_old = t_new x_old = x_new # update x y -= step_size * grad_noreg(y) x_new = np.maximum(y - reg * step_size, 0.) # update t and y t_new = 0.5 * (1. + np.sqrt(1. + 4. * (t_old**2))) y = x_new + ((t_old - 1.) / t_new) * (x_new - x_old) if timing: times.append(time.time() - start[0]) pobj.append(func_and_grad(x_new)[0]) start[0] = time.time() zhat = x_new else: raise ValueError("Unrecognized solver %s. Must be 'ista', 'fista'," " or 'l-bfgs'." % solver) zhats.append(zhat) if timing: return np.vstack(zhats), pobj, times return np.vstack(zhats)
def _scipy_check_grad(model, x0=None): if x0 is None: x0 = model.parameter_values() from scipy.optimize import check_grad return check_grad(model.negative_loglike, model.negative_d_loglike, x0)