def hessian ( calculate_cost_function, x0, epsilon=1.e-5, linear_approx=False, *args ): """ A numerical approximation to the Hessian matrix of cost function at location x0 (hopefully, the minimum) """ # ``calculate_cost_function`` is the cost function implementation # The next line calculates an approximation to the first # derivative from scipy.optimize import approx_fprime f1 = approx_fprime( x0, calculate_cost_function, epsilon, *args) # This is a linear approximation. Obviously much more efficient # if cost function is linear if linear_approx: f1 = np.matrix(f1) return f1.transpose() * f1 # Allocate space for the hessian n = x0.shape[0] hessian = np.zeros ( ( n, n ) ) # The next loop fill in the matrix xx = x0 for j in xrange( n ): xx0 = xx[j] # Store old value xx[j] = xx0 + epsilon # Perturb with finite difference # Recalculate the partial derivatives for this new point f2 = approx_fprime( x0, calculate_cost_function, epsilon, *args) hessian[:, j] = (f2 - f1)/epsilon # scale... xx[j] = xx0 # Restore initial value of x0 return hessian
def test_logistic_loss_and_grad(): X_ref, y = make_classification(n_samples=20, random_state=0) n_features = X_ref.shape[1] X_sp = X_ref.copy() X_sp[X_sp < .1] = 0 X_sp = sp.csr_matrix(X_sp) for X in (X_ref, X_sp): w = np.zeros(n_features) # First check that our derivation of the grad is correct loss, grad = _logistic_loss_and_grad(w, X, y, alpha=1.) approx_grad = optimize.approx_fprime( w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3 ) assert_array_almost_equal(grad, approx_grad, decimal=2) # Second check that our intercept implementation is good w = np.zeros(n_features + 1) loss_interp, grad_interp = _logistic_loss_and_grad( w, X, y, alpha=1. ) assert_array_almost_equal(loss, loss_interp) approx_grad = optimize.approx_fprime( w, lambda w: _logistic_loss_and_grad(w, X, y, alpha=1.)[0], 1e-3 ) assert_array_almost_equal(grad_interp, approx_grad, decimal=2)
def learnGPparamsWithPrior(oldParams, infRes, experiment, tauOptimMethod, regularizer_stepsize_tau): xdim, T = np.shape(infRes['post_mean'][0]) binSize = experiment.binSize oldTau = oldParams['tau']*1000/binSize precomp = makePrecomp(infRes) tempTau = np.zeros(xdim) pOptimizeDetails = [[]]*xdim for xd in range(xdim): initp = np.log(1/oldTau[xd]**2) if False: # gradient check and stuff gradcheck = op.check_grad( MStepGPtimescaleCostWithPrior, MStepGPtimescaleCostWithPrior_grad, initp,precomp[0],0.001,binSize, oldParams['tau'][xd], regularizer_stepsize_tau) print('tau learning grad check = ' + str(gradcheck)) pdb.set_trace() apprxGrad = op.approx_fprime( initp,MStepGPtimescaleCostWithPrior,1e-8, precomp[xd],0.001,binSize,oldParams['tau'][xd],regularizer_stepsize_tau) calcdGrad = MStepGPtimescaleCostWithPrior_grad( initp,precomp[xd],0.001,binSize,oldParams['tau'][xd],regularizer_stepsize_tau) plt.plot(apprxGrad,linewidth = 10, color = 'k', alpha = 0.4) plt.plot(calcdGrad,linewidth = 2, color = 'k', alpha = 0.4) plt.legend(['approximated','calculated']) plt.title('Approx. vs. calculated Grad of Tau learning cost') plt.tight_layout() plt.show() def cost(p): cost = MStepGPtimescaleCostWithPrior( p, precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau) return cost def cost_grad(p): grad = MStepGPtimescaleCostWithPrior_grad( p, precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau) return grad pdb.set_trace() if False: # bench for setting hessian as inverse variance hessTau = op.approx_fprime([initp], MStepGPtimescaleCost_grad, 1e-14, precomp[xd], 0.001) priorVar = -1/hessTau regularizer_stepsize_tau = np.sqrt(np.abs(priorVar)) # pdb.set_trace() res = op.minimize( fun = MStepGPtimescaleCostWithPrior, x0 = initp, args = (precomp[xd], 0.001, binSize, oldParams['tau'][xd], regularizer_stepsize_tau), jac = MStepGPtimescaleCostWithPrior_grad, options = {'disp': False,'gtol':1e-10}, method = tauOptimMethod) pOptimizeDetails[xd] = res tempTau[xd] = (1/np.exp(res.x))**(0.5) newTau = tempTau*binSize/1000 return newTau, pOptimizeDetails
def debug_gradient(p,src,dst): ''' Compare gradient with numerical approxmimation ''' r_t_x = r_t_y = 1 g_a = jac(p,src,dst) g_n = approx_fprime(p,res,[1.0e-10,1.0e-10,1.0e-10],src,dst) print "g_a:",g_a print "g_n:",g_n H_a = hess(p,src,dst) #element of gradient def g_p_i(p,src,dst,i): g = jac(p,src,dst) return g[i] #assuming analytical gradient is correct! H_x_n = approx_fprime(p,g_p_i,1.0e-10,src,dst,0) H_y_n = approx_fprime(p,g_p_i,1.0e-10,src,dst,1) H_theta_n = approx_fprime(p,g_p_i,1.0e-10,src,dst,2) H_n = np.zeros([3,3]) H_n[0,:] = H_x_n H_n[1,:] = H_y_n H_n[2,:] = H_theta_n print "H_a:\n",H_a print "H_n:\n",H_n
def test_nnls_jacobian_fucn(): b0 = 1000. bvecs, bval = read_bvec_file(get_data('55dir_grad.bvec')) gtab = grad.gradient_table(bval, bvecs) B = bval[1] # Scale the eigenvalues and tensor by the B value so the units match D = np.array([1., 1., 1., 0., 0., 1., -np.log(b0) * B]) / B # Design Matrix X = dti.design_matrix(gtab) # Signals Y = np.exp(np.dot(X, D)) # Test Jacobian at D args = [X, Y] analytical = dti._nlls_jacobian_func(D, *args) for i in range(len(X)): args = [X[i], Y[i]] approx = opt.approx_fprime(D, dti._nlls_err_func, 1e-8, *args) assert_true(np.allclose(approx, analytical[i])) # Test Jacobian at zero D = np.zeros_like(D) args = [X, Y] analytical = dti._nlls_jacobian_func(D, *args) for i in range(len(X)): args = [X[i], Y[i]] approx = opt.approx_fprime(D, dti._nlls_err_func, 1e-8, *args) assert_true(np.allclose(approx, analytical[i]))
def test_gauss_transform(N=10, M=50, sigma=1, eps=1e-5, random=numpy.random.RandomState(0)): points_fixed = random.randn(N, 3) points_moving = random.randn(M, 3) f = lambda x: _metrics_densities.gauss_transform(x.reshape( len(x) / 3, 3), points_fixed, sigma)[0] g = lambda x: _metrics_densities.gauss_transform(x.reshape( len(x) / 3, 3), points_fixed, sigma)[-1] approx_g = approx_fprime(points_fixed.ravel(), f, eps).reshape(N, 3) grad = g(points_fixed.ravel()) testing.assert_array_almost_equal( grad, approx_g, decimal=4, err_msg="Gauss transform gradient failed", verbose=True ) approx_g = approx_fprime(points_moving.ravel(), f, eps).reshape(M, 3) grad = g(points_moving.ravel()) testing.assert_array_almost_equal( grad, approx_g, decimal=4, err_msg="Gauss transform gradient non-centered failed", verbose=True )
def hessian(x0, func): f1 = approx_fprime(x0, func, EPS) n = x0.shape[0] hessian = np.zeros((n, n)) xx = x0 for j in range(n): xx0 = xx[j] xx[j] = xx0 + EPS f2 = approx_fprime( x0, func, EPS) hessian[:, j] = (f2 - f1)/EPS xx[j] = xx0 return hessian
def test_predict(self): # first check that we can even evaluate the posterior. _ = self.model.predict(self.X) # check the mu gradients f = lambda x: self.model.predict(x[None])[0] G1 = self.model.predict(self.X, grad=True)[2] G2 = np.array([spop.approx_fprime(x, f, 1e-8) for x in self.X]) nt.assert_allclose(G1, G2, rtol=1e-6, atol=1e-6) # check the s2 gradients f = lambda x: self.model.predict(x[None])[1] G1 = self.model.predict(self.X, grad=True)[3] G2 = np.array([spop.approx_fprime(x, f, 1e-8) for x in self.X]) nt.assert_allclose(G1, G2, rtol=1e-6, atol=1e-6)
def _compute_jacobianFunc(self): if self.funcPrime: if self.constants is None: return self.funcPrime(self.params, self.x) else: return self.funcPrime(self.params, self.x, self.constants) else: if self.epsilon: eps = self.epsilon else: eps = np.sqrt(np.finfo(np.float).eps) if self.constants is None: return np.array([optimize.approx_fprime(self.params, self.func, eps, xi) for xi in self.x]) else: return np.array([optimize.approx_fprime(self.params, self.func, eps, xi, self.constants) for xi in self.x])
def constraint(self, in0, gradIn): if gradIn.size > 0: gradIn[:] = -optimize.approx_fprime(in0, self.boundsFunction, 1e-8) #print "norm of constraint grad ", np.linalg.norm(gradIn) out = -self.boundsFunction(in0) #print "Is out of bounds ", out>0 return out
def assert_message_to_parent(self, child, parent, postprocess=lambda u: u, eps=1e-6, rtol=1e-4, atol=0): (pack, unpack) = self._get_pack_functions(parent.plates, parent.dims) def cost(x): parent.u = pack(x) return child.lower_bound_contribution() d = postprocess(pack(unpack(parent._message_from_children()))) d_num = postprocess( pack( approx_fprime( unpack(parent.u), cost, eps ) ) ) # for (i, j) in zip(postprocess(pack(d)), postprocess(pack(d_num))): # print(i) # print(j) assert len(d_num) == len(d) for i in range(len(d)): self.assertAllClose(d[i], d_num[i], rtol=rtol, atol=atol)
def assert_moments(self, node, postprocess=lambda u: u, eps=1e-6, rtol=1e-4, atol=0): (u, g) = node._distribution.compute_moments_and_cgf(node.phi) (pack, unpack) = self._get_pack_functions(node.plates, node.dims) def cost(x): (_, g) = node._distribution.compute_moments_and_cgf(pack(x)) return -np.sum(g) u_num = pack( approx_fprime( unpack(node.phi), cost, eps ) ) assert len(u_num) == len(u) up = postprocess(u) up_num = postprocess(u_num) for i in range(len(up)): self.assertAllClose(up[i], up_num[i], rtol=rtol, atol=atol) pass
def compute_analytical_and_numerical_grad_graph(terminal, inputs, epsilon=1e-3): def set_inputs(x0): begin = 0 for i in inputs: end = begin + i.size i.d = x0[begin:end].reshape(i.shape) begin = end def func(x0): set_inputs(x0) terminal.forward() return terminal.d.copy() def grad(x0): set_inputs(x0) backups = [i.g.copy() for i in inputs] terminal.forward() terminal.backward() gx0 = [] for i, b in zip(inputs, backups): gx0.append((i.g.copy() - b).flatten()) i.g = b return np.concatenate(gx0) inputs0 = np.concatenate([i.d.flatten() for i in inputs]) analytical_grad = grad(inputs0) from scipy.optimize import approx_fprime numerical_grad = approx_fprime(inputs0, func, epsilon) return analytical_grad, numerical_grad
def check_gradient_correctness(X_new, model, acq_func, y_opt): analytic_grad = gaussian_acquisition_1D( X_new, model, y_opt, acq_func)[1] num_grad_func = lambda x: gaussian_acquisition_1D( x, model, y_opt, acq_func=acq_func)[0] num_grad = optimize.approx_fprime(X_new, num_grad_func, 1e-5) assert_array_almost_equal(analytic_grad, num_grad, 3)
def test_FANN_recurrent_gradient_multisample(): rc = ForwardAndRecurrentConnection(4,1) nn = FANN([rc]) theta = 2 * np.ones((nn.get_param_dim())) grad_c = nn.calculate_gradient(theta, X, T) grad_e = approx_fprime(theta, nn.calculate_error, 1e-8, X, T) assert_allclose(grad_c, grad_e, rtol=1e-3, atol=1e-5)
def test_pairwise_gradient(): fcts = PairwiseFcts(PAIRWISE_DATA, 0.2) for sigma in np.linspace(1, 20, num=10): xs = sigma * RND.randn(8) val = approx_fprime(xs, fcts.objective, EPS) err = check_grad(fcts.objective, fcts.gradient, xs, epsilon=EPS) assert abs(err / np.linalg.norm(val)) < 1e-5
def test_gradients(): """Test gradient accuracy.""" # data scaler = StandardScaler() n_samples, n_features = 1000, 100 X = np.random.normal(0.0, 1.0, [n_samples, n_features]) X = scaler.fit_transform(X) density = 0.1 beta_ = np.zeros(n_features + 1) beta_[0] = np.random.rand() beta_[1:] = sps.rand(n_features, 1, density=density).toarray()[:, 0] reg_lambda = 0.1 distrs = ['gaussian', 'binomial', 'softplus', 'poisson', 'probit', 'gamma'] for distr in distrs: glm = GLM(distr=distr, reg_lambda=reg_lambda) y = simulate_glm(glm.distr, beta_[0], beta_[1:], X) func = partial(_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta, glm.group) grad = partial(_grad_L2loss, distr, glm.alpha, glm.Tau, reg_lambda, X, y, glm.eta) approx_grad = approx_fprime(beta_, func, 1.5e-8) analytical_grad = grad(beta_) assert_allclose(approx_grad, analytical_grad, rtol=1e-5, atol=1e-3)
def check_gradients(f, fprime, x, eps=1e-4): """Check the approximation to the gradient of the function matches the supplied gradient f is a function fprime is a function describing the gradient of f The gradient will be approximated by expansion of f around x and compared with the value of fprime at x """ from scipy.optimize import approx_fprime from scipy.optimize.optimize import _epsilon calculated = fprime(x) approximation = approx_fprime(x, f, _epsilon) diff = calculated - approximation norm = scipy.sqrt(numpy.dot(diff, diff)) if norm > eps: raise RuntimeError, ( "Gradient does not match approximation from function\n" + "Difference (norm): %f\n" + "x: %s\n" + "Approximation: %s\n" + "Calculated: %s\n" + "differences: %s" ) % (norm, x, calculated, approximation, diff)
def test_gradfactor(self): for t in [0.1, 0.5, 1, 2]: t = np.array([t]) x = self.transform.get_inverse(t) d1 = self.transform.get_gradfactor(x) d2 = spop.approx_fprime(t, self.transform.get_inverse, 1e-8) nt.assert_allclose(d1, d2, rtol=1e-6)
def check_gradient(f, x0, verbose=True): """ Simple wrapper for SciPy's gradient checker. The given function must return a tuple: (value, gradient). Returns relative """ df = f(x0)[1] df_num = optimize.approx_fprime(x0, lambda x: f(x)[0], optimize.optimize._epsilon) abserr = np.linalg.norm(df-df_num) norm_num = np.linalg.norm(df_num) if abserr == 0 and norm_num == 0: err = 0 else: err = abserr / norm_num if verbose: print("Norm of numerical gradient: %g" % np.linalg.norm(df_num)) print("Norm of function gradient: %g" % np.linalg.norm(df)) print("Gradient relative error = %g and absolute error = %g" % (err, abserr)) return err
def fpapprox(fun,x0,args=(),eps=1.49e-06,fpmode=0): ''' Finite-difference approximation of the gradient of a scalar function at a given point. Parameters ---------- fun : callable The function whose gradient is to be approximated. x0 : 1d ndarray The given point. args : tuple, optional The extra arguments of the function ``fun``. eps : float, optional The step size. fpmode : 0 or 1, optional * 0 use ``(f(x+eps)-f(x))/eps`` to approximate fp; * 1 use ``(f(x+eps)-f(x-eps))/2/eps`` to approximate fp. Returns ------- 1d ndarray The approximated gradient. ''' if fpmode==0: return op.approx_fprime(x0,fun,eps,*args) else: result,dx=np.zeros(len(x0)),np.eye(len(x0)) for i in range(len(x0)): result[i]=(fun(x0+eps*dx[i],*args)-fun(x0-eps*dx[i],*args))/2/eps return result
def test_elbo_grad(): for f in range(2): for j in range(2): if f == 0: if j == 0: y, exog_fe, exog_vc, ident = gen_simple_logit(10, 10, 2) else: y, exog_fe, exog_vc, ident = gen_crossed_logit( 10, 10, 1, 2) elif f == 1: if j == 0: y, exog_fe, exog_vc, ident = gen_simple_poisson( 10, 10, 0.5) else: y, exog_fe, exog_vc, ident = gen_crossed_poisson( 10, 10, 1, 0.5) exog_vc = sparse.csr_matrix(exog_vc) if f == 0: glmm1 = BinomialBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) else: glmm1 = PoissonBayesMixedGLM(y, exog_fe, exog_vc, ident, vcp_p=0.5) rslt1 = glmm1.fit_map() for k in range(3): if k == 0: vb_mean = rslt1.params vb_sd = np.ones_like(vb_mean) elif k == 1: vb_mean = np.zeros(len(vb_mean)) vb_sd = np.ones_like(vb_mean) else: vb_mean = np.random.normal(size=len(vb_mean)) vb_sd = np.random.uniform(1, 2, size=len(vb_mean)) mean_grad, sd_grad = glmm1.vb_elbo_grad(vb_mean, vb_sd) def elbo(vec): n = len(vec) // 2 return glmm1.vb_elbo(vec[:n], vec[n:]) x = np.concatenate((vb_mean, vb_sd)) g1 = approx_fprime(x, elbo, 1e-5) n = len(x) // 2 mean_grad_n = g1[:n] sd_grad_n = g1[n:] assert_allclose(mean_grad, mean_grad_n, atol=1e-2, rtol=1e-2) assert_allclose(sd_grad, sd_grad_n, atol=1e-2, rtol=1e-2)
def detailed_check_grad(func, grad, x0, *args): """ A variant of the scipy.optimize.check_grad that only returns the difference between the computed and the approximated gradient. This is helpfull because you can see if maybe some values are already correct (difference is 0) and you are already quite close to the solution """ return grad(x0, *args) - approx_fprime(x0, func, _epsilon, *args)
def test_loglikelihood(self): x = self.gp.get_hyper() f = lambda x: self.gp.copy(x).loglikelihood() _, g1 = self.gp.loglikelihood(grad=True) g2 = spop.approx_fprime(x, f, 1e-8) # slightly lesser gradient tolerance. mostly due to FITC. nt.assert_allclose(g1, g2, rtol=1e-5, atol=1e-5)
def test_FANN_gradient_multisample(): fc = FullConnection(4, 1) sig = SigmoidLayer(1) nn = FANN([fc, sig]) theta = np.random.randn(nn.get_param_dim()) grad_c = nn.calculate_gradient(theta, X, T) grad_e = approx_fprime(theta, nn.calculate_error, 1e-8, X, T) assert_almost_equal(grad_c, grad_e)
def test_FANN_with_bias_gradient_single_sample(): fc = FullConnectionWithBias(3, 1) sig = SigmoidLayer(1) nn = FANN([fc, sig]) theta = np.random.randn(nn.get_param_dim()) for x, t in zip(X_nb, T) : grad_c = nn.calculate_gradient(theta, x, t) grad_e = approx_fprime(theta, nn.calculate_error, 1e-8, x, t) assert_almost_equal(grad_c, grad_e)
def test_FANN_recurrent_gradient_single_sample(): rc = ForwardAndRecurrentConnection(1,1) nn = FANN([rc]) theta = 2 * np.ones((nn.get_param_dim())) for x, t in [[0, 1], [1, 1], [0, 0]] : x = np.array([[x]]) grad_c = nn.calculate_gradient(theta, x, t) grad_e = approx_fprime(theta, nn.calculate_error, 1e-8, x, t) assert_almost_equal(grad_c, grad_e)
def test_sample_fourier(self): # sample a function f = self.gp.sample_fourier(10) x = self.X[0] # get the gradient and test it _, g1 = f(x, True) g2 = spop.approx_fprime(x, f, 1e-8) nt.assert_allclose(g1, g2, rtol=1e-5, atol=1e-5) # reset the gp and sample from the prior. gp = self.gp.copy() gp.reset() f = gp.sample_fourier(10) # get the gradient and test it _, g1 = f(x, True) g2 = spop.approx_fprime(x, f, 1e-8) nt.assert_allclose(g1, g2, rtol=1e-5, atol=1e-5)
def test_ForwardAndRecurrentConnections_backprop_gradient_check(): frc = ForwardAndRecurrentConnection(1, 1) theta = np.ones(frc.get_param_dim()) X = [[1.], [1.]] Y = [[1.], [2.]] T = np.array([[0.], [0.]]) out_error = [[-1.], [-2.]] error, grad = frc.backprop(theta, X, Y, out_error) f = lambda t : error_function(T - frc.forward_pass(t, X)) assert_almost_equal(approx_fprime(theta, f, 1e-8), grad)
def test_pairwise_hessian(): fcts = PairwiseFcts(PAIRWISE_DATA, 0.2) for sigma in np.linspace(1, 20, num=10): xs = sigma * RND.randn(8) for i in range(8): obj = lambda xs: fcts.gradient(xs)[i] grad = lambda xs: fcts.hessian(xs)[i] val = approx_fprime(xs, obj, EPS) err = check_grad(obj, grad, xs, epsilon=EPS) assert abs(err / np.linalg.norm(val)) < 1e-5
def _compute_jacobianFunc(self): if self.funcPrime: if self.constants is None: return self.funcPrime(self.params, self.x) else: return self.funcPrime(self.params, self.x, self.constants) else: if self.epsilon: eps = self.epsilon else: eps = np.sqrt(np.finfo(np.float).eps) if self.constants is None: return np.array([ optimize.approx_fprime(self.params, self.func, eps, xi) for xi in self.x ]) else: return np.array([ optimize.approx_fprime(self.params, self.func, eps, xi, self.constants) for xi in self.x ])
def check_grad_rel(func, grad, x0, *args): """ Does a relative check of the gradient. Uses scipy.optimize.approx_fprime """ step = 1.49e-08 target = approx_fprime(x0, func, step, *args) actual = grad(x0, *args) delta = target - actual # make sure target is not 0 delta[target > 0] /= target[target > 0] return delta
def gradient_descend(f, x0, alpha): eps = np.sqrt(np.finfo(float).eps) x, y = [], [] x.append(x0) y.append(f(x0)) for i in range(50): gradx0 = optimize.approx_fprime(x0, f, eps) x0 = x0 - alpha * gradx0 x.append(x0) y.append(f(x0)) return x, y
def test_lml_gradient(kernel): # Compare analytic and numeric gradient of log marginal likelihood. gpr = GaussianProcessRegressor(kernel=kernel).fit(X, y) lml, lml_gradient = gpr.log_marginal_likelihood(kernel.theta, True) lml_gradient_approx = \ approx_fprime(kernel.theta, lambda theta: gpr.log_marginal_likelihood(theta, False), 1e-10) assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
def test_ForwardAndRecurrentSigmoidConnections_backprop_random_example_gradient_check( ): frc = ForwardAndRecurrentSigmoidConnection(4, 3) theta = np.random.randn(frc.get_param_dim()) X = np.random.randn(10, 4) Y = frc.forward_pass(theta, X) T = np.zeros((10, 3)) out_error = (T - Y) error, grad_c = frc.backprop(theta, X, Y, out_error) f = lambda t: error_function(T - frc.forward_pass(t, X)) grad_e = approx_fprime(theta, f, 1e-8) assert_allclose(grad_c, grad_e, rtol=1e-3, atol=1e-5)
def find_MEP(fun, x0, tol=1e-6, max_iter=10000, improved_tangent=True): force_max = 1. itr = 0 k_sp = 1. path = x0 (M, N) = path.shape alpha = .01 eps = np.sqrt(np.finfo(float).eps) while (force_max > tol) and (itr < max_iter): temp_minus = path[:, 1:-1] - path[:, :-2] norm_temp_minus = np.linalg.norm(temp_minus, axis=0) temp_plus = path[:, 2:] - path[:, 1:-1] norm_temp_plus = np.linalg.norm(temp_plus, axis=0) if improved_tangent: energy = fun(path) V_max = np.array([max(abs(energy[i + 1] - energy[i]), abs(energy[i - 1] - energy[i])) for i in range(1, N - 1)]) V_min = np.array([min(abs(energy[i + 1] - energy[i]), abs(energy[i - 1] - energy[i])) for i in range(1, N - 1)]) i_minus = np.array([i for i in range(1, N - 1) if energy[i + 1] < energy[i] < energy[i - 1]]) - 1 i_plus = np.array([i for i in range(1, N - 1) if energy[i - 1] < energy[i] < energy[i + 1]]) - 1 i_mix = np.array([i for i in range(N - 2) if i not in np.hstack((i_minus, i_plus))]) i_mix_minus = np.array([i for i in i_mix if energy[i] > energy[i + 2]]) i_mix_plus = np.array([i for i in i_mix if energy[i + 2] > energy[i]]) tau = np.zeros((M, N - 2)) if len(i_minus) > 0: tau[:, i_minus] = temp_minus[:, i_minus] if len(i_plus) > 0: tau[:, i_plus] = temp_plus[:, i_plus] if len(i_mix_minus) > 0: tau[:, i_mix_minus] = temp_plus[:, i_mix_minus] * V_min[i_mix_minus] + \ temp_minus[:, i_mix_minus] * V_max[i_mix_minus] if len(i_mix_plus) > 0: tau[:, i_mix_plus] = temp_plus[:, i_mix_plus] * V_max[i_mix_plus] + \ temp_minus[:, i_mix_plus] * V_min[i_mix_plus] tau /= np.linalg.norm(tau, axis=0) else: temp_minus /= norm_temp_minus temp_plus /= norm_temp_plus tau = temp_minus + temp_plus tau /= np.linalg.norm(tau, axis=0) gradient = np.array([-approx_fprime(path[:, j], fun, eps) for j in range(1, N-1)]).transpose() grad_trans = gradient - np.array([np.dot(gradient[:, j], tau[:, j]) * tau[:, j] for j in range(N-2)]).transpose() dist = k_sp * (norm_temp_plus - norm_temp_minus) grad_spring = dist * tau grad_opt = grad_spring + grad_trans force_max = max(np.linalg.norm(grad_opt, axis=0)) path[:, 1:-1] += alpha * grad_opt itr += 1 if force_max < tol: print "MEP was successfully found in %i iterations" % itr return path
def test_water_cloud_gradient_vh(): x = np.atleast_2d(np.array([0.5, 2.])) polarisation = "VH" sigma, dsigma = sar_observation_operator(x, polarisation) def backscatter_gradient(t): return sar_observation_operator(t, polarisation)[0] finite_difference_gradient_approx = approx_fprime(x.squeeze(), backscatter_gradient, 1e-6) assert(np.allclose(dsigma.squeeze(), finite_difference_gradient_approx, atol=1e-2))
def __init__(self, nstates, ncontrols, nobstates, goal0, goal_weight, effort_weight, obstacle_weight, umin=None, umax=None, strictness=100, arb_state_cost=None, arb_effort_cost=None): # Dimensionality self.nstates = int(nstates) self.ncontrols = int(ncontrols) self.nobstates = int(nobstates) # Get your goals in order and your priorities straight self.set_goal(goal0) self.set_weights(goal_weight, effort_weight, obstacle_weight) self.reset_obstacles() # Initialize and then set limits self.umin = -np.inf * np.ones(ncontrols) self.umax = np.inf * np.ones(ncontrols) self.set_constraints(umin, umax, strictness) # Initialize and then store arbitrary cost functions self.arb_state_cost = lambda x: 0 self.arb_effort_cost = lambda u: 0 self.set_arb_costs(arb_state_cost, arb_effort_cost) # Finite difference delta size and gradient functions self.eps = (np.finfo(float).eps)**0.5 self.state_cost_gradient = lambda x: approx_fprime( x, self.state_cost, self.eps) self.effort_cost_gradient = lambda u: approx_fprime( u, self.effort_cost, self.eps)
def test_scale_priors_grad(): """Test gradient of priors.""" np.random.seed(0) num_dims = 5 num_pts = 20 vectors = np.random.random((num_pts, num_dims)) high_fid_values = np.random.random(num_pts) mid_fid_values = np.random.random(num_pts) low_fid_values = np.random.random(num_pts) sqe_kernel = hiergp.kernels.SqKernel(num_dims, (0.2, 10)) hypers = np.array([0.03, 1, 2, 3, 4, 5, 0.1]) values = [low_fid_values, high_fid_values] def log_marg_f(hypers): return hiergp.gpmodel.lmgrad(hypers, [sqe_kernel], vectors, values)[0] scipy_grad = approx_fprime(hypers, log_marg_f, np.sqrt(np.finfo(float).eps)) gpmodel_grad = hiergp.gpmodel.lmgrad(hypers, [sqe_kernel], vectors, values)[1] assert np.allclose(scipy_grad, gpmodel_grad, rtol=1e-1) # Test with two priors hypers = np.array([0.03, 1, 2, 3, 4, 5, 0.1, 2.1]) values = [low_fid_values, mid_fid_values, high_fid_values] scipy_grad = approx_fprime(hypers, log_marg_f, np.sqrt(np.finfo(float).eps)) gpmodel_grad = hiergp.gpmodel.lmgrad(hypers, [sqe_kernel], vectors, values)[1] assert np.allclose(scipy_grad, gpmodel_grad, rtol=1e-1) # Test valueerror assertion hypers = np.array([0.03, 1, 2, 3, 4, 5]) with pytest.raises(ValueError): gpmodel_grad = hiergp.gpmodel.lmgrad(hypers, [sqe_kernel], vectors, values)[1]
def fit(self,X,y): n, d = X.shape # Initial guess self.w = np.zeros((d, 1)) # check the gradient estimated_gradient = approx_fprime(self.w, lambda w: self.funObj(w,X,y)[0], epsilon=1e-6) implemented_gradient = self.funObj(self.w,X,y)[1] if np.max(np.abs(estimated_gradient - implemented_gradient) > 1e-4): print('User and numerical derivatives differ: %s vs. %s' % (estimated_gradient, implemented_gradient)); else: print('User and numerical derivatives agree.')
def finite_difference(self, x, u): "calling finite difference for delta perturbation" xu = np.concatenate((x, u)) F = np.zeros((x.shape[0], xu.shape[0])) for i in range(x.shape[0]): F[i, :] = approx_fprime(xu, self.simulate_next_state, self.delta, i) c = approx_fprime(xu, self.simulate_cost, self.delta) C = np.zeros((len(xu), len(xu))) for i in range(xu.shape[0]): C[i, :] = approx_fprime(xu, self.approx_fdoubleprime, self.delta, i) f = np.zeros((len(x))) return C, F, c, f
def test_factor_jacobian(): shape = 4, 3 z_ = mp.Variable('z', *(mp.Plate() for _ in shape)) likelihood = mp.NormalMessage(np.random.randn(*shape), np.random.exponential(size=shape)) likelihood_factor = likelihood.as_factor(z_) values = {z_: likelihood.sample()} fval, jval = likelihood_factor.func_jacobian(values, axis=None) ngrad = approx_fprime(values[z_].ravel(), lambda x: likelihood.logpdf(x.reshape(*shape)).sum(), 1e-8).reshape(*shape) assert np.allclose(ngrad, jval[z_])
def test_logistic_full_gradients(loss, loss_grad, w, dataset): for i in range(N): def f(w, x, y): return loss(w, x, y)[i] def g(w, x, y): return loss_grad(w, x, y)[i] grad = g(w, *dataset) approx_grad = approx_fprime(w, f, EPS, *dataset) print(f"{grad=}, {approx_grad=}") assert check_grad(f, g, w, *dataset) <= 1e-5
def test_simple_lingrad(): """Test the linear kernel gradient and a combination of linear and sqe kernel """ np.random.seed(0) num_dims = 5 num_pts = 20 lin_kernel = hiergp.kernels.LinKernel(num_dims, (0.2, 10)) sqe_kernel = hiergp.kernels.SqKernel(num_dims, (0.2, 10)) vectors = np.random.random((num_pts, num_dims)) values = np.random.random(num_pts) # Test gradient with one SQE kernels hypers = np.array([0.03, 1, 2, 3, 4, 5]) * 1e-3 def log_marg_f(hypers): return hiergp.gpmodel.lmgrad(hypers, [lin_kernel], vectors, values)[0] scipy_grad = approx_fprime(hypers, log_marg_f, np.sqrt(np.finfo(float).eps)) gpmodel_grad = hiergp.gpmodel.lmgrad(hypers, [lin_kernel], vectors, values)[1] # The linear kernel values are even larger and have worse tolerance assert np.allclose(scipy_grad, gpmodel_grad, rtol=1e-1) # Test gradient with two SQE kernels hypers = np.array( [2., 1, 2, 3, 4, 5, 1., 0.001, 0.002, 0.003, 0.004, 0.005]) def log_marg_f_sqlin(hypers): return hiergp.gpmodel.lmgrad(hypers, [sqe_kernel, lin_kernel], vectors, values)[0] scipy_grad = approx_fprime(hypers, log_marg_f_sqlin, np.sqrt(np.finfo(float).eps)) gpmodel_grad = hiergp.gpmodel.lmgrad(hypers, [sqe_kernel, lin_kernel], vectors, values)[1] assert np.allclose(scipy_grad, gpmodel_grad, rtol=1e-1)
def calc_derivative_values(func, options): point = options['point'] epsilon = options['epsilon'] try: return o.approx_fprime(point, func, epsilon).tolist() except Exception as e: e = str(e) if e is 'a float is required': return 'argument passed to function that you want to calculate the derivate for must take an array. Even for a univariate function, it expects an array of length 1' if e is "object of type 'int' has no len()": return 'the point at which the derivative is calculated must be an array. Even for a univariate function, point expects an array of length 1' return str(e)
def test_get_gradx(self): G1 = self.kernel.get_gradx(self.X1, self.X2) m = self.X1.shape[0] n = self.X2.shape[0] d = self.X1.shape[1] k = self.kernel G2 = np.array([ spop.approx_fprime(x1, k, 1e-8, x2) for x1 in self.X1 for x2 in self.X2 ]).reshape(m, n, d) nt.assert_allclose(G1, G2, rtol=1e-6, atol=1e-6)
def test_lml_gradient(): """ Compare analytic and numeric gradient of log marginal likelihood. """ for kernel in kernels: gpc = GaussianProcessClassifier(kernel=kernel).fit(X, y) lml, lml_gradient = gpc.log_marginal_likelihood(kernel.theta, True) lml_gradient_approx = \ approx_fprime(kernel.theta, lambda theta: gpc.log_marginal_likelihood(theta, False), 1e-10) assert_almost_equal(lml_gradient, lml_gradient_approx, 3)
def perform(self, node, inputs, outputs): samples,=inputs # calculate gradients if self.likelihood_grad is None: # define version of likelihood function to pass to # derivative function def lnlike(values): return self.likelihood(values) grads = approx_fprime( samples,lnlike,2*np.sqrt(np.finfo(float).eps)) else: grads = self.likelihood_grad(samples) outputs[0][0] = grads
def FR(x0, h, e, f): xcur = np.array(x0) h = np.array(h) n = len(x0) k = 0 # step1 grad = optimize.approx_fprime(xcur, f, e**4) # step2 prevgrad = 1 pk = -1 * grad while (any([abs(grad[i]) > e**2 for i in range(n)])): # step3 if (k % n == 0): # step4 pk = -1 * grad else: bk = (np.linalg.norm(grad)**2) / (np.linalg.norm(prevgrad)**2 ) # step5 prevpk = pk pk = -1 * grad + bk * prevpk # step6 a = (optimize.minimize_scalar(lambda x: f(xcur + pk * x), bounds=(0, )).x) xcur = xcur + a * pk #step8 k = k + 1 #step8 prevgrad = grad grad = optimize.approx_fprime(xcur, f, e**4) #step2 return xcur #step10
def test_grad(z0): grad_approx = approx_fprime(z0, func, 1e-8, *args) grad_compute = grad(z0, *args, **kwargs) error = np.sqrt(np.sum((grad_approx - grad_compute)**2)) error /= np.sqrt(np.sum(grad_compute * grad_approx)) try: assert error < rtol, msg.format(error) except AssertionError: if debug: import matplotlib.pyplot as plt plt.plot(grad_approx) plt.plot(grad_compute) plt.show() raise
def test_score(): uniq, load, corr, par = _toy() fa = Factor(n_factor=2, corr=corr) def f(par): return fa.loglike(par) par2 = np.r_[0.1, 0.2, 0.3, 0.4, 0.3, 0.1, 0.2, -0.2, 0, 0.8, 0.5, 0] for pt in (par, par2): g1 = approx_fprime(pt, f, 1e-8) g2 = fa.score(pt) assert_allclose(g1, g2, atol=1e-3)
def C_grad_std(x, w, X, Y, threshold, epsilon): X_new = np.hstack((X, x)) points = np.array((-epsilon, epsilon)) eps = np.sqrt(np.finfo(float).eps) eps_array = w * eps for point in points: w_new = np.hstack((w, point)) eps_array_new = np.hstack((eps_array, eps * point)) grad = optimize.approx_fprime(w_new, Loss, eps_array_new, X_new, Y)[-1] if abs(grad) > threshold: #if np.sign(point)*grad + threshold < 0: print("grad = %10.7f\t" % grad) return True return False
def testGradientExactAndApproxAgree__objFunc_constrained(self): ''' Verify computed gradient similar for exact and approx methods ''' print '' for K in [1, 10, 107]: for alpha in [0.1, 0.95]: for gamma in [1., 3.14, 9.45]: for seed in [111, 222, 333]: PRNG = np.random.RandomState(seed) rho = PRNG.rand(K) omega = 100 * PRNG.rand(K) rhoomega = np.hstack([rho, omega]) kwargs = dict(alpha=alpha, gamma=gamma, nDoc=0, sumLogPi=np.zeros(K + 1)) # Exact gradient _, g = OptimizerRhoOmega.objFunc_constrained( rhoomega, approx_grad=0, **kwargs) # Numerical gradient objFunc_cons = OptimizerRhoOmega.objFunc_constrained objFunc = lambda x: objFunc_cons( x, approx_grad=1, **kwargs) epsvec = np.hstack( [1e-8 * np.ones(K), 1e-8 * np.ones(K)]) gapprox = approx_fprime(rhoomega, objFunc, epsvec) print ' rho 1:10 ', np2flatstr(rho) print ' grad 1:10 ', np2flatstr(g[:K], fmt='% .6e') print ' grad 1:10 ', np2flatstr(gapprox[:K], fmt='% .6e') if K > 10: print ' rho K-10:K ', np2flatstr(rho[-10:]) print ' grad K-10:K ', np2flatstr(g[K - 10:K], fmt='% .6e') print 'gapprox K-10:K ', np2flatstr(gapprox[K - 10:K], fmt='% .6e') assert np.allclose(g[:K], gapprox[:K], atol=1e-6, rtol=0.01) print np2flatstr(g[K:]) print np2flatstr(gapprox[K:]) assert np.allclose(g[K:], gapprox[K:], atol=1e-4, rtol=0.05)
def _check_gradients(layer_args, input_shape): rand = np.random.RandomState(0) net = cn.SoftmaxNet(layer_args=layer_args, input_shape=input_shape, rand_state=rand) x = rand.randn(*(10, ) + net.input_shape) / 100 y = rand.randn(10) > 0 by = net.binarize_labels(y) g1 = approx_fprime(net.get_params(), net.cost_for_params, 1e-5, x, by) g2 = net.param_grad(x, by) err = np.max(np.abs(g1 - g2)) / np.abs(g1).max() print err assert err < 1e-3, 'incorrect gradient!'
def update_loss(self, loss: Callable[[np.ndarray], float], grad: Optional[Callable[[np.ndarray], float]]=None) -> None: """Update the objective function to minimize. Args: loss : Objective function to be minimized. grad : Gradient of the objective function. """ if grad is None: grad = lambda x: approx_fprime(x, loss, self._p['epsilon']) self._loss = loss self._grad = grad self._p['gfk'] = self.grad(self._p['xk']) self._p['gfkp1'] = self._p['gfk']
def gradientProj(objList, scale, fObj, pas, epsilon, maxIter): hyperBase = make_opti_base(len(objList)) Xk = projHyperBase( np.array([random.randint(10, 100) for _ in range(len(objList))]), hyperBase, scale) Xn = Xk + 2 * epsilon i = 0 print('starting to optimize') while (np.linalg.norm(Xk - Xn) > epsilon) and (i < maxIter): Xk = Xn grad = approx_fprime(Xk, fObj, 0.0001, objList) Xn = projHyperBase(Xk - pas * grad, hyperBase, scale) i += 1 return finalDistrib(np.floor(balanceVector(Xn)), objList, scale)
def batch_cholesky_numerical_grad(x, vgrads, upper, *args, **kwargs): # Our implementation of cholesky decomposition returns symmetric grad instead of lower triangular grad from scipy.optimize import approx_fprime def func(vector): matrix = np.reshape(vector, x.shape) L = batch_cholesky(matrix, upper) * vgrads return np.sum(L) x_c = np.concatenate([i.flatten() for i in x if i is not None]) epsilon = 1e-4 n_grad = approx_fprime(x_c, func, epsilon) n_grad = np.reshape(n_grad, x.shape) n_grad = (n_grad + np.transpose(n_grad, axes=(0, 2, 1))) * 0.5 return n_grad.flatten()
def test_get_grad(self): x = self.kernel.params.get_value() k = lambda x, x1, x2: self.kernel.copy(x)(x1, x2) G = np.array(list(self.kernel.get_grad(self.X1, self.X2))) m = self.X1.shape[0] n = self.X2.shape[0] G_ = np.array([ spop.approx_fprime(x, k, 1e-8, x1, x2) for x1 in self.X1 for x2 in self.X2 ]).swapaxes(0, 1).reshape(-1, m, n) nt.assert_allclose(G, G_, rtol=1e-6, atol=1e-6)
def __init__(self, fun, args=(), kwargs=None, jac=None, hess=None, hessp=None, constraints=(), eps=1e-8): if not SCIPY_INSTALLED: raise ImportError( 'Install SciPy to use the `IpoptProblemWrapper` class.') self.fun_with_jac = None self.last_x = None if hess is not None or hessp is not None: raise NotImplementedError( 'Using hessian matrixes is not yet implemented!') if jac is None: #fun = FunctionWithApproxJacobian(fun, epsilon=eps, verbose=False) jac = lambda x0, *args, **kwargs: approx_fprime( x0, fun, eps, *args, **kwargs) elif jac is True: self.fun_with_jac = fun elif not callable(jac): raise NotImplementedError('jac has to be bool or a function') self.fun = fun self.jac = jac self.args = args self.kwargs = kwargs or {} self._constraint_funs = [] self._constraint_jacs = [] self._constraint_args = [] if isinstance(constraints, dict): constraints = (constraints, ) for con in constraints: con_fun = con['fun'] con_jac = con.get('jac', None) if con_jac is None: con_fun = FunctionWithApproxJacobian(con_fun, epsilon=eps, verbose=False) con_jac = con_fun.jac con_args = con.get('args', []) self._constraint_funs.append(con_fun) self._constraint_jacs.append(con_jac) self._constraint_args.append(con_args) # Set up evaluation counts self.nfev = 0 self.njev = 0 self.nit = 0
def test_loglikelihood_hessian_diag_dcm_exp_zeros(self): # convergence relies heavily on x0 n, s = (10, 35) # n, s = (5, 35) A = mg.random_weighted_matrix_generator_dense(n, sup_ext=100, sym=False, seed=s, intweights=True) A[0, :] = 0 A[:, 5] = 0 bA = np.array([[1 if aa != 0 else 0 for aa in a] for a in A]) k_out = np.sum(bA, axis=1) k_in = np.sum(bA, axis=0) s_out = np.sum(A, axis=1) s_in = np.sum(A, axis=0) g = sample.DirectedGraph(A) g.initial_guess = "uniform" g.regularise = "identity" g._initialize_problem("decm", "newton") # theta = np.random.rand(6) theta = 0.5 * np.ones(n * 4) theta[np.concatenate((k_out, k_in, s_out, s_in)) == 0] = 1e4 x0 = np.exp(-theta) f_sample = np.zeros(n * 4) for i in range(n * 4): f = lambda x: loglikelihood_prime_decm_exp(x, g.args)[i] f_sample[i] = approx_fprime(theta, f, epsilon=1e-6)[i] f_exp = loglikelihood_hessian_diag_decm_exp(theta, g.args) # debug # print(a) # print(theta, x0) # print(g.args) # print('approx',f_sample) # print('my',f_exp) # print('gradient', loglikelihood_prime_decm_exp(theta, g.args)) # print('diff',f_sample - f_exp) # print('max',np.max(np.abs(f_sample - f_exp))) # test result self.assertTrue(np.allclose(f_sample, f_exp))