def __init__(self, X, y, kernels, likelihood, mu = None, opt_kernel = False, tau=0.5, obs_idx=None, noise = 1e-6): """ Args: X (np.array): data y (np.array): output kernel (): kernel function to use for inference likelihood (likelihoods.Likelihood): likelihood tau (float): Newton line search hyperparam obs_idx (np.array): Indices of observed points (partial grid) verbose (bool): verbose or not """ super(Laplace, self).__init__(X, y, kernels, likelihood, mu, obs_idx, opt_kernel, noise=noise) self.alpha = np.zeros([X.shape[0]]) self.W = np.zeros([X.shape[0]]) self.grads = np.zeros([X.shape[0]]) self.opt = CG(self.cg_prod) self.root_eigdecomp = None self.f = self.mu self.f_pred = self.f self.tau = tau self.grad_func = egrad(self.likelihood.log_like) self.hess_func = egrad(self.grad_func)
def gammaTA(TC, PH, totals, Ks): """(d[ln(CO2)]/d[TA])^-1 with constant TC, i.e. γ_Alk of ESM10.""" dTA_dPH__TC = egrad(lambda PH: solve.get.TAfromTCpH(TC, PH, totals, Ks))( PH) dlnFC_dPH__TC = egrad( lambda PH: np.log(solve.get.fCO2fromTCpH(TC, PH, totals, Ks)))(PH) return dTA_dPH__TC / dlnFC_dPH__TC
def new_position_from_nth_solution_equation(points): interior_point_trial_solution = points[0:len(self.q_list)] q_n_plus_1_trial_solution = points[len(self.q_list):2 * len(self.q_list)] S_of_n = lambda q_n: self.action( t, time_step, q_n, interior_point_trial_solution, q_n_plus_1_trial_solution) S_of_interior = lambda q_interior: self.action( t, time_step, self.q_solutions[i], q_interior, q_n_plus_1_trial_solution) partial_differential_of_action_wrt_interior_point = egrad( S_of_interior) interior_equation = partial_differential_of_action_wrt_interior_point( interior_point_trial_solution) partial_differential_of_action_wrt_q_n = egrad(S_of_n) conservation_equation = np.add( self.p_solutions[i], partial_differential_of_action_wrt_q_n( self.q_solutions[i])) return np.concatenate( (interior_equation, conservation_equation))
def omegaTA(TC, PH, CARB, Sal, TempK, Pbar, WhichKs, totals, Ks): """(d[ln(OmegaAr)]/d[TA] with constant TC, i.e. ω_Alk of ESM10.""" dCARB_dPH__TC = egrad( lambda PH: solve.get.CarbfromTCpH(TC, PH, totals, Ks))(PH) dTA_dPH__TC = egrad(lambda PH: solve.get.TAfromTCpH(TC, PH, totals, Ks))( PH) return dTA_dPH__TC / (dCARB_dPH__TC * _dlnOmega_dCARB(CARB, totals, Ks))
def omegaTC(TA, PH, CARB, Sal, TempK, Pbar, WhichKs, totals, Ks): """(d[ln(OmegaAr)]/d[TC] with constant TA, i.e. ω_DIC of ESM10.""" dCARB_dPH__TA = egrad( lambda PH: solve.get.CarbfromTApH(TA, PH, totals, Ks))(PH) dTC_dPH__TA = egrad(lambda PH: solve.get.TCfromTApH(TA, PH, totals, Ks))( PH) return dTC_dPH__TA / (dCARB_dPH__TA * _dlnOmega_dCARB(CARB, totals, Ks))
def __init__(self, mesh, V, X, Y, eigenpairs=None, num_features=500, nu=3/2, kappa=1.0, sigma_f=1.0, sigma_n=1e-15): # s --- number of samples # l --- number of Fourier features # n --- number of data # t --- number of test points super().__init__(name='manifold_matern_gp') self.mesh = mesh self.V = V if eigenpairs is None: self.eigenvalues, self.eigenfunctions = \ get_eigenpairs(mesh, self.V, num_features) else: self.eigenvalues, self.eigenfunctions = eigenpairs self.eigenfunctions = self.eigenfunctions.T self.nu = nu self.kappa = paramz.Param('kappa', kappa, default_constraint=paramz.transformations.Logexp()) self.sigma_f = paramz.Param('sigma_f', sigma_f, default_constraint=paramz.transformations.Logexp()) self.sigma_n = paramz.Param('sigma_n', sigma_n, default_constraint=paramz.transformations.Logexp()) self.link_parameters(self.kappa, self.sigma_f, self.sigma_n) self.X = X self.Y = Y self.dim = self.mesh.topological_dimension() self._dL_dsigma_f = egrad(self._neg_log_likelihood_alt, 0) self._dL_dkappa = egrad(self._neg_log_likelihood_alt, 1) self._dL_dsigma_n = egrad(self._neg_log_likelihood_alt, 2)
def isocap(TA, TC, PH, FC, totals, Ks): """d[TA]/d[TC] at constant fCO2, i.e. Q of HDW18.""" dTA_dPH__FC = egrad(lambda PH: solve.get.TAfrompHfCO2(PH, FC, totals, Ks))( PH) dTC_dPH__FC = egrad(lambda PH: solve.get.TCfrompHfCO2(PH, FC, totals, Ks))( PH) return dTA_dPH__FC / dTC_dPH__FC
def RevelleFactor(TA, TC, PH, FC, totals, Ks): """Revelle factor as defined by BTSP79.""" dFC_dPH__TA = egrad(lambda PH: solve.get.fCO2fromTApH(TA, PH, totals, Ks))( PH) dTC_dPH__TA = egrad(lambda PH: solve.get.TCfromTApH(TA, PH, totals, Ks))( PH) return (dFC_dPH__TA / dTC_dPH__TA) * (TC / FC)
def implicit_curvature_2d(curve_fn): # Returns a function which computes curvature of an implicit # curve, curve_fn(s,t)=0. The resultant function takes two # arguments as well. # # First derivatives: _g1 = egrad(curve_fn) # Second derivatives: _g2s = egrad(lambda *a, **kw: _g1(*a, **kw)[..., 0]) _g2t = egrad(lambda *a, **kw: _g1(*a, **kw)[..., 1]) # Doing 'egrad' twice doesn't have the intended effect, so here I # split up the first derivative manually. def f(st, **kw): g1 = _g1(st, **kw) g2s = _g2s(st, **kw) g2t = _g2t(st, **kw) ds = g1[..., 0] dt = g1[..., 1] dss = g2s[..., 0] dst = g2s[..., 1] dtt = g2t[..., 1] return (-dt * dt * dss + 2 * ds * dt * dst - ds * ds * dtt) / ( (ds * ds + dt * dt)**(3 / 2)) return f
def gammaTC(TA, PH, totals, Ks): """(d[ln(CO2)]/d[TC])^-1 with constant TA, i.e. γ_DIC of ESM10.""" dTC_dPH__TA = egrad(lambda PH: solve.get.TCfromTApH(TA, PH, totals, Ks))( PH) dlnFC_dPH__TA = egrad( lambda PH: np.log(solve.get.fCO2fromTApH(TA, PH, totals, Ks)))(PH) return dTC_dPH__TA / dlnFC_dPH__TA
def mu_grad(self, X, nder=1): # Construct the autogradient function for the # predictive mean mu = lambda x: self.predict(x) if nder == 1: grad_mu = egrad(mu) return grad_mu(X) else: grad_mu = egrad(egrad(mu)) return grad_mu(X)
def sigma_grad(self, X, nder=1): # Construct the autogradient function for the # predictive variance sigma = lambda x: self.variance(x) if nder == 1: grad_var = egrad(sigma) return grad_var(X) else: grad_var = egrad(egrad(sigma)) return grad_var(X)
def test_higher_order_derivatives(): a = cp.arange(10) def f(x): return cp.sin(x) df = egrad(f) ddf = egrad(df) dddf = egrad(ddf) df(a) ddf(a) dddf(a)
def kinetic_egrad(self, param, pos): """Compute the action of the kinetic operator on the we points. Args : pos : position of the electrons metod (string) : mehod to compute the derivatives Returns : value of K * psi """ eg = egrad(egrad(self.values, 1), 1)(param, pos) if self.ndim_tot == 1: return eg.reshape(-1, 1) else: return np.sum(eg, 1).reshape(-1, 1)
def weights_grad(self, X, weights, length_scale, signal_variance): mu = lambda x: self.predict_weights(x, weights, length_scale, signal_variance) grad_mu = egrad(mu) return grad_mu(X)
def L_d(d,f,order=1): d = [d] for ii in range(order): #d.append(np.dot(operable(egrad(c[ii],0)),f)) d.append(np.dot(operable(egrad(d[ii])),f)) return d[-1]
def __init__(self, X, y, kernels, likelihood=None, mu=None, obs_idx=None, max_grad=10., noise=1e-6): """ Args: X (): data (full grid) y (): response kernels (): list of kernel objects likelihood (): likelihood object mu (): prior mean obs_idx (): indices of observed points on grid max_grad (): for gradient clipping noise (): observation noise jitter """ self.X = X self.y = y self.m = self.X.shape[0] self.d = self.X.shape[1] self.obs_idx = obs_idx self.n = len(self.obs_idx) if self.obs_idx is not None else self.m self.X_dims = [np.expand_dims(np.unique(X[:, i]), 1) for i in range(self.d)] self.mu = np.zeros(self.m) if mu is None else mu self.max_grad = max_grad self.init_Ks(kernels, noise) if likelihood is not None: self.likelihood = likelihood self.likelihood_grad = egrad(self.likelihood.log_like)
def step(self, parameters, input, target): optimizer_grad_func = egrad(self.optimizer, 0) parameters_gard = optimizer_grad_func(parameters, input + self.v, target) self.v = self.momentum * self.v - self.learning_rate * parameters_gard parameters = parameters + self.v return parameters
def autograd(f, ds, points): """Evaluate derivatives of f on the given points.""" df_ds = lambda *args: f(np.stack(args, axis=-1)) for i in ds: df_ds = egrad(df_ds, i) ndim = points.shape[-1] return df_ds(*[points[..., i] for i in range(ndim)])
def __init__(self, breakpoints, alpha=0.05, penalizer=0.0, fit_intercept=True, *args, **kwargs): super(PiecewiseExponentialRegressionFitter, self).__init__(alpha=alpha) breakpoints = np.sort(breakpoints) if len(breakpoints) and not (breakpoints[-1] < np.inf): raise ValueError("Do not add inf to the breakpoints.") if len(breakpoints) and breakpoints[0] < 0: raise ValueError("First breakpoint must be greater than 0.") self.breakpoints = np.append(breakpoints, [np.inf]) self.n_breakpoints = len(self.breakpoints) self._hazard = egrad(self._cumulative_hazard, argnum=1) # pylint: disable=unexpected-keyword-arg self.penalizer = penalizer self.fit_intercept = fit_intercept self._fitted_parameter_names = [ "lambda_%d_" % i for i in range(self.n_breakpoints) ]
def klmin(self, target=None, save=True, use_jac=True, **kwargs): """ :param target: :param save: :param use_jac: :param kwargs: :return: """ if target is not None: self.set_kl_target(target) if use_jac is True: try: jac = egrad(self.kld) except: print('error finding autograd jacobian... continuing without') jac = None else: jac = None #set xi to mean of target dist self.the_model.kernel.xi = self._target_weights.dot( self.the_model._integration_ticks) res = sp.optimize.minimize(self.kld, self.params0, jac=jac, **kwargs) self.res = res self.params = copy.copy(res.x) if save is True: self.the_model.params = copy.copy(res.x) self.fitted_q = True return res
def test_gradient_descent(): # x = cp.array([[1,2,3,4,5,6,7,8]]) # y = cp.array([[-1,-1,-1,-1,1,1,1,1]]) x = cp.random.random(size=(10, 2)) w_truth = cp.random.random(size=(2, 1)) y = cp.dot(x, w_truth) # def model(w, x): # a = w[0] + cp.dot(w[1:], x) # return a.T def model(w, x): return cp.dot(x, w) def loss(w, x, y): preds = model(w, x) loss_score = cp.mean(cp.power(preds.ravel() - y.ravel(), cp.array(2))) return loss_score w = cp.random.rand(2, 1) dloss = egrad(loss) a = dloss(w, x, y) for i in range(10000): w = w + -dloss(w, x, y) * 0.01 print(w) print(w_truth) assert np.allclose(cp.asnumpy(w), cp.asnumpy(w_truth), atol=0.001)
def logprob(pos, func): def f_logp(x): return np.log(func(x)) logp = f_logp(pos) grad = egrad(f_logp)(pos) return logp, grad
def autogradientn(func, n=1): ''' Improved version for autograd.egrad, can receive int input. and calculate high order derivative. Parameters: ----------- func: callable, function to calculate derivative, should be autograd function. n: the order of derivative. Return: ----------- result: callable, n order derivative function. ''' df = egrad(func) for i in np.arange(n - 1): df = egrad(df) return lambda x: df(np.asarray(x) * 1.)
def determine_new_momentum_from_q_n_plus_1th_solution( interior_point): S = lambda q_n_plus_1: self.action( t, time_step, self.q_solutions[ i], interior_point, q_n_plus_1) partial_differential_of_action_wrt_q_n_plus_1 = egrad(S) return partial_differential_of_action_wrt_q_n_plus_1( self.q_solutions[i + 1])
def jacobian_opt(self, param, pos): """Gradient of the wf wrt the variational parameters at current positions. """ jac = np.array([egrad(self.values, 0)( param, p.reshape(1, -1))[0].tolist() for p in pos]) if jac.ndim == 1: jac = jac.reshape(-1, 1) return jac
def test_sin(): a = cp.arange(10) def f(x): return cp.sin(x) df = egrad(f) df(a)
def mu_grad(self, X, nder=1, return_std=False): # Construct the autogradient function for the # predictive mean mu = lambda x: self.predict(x) if nder == 1: grad_mu = egrad(mu) if not return_std: return grad_mu(X) else: return grad_mu(X), self.sigma_grad(X, nder=1) else: grad_mu = egrad(egrad(mu)) if not return_std: return grad_mu(X) else: return grad_mu(X), self.sigma_grad(X, nder=2)
def softmax(x, grad=False): def softmax_eval(x): e_x = np_autograd.exp(x - np_autograd.max(x)) return e_x / e_x.sum(axis=0) softmax_eval_grad = egrad(softmax_eval) if grad: return softmax_eval_grad(x) else: return softmax_eval(x)
def new_position_from_nth_solution_equation( q_n_plus_1_trial_solutions): S = lambda q_n: self.action(q_n, q_n_plus_1_trial_solutions, t, time_step) partial_differential_of_action_wrt_q_n = egrad(S) equation = np.add( self.p_solutions[i], partial_differential_of_action_wrt_q_n( self.q_solutions[i])) return equation
''' Mathematically we can only take gradients of scalar-valued functions, but autograd's elementwise_grad function also handles numpy's familiar vectorization of scalar functions, which is used in this example. To be precise, elementwise_grad(fun)(x) always returns the value of a vector-Jacobian product, where the Jacobian of fun is evaluated at x and the vector is an all-ones vector with the same size as the output of fun. When vectorizing a scalar-valued function over many arguments, the Jacobian of the overall vector-to-vector mapping is diagonal, and so this vector-Jacobian product simply returns the diagonal elements of the Jacobian, which is the (elementwise) gradient of the function at each input value over which the function is vectorized. ''' def tanh(x): return (1.0 - np.exp(-x)) / (1.0 + np.exp(-x)) x = np.linspace(-7, 7, 200) plt.plot(x, tanh(x), x, egrad(tanh)(x), # first derivative x, egrad(egrad(tanh))(x), # second derivative x, egrad(egrad(egrad(tanh)))(x), # third derivative x, egrad(egrad(egrad(egrad(tanh))))(x), # fourth derivative x, egrad(egrad(egrad(egrad(egrad(tanh)))))(x), # fifth derivative x, egrad(egrad(egrad(egrad(egrad(egrad(tanh))))))(x)) # sixth derivative plt.axis('off') plt.savefig("tanh.png") plt.show()