def test_solve_banded_grad(T=10, D=4): """ Test solve_banded gradient """ J_diag, J_lower_diag, J_full = make_block_tridiag(T, D) L_full = np.linalg.cholesky(J_full) L_banded = np.vstack([[ np.concatenate((np.diag(L_full, -d), np.zeros(d))) for d in range(2 * D) ]]) b = npr.randn(T * D) # Check gradient against that of regular solve. g_true = elementwise_grad(np.linalg.solve)(L_full, b) g_test = elementwise_grad(solve_banded, argnum=1)((2 * D - 1, 0), L_banded, b) assert np.allclose(np.diag(g_true), g_test[0]) for d in range(1, 2 * D): assert np.allclose(np.diag(g_true, -d), g_test[d, :-d]) check_grads(solve_banded, argnum=1, modes=['rev'], order=1)((2 * D - 1, 0), L_banded, b) check_grads(solve_banded, argnum=2, modes=['rev'], order=1)((2 * D - 1, 0), L_banded, b)
def test_parameter_gradients(self): for _ in range(10): x = auto_np.random.randn(1, 2) output = self.nn(x) grads = self.nn.gradient(x) # Compute gradients using autograd auto_grads = [] for i in range(len(self.nn.layers)): W_grad = elementwise_grad(self.f_np, 1 + 2 * i)(x, *self.params) b_grad = elementwise_grad(self.f_np, 1 + 2 * i + 1)(x, *self.params) auto_grads.extend(W_grad.ravel()) auto_grads.extend(b_grad.ravel()) # Test each indivudual layer np.testing.assert_almost_equal( W_grad, self.nn.layers[i].weights_gradient) np.testing.assert_almost_equal(b_grad, self.nn.layers[i].bias_gradient) # Test extraction of full gradient vector np.testing.assert_almost_equal(auto_grads, grads * output)
def get_link_h(w, q, ln_q, ln_1_q, ln_s): w = w.reshape(-1, 3) n = numpy.shape(w)[0] h = numpy.zeros((n * 3, n * 3)) for i in range(0, 3): for j in range(0, 3): tmp_grad = autograd.elementwise_grad( autograd.elementwise_grad(e_link_log_lik, i), j) h[numpy.arange(0, n)*3 + i, numpy.arange(0, n)*3 + j] = \ tmp_grad(w[:, 0].reshape(-1, 1), w[:, 1].reshape(-1, 1), w[:, 2].reshape(-1, 1), q, ln_q, ln_1_q, ln_s).ravel() h_u = numpy.zeros((n * 3, n * 3)) h_v = numpy.zeros((n * 3, n * 3)) for i in range(0, n): tmp_u, tmp_s, tmp_v = scipy.linalg.svd( a=-h[i * 3:(i + 1) * 3, i * 3:(i + 1) * 3]) tmp_s = tmp_s**0.5 h_u[i * 3:(i + 1) * 3, i * 3:(i + 1) * 3] = numpy.matmul(tmp_u, numpy.diag(tmp_s)) h_v[i * 3:(i + 1) * 3, i * 3:(i + 1) * 3] = numpy.matmul(numpy.diag(tmp_s), tmp_v) return -h, h_u, h_v
def __init__(self, hidden_dim, activation='tanh', inner_init='orthogonal', parameters=None, return_sequences=True): self.return_sequences = return_sequences self.hidden_dim = hidden_dim self.inner_init = get_initializer(inner_init) self.activation = get_activation(activation) self.activation_d = elementwise_grad(self.activation) self.sigmoid_d = elementwise_grad(sigmoid) if parameters is None: self._params = Parameters() else: self._params = parameters self.last_input = None self.states = None self.outputs = None self.gates = None self.hprev = None self.input_dim = None self.W = None self.U = None
def __init__(self, lnpdf, D, glnpdf=None, lnpdf_is_vectorized=False): """ Black Box Variational Inference using stochastic gradients""" if lnpdf_is_vectorized: self.lnpdf = lnpdf if glnpdf is None: self.glnpdf = elementwise_grad(lnpdf) else: # create vectorized version self.glnpdf_single = grad(lnpdf) self.glnpdf = lambda z: np.array( [self.glnpdf_single(zi) for zi in np.atleast_2d(z)]) self.lnpdf = lambda z: np.array( [lnpdf(zi) for zi in np.atleast_2d(z)]) #if glnpdf is None: # self.glnpdf = grad(lnpdf) # hessian and elementwise_grad of glnpdf self.gglnpdf = elementwise_grad(self.glnpdf) self.hlnpdf = hessian(self.lnpdf) self.hvplnpdf = hessian_vector_product(self.lnpdf) # this function creates a generator of Hessian-vector product functions # - make hvp = hvp_maker(lnpdf)(z) # - now hvp(v) = hessian(lnpdf)(z) v self.hvplnpdf_maker = make_hvp(self.lnpdf)
def __init__(self, hidden_dim, activation='tanh', inner_init='orthogonal', parameters=None, return_sequences=True): self.return_sequences = return_sequences self.hidden_dim = hidden_dim self.inner_init = get_initializer(inner_init) print activation self.activation = get_activation(activation) self.activation_d = elementwise_grad(self.activation) self.sigmoid_d = elementwise_grad(sigmoid) if parameters is None: self._params = convnet.Parameters() else: self._params = convnet.parameters self.last_input = None self.states = None self.outputs = None self.gates = None self.hprev = None self.input_dim = None self.W = None self.U = None
def get_gd_deltas(f, learning_rate, W, x, b, y_target, threshold): df_dW = autograd.elementwise_grad(f, 0) df_db = autograd.elementwise_grad(f, 2) delta_W = -learning_rate * df_dW(W, x, b, y_target, threshold) delta_b = -learning_rate * df_db(W, x, b, y_target, threshold) return delta_W, delta_b
def gradient(self, func, w): # calculate gradent x0 = float(w[0]) y0 = float(w[1]) dz_dx = elementwise_grad(func, argnum=0)(x0, y0) dz_dy = elementwise_grad(func, argnum=1)(x0, y0) grad = np.array([dz_dx, dz_dy]).reshape((2, 1)) return grad
def nabla_mu(self, eta): zeta = (eta * self.omega) + self.mu theta = self.inv_T(zeta) grad_joint = elementwise_grad(self.log_p_x_theta)(theta) grad_transform = elementwise_grad(self.inv_T)(zeta) grad_log_det = elementwise_grad(self.log_det_jac)(zeta) return grad_joint * grad_transform + grad_log_det
def composite_mse_loss(params, u_i, u_x, f_x, N_u, N_f): u_pred_u = feed_forward(params, u_x) err_u = u_i.reshape([-1, 1]) - u_pred_u u_pred_f = feed_forward(params, f_x) u_pred_f_x = elementwise_grad(feed_forward, 1) u_pred_f_xx = elementwise_grad(u_pred_f_x, 1)(params, f_x) err_f = u_pred_f_xx - u_pred_f - f_x # print(np.shape(err_f), np.shape(u_pred_f_x)) return 1 / N_u * np.sum(err_u**2) + 1 / N_f * np.sum(err_f**2)
def __init__(self, n_dims, log_prob, init=None): self.n_dims = n_dims if init is None: self.params = np.zeros((1, self.n_dims)) else: self.params = init self.full_log_prob = lambda params, x: log_prob self.full_grad_log_prob = lambda params, X: autograd.elementwise_grad( self.full_log_prob) self.log_prob = log_prob self.grad_log_prob = autograd.elementwise_grad(self.log_prob)
def check_jacobian(self): try: import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt from GPy.models import GradientChecker, GPRegression except: raise self.skipTest("autograd not available to check gradients") def k(X, X2, alpha=1., lengthscale=None): if lengthscale is None: lengthscale = np.ones(X.shape[1]) exp = 0. for q in range(X.shape[1]): exp += ((X[:, [q]] - X2[:, [q]].T) / lengthscale[q])**2 #exp = np.sqrt(exp) return alpha * np.exp(-.5 * exp) dk = ag.elementwise_grad(lambda x, x2: k( x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values) ) dkdk = ag.elementwise_grad(dk, argnum=1) ke = GPy.kern.RBF(1, ARD=True) #ke.randomize() ke.variance = .2 #.randomize() ke.lengthscale[:] = .5 ke.randomize() X = np.linspace(-1, 1, 1000)[:, None] X2 = np.array([[0.]]).T np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X)) np.testing.assert_allclose( ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X)) np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2)) np.testing.assert_allclose( ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2)) m = GPRegression(self.X, self.Y) def f(x): m.X[:] = x return m.log_likelihood() def df(x): m.X[:] = x return m.kern.gradients_X(m.grad_dict['dL_dK'], X) def ddf(x): m.X[:] = x return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0) gc = GradientChecker(f, df, self.X) gc2 = GradientChecker(df, ddf, self.X) assert (gc.checkgrad()) assert (gc2.checkgrad())
def argmin_vjp(ans, x): """ This should return the jacobian-vector product it should calculate d_ans/dx because the vector contains dloss/dans then we get with dloss/dans * dans/dx = dloss/dx which we're actually interested in """ g = elementwise_grad(O2, 1) dg_dy = elementwise_grad(g, 1)(x, initial_y) dg_dx = elementwise_grad(g, 0)(x, initial_y) if np.ndim(dg_dy) == 0: # we have just simple scalar function so we just have to divide instead of inverse return lambda v: v * (1. / dg_dy) * dg_dx return lambda v: v * np.matmul(np.linalg.inv(dg_dy), dg_dx)
def cost_function_deep(P, x): # Evaluate the trial function with the current parameters P g_t = g_trial_deep(x, P) # Find the derivative w.r.t x of the trial function d2_g_t = elementwise_grad(elementwise_grad(g_trial_deep, 0))(x, P) right_side = f(x) err_sqr = (-d2_g_t - right_side)**2 cost_sum = np.sum(err_sqr) return cost_sum / np.size(err_sqr)
def test(srbm, X, a, b, w, sigma2): # Check evaluation against the dedicated Symmetric RBM Numpy impl. a_grad = elementwise_grad(srbm_np, 1)(X.flatten(), a, b, w, sigma2) b_grad = elementwise_grad(srbm_np, 2)(X.flatten(), a, b, w, sigma2) w_grad = elementwise_grad(srbm_np, 3)(X.flatten(), a, b, w, sigma2) # Remember that gradient returned is grad / eval np_expect = np.concatenate((a_grad, b_grad, w_grad.ravel())) / srbm_np( X.flatten(), a, b, w, sigma2) for expect, actual in zip(np_expect, srbm.gradient(X)): if expect == 0: assert expect == actual else: assert np.isclose(1, actual / expect)
def fit_autograd(self, X, y): #Cost function for unregularised def cost(theta, X, y): X_t = np.dot(X, theta) logistic = self.sigmoid(X_t) val = -1 * (y * (np.log(logistic)) + (1 - y) * (np.log(1 - logistic))) return (val.mean(axis=None)) #Cost function for L2 regularisation def cost_2(theta, X, y): X_t = np.dot(X, theta) logistic = self.sigmoid(X_t) val = -1 * (y * (np.log(logistic)) + (1 - y) * (np.log(1 - logistic))) g = np.sum(val) g += self.C * (np.dot(theta.T, theta)) return (g / X.shape[0]) #Cost function for L1 regularisation def cost_1(theta, X, y): X_t = np.dot(X, theta) logistic = self.sigmoid(X_t) val = -1 * (y * (np.log(logistic)) + (1 - y) * (np.log(1 - logistic))) g = np.sum(val) g += self.C * (abs(theta)) return (g / X.shape[0]) #define differentiation functions grad_cost = grad(cost) grad_cost_1 = elementwise_grad(cost_1) grad_cost_2 = elementwise_grad(cost_2) self.theta = np.zeros(X.shape[1] + 1) X_new = np.concatenate((np.ones((X.shape[0], 1)), X), axis=1) n_features = X_new.shape[1] n_samples = X_new.shape[0] #Find grad for each iteration for i in range(self.iterations): #update theta according to the regularisation provided if (self.regularization == 'None'): self.theta -= (self.alpha) * grad_cost(self.theta, X_new, y) elif (self.regularization == "L2"): self.theta -= (self.alpha) * grad_cost_2(self.theta, X_new, y) elif (self.regularization == "L1"): self.theta -= (self.alpha) * grad_cost_1(self.theta, X_new, y)
def train(self, n_iters=100, n_mc_samples=200, callback=None): def discriminator_loss(params, x_p, x_q): logit_p = sigmoid(self.discriminator.predict(params, x_p)) logit_q = sigmoid(self.discriminator.predict(params, x_q)) loss = agnp.mean(agnp.log(logit_q)) + agnp.mean( agnp.log(1 - logit_p)) return loss grad_discriminator_loss = autograd.elementwise_grad(discriminator_loss) # Train the generator, fixing the discriminator def generator_loss(params, z): og = self.generator.predict(params, z)[0, :, :] ratio = self.discriminator.predict(self.discriminator.get_params(), og) preds = sigmoid(ratio) op_preds = 1 - preds ll = agnp.mean(ratio) - agnp.mean(self.model.log_prob(og)) return ll grad_generator_loss = autograd.elementwise_grad(generator_loss) for i in range(n_iters): print("Iteration %d " % (i + 1)) # Fix the generator, train the discriminator # Sample random generator samples z = agnp.random.uniform(-10, 10, size=(n_mc_samples, 20)) # Samples from the prior prior_samples = agnp.random.uniform(-10, 10, size=(n_mc_samples, self.n_params)) var_dist_samples = self.generator.predict( self.generator.get_params(), z)[0, :, :] # Requires a differentiable Discriminator ret = adam( lambda x, i: -grad_discriminator_loss(x, prior_samples, var_dist_samples), self.discriminator.get_params()) self.discriminator.set_params(ret) # Requires a differentiable Generator ret = adam(lambda x, i: grad_generator_loss(x, z), self.generator.get_params(), callback=callback) self.generator.set_params(ret)
def fit_autograd(self, X, y): self.X = np.array(X) #converting X to np array self.y = np.array(y) #converting y to np array self.X = np.append(np.ones((self.X.shape[0],1)),self.X,axis=1) #appending columns of ones self.theta = np.random.rand(self.X.shape[1])#np.ones(self.X.shape[1])# agrad = elementwise_grad(self.costFunctionUnregularised) agrad1 = elementwise_grad(self.costFunctionL1Regularised) agrad2 = elementwise_grad(self.costFunctionL2Regularised) for iterationNum in range(self.maxIterations): if self.regularization == 'l1': self.theta -= (self.learningRate*(agrad1(self.theta, self.X, self.y))) /(self.X.shape[0]) elif self.regularization == 'l2': self.theta -= (self.learningRate*(agrad2(self.theta, self.X, self.y))) /(self.X.shape[0]) else: self.theta -= (self.learningRate*(agrad(self.theta, self.X, self.y))) /(self.X.shape[0]) return self.theta
def get_activation_function(mode: str = "sigmoid", derivate: bool = False) -> object: ''' returns corresponding activation function for given mode Parameters: - mode: mode of the activation function. Possible values are [String] - Sigmoid-function --> "sigmoid" - Tangens hyperbolicus --> "tanh" - Rectified Linear Unit --> "relu" - Leaky Rectified Linear Unit --> "leaky-relu" - Soft-max --> "softmax" - derivate: whether (=True, default) or not (=False) to return the derivated value of given function and x [Boolean] Returns: - y: desired activation function [object] ''' if mode == "sigmoid": y = lambda x: 1 / (1 + np.exp(-x)) elif mode == "tanh": y = lambda x: (np.exp(x) - np.exp(-x)) / (np.exp(x) + np.exp(-x)) elif mode == "relu": y = lambda x: np.where(x <= 0, 0.0, 1.0) * x elif mode == "leaky-relu": y = lambda x: np.where(x <= 0, 0.1, 1.0) * x elif mode == "softmax": y = lambda x: np.exp(x - x.max()) / ( (np.exp(x - x.max()) / np.sum(np.exp(x - x.max())))) else: print('Unknown activation function. linear is used') y = lambda x: x ## when derivation of function shall be returned if derivate: return elementwise_grad(y) return y
def xcfunctional(rho: np.ndarray, excfunction) -> Tuple[np.ndarray, np.ndarray]: """Compute the exchange-(correlation) energy density and potential.""" exc = excfunction(rho, np) # pylint: disable=no-value-for-parameter vxc = elementwise_grad(excfunction)(rho, agnp) return exc, vxc
def elementwise_grad(fun, argnum=0, *nary_op_args, **nary_op_kwargs): import autograd register() gradfun = autograd.elementwise_grad(fun, argnum, *nary_op_args, **nary_op_kwargs) def broadcast(*args, **kwargs): nextargs = [ ak.operations.convert.to_layout(x, allow_record=True, allow_other=True) for x in args ] def getfunction(inputs): if all( isinstance(x, ak.layout.NumpyArray) or not isinstance(x, ak.layout.Content) for x in inputs): return lambda: (ak.layout.NumpyArray(gradfun(*inputs)), ) else: return None behavior = ak._util.behaviorof(*args) out = ak._util.broadcast_and_apply(nextargs, getfunction, behavior, pass_depth=False) assert isinstance(out, tuple) and len(out) == 1 return ak._util.wrap(out[0], behavior) return broadcast
def gan_objective(prior_params, d_params, n_data, n_samples, bnn_layer_sizes, act, d_act='tanh'): '''estimates V(G, D) = E_p_gp[D(f)] - E_pbnn[D(f)]]''' x = sample_inputs('uniform', n_data, (-10, 10)) fbnns = sample_bnn(prior_params, x, n_samples, bnn_layer_sizes, act) # [nf, nd] fgps = sample_gpp(x, n_samples, 'rbf') # sample f ~ P_gp(f) D_fbnns = nn_predict(d_params, fbnns, d_act) D_fgps = nn_predict(d_params, fgps, d_act) print(D_fbnns.shape) eps = np.random.uniform() f = eps * fgps + (1 - eps) * fbnns def D(function): return nn_predict(d_params, function, 'tanh') J = jacobian(D)(f) print(J.shape) g = elementwise_grad(D)(f) print(g.shape) pen = 10 * (norm(g, ord=2, axis=1) - 1)**2 return np.mean(D_fgps - D_fbnns + pen)
def f(self, mode: str = "tanh", derivate: bool = False) -> object: ''' returns corresponding function for given mode Parameters: - mode: mode of the function. Possible values are [String] - Tangens hyperbolicus --> "tanh" - derivate: whether (=True, default) or not (=False) to return the derivated value of given function and x [Boolean] Returns: - y: desired activation function [object] ''' if mode == "tanh": y = lambda x: (npa.exp(x) - npa.exp(-x)) / (npa.exp(x) + npa.exp(-x )) elif mode == "exp": y = lambda x: x * npa.exp(-(x**2) / 2) elif mode == "cube": y = lambda x: x**3 else: print('Unknown activation function. tanh is used') y = lambda x: (npa.exp(x) - npa.exp(-x)) / (npa.exp(x) + npa.exp(-x )) ## when derivation of function shall be returned if derivate: return elementwise_grad(y) return y
def __init__(self, X, loc, scale, name: str = '__cntk_class_mvn_pdf__'): super(__cntk_class_mvn_pdf__, self).__init__([X, loc, scale], name=name) self.mvn_pdf = multivariate_normal.pdf self.grad = elementwise_grad( self.mvn_pdf) # elementwise_grad, jacobian, grad, holomorphic_grad
def _function_diff(self, children, idx): """ Derivative with respect to child number 'idx'. See :meth:`pybamm.Symbol._diff()`. """ # Store differentiated function, needed in case we want to convert to CasADi if self.derivative == "autograd": return Function( autograd.elementwise_grad(self.function, idx), *children, differentiated_function=self.function ) elif self.derivative == "derivative": if len(children) > 1: raise ValueError( """ differentiation using '.derivative()' not implemented for functions with more than one child """ ) else: # keep using "derivative" as derivative return pybamm.Function( self.function.derivative(), *children, derivative="derivative", differentiated_function=self.function )
def grad_func(*args): inp = anp.array(anp.broadcast_arrays(*args)) result = anp.atleast_2d(elementwise_grad(argwrapper)(inp)) # Put 'gradient' axis at end axes = list(range(len(result.shape))) result = result.transpose(*chain(axes[1:], [axes[0]])) return result
def MALA(logprob, x0, num_iters=1000, num_samples=10, step_size=0.01, callback=None): """ logprob: autograd.np-valued function that accepts array x of shape x0.shape and returns array of shape (x.shape[0],) representing log P(x) up to a constant factor. x0: inital array of inputs. """ assert x0.shape[0] == num_samples, 'x0.shape[0] must be num_samples.' x = x0 g = elementwise_grad(logprob) logq = lambda y, x: -0.5 * np.sum(( (y - x - 0.5 * step_size**2 * g(x)) / step_size)**2, axis=1) #log q(y|x) for i in tqdm(range(num_iters)): Z = np.random.randn(*x.shape) U = np.random.rand(num_samples) dx = g(x) x_new = x + 0.5 * (step_size**2) * dx + step_size * Z log_alpha = (logprob(x_new).ravel() + logq(x, x_new) - logprob(x).ravel() - logq(x_new, x)) idxs = np.log(U) < log_alpha if callback: callback(x, i, x_new, log_alpha) x[idxs] = x_new[idxs] return x
def __init__(self, layers, optimizer, loss, max_epochs=10, batch_size=64, metric='mse', shuffle=False, verbose=True): self.verbose = verbose self.shuffle = shuffle self.optimizer = optimizer self.loss = get_loss(loss) # TODO: fix if loss == 'categorical_crossentropy': self.loss_grad = lambda actual, predicted: -(actual - predicted) else: self.loss_grad = elementwise_grad(self.loss, 1) self.metric = get_metric(metric) self.layers = layers self.batch_size = batch_size self.max_epochs = max_epochs self._n_layers = 0 self.log_metric = True if loss != metric else False self.metric_name = metric self.bprop_entry = self._find_bprop_entry() self.training = False self._initialized = False
def multiview_mds_projection(self, A, P1, P2, P3, steps, alpha, stopping_eps, outputpath, name_data_set): #pdb.set_trace() js_file_path = name_data_set + "_coordinates_tmp.js" self.create_viz_file(outputpath, name_data_set, js_file_path) g = jacobian(self.costfunction_projection) #grad(self.costfunction) costs = [] costs.append(self.costfunction_projection(A, P1, P2, P3)) proj = "" updown = 0 for i in range(1, steps + 1): nabla_g = elementwise_grad(self.costfunction_projection, (0, 1, 2, 3)) dA, dP1, dP2, dP3 = nabla_g(A, P1, P2, P3) A = A - alpha * dA P1 = P1 - (alpha / 10) * dP1 P2 = P2 - (alpha / 10) * dP2 P3 = P3 - (alpha / 10) * dP3 newcost = self.costfunction_projection(A, P1, P2, P3) print(" step: ", i, ", cost:", newcost) self.temp_data_writer(A, outputpath + js_file_path, costs, P1, P2, P3, i, newcost) if costs[i - 1] - newcost < stopping_eps: updown = updown + 1 else: updown = 0 costs.append(newcost) if updown == 3: print("early stopping at", i) return A, costs, P1, P2, P3 costs.append(newcost) return A, costs, P1, P2, P3
def train(self, n_mc_samples, n_elbo_samples=20, step_size=0.01, num_iters=1000, verbose=False, callback=None): def variational_objective(params, var_it, n_mc_samples=n_mc_samples): samples = self.v_dist.sample(params, n_mc_samples) elbo = self.v_dist.entropy(params) + agnp.mean( self.model.log_prob(samples)) return -elbo if verbose: def cb(params, i, g): print("Negative ELBO: %f" % variational_objective( params, i, n_mc_samples=n_elbo_samples)) if callback is not None: callback(params, i, g) else: cb = callback grad_elbo = autograd.elementwise_grad(variational_objective) ret = adam(lambda x, i: grad_elbo(x, i), self.v_dist.get_params(), step_size=step_size, num_iters=num_iters, callback=cb) self.v_dist.set_params(ret) return ret
def stress(parameters, positions, numbers, cell, strain=np.zeros((3, 3))): """Compute the stress on an EMT system. Parameters ---------- positions : array of floats. Shape = (natoms, 3) numbers : array of integers of atomic numbers (natoms,) cell: array of unit cell vectors. Shape = (3, 3) Returns ------- stress : an array of stress components. Shape = (6,) [sxx, syy, szz, syz, sxz, sxy] """ dEdst = elementwise_grad(energy, 4) volume = np.abs(np.linalg.det(cell)) der = dEdst(parameters, positions, numbers, cell, strain) result = (der + der.T) / 2 / volume return np.take(result, [0, 4, 8, 5, 2, 1])
def test_elementwise_grad(): def simple_fun(a): return a + np.sin(a) + np.cosh(a) A = npr.randn(10) exact = elementwise_grad(simple_fun)(A) numeric = np.squeeze(np.array([nd(simple_fun, A[i]) for i in range(len(A))])) check_equivalent(exact, numeric)
def get_jitter_jacobian(self, include_frozen=False): if not HAS_AUTOGRAD: raise ImportError("'autograd' must be installed to compute " "gradients") jac = elementwise_grad(self.get_jitter) jac = jac(self.get_parameter_vector(include_frozen=True)) if include_frozen: return jac return jac[self.unfrozen_mask]
def test_elementwise_grad(): def simple_fun(a): return a + np.sin(a) + np.cosh(a) A = npr.randn(10) wrapped = elementwise_grad(simple_fun)(A) explicit = np.array([grad(simple_fun)(A[i]) for i in range(len(A))]) check_equivalent(wrapped, explicit)
def check_jacobian(self): try: import autograd.numpy as np, autograd as ag, GPy, matplotlib.pyplot as plt from GPy.models import GradientChecker, GPRegression except: raise self.skipTest("autograd not available to check gradients") def k(X, X2, alpha=1., lengthscale=None): if lengthscale is None: lengthscale = np.ones(X.shape[1]) exp = 0. for q in range(X.shape[1]): exp += ((X[:, [q]] - X2[:, [q]].T)/lengthscale[q])**2 #exp = np.sqrt(exp) return alpha * np.exp(-.5*exp) dk = ag.elementwise_grad(lambda x, x2: k(x, x2, alpha=ke.variance.values, lengthscale=ke.lengthscale.values)) dkdk = ag.elementwise_grad(dk, argnum=1) ke = GPy.kern.RBF(1, ARD=True) #ke.randomize() ke.variance = .2#.randomize() ke.lengthscale[:] = .5 ke.randomize() X = np.linspace(-1, 1, 1000)[:,None] X2 = np.array([[0.]]).T np.testing.assert_allclose(ke.gradients_X([[1.]], X, X), dk(X, X)) np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X).sum(0), dkdk(X, X)) np.testing.assert_allclose(ke.gradients_X([[1.]], X, X2), dk(X, X2)) np.testing.assert_allclose(ke.gradients_XX([[1.]], X, X2).sum(0), dkdk(X, X2)) m = GPRegression(self.X, self.Y) def f(x): m.X[:] = x return m.log_likelihood() def df(x): m.X[:] = x return m.kern.gradients_X(m.grad_dict['dL_dK'], X) def ddf(x): m.X[:] = x return m.kern.gradients_XX(m.grad_dict['dL_dK'], X).sum(0) gc = GradientChecker(f, df, self.X) gc2 = GradientChecker(df, ddf, self.X) assert(gc.checkgrad()) assert(gc2.checkgrad())
def test_elementwise_grad_multiple_args(): def simple_fun(a, b): return a + np.sin(a) + np.cosh(b) A = 0.9 B = npr.randn(10) argnum = 1 wrapped = elementwise_grad(simple_fun, argnum)(A, B) explicit = np.array([grad(simple_fun, argnum)(A, B[i]) for i in range(len(B))]) check_equivalent(wrapped, explicit)
def test_elementwise_grad_multiple_args(): def simple_fun(a, b): return a + np.sin(a) + np.cosh(b) A = 0.9 B = npr.randn(10) argnum = 1 exact = elementwise_grad(simple_fun, argnum=argnum)(A, B) numeric = np.squeeze(np.array([nd(simple_fun, A, B[i])[argnum] for i in range(len(B))])) check_equivalent(exact, numeric)
def __init__( self, layers, optimizer, loss, max_epochs=10, batch_size=64, random_seed=33, metric="mse", shuffle=True ): self.shuffle = shuffle self.optimizer = optimizer self.loss = get_loss(loss) if loss == "categorical_crossentropy": self.loss_grad = lambda actual, predicted: -(actual - predicted) else: self.loss_grad = elementwise_grad(self.loss, 1) self.metric = get_metric(metric) self.random_seed = random_seed self.layers = layers self.batch_size = batch_size self.max_epochs = max_epochs self._n_layers = 0 self.log_metric = True if loss != metric else False self.metric_name = metric self.bprop_entry = self._find_bprop_entry() self.training = False self._initialized = False
def __init__(self, C=0.01): self.C = C self._grad = elementwise_grad(self._penalty)
def train(args, rep_model, rnn_optimizer, update_params_theano, delta_x, batchData): # this is important to clear the share memory rep_model.clearDerivativeSharedMemory() l_trees, r_trees, Y_labels, Y_scores, Y_scores_pred = batchData vec_feats = np.zeros((len(l_trees), 2*args.lstmDim)) for i, (l_t, r_t, td) in enumerate(zip(l_trees, r_trees, Y_scores_pred)): first_tree_rep = rep_model.forwardProp(l_t) second_tree_rep = rep_model.forwardProp(r_t) mul_rep = mul(first_tree_rep, second_tree_rep) sub_rep = abs_sub(first_tree_rep, second_tree_rep) vec_feat = np.concatenate((mul_rep, sub_rep)) vec_feats[i] = vec_feat vec_feat_2d = vec_feat.reshape((1, 2*rep_model.wvecDim)) td_2d = td.reshape((1, 5)) delta = delta_x(vec_feat_2d, td_2d) delta_mul = delta[:rep_model.wvecDim] delta_sub = delta[rep_model.wvecDim:] mul_grad_1 = elementwise_grad(mul, argnum=0) mul_grad_2 = elementwise_grad(mul, argnum=1) first_mul_grad = mul_grad_1(first_tree_rep,second_tree_rep) second_mul_grad = mul_grad_2(first_tree_rep,second_tree_rep) delta_rep1_mul = first_mul_grad * delta_mul delta_rep2_mul = second_mul_grad * delta_mul sub_grad_1 = elementwise_grad(abs_sub, argnum=0) sub_grad_2 = elementwise_grad(abs_sub, argnum=1) first_sub_grad = sub_grad_1(first_tree_rep,second_tree_rep) second_sub_grad = sub_grad_2(first_tree_rep,second_tree_rep) delta_rep1_sub = first_sub_grad * delta_sub delta_rep2_sub = second_sub_grad * delta_sub rep_model.backProp(l_t, delta_rep1_mul) rep_model.backProp(l_t, delta_rep1_sub) rep_model.backProp(r_t, delta_rep2_mul) rep_model.backProp(r_t, delta_rep2_sub) cost = update_params_theano(vec_feats, Y_scores_pred) if args.optimizer == 'sgd': update = rep_model.dstack rep_model.stack[1:] = [P-args.step*dP for P,dP in zip(rep_model.stack[1:],update[1:])] # handle dictionary update sparsely """ dL = update[0] for j in range(rep_model.numWords): rep_model.L[:,j] -= learning_rate*dL[j] """ elif args.optimizer == 'adagrad': rnn_optimizer.adagrad_rnn(rep_model.dstack) elif args.optimizer == 'adadelta': rnn_optimizer.adadelta_rnn(rep_model.dstack) elif args.optimizer == "adam": rnn_optimizer.adam_rnn(rep_model.dstack) for l_t, r_t in zip(l_trees, r_trees): l_t.resetFinished() r_t.resetFinished() return cost
def __init__(self): # Gradient by "predicted" variable self._grad = elementwise_grad(self.values, argnum=1)
def sum_grad_output(*args, **kwargs): return sum_latter_dims(elementwise_grad(fun)(*args, **kwargs))
def __init__(self, *args, **kwargs): if not HAS_AUTOGRAD: raise ImportError("the 'autograd' module must be installed to " "use the AutoGradModel") self._grad_func = autograd.elementwise_grad(self.compute_value) super(AutoGradModel, self).__init__(*args, **kwargs)
def __init__(self, name): self.last_input = None self.activation = get_activation(name) self.activation_d = elementwise_grad(self.activation)
def fit(self, X, y=None): super(FMRegressor, self).fit(X, y) self.loss = mean_squared_error self.loss_grad = elementwise_grad(mean_squared_error)
from __future__ import absolute_import import autograd.numpy as np import matplotlib.pyplot as plt from autograd import elementwise_grad # Here we use elementwise_grad to support broadcasting, which makes evaluating # the gradient functions faster and avoids the need for calling 'map'. def tanh(x): return (1.0 - np.exp(-x)) / (1.0 + np.exp(-x)) d_fun = elementwise_grad(tanh) # First derivative dd_fun = elementwise_grad(d_fun) # Second derivative ddd_fun = elementwise_grad(dd_fun) # Third derivative dddd_fun = elementwise_grad(ddd_fun) # Fourth derivative ddddd_fun = elementwise_grad(dddd_fun) # Fifth derivative dddddd_fun = elementwise_grad(ddddd_fun) # Sixth derivative x = np.linspace(-7, 7, 200) plt.plot(x, tanh(x), x, d_fun(x), x, dd_fun(x), x, ddd_fun(x), x, dddd_fun(x), x, ddddd_fun(x), x, dddddd_fun(x)) plt.axis('off') plt.savefig("tanh.png") plt.show()
def fit(self, X, y=None): super(FMClassifier, self).fit(X, y) self.loss = binary_crossentropy self.loss_grad = elementwise_grad(binary_crossentropy)
def __init__(self, C): super(AutogradWeightsRegularization, self).__init__(C) self._grad = elementwise_grad(self.values)
def __init__(self): self._derivative = elementwise_grad(self.__call__)