def NLS2(ti, yi, t10=1., t20=100., w0=0.5, tol=1e-14): "nonlinear least squares to fit DOUBLE exponential distribution" xi = np.log(ti) # find: characteristic times exp(x1), exp(x2) and weights w1, w2 def f(theta, xi, yi): w = theta[0] x1, x2 = theta[1], theta[2] z = w * np.exp(-np.exp(xi - x1)) + (1. - w) * np.exp(-np.exp(xi - x2)) return np.sum((1. - z - yi)**2) # minimize f by solving df(x) = 0 with newton method df = tangent.grad(f) ddf = tangent.grad(df, mode="forward") def Jf(theta, xi, yi): return np.array([ ddf(theta, xi, yi, 1., [1, 0, 0]), ddf(theta, xi, yi, 1., [0, 1, 0]), ddf(theta, xi, yi, 1., [0, 0, 1]) ]) theta = np.array([w0, np.log(t10), np.log(t20)]) dftheta = df(theta, xi, yi) while np.linalg.norm(dftheta) > tol: print "|(grad f)(theta)|", np.linalg.norm(dftheta) theta -= np.linalg.solve(Jf(theta, xi, yi), dftheta) dftheta = df(theta, xi, yi) return theta[0], np.exp(theta[1]), np.exp(theta[2])
def main(): # By running this, I've "discovered" that the gradient is always calculated # with respect to the first argument of the function. d_easy = tangent.grad(easy_quadratic, verbose=1) # I'd expect, then, that the gradient is always just d/dx{x^2} = 2x, # regardless of y's value. Let's try a collection of random values and # check: all_good = True for _ in range(10): xi = 10 * random.random() - 5 yi = 10 * random.random() - 5 all_good &= (d_easy(xi, yi) == 2 * xi) if all_good: print "The 'easy' derivative function always returns 2*x" else: print "YIKES!!! Something went wrong!!" # There can still be a dependence on the other parameter, though: d_big = tangent.grad(bigger_polynomial, verbose=1) all_good = True for _ in range(10): xi = 10 * random.random() - 5 yi = 10 * random.random() - 5 all_good &= (d_big(xi, yi) == (2 * xi + yi)) if all_good: print "The 'bigger' derivative function always returns 2*x + y" else: print "DAAAANG something went wrong with d_big"
def NLS_general(F, xi, yi, p0=1., tol=1e-12): "nonlinear least squares to fit arbitrary f with any number of parameters" # F = F(x, p), p0 MUST match len(p), F takes array of x # F must be compatible with tangent module def f(p, xi, yi): return np.sum((F(xi, p) - yi)**2) # minimize f by solving df(x) = 0 with newton method n = len(p0) df = tangent.grad(f) ddf = tangent.grad(df, mode="forward") ei = lambda i: np.eye(1, n, i)[0, :] def Jf(p, xi, yi): return np.array([ddf(p, xi, yi, 1., ei(i)) for i in range(n)]) p = np.array(p0) dfp = df(p, xi, yi) while np.linalg.norm(dfp) > tol: print "|grad f|", np.linalg.norm(dfp) p -= np.linalg.solve(Jf(p, xi, yi), dfp) dfp = df(p, xi, yi) return tuple(p)
def tangent_func(): func.__globals__['np'] = np df = tangent.grad(func, motion='joint', optimized=optimized, verbose=True) ddf = tangent.grad(df, motion='joint', optimized=optimized, verbose=True) return ddf(*args)
def NLS_(F, xi, yi, x0=0., tol=1e-14): "nonlinear least squares to find parameter x so that F(xi, x) \approx yi" def f(x, xi, yi): return np.sum((F(xi, x) - yi)**2) # minimize f by solving df(x) = 0 with newton method df = grad(f) ddf = grad(df) x = x0 while np.abs(df(x, xi, yi)) > tol: x -= df(x, xi, yi) / ddf(x, xi, yi) print "|df(x)|", np.abs(df(x, xi, yi)) return x
def NLS(ti, yi, t0=0., tol=1e-14): "nonlinear least squares to fit exponential distribution with mean exp(x)" xi = np.log(ti) def f(x, xi, yi): return np.sum((1. - np.exp(-np.exp(xi - x)) - yi)**2) # minimize f by solving df(x) = 0 with newton method df = grad(f) ddf = grad(df) x = np.log(t0) while np.abs(df(x, xi, yi)) > tol: x -= df(x, xi, yi) / ddf(x, xi, yi) #print "|f(x)|", np.abs(df(x, xi, yi)) return np.exp(x)
def newton(f, x0=0., tol=1e-14): "solve f(x) = 0 with initial guess x = x0" df = grad(f) x = x0 while np.abs(f(x)) > tol: x -= f(x) / df(x) print "|f(x)|", np.abs(f(x)) return x
def test_nested_dict(motion, optimized): p = dict(i=dict(j=3.0, k=4.0)) func = nested_dict df = tangent.grad(func, motion=motion, optimized=optimized, verbose=True) dx = df(p) df_ag = ag_grad(func) dx_ag = df_ag(p) for k in p['i']: assert np.allclose(dx['i'][k], dx_ag['i'][k])
def tangent_func(): func.__globals__['np'] = np df = tangent.grad(func, mode='forward', preserve_result=preserve_result, wrt=wrt, optimized=True, verbose=1) args_ = args + (1.0, ) # seed gradient return df(*deepcopy(args_))
def _test_tf_hvp(func, optimized): a = tf.random_normal(shape=(300, )) v = tf.reshape(a, shape=(-1, )) modes = ['forward', 'reverse'] for mode1 in modes: for mode2 in modes: if mode1 == mode2 == 'forward': continue df = tangent.grad(func, mode=mode1, motion='joint', optimized=optimized) ddf = tangent.grad(df, mode=mode2, motion='joint', optimized=optimized) dx = ddf(a, tf.constant(1.0), v) # We just ensure it computes something in this case. assert dx.shape == a.shape
def test_unpacking_args_saxpy(motion, optimized, a, b, c): func = unpacking_args_saxpy func = tangent.tangent(func) func.__globals__['np'] = np df = tangent.grad(func, motion=motion, optimized=optimized, verbose=True) dx = df((a, b, c)) df_num = utils.numeric_grad(func) dx_num = df_num((a, b, c)) assert np.allclose(dx, dx_num)
def test_inlining_contextmanager(motion, optimized, a): func = inlining_contextmanager func = tangent.tangent(func) func.__globals__['np'] = np df = tangent.grad(func, motion=motion, optimized=optimized, verbose=True) dx = df(a) func.__globals__['np'] = ag_np df_ag = ag_grad(func) df_ag(a) assert np.allclose(dx, 2.9 * a**2)
def _test_hvp(func, optimized): np.random.seed(0) a = np.random.normal(scale=1, size=(300, )).astype('float32') v = a.ravel() modes = ['forward', 'reverse'] for mode1 in modes: for mode2 in modes: if mode1 == mode2 == 'forward': continue df = tangent.grad(func, mode=mode1, motion='joint', optimized=optimized) ddf = tangent.grad(df, mode=mode2, motion='joint', optimized=optimized) dx = ddf(a, 1, v) hvp_ag = hessian_vector_product(func) dx_ag = hvp_ag(a, v) assert np.allclose(dx, dx_ag)
def test_dict_saxpy(motion, optimized, a, b, c): func = dict_saxpy func = tangent.tangent(func) func.__globals__['np'] = np df = tangent.grad(func, motion=motion, optimized=optimized, verbose=True) dx = df(dict(a=a, b=b, c=c)) df_num = utils.numeric_grad(func) dx_num = df_num(dict(a=float(a), b=float(b), c=float(c))) flat_dx, _ = flatten(dx) flat_dx_num, _ = flatten(dx_num) assert np.allclose(flat_dx, flat_dx_num)
def tangent_func(): y = func(*deepcopy(args)) if np.array(y).size > 1: init_grad = np.ones_like(y) else: init_grad = 1 func.__globals__['np'] = np df = tangent.grad(func, motion=motion, optimized=optimized, preserve_result=preserve_result, verbose=1) if motion == 'joint': return df(*deepcopy(args) + (init_grad, )) return df(*deepcopy(args), init_grad=init_grad)
def test_rnn(motion, optimized): func = rnn w = np.random.randn(2, 3) inputs = np.random.randn(3, 2) func.__globals__['np'] = np df = tangent.grad(func, wrt=(0, 1), motion=motion, optimized=optimized, verbose=True) dinputs, dw = df(inputs, w) num_dinputs = utils.numeric_grad(func)(inputs, w) num_dw = utils.numeric_grad(lambda w, x: func(x, w))(w, inputs) assert np.allclose(num_dw, dw) assert np.allclose(num_dinputs, dinputs)
def test_bilinear(optimized): func = bilinear D = 3 np.random.seed(0) x = np.random.randn(1, D) h = np.random.randn(1, D) U = np.random.randn(D, D) w = np.random.randn(D, D) b = np.random.randn(1, D) func.__globals__['np'] = np df = tangent.grad(func, wrt=(0, ), motion='joint', optimized=optimized, verbose=True) dx = df(x, h, U, w, b) num_dx = utils.numeric_grad(func)(x, h, U, w, b) assert np.allclose(num_dx, dx)
def test_logistic_regression(motion, optimized): func = logistic_regression w = np.random.randn(3, 5) b = np.random.randn(5) input_ = np.random.rand(3) label = np.zeros(5) label[1] = 1 func.__globals__['np'] = np df = tangent.grad(func, wrt=(2, 3), motion=motion, optimized=optimized, verbose=True) dw, db = df(input_, label, w, b) func.__globals__['np'] = ag_np ag_dw = ag_grad(func, argnum=2)(input_, label, w, b) ag_db = ag_grad(func, argnum=3)(input_, label, w, b) assert np.allclose(ag_dw, dw) assert np.allclose(ag_db, db)
def scope_tangent(): def forward(theta, states): states_T = np.transpose(states) z0 = np.dot(theta[0], states_T) a0 = z0 zero_inds = z0 < 0.0 a0[zero_inds] *= 1e-2 z1 = np.dot(theta[1], a0) #a1 = 0.5*(1.0 + np.tanh(z1)) a1 = z1 return a1 def loss(theta, states, actions): y_pred = forward(theta, states) actions_T = np.transpose(actions) err = y_pred - actions_T sq_err = err**2 return 0.5 * np.mean(np.sum(sq_err, axis=(0, )), axis=(0, )) import tangent dlossdtheta = tangent.grad(loss, preserve_result=True) return loss, forward, dlossdtheta
import tangent import delphi.translators.data.PETPT_lambdas as lambdas func = lambdas.PETPT__lambda__IF_1_0 df = tangent.grad(func) print(df)
def assert_forward_not_implemented(func, wrt): try: tangent.grad(func, mode='forward', preserve_result=False, wrt=wrt) assert False, 'Remove this when implementing.' except NotImplementedError: pass
def minimize(F, x0=0., tol=1e-14): "find local minimum of F near initial guess x=x0" # solve dF(x) = 0 with newton return newton(grad(F), x0=x0, tol=tol)
def main(): print "Hi, everybody!!" df = tangent.grad(cube_x_plus_x, verbose=1) for i in range(1, 10): print i, cube_x_plus_x(i), df(i), expected_deriv(i)