def test_log(): x = ad.log(ad.Scalar('x', 10), 10) assert (np.isclose(x.getValue(), 1)) assert (np.isclose(x.getDeriv()['x'], 1 / (np.log(10) * 10))) x = ad.log(2 * ad.Scalar('x', 10), 10) assert (np.isclose(x.getValue(), np.log(20) / np.log(10))) assert (np.isclose(x.getDeriv()['x'], 1 / (np.log(10) * 10))) x = ad.log(ad.Scalar('x', 10), 10) x2 = ad.log(ad.Scalar('x', 100), 10) x3 = x - x2 x4 = ad.log(ad.Scalar('x', 10) / ad.Scalar('x', 100), 10) assert (np.isclose(x3.getValue(), -1)) assert (np.isclose(x3.getDeriv()['x'], 1 / (np.log(10) * 10) - 1 / (np.log(10) * 100))) assert (np.isclose(x3.getValue(), x4.getValue())) assert (np.isclose(x3.getDeriv()['x'], x4.getDeriv()['x'])) x = ad.ln(ad.Scalar('x', np.e)) assert (np.isclose(x.getValue(), 1)) assert (np.isclose(x.getDeriv()['x'], 1 / np.e)) x = ad.exp(ad.ln(ad.Scalar('x', 10))) assert (np.isclose(x.getValue(), 10)) assert (np.isclose(x.getDeriv()['x'], 1)) assert (np.isclose(ad.log(100, 10), 2))
def sparse_jacob(x_vals): if len(x_vals ) != 1000: #check to make sure we generate a large sparse matrix raise Exception( 'Please enter a vector of at least 1000 values or Autodiff objects' ) f_vals = [] for i, x_val in enumerate(x_vals): if i == 1: #second element is ln(x1) func = ad.ln(x_val) f_vals.append(func) elif i == 100: # x3 + 2*x100*exp(x100) + 4*x34 func = x_vals[3] + 2 * x_val * ad.exp(x_val) + 4 * x_vals[34] f_vals.append(func) elif i == 445: #x445**2 - x23 func = x_val**2 - x_vals[23] f_vals.append(func) elif i == 690: #x690 - x451 - x888 func = x_val - x_vals[451] - x_vals[888] f_vals.append(func) elif i == 885: #x887 - x200 func = x_val - x_vals[200] f_vals.append(func) elif i == 998: #x998 + x997 - x127 func = x_val + x_vals[997] - x_vals[127] f_vals.append(func) else: f_vals.append(x_val) return np.array(f_vals)
def test_logistic_loss(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) executor = ad.Executor([L, w_grad]) y_val = 0 x_val = np.array([2, 3, 4]) w_val = np.random.random(3) L_val, w_grad_val = executor.run(feed_dict={x: x_val, y: y_val, w: w_val}) logistic = 1 / (1 + np.exp(-np.sum(w_val * x_val))) expected_L_val = y_val * np.log(logistic) + (1 - y_val) * np.log(1 - logistic) expected_w_grad = (y_val - logistic) * x_val print(L_val) print(expected_L_val) print(expected_w_grad) print(w_grad_val) assert expected_L_val == L_val assert np.sum(np.abs(expected_w_grad - w_grad_val)) < 1E-9
def run_problem4(): print("-" * 18 + " Problem 4 " + "-" * 18) x1 = ad.Variable() x2 = ad.Variable() x3 = ad.Variable() y = ((ad.sin(x1 + 1) + ad.cos(2 * x2)) * ad.tan(ad.log(x3)) + (ad.sin(x2 + 1) + ad.cos(2 * x1)) * ad.exp(1 + ad.sin(x3))) x1_f = np.random.rand() x2_f = np.random.rand() x3_f = np.random.rand() x1_v = noise_like(x1_f) x2_v = noise_like(x2_f) x3_v = noise_like(x3_f) f, grad = ad.func(y, {x1: x1_f, x2: x2_f, x3: x3_f}, get_gradient=True) f_np = ((np.sin(x1_f + 1) + np.cos(2 * x2_f)) * np.tan(np.log(x3_f)) + (np.sin(x2_f + 1) + np.cos(2 * x1_f)) * np.exp(1 + np.sin(x3_f))) print("Function value by autodiff =", f) print("Function value by numpy =", f_np) lhs = (ad.func(y, { x1: x1_f + t * x1_v, x2: x2_f + t * x2_v, x3: x3_f + t * x3_v }) - f) / t rhs = (np.sum(grad[x1] * x1_v) + np.sum(grad[x2] * x2_v) + np.sum(grad[x3] * x3_v)) print("(V(w + tv)-V(w)) / t =", lhs) print("<dV(w), v> =", rhs) print("|lfs - rhs| / |lhs| =", np.abs(lhs - rhs) / np.abs(lhs))
def auto_diff_lr(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') # 注意,以下实现某些情况会有很大的数值误差, # 所以一般真实系统实现会提供高阶算子,从而减少数值误差 h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) executor = ad.Executor([L, w_grad]) N = 100 X_val, Y_val = gen_2d_data(N) w_val = np.ones(3) plot(N, X_val, Y_val, w_val) executor = ad.Executor([L, w_grad]) test_accuracy(w_val, X_val, Y_val) alpha = 0.01 max_iters = 300 for iteration in range(max_iters): acc_L_val = 0 for i in range(N): x_val = X_val[i] y_val = np.array(Y_val[i]) L_val, w_grad_val = executor.run(feed_dict={w: w_val, x: x_val, y: y_val}) w_val += alpha * w_grad_val acc_L_val += L_val print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val)) test_accuracy(w_val, X_val, Y_val) plot(N, X_val, Y_val, w_val, True)
def loss_func(self, feed_dict={}): batch_size = feed_dict["predicted_y"].shape[0] return ad.add( ad.negative( ad.__getitem__( ad.Placeholder(feed_dict["predicted_y"]), ad.Constant( tuple([range(batch_size), feed_dict["true_y"].ravel()])))), ad.log( ad.sum(ad.exp(ad.Placeholder(feed_dict["predicted_y"])), axis=1)))
def test_exp_op(): x1 = ad.Variable(name = "x1") y = ad.exp(x1) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(x1_val)) assert np.array_equal(grad_x1_val, np.exp(x1_val))
def test_exp(): v1 = ad.create_vector('v', [2, 5]) v2 = ad.exp(v1) assert(np.isclose(v2[0].getValue(), np.exp(2))) assert(np.isclose(v2[1].getValue(), np.exp(5))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, np.array([[np.exp(2), 0], [0, np.exp(5)]]))) v1 = ad.create_vector('v', [2, 5]) v2 = ad.exp(2 * v1) assert(np.isclose(v2[0].getValue(), np.exp(4))) assert(np.isclose(v2[1].getValue(), np.exp(10))) jacobian = ad.get_jacobian(v2, ['v1', 'v2']) assert(np.array_equal(jacobian, 2 * np.array([[np.exp(4), 0], [0, np.exp(10)]]))) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) v1 = ad.exp(np.array([x + y, x * y])) assert(np.isclose(v1[0].getValue(), np.exp(5))) assert(np.isclose(v1[1].getValue(), np.exp(6))) jacobian = ad.get_jacobian(v1, ['x', 'y']) assert(np.array_equal(jacobian, np.array([[np.exp(5), np.exp(5)], [3 * np.exp(6), 2 * np.exp(6)]])))
def test_exp(): x = ad.Scalar('x', 8) y = ad.exp(x) assert (np.isclose(y.getValue(), np.exp(8))) assert (np.isclose(y.getDeriv()['x'], np.exp(8))) x = ad.Scalar('x', 8) y = ad.exp(2 * x) assert (np.isclose(y.getValue(), np.exp(16))) assert (np.isclose(y.getDeriv()['x'], 2 * np.exp(16))) x = ad.Scalar('x', -3) x._deriv['x'] = -2.3 y = ad.exp(x) assert (np.isclose(y.getValue(), np.exp(-3))) assert (np.isclose(y.getDeriv()['x'], -2.3 * np.exp(-3))) assert (ad.exp(0) == 1) assert np.isclose(ad.exp(4), np.exp(4)) x = ad.Scalar('x', -3) y = ad.Scalar('y', 5) z = ad.exp(x) * ad.exp(y) assert (np.isclose(z.getValue(), np.exp(2))) assert (np.isclose(z.getDeriv()['x'], np.exp(2))) assert (np.isclose(z.getDeriv()['y'], np.exp(2))) x = ad.Scalar('x', -3) y = ad.Scalar('y', 5) z = ad.exp(x + y) assert (np.isclose(z.getValue(), np.exp(2))) assert (np.isclose(z.getDeriv()['x'], np.exp(2))) assert (np.isclose(z.getDeriv()['y'], np.exp(2))) x = ad.Scalar('x', -3) y = ad.Scalar('y', 5) z = ad.exp(x * y) assert (np.isclose(z.getValue(), np.exp(-15))) assert (np.isclose(z.getDeriv()['x'], 5 * np.exp(-15))) assert (np.isclose(z.getDeriv()['y'], -3 * np.exp(-15)))
def test_exp_mix_op(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") y = ad.exp(ad.log(x1 * x2) + 1) grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x1_val = 2 * np.ones(3) x2_val = 4 * np.ones(3) y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1)) assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val)) assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val))
def test_mix_all(): x1 = ad.Variable(name="x1") y = 1/(1+ad.exp(-ad.reduce_sum(x1))) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) expected_y_val = 1/(1+np.exp(-np.sum(x1_val))) expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val) print(expected_y_grad) print(grad_x1_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10
def test_logistic(): x1 = ad.Variable(name="x1") w = ad.Variable(name='w') y = 1/(1+ad.exp(-ad.reduce_sum(w * x1))) grad_w, = ad.gradients(y, [w]) executor = ad.Executor([y, grad_w]) x1_val = 3 * np.ones(3) w_val = 3 * np.zeros(3) y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val}) expected_y_val = 1/(1 + np.exp(-np.sum(w_val * x1_val))) expected_y_grad = expected_y_val * (1 - expected_y_val) * x1_val print(expected_y_grad) print(grad_w_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7
def auto_diff_lr(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') # 注意,以下实现某些情况会有很大的数值误差, # 所以一般真实系统实现会提供高阶算子,从而减少数值误差 h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) w_grad.name = "w______" print('w_grad: ', w_grad, w_grad.op, w_grad.inputs) N = 100 X_val, Y_val = gen_2d_data(N) w_val = np.ones(3) # plot(N, X_val, Y_val, w_val, True) executor = ad.Executor([L, w_grad]) test_accuracy(w_val, X_val, Y_val) alpha = 0.01 max_iters = 1 for iteration in range(max_iters): acc_L_val = 0 for i in range(N): x_val = X_val[i] y_val = np.array(Y_val[i]) L_val, w_grad_val = executor.run(feed_dict={ w: w_val, x: x_val, y: y_val }) # 每一个样本调整一次梯度, 可以改一下run方法,改为mini_batch w_val += alpha * w_grad_val acc_L_val += L_val break # print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val)) # if iteration % 50 == 0: # plot(N, X_val, Y_val, w_val, True) test_accuracy(w_val, X_val, Y_val) plot(N, X_val, Y_val, w_val, True)
def test_reduce_sum_mix(): x1 = ad.Variable(name = "x1") y = ad.exp(ad.reduce_sum(x1)) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) expected_y_val = np.exp(np.sum(x1_val)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val)) y2 = ad.log(ad.reduce_sum(x1)) grad_x2, = ad.gradients(y2, [x1]) executor2 = ad.Executor([y2, grad_x2]) y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val}) expected_y2_val = np.log(np.sum(x1_val)) assert isinstance(y2, ad.Node) assert np.array_equal(y2_val, expected_y2_val) assert np.array_equal(grad_x2_val, (1/np.sum(x1_val)) * np.ones_like(x1_val))
def test_composite(): #Test some more complicated functions / identities, including some multivariate ones. x = ad.Scalar('x', 2) z = (5 * (x + 20) / 10)**2 d = z.getGradient(['x']) assert (z.getValue() == 121) assert (np.array_equal(d, [11])) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) z = (x + 20) * y d = z.getGradient(['x', 'y']) assert (z.getValue() == 66) assert (np.array_equal(d, [3, 22])) x = ad.Scalar('x', 1) y = ad.Scalar('y', 3) z = (x * y + x) * y d = z.getGradient(['x', 'y']) assert (z.getValue() == 12) assert (np.array_equal(d, [12, 7])) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) z = (x + y) / y d = z.getGradient(['x', 'y']) assert (np.isclose(z.getValue(), 5 / 3)) assert (np.allclose(d, [1.0 / 3, -2.0 / 9])) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) z = x + ((y**2) / y) d = z.getGradient(['x', 'y']) assert (z.getValue() == 5) assert (np.array_equal(d, [1, 1])) x = ad.Scalar('x', 2) y = ad.Scalar('y', 3) z = (x**y)**2 d = z.getGradient(['x', 'y']) assert (z.getValue() == 64) assert (np.array_equal(d, [6 * 32, 2 * np.log(2) * 64])) x = ad.Scalar('x', 16) y = ad.sin(ad.sqrt(x)) assert (np.isclose(y.getValue(), np.sin(4))) assert (np.isclose(y.getDeriv()['x'], 1 / 8 * np.cos(4))) #trig identity x = ad.Scalar('x', 5) y = ad.sin(x)**2 + ad.cos(x)**2 assert (np.isclose(y.getValue(), 1)) assert (np.isclose(y.getDeriv()['x'], 0)) #trig identity multivariate x = ad.Scalar('x', 5) y = ad.Scalar('y', 243423) z = ad.sin(x * 1231 * y)**2 + ad.cos(x * 1231 * y)**2 assert (np.isclose(z.getValue(), 1)) assert (np.isclose(z.getDeriv()['x'], 0)) assert (np.isclose(z.getDeriv()['y'], 0)) x = ad.Scalar('x', 16) y = ad.sqrt(ad.power(x, 2)) assert (y.getValue() == 16) assert (y.getDeriv()['x'] == 1) x = ad.Scalar('x', 10) y = ad.tan(x) * ad.cos(x) / ad.sin(x) assert (np.isclose(y.getValue(), 1)) assert (np.isclose(y.getDeriv()['x'], 0)) #https://math.berkeley.edu/~kruckman/fall2010/9-29-problems.pdf x = ad.Scalar('x', 10) y = (x**2 * ad.sin(x) / (x**2 + 1)) assert (np.isclose(y.getValue(), 100 * np.sin(10) / (101))) assert (np.isclose(y.getDeriv()['x'], (20 * np.sin(10) + (10**4 + 100) * np.cos(10)) / (101**2))) x = ad.Scalar('x', 4) y = (x**3 * ad.exp(x)) assert (np.isclose(y.getValue(), 4**3 * (np.exp(4)))) assert (np.isclose(y.getDeriv()['x'], 3 * 16 * np.exp(4) + 4**3 * np.exp(4))) x = ad.Scalar('x', 4) y = ad.sin(x) * ad.cos(x) * ad.tan(x) assert (np.isclose(y.getValue(), np.sin(4) * np.cos(4) * np.tan(4))) assert (np.isclose(y.getDeriv()['x'], np.sin(8))) x = ad.Scalar('x', 4) y = ad.sqrt(x) / ad.tan(x) assert (np.isclose(y.getValue(), 2 / np.tan(4))) assert (np.isclose(y.getDeriv()['x'], 1 / (np.tan(4) * 4) - 2 / (np.sin(4)**2))) x = ad.Scalar('x', 4) y = ad.exp(ad.sqrt(x + 1)) assert (np.isclose(y.getValue(), np.exp(5**0.5))) assert (np.isclose(y.getDeriv()['x'], np.exp(5**0.5) / (2 * (5**0.5)))) x = ad.Scalar('x', 4) y = ad.exp(ad.sin(ad.exp(x))) assert (np.isclose(y.getValue(), np.exp(np.sin(np.exp(4))))) assert (np.isclose(y.getDeriv()['x'], np.cos(np.exp(4)) * np.exp(np.sin(np.exp(4)) + 4))) x = ad.Scalar('x', 4) y = (ad.sin(x**(1 / 3)))**(1 / 3) assert (np.isclose(y.getValue(), np.sin(4**(1 / 3))**(1 / 3))) assert (np.isclose( y.getDeriv()['x'], np.cos(4**(1 / 3)) / (9 * ((4 * np.sin(4**(1 / 3)))**(2 / 3))))) x = ad.Scalar('x', 16) y = ad.Scalar('y', 9) z = ad.sqrt(x * y) assert (z.getValue() == 12) assert (np.isclose(z.getDeriv()['x'], 9 / 2 * (16 * 9)**(-0.5))) assert (np.isclose(z.getDeriv()['y'], 16 / 2 * (16 * 9)**(-0.5))) x = ad.Scalar('x', 16) y = ad.Scalar('y', 9) z = ad.sqrt(x * (y**4)) assert (z.getValue() == 4 * 81) assert (np.isclose(z.getDeriv()['x'], (9**4) / 2 * (16 * (9**4))**(-0.5))) assert (np.isclose(z.getDeriv()['y'], (4 * 16 * (9**3)) / 2 * (16 * (9**4))**(-0.5))) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) z = ad.cos(ad.sin(x * y)) assert (np.isclose(z.getValue(), np.cos(np.sin(6)))) assert (np.isclose(z.getDeriv()['x'], -2 * np.cos(6) * np.sin(np.sin(6)))) assert (np.isclose(z.getDeriv()['y'], -3 * np.cos(6) * np.sin(np.sin(6)))) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) x = ad.log(ad.Scalar('x', 10), 10) x2 = ad.log(ad.Scalar('y', 100), 10) x3 = x - x2 x4 = ad.log(ad.Scalar('x', 10) / ad.Scalar('y', 100), 10) assert (np.isclose(x3.getValue(), -1)) assert (np.isclose(x3.getDeriv()['x'], 1 / (np.log(10) * 10))) assert (np.isclose(x3.getDeriv()['y'], -1 / (np.log(10) * 100))) assert (np.isclose(x3.getValue(), x4.getValue())) assert (np.isclose(x3.getDeriv()['x'], x4.getDeriv()['x'])) assert (np.isclose(x3.getDeriv()['y'], x4.getDeriv()['y'])) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) z = ad.sin(2 * x + ad.exp(y**2) + 4 * x * y) assert (np.isclose(z.getValue(), np.sin(6 + np.exp(4) + 24))) assert (np.isclose(z.getDeriv()['x'], np.cos(6 + np.exp(4) + 24) * (10))) assert (np.isclose(z.getDeriv()['y'], np.cos(6 + np.exp(4) + 24) * (4 * np.exp(4) + 12))) #http://math.gmu.edu/~memelian/teaching/Fall08/partDerivExamples.pdf x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) z = x * ad.exp(2 * x + 3 * y) assert (np.isclose(z.getValue(), 3 * np.exp(12))) assert (np.isclose(z.getDeriv()['x'], 6 * np.exp(12) + np.exp(12))) assert (np.isclose(z.getDeriv()['y'], 9 * np.exp(12))) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) z = (x - y) / (x + y) assert (np.isclose(z.getValue(), 1 / 5)) assert (np.isclose(z.getDeriv()['x'], 4 / 25)) assert (np.isclose(z.getDeriv()['y'], -6 / 25)) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) z = ad.Scalar('z', 5) f = x * ad.cos(z) + ((x**2) * (y**3) * ad.exp(z)) assert (np.isclose(f.getValue(), 3 * np.cos(5) + (9 * 8 * np.exp(5)))) assert (np.isclose(f.getDeriv()['x'], np.cos(5) + 2 * 3 * 8 * np.exp(5))) assert (np.isclose(f.getDeriv()['y'], 3 * 9 * 4 * np.exp(5))) assert (np.isclose(f.getDeriv()['z'], -3 * np.sin(5) + 9 * 8 * np.exp(5))) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) f = (y / x) * ad.ln(x) assert (np.isclose(f.getValue(), 2 / 3 * np.log(3))) assert (np.isclose(f.getDeriv()['x'], 2 / 9 * (1 - np.log(3)))) assert (np.isclose(f.getDeriv()['y'], 1 / 3 * np.log(3))) x = ad.Scalar('x', 3) y = ad.Scalar('y', 2) f = 1 / (x**2 + y**2) assert (np.isclose(f.getValue(), 1 / 13)) assert (np.isclose(f.getDeriv()['x'], -6 / (13**2))) assert (np.isclose(f.getDeriv()['y'], -4 / (13**2)))
def tanh(x): return ad.divide( ad.subtract(ad.Constant(1), ad.exp(ad.negative(ad.Variable(x)))), ad.add(ad.Constant(1), ad.exp(ad.negative(ad.Variable(x)))))