def test_multi_var_chain_rule(): x1 = ad.Variable(name="x1") x2 = x1+3 x3 = x1+5 y = x2*x3 grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(3) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x1 : x1_val})
def test_mul_const_jacobian(backendopt): for datatype in backendopt: T.set_backend(datatype) x1 = ad.Variable(name="x2", shape=[2, 2]) jacobian_x1, = ad.jacobians(2 * x1, [x1]) executor = ad.Executor([jacobian_x1]) x1_val = T.tensor([[5., 6.], [7., 8.]]) jacobian_x1_val, = executor.run(feed_dict={x1: x1_val}) I = T.identity(2) expected_jacobian_x1_val = 2 * T.einsum("ai,bj->abij", I, I) assert T.array_equal(jacobian_x1_val, expected_jacobian_x1_val)
def test_identity(): x2 = ad.Variable(name="x2") y = x2 (grad_x2,) = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val) assert np.array_equal(grad_x2_val, np.ones_like(x2_val))
def test_reduce_sum(): x1 = ad.Variable(name="x1") y = ad.reduce_sum(x1) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.sum(x1_val)) assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
def test_exp(): # P x2 = ad.Variable(name="x2") y = ad.exp_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(x2_val)) assert np.array_equal(grad_x2_val, np.exp(x2_val))
def test_relu(): # P x2 = ad.Variable(name="x2") y = ad.relu_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.linspace(-10, 10, 10) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, 1.0 * (x2_val > 0) * x2_val) assert np.array_equal(grad_x2_val, 1.0 * (x2_val > 0))
def test_add_by_const(): # P x2 = ad.Variable(name="x2") y = 5 + x2 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val + 5) assert np.array_equal(grad_x2_val, np.ones_like(x2_val))
def main(): # Generate dataset and initial weight x_t, y_t = generate_dataset(1, 1, -5, point_num=100) # add extra dim to build homogenous coordinates x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1) W_val = np.random.rand(3, 1) # draw initial decision superplane draw(W_val, x_t, y_t) # Create the model x = ad.Variable(name='x') W = ad.Variable(name='W') y = ad.sigmoid_op(ad.matmul_op(x, W)) # Define loss y_ = ad.Variable(name='y_') cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op( y_ * ad.log_op(y) + (1 - y_) * ad.log_op(1 - y), reduction_indices=[1])) # Update rule learning_rate = 0.5 W_grad, = ad.gradients(cross_entropy, [W]) W_train_step = W - learning_rate * W_grad # Training executor = ad.Executor([cross_entropy, y, W_train_step]) steps = 200 plt.ion() for i in range(steps): plt.cla() loss_val, y_val, W_val = executor.run(feed_dict={ x: x_t, y_: y_t, W: W_val, }) print("Step {}: loss: {}".format(i + 1, loss_val)) # draw trained decision superplane draw(W_val, x_t, y_t) plt.pause(0.1) plt.ioff() plt.show()
def auto_diff_lr(): x = ad.Variable(name='x') w = ad.Variable(name='w') y = ad.Variable(name='y') # 注意,以下实现某些情况会有很大的数值误差, # 所以一般真实系统实现会提供高阶算子,从而减少数值误差 h = 1 / (1 + ad.exp(-ad.reduce_sum(w * x))) L = y * ad.log(h) + (1 - y) * ad.log(1 - h) w_grad, = ad.gradients(L, [w]) executor = ad.Executor([L, w_grad]) N = 100 X_val, Y_val = gen_2d_data(N) w_val = np.ones(3) plot(N, X_val, Y_val, w_val) executor = ad.Executor([L, w_grad]) test_accuracy(w_val, X_val, Y_val) alpha = 0.01 max_iters = 300 for iteration in range(max_iters): acc_L_val = 0 for i in range(N): x_val = X_val[i] y_val = np.array(Y_val[i]) L_val, w_grad_val = executor.run(feed_dict={ w: w_val, x: x_val, y: y_val }) w_val += alpha * w_grad_val acc_L_val += L_val print("iter = %d, likelihood = %s, w = %s" % (iteration, acc_L_val, w_val)) test_accuracy(w_val, X_val, Y_val) plot(N, X_val, Y_val, w_val, True)
def cpd_als(dim, size, rank, num_iter, input_val=[]): A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) full_hessian = ad.hessian(loss, A_list) hessians = [full_hessian[i][i] for i in range(len(full_hessian))] grads = ad.gradients(loss, A_list) updates = [ ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]]) for (hes, grad) in zip(hessians, grads) ] new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)] executor = ad.Executor(new_A_list) executor_loss = ad.Executor([simplify(loss)]) if input_val == []: A_val_list, input_tensor_val = init_rand_cp(dim, size, rank) else: A_val_list, input_tensor_val = input_val for iter in range(num_iter): # als iterations for i in range(len(A_list)): feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) A_val_list[i], = executor.run(feed_dict=feed_dict, out_nodes=[new_A_list[i]]) feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) loss_val, = executor_loss.run(feed_dict=feed_dict) print(f'At iteration {iter} the loss is: {loss_val}') return A_val_list
def test_reduce_sum_mix(): x1 = ad.Variable(name="x1") y = ad.exp(ad.reduce_sum(x1)) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val}) expected_y_val = np.exp(np.sum(x1_val)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val)) y2 = ad.log(ad.reduce_sum(x1)) grad_x2, = ad.gradients(y2, [x1]) executor2 = ad.Executor([y2, grad_x2]) y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val}) expected_y2_val = np.log(np.sum(x1_val)) assert isinstance(y2, ad.Node) assert np.array_equal(y2_val, expected_y2_val) assert np.array_equal(grad_x2_val, (1 / np.sum(x1_val)) * np.ones_like(x1_val))
def test_cpd_hessian_optimize_diag(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list hessian = ad.hessian(loss, [A, B, C]) hessian_diag = [hessian[0][0], hessian[1][1], hessian[2][2]] for node in hessian_diag: node = optimize(node) assert isinstance(node, ad.AddNode) num_operations = len( list( filter(lambda x: isinstance(x, ad.OpNode), find_topo_sort([node])))) """ Use this assertion to test the optimize function. 5 operations: 1. T.einsum('ca,cb->ab',A,A), 2. T.einsum('ca,cb->ab',B,B), 3. T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)), 4. T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10)), 5. (T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))+ T.einsum('bd,ac->abcd',T.einsum('ab,ab->ab',T.einsum('ca,cb->ab',A,A),T.einsum('ca,cb->ab',B,B)),T.identity(10))) """ assert num_operations == 5 executor = ad.Executor(hessian_diag) hes_diag_vals = executor.run(feed_dict={ A: A_val, B: B_val, C: C_val, input_tensor: input_tensor_val, }) expected_hes_diag_val = [ 2 * T.einsum('eb,ed,fb,fd,ac->abcd', B_val, B_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, C_val, C_val, T.identity(size)), 2 * T.einsum('eb,ed,fb,fd,ac->abcd', A_val, A_val, B_val, B_val, T.identity(size)) ] assert T.norm(hes_diag_vals[0] - expected_hes_diag_val[0]) < 1e-8 assert T.norm(hes_diag_vals[1] - expected_hes_diag_val[1]) < 1e-8 assert T.norm(hes_diag_vals[2] - expected_hes_diag_val[2]) < 1e-8
def test_tensorinv_odd_dim(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[24, 8, 3]) inv_x = ad.tensorinv(x, ind=1) assert inv_x.shape == [8, 3, 24] assert inv_x.input_indices_length == 2 executor = ad.Executor([inv_x]) x_val = T.random([24, 8, 3]) inv_x_val, = executor.run(feed_dict={x: x_val}) assert T.array_equal(inv_x_val, T.tensorinv(x_val, ind=1))
def test_einsum_subtree_clone(backendopt): """ [Subtree clone] This case is rather subtle. We want to auto fuse A B C D | \ / | | es | | / \ | | / \ | es es \ / + Here es is einsum. """ for datatype in backendopt: T.set_backend(datatype) a = ad.Variable(name="a", shape=[3, 3]) b = ad.Variable(name="b", shape=[3, 2]) c = ad.Variable(name="c", shape=[2, 3]) d = ad.Variable(name="d", shape=[3, 3]) BC = ad.einsum('ik, kj->ij', b, c) # 3x3 ABC = ad.einsum('ik, kj->ij', a, BC) # 3x3 BCD = ad.einsum('jk, ki->ji', BC, d) # 3x3 out = ABC + BCD input_nodes = [a, b, c, d] generated_feed_dict = gen_dict(input_nodes) executor = ad.Executor([out]) out_val, = executor.run(feed_dict=generated_feed_dict) with OutputInjectedModeP(find_topo_sort_p([PseudoNode(out)])): trees = find_sub_einsumtree(PseudoNode(out)) assert len(trees) == 2 for tree in trees: out_node, in_nodes = tree new_z = fuse_einsums(out_node.node, in_nodes) replace_node(out_node, new_z) new_out_val, = executor.run(feed_dict=generated_feed_dict) assert float_eq(out_val, new_out_val)
def test_add_by_const(): x2 = ad.Variable(name="x2") #用placeholderOP创建 y = 5 + x2 #用Node创建并添加运算符 grad_x2, = ad.gradients(y, [x2]) #返回的时节点的类型 executor = ad.Executor([y, grad_x2]) #创建运算器,参数的内容(list格式)及顺序表示被计算对象 x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) #按照运算器参数list的顺序得到结果 assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val + 5) assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) print("pass:test_add_by_const!")
def test_add_by_const(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") x4 = x3 * x2 x5 = x3 + x2 y = x4 + x5 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([grad_x2]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) grad_x2_val = executor.run(feed_dict={x2: x2_val, x3: x3_val}) print(grad_x2_val) #Got dy/dx2 !!!
def test_add_two_vars(): x2 = ad.Variable(name = "x2") x3 = ad.Variable(name = "x3") y = x2 + x3 grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val + x3_val) assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) assert np.array_equal(grad_x3_val, np.ones_like(x3_val))
def test_div_by_const(): x2 = ad.Variable(name = "x2") y = 5 / x2 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, 5 / x2_val) print(grad_x2_val) print(-5 / (x2_val * x2_val)) assert np.array_equal(grad_x2_val, -5 / (x2_val * x2_val))
def test_log(): x2 = ad.Variable(name="x2") y = ad.log_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) epsilon = 1e-6 zero_arr = np.zeros(3) + epsilon assert isinstance(y, ad.Node) assert np.all(np.less_equal(np.abs(y_val - np.log(x2_val)), zero_arr)) assert np.all(np.less_equal(np.abs(1 / x2_val - grad_x2_val), zero_arr))
def test_neg(): x1 = ad.Variable(name='x1') x2 = ad.Variable(name='x2') y = -x2 + x1 grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x2_val = 2 * np.ones(3) x1_val = 3 * np.ones(3) y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, -x2_val + x1_val) assert np.array_equal(grad_x2_val, -np.ones_like(x2_val)) assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
def test_exp_mix_op(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") y = ad.exp(ad.log(x1 * x2) + 1) grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x1_val = 2 * np.ones(3) x2_val = 4 * np.ones(3) y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1)) assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val)) assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val))
def test_negative(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) y = ad.sum(-x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * T.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(-x2_val)) assert T.array_equal(grad_x2_val, -T.ones_like(x2_val))
def test_executor_retain(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3, 3]) y = ad.sum(x2) z = y * 2 x2_val = T.identity(3) executor = ad.Executor([y, z]) y_val, = executor.run(feed_dict={x2: x2_val}, reset_graph=False, out_nodes=[y]) # This can only be run if y values are retained. z_val, = executor.run(feed_dict={}, reset_graph=False, out_nodes=[z])
def test_div_two_vars(): x1 = ad.Variable(name = 'x1') x2 = ad.Variable(name = 'x2') y = x1 / x2 grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x1_val = 2 * np.ones(3) x2_val = 5 * np.ones(3) y_val, grad_x1_val, grad_x2_val= executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x1_val / x2_val) assert np.array_equal(grad_x1_val, np.ones_like(x1_val) / x2_val) assert np.array_equal(grad_x2_val, -x1_val / (x2_val * x2_val))
def test_mix_all(): x1 = ad.Variable(name="x1") y = 1/(1+ad.exp(-ad.reduce_sum(x1))) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) expected_y_val = 1/(1+np.exp(-np.sum(x1_val))) expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val) print(expected_y_grad) print(grad_x1_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10
def test_hessian_quadratic(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) H = ad.Variable(name="H", shape=[3, 3]) y = ad.einsum("i,ij,j->", x, H, x) hessian = ad.hessian(y, [x]) executor = ad.Executor([hessian[0][0]]) x_val = T.random([3]) H_val = T.random((3, 3)) hessian_val, = executor.run(feed_dict={x: x_val, H: H_val}) assert T.array_equal(hessian_val, H_val + T.transpose(H_val))
def test(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = (ad.sin_op(x1 + 1) + ad.cos_op(2 * x2)) * ad.tan_op(ad.log_op(x3)) + ( ad.sin_op(x2 + 1)) + ad.cos_op(2 * x1) * ad.exp_op(1 + ad.sin_op(x3)) grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(1) x2_val = 2 * np.ones(1) x3_val = 3 * np.ones(1) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) print('x1=', x1_val[0]) print('x2=', x2_val[0]) print('x3=', x3_val[0]) print('---------------------------------------------------------------') print('y0_val=', y_val[0]) print('grad_x1_val= ', grad_x1_val[0]) print('grad_x2_val= ', grad_x2_val[0]) print('grad_x3_val= ', grad_x3_val[0]) print('---------------------------------------------------------------') y_numerical, grad_numerical = numerical_diff(f, [x1_val, x2_val, x3_val], 1e-10) print('y0_numerical= ', y_numerical) grad_numerical_x1, grad_numerical_x2, grad_numerical_x3 = grad_numerical[ 0], grad_numerical[1], grad_numerical[2] print('grad_numerical_x1 =', grad_numerical_x1) print('grad_numerical_x2 =', grad_numerical_x2) print('grad_numerical_x3 =', grad_numerical_x3) print('---------------------------------------------------------------') print('gradients Offset:') print('x1:', abs(grad_x1_val - grad_numerical_x1)) assert abs(grad_x1_val - grad_numerical_x1) < 1e-5 print('x2:', abs(grad_x2_val - grad_numerical_x2)) assert abs(grad_x2_val - grad_numerical_x2) < 1e-5 print('x3:', abs(grad_x3_val - grad_numerical_x3)) assert abs(grad_x3_val - grad_numerical_x3) < 1e-5
def test_jacobian_summation_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) x_sum = ad.einsum('ij->', x) grad_x, = ad.jacobians(x_sum, [x]) executor = ad.Executor([x_sum, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) x_sum_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_x_sum_val = T.sum(x_val) expected_grad_x_val = T.ones_like(x_val) assert T.array_equal(x_sum_val, expected_x_sum_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_tucker(backendopt): for datatype in backendopt: T.set_backend(datatype) tg = TuckerGraph(dim, size, rank) executor = ad.Executor([tg.residual]) A_val_list, core_val, X_val = init_rand_tucker(dim, size, rank) feed_dict = dict(zip(tg.A_list, A_val_list)) feed_dict.update({tg.core: core_val, tg.X: X_val}) residual_val, = executor.run(feed_dict=feed_dict) expect_residual_val = T.einsum('ae,bf,cg,efg->abc', *A_val_list, core_val) - X_val assert T.norm(residual_val - expect_residual_val) < 1e-8
def test_jacobian_trace_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) trace = ad.einsum('ii->', x) grad_x, = ad.jacobians(trace, [x]) executor = ad.Executor([trace, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) trace_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_trace_val = T.einsum('ii->', x_val) expected_grad_x_val = T.identity(2) assert T.array_equal(trace_val, expected_trace_val) assert T.array_equal(grad_x_val, expected_grad_x_val)