def test_mul_by_const(): x2 = ad.Variable(name="x2") y = 5 * x2 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val * 5) assert np.array_equal(grad_x2_val, np.ones_like(x2_val) * 5)
def main(): # Generate dataset and initial weight x_t, y_t = generate_dataset(1, 1, -5, point_num=100) # add extra dim to build homogenous coordinates x_t = np.concatenate((x_t, np.ones((x_t.shape[0], 1))), axis=1) W_val = np.random.rand(3, 1) # draw initial decision superplane draw(W_val, x_t, y_t) # Create the model x = ad.Variable(name='x') W = ad.Variable(name='W') y = ad.sigmoid_op(ad.matmul_op(x, W)) # Define loss y_ = ad.Variable(name='y_') cross_entropy = ad.reduce_mean_op(-ad.reduce_sum_op( y_ * ad.log_op(y) + (1 - y_) * ad.log_op(1 - y), reduction_indices=[1])) # Update rule learning_rate = 0.5 W_grad, = ad.gradients(cross_entropy, [W]) W_train_step = W - learning_rate * W_grad # Training executor = ad.Executor([cross_entropy, y, W_train_step]) steps = 200 plt.ion() for i in range(steps): plt.cla() loss_val, y_val, W_val = executor.run(feed_dict={ x: x_t, y_: y_t, W: W_val, }) print("Step {}: loss: {}".format(i + 1, loss_val)) # draw trained decision superplane draw(W_val, x_t, y_t) plt.pause(0.1) plt.ioff() plt.show()
def cpd_als_shared_exec(dim, size, rank, num_iter, input_val=[]): A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) full_hessian = ad.hessian(loss, A_list) hessians = [full_hessian[i][i] for i in range(len(full_hessian))] grads = ad.gradients(loss, A_list) updates = [ ad.tensordot(ad.tensorinv(hes), grad, [[2, 3], [0, 1]]) for (hes, grad) in zip(hessians, grads) ] new_A_list = [simplify(A - update) for (A, update) in zip(A_list, updates)] new_A_list = generate_sequential_optimal_tree(new_A_list, A_list) executor = ad.Executor(new_A_list) executor_loss = ad.Executor([simplify(loss)]) if input_val == []: A_val_list, input_tensor_val = init_rand_cp(dim, size, rank) else: A_val_list, input_tensor_val = input_val for iter in range(num_iter): t0 = time.time() # als iterations for i in range(len(A_list)): feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) if i == 0: A_val_list[0], = executor.run(feed_dict=feed_dict, out_nodes=[new_A_list[0]]) else: A_val_list[i], = executor.run(feed_dict=feed_dict, reset_graph=False, evicted_inputs=[A_list[i - 1]], out_nodes=[new_A_list[i]]) feed_dict = dict(zip(A_list, A_val_list)) feed_dict.update({input_tensor: input_tensor_val}) loss_val, = executor_loss.run(feed_dict=feed_dict) print(f'At iteration {iter} the loss is: {loss_val}') t1 = time.time() print(f"[ {iter} ] Sweep took {t1 - t0} seconds") return A_val_list
def test_reduce_sum_mix(): x1 = ad.Variable(name="x1") y = ad.exp(ad.reduce_sum(x1)) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val = executor.run(feed_dict={x1: x1_val}) expected_y_val = np.exp(np.sum(x1_val)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.array_equal(grad_x1_val, expected_y_val * np.ones_like(x1_val)) y2 = ad.log(ad.reduce_sum(x1)) grad_x2, = ad.gradients(y2, [x1]) executor2 = ad.Executor([y2, grad_x2]) y2_val, grad_x2_val = executor2.run(feed_dict={x1: x1_val}) expected_y2_val = np.log(np.sum(x1_val)) assert isinstance(y2, ad.Node) assert np.array_equal(y2_val, expected_y2_val) assert np.array_equal(grad_x2_val, (1 / np.sum(x1_val)) * np.ones_like(x1_val))
def test_identity(): x2 = ad.Variable(name="x2") y = x2 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val) assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) print('assert pass.')
def test_div_by_const(): x2 = ad.Variable(name = "x2") y = 5 / x2 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val= executor.run(feed_dict = {x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, 5 / x2_val) print(grad_x2_val) print(-5 / (x2_val * x2_val)) assert np.array_equal(grad_x2_val, -5 / (x2_val * x2_val))
def test_add_by_const(): x2 = ad.Variable(name="x2") #用placeholderOP创建 y = 5 + x2 #用Node创建并添加运算符 grad_x2, = ad.gradients(y, [x2]) #返回的时节点的类型 executor = ad.Executor([y, grad_x2]) #创建运算器,参数的内容(list格式)及顺序表示被计算对象 x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) #按照运算器参数list的顺序得到结果 assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val + 5) assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) print("pass:test_add_by_const!")
def test_sub(): x1 = ad.Variable(name = "x1") x2 = ad.Variable(name = "x2") y_node = x1 - x2 y_const = x1 - 3.0 y_rev_const = 3.0 - x2 grad_y_node_x1, grad_y_node_x2 = ad.gradients(y_node, [x1, x2]) grad_y_const_x1, = ad.gradients(y_const, [x1]) grad_y_rev_const_x2, = ad.gradients(y_rev_const, [x2]) x1_val = np.ones((1, 3)) x2_val = 2 * np.ones((1, 3)) executor = ad.Executor([y_node, y_const, y_rev_const, grad_y_node_x1, grad_y_node_x2, grad_y_const_x1, grad_y_rev_const_x2]) y_node_val, y_const_val, y_rev_const_val, grad_y_node_x1_val, grad_y_node_x2_val, grad_y_const_x1_val, grad_y_rev_const_x2_val = executor.run(feed_dict={x1:x1_val, x2:x2_val}) np.array_equal(y_node_val, -1.0 * np.ones_like(y_node_val)) np.array_equal(y_const_val, -2.0 * np.ones_like(y_const_val)) np.array_equal(y_rev_const_val, 2.0 * np.ones_like(y_rev_const_val)) np.array_equal(grad_y_node_x1_val, np.ones_like(grad_y_node_x1_val)) np.array_equal(grad_y_node_x2_val, -1.0 * np.ones_like(grad_y_node_x2_val)) np.array_equal(grad_y_const_x1_val, np.ones_like(grad_y_const_x1_val)) np.array_equal(grad_y_rev_const_x2_val, -1.0 * np.ones_like(grad_y_rev_const_x2_val))
def test_add_two_vars(): x2 = ad.Variable(name = "x2") x3 = ad.Variable(name = "x3") y = x2 + x3 grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val + x3_val) assert np.array_equal(grad_x2_val, np.ones_like(x2_val)) assert np.array_equal(grad_x3_val, np.ones_like(x3_val))
def test_msr(): x = ad.Variable(name="x") y = ad.Variable(name="y") z = x * y l = ad.reduce_sum_op((x - z) * (x - z), axis=0) # c = 2*x c = ad.matmul_op(x - z, x - z, True, False) x_val = np.ones((10, 1)) y_val = np.ones((10, 1)) * 2 grad_x1, grad_y1 = ad.gradients(l, [x, y]) grad_x2, grad_y2 = ad.gradients(c, [x, y]) excutor = ad.Executor([l, c, grad_x1, grad_y1, grad_x2, grad_y2]) # excutor = ad.Executor([l, grad_x1, grad_y1, d]) loss, cost, grad_x1_val, grad_y1_val, grad_x2_val, grad_y2_val = excutor.run( feed_dict={ x: x_val, y: y_val }) # loss, grad_x1_val, grad_y1_val, d_val = excutor.run(feed_dict={x: x_val, y: y_val, z: z_val}) print loss print cost print "gx1: %s, gy1: %s" % (str(grad_x1_val), str(grad_y1_val)) print "gx2: %s, gy2: %s" % (str(grad_x2_val), str(grad_y2_val))
def test_add_by_const(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") x4 = x3 * x2 x5 = x3 + x2 y = x4 + x5 grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([grad_x2]) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) grad_x2_val = executor.run(feed_dict={x2: x2_val, x3: x3_val}) print(grad_x2_val) #Got dy/dx2 !!!
def test_log(): x2 = ad.Variable(name="x2") y = ad.log_op(x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) epsilon = 1e-6 zero_arr = np.zeros(3) + epsilon assert isinstance(y, ad.Node) assert np.all(np.less_equal(np.abs(y_val - np.log(x2_val)), zero_arr)) assert np.all(np.less_equal(np.abs(1 / x2_val - grad_x2_val), zero_arr))
def test_exp_mix_op(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") y = ad.exp(ad.log(x1 * x2) + 1) grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x1_val = 2 * np.ones(3) x2_val = 4 * np.ones(3) y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, np.exp(np.log(x1_val * x2_val) + 1)) assert np.array_equal(grad_x1_val, y_val * x2_val / (x1_val * x2_val)) assert np.array_equal(grad_x2_val, y_val * x1_val / (x1_val * x2_val))
def test_neg(): x1 = ad.Variable(name='x1') x2 = ad.Variable(name='x2') y = -x2 + x1 grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x2_val = 2 * np.ones(3) x1_val = 3 * np.ones(3) y_val, grad_x1_val, grad_x2_val = executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, -x2_val + x1_val) assert np.array_equal(grad_x2_val, -np.ones_like(x2_val)) assert np.array_equal(grad_x1_val, np.ones_like(x1_val))
def test_mix_all(): x1 = ad.Variable(name="x1") y = 1/(1+ad.exp(-ad.reduce_sum(x1))) grad_x1, = ad.gradients(y, [x1]) executor = ad.Executor([y, grad_x1]) x1_val = 2 * np.ones(3) y_val, grad_x1_val= executor.run(feed_dict = {x1 : x1_val}) expected_y_val = 1/(1+np.exp(-np.sum(x1_val))) expected_y_grad = expected_y_val * (1 - expected_y_val) * np.ones_like(x1_val) print(expected_y_grad) print(grad_x1_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.sum(np.abs(grad_x1_val - expected_y_grad)) < 1E-10
def test_negative(backendopt): for datatype in backendopt: T.set_backend(datatype) x2 = ad.Variable(name="x2", shape=[3]) y = ad.sum(-x2) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * T.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert T.array_equal(y_val, T.sum(-x2_val)) assert T.array_equal(grad_x2_val, -T.ones_like(x2_val))
def test_div_two_vars(): x1 = ad.Variable(name = 'x1') x2 = ad.Variable(name = 'x2') y = x1 / x2 grad_x1, grad_x2 = ad.gradients(y, [x1, x2]) executor = ad.Executor([y, grad_x1, grad_x2]) x1_val = 2 * np.ones(3) x2_val = 5 * np.ones(3) y_val, grad_x1_val, grad_x2_val= executor.run(feed_dict = {x1: x1_val, x2 : x2_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x1_val / x2_val) assert np.array_equal(grad_x1_val, np.ones_like(x1_val) / x2_val) assert np.array_equal(grad_x2_val, -x1_val / (x2_val * x2_val))
def test(): x1 = ad.Variable(name="x1") x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = (ad.sin_op(x1 + 1) + ad.cos_op(2 * x2)) * ad.tan_op(ad.log_op(x3)) + ( ad.sin_op(x2 + 1)) + ad.cos_op(2 * x1) * ad.exp_op(1 + ad.sin_op(x3)) grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(1) x2_val = 2 * np.ones(1) x3_val = 3 * np.ones(1) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x1: x1_val, x2: x2_val, x3: x3_val }) print('x1=', x1_val[0]) print('x2=', x2_val[0]) print('x3=', x3_val[0]) print('---------------------------------------------------------------') print('y0_val=', y_val[0]) print('grad_x1_val= ', grad_x1_val[0]) print('grad_x2_val= ', grad_x2_val[0]) print('grad_x3_val= ', grad_x3_val[0]) print('---------------------------------------------------------------') y_numerical, grad_numerical = numerical_diff(f, [x1_val, x2_val, x3_val], 1e-10) print('y0_numerical= ', y_numerical) grad_numerical_x1, grad_numerical_x2, grad_numerical_x3 = grad_numerical[ 0], grad_numerical[1], grad_numerical[2] print('grad_numerical_x1 =', grad_numerical_x1) print('grad_numerical_x2 =', grad_numerical_x2) print('grad_numerical_x3 =', grad_numerical_x3) print('---------------------------------------------------------------') print('gradients Offset:') print('x1:', abs(grad_x1_val - grad_numerical_x1)) assert abs(grad_x1_val - grad_numerical_x1) < 1e-5 print('x2:', abs(grad_x2_val - grad_numerical_x2)) assert abs(grad_x2_val - grad_numerical_x2) < 1e-5 print('x3:', abs(grad_x3_val - grad_numerical_x3)) assert abs(grad_x3_val - grad_numerical_x3) < 1e-5
def test_trace_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) trace = ad.einsum('ii->', x) grad_x, = ad.gradients(trace, [x]) executor = ad.Executor([trace, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) trace_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_trace_val = T.einsum('ii->', x_val) expected_grad_x_val = T.identity(2) assert T.array_equal(trace_val, expected_trace_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_summation_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) x_sum = ad.einsum('ij->', x) grad_x, = ad.gradients(x_sum, [x]) executor = ad.Executor([x_sum, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) x_sum_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_x_sum_val = T.sum(x_val) expected_grad_x_val = T.ones_like(x_val) assert T.array_equal(x_sum_val, expected_x_sum_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def evaluate(expr, value_map): """Evaluate expr for a specific value of x. Examples: >>> evaluate(multiply(3, 'x'), {'x': 0}) 0 # because 3*0 = 0 >>> evaluate(add(3, multiply('x', 'x')), {'x': 2}) 7 # because 3+2^2 = 7 Input: expr: An expression. The expression can be the following (1) any real number (2) 'x' (3) operation(expr, expr), where operation can be either add or multiply value_map: A dictionary specifying the values of the variables. Output: This returns expr evaluated at the specific value of x, which should be a real number. """ # TODO: Implement this. y = expr # print(y) input_vars = [[ad.Variable(name=x_val), value_map[x_val]] for x_val in value_map.keys()] # print(input_vars) x_vars = [x_var for x_var, x_val in input_vars] # print(x_vars) # print(type(x_vars[0])) x_grads = ad.gradients(y, x_vars) executor = ad.Executor([y, *x_grads]) feed_dict = {} for x, x_val in input_vars: feed_dict[x] = x_val * np.ones( 1 ) # the AD code actually supports matrix multiplication but for our purposes we really just need scalar value support output_nodes = executor.run(feed_dict=feed_dict) y_val = output_nodes[0] return y_val
def tucker_als_graph_shared_exec(dim, size, rank): """ Build the graph used for Tucker ALS with shared execution. Parameters ---------- dim: dimensionality of the input tensor size: the size of input tensor's each dim rank: the rank of the decomposition Returns ------- tg: an TuckerGraph object executor: An shared executor loss: the optimized graph for tucker loss updates: an list containing updates graphs for each dimension intermediates: list of einsum nodes. Each node is the objective each Tucker ALS step optimized for """ tg = TuckerGraph(dim, size, rank) updates = [] for i in range(dim): core_A = tg.intermediates[i] hes = ad.hessian(tg.losses[i], [core_A]) hes = hes[0][0] grad, = ad.gradients(tg.losses[i], [core_A]) new_core_A = core_A - ad.tensordot( ad.tensorinv(hes), grad, [[i + dim for i in range(dim)], [i for i in range(dim)]]) updates.append(simplify(new_core_A)) loss = simplify(tg.losses[0]) for i in range(1, len(tg.losses)): assert loss.name == simplify(tg.losses[i]).name updates = generate_sequential_optimal_tree(updates, tg.A_list) executor_updates = ad.Executor(updates) executor_loss = ad.Executor([loss]) return tg, executor_updates, executor_loss, loss, updates, tg.intermediates
def test_cpd_grad(backendopt): dim = 3 for datatype in backendopt: T.set_backend(datatype) A_list, input_tensor, loss, residual = cpd_graph(dim, size, rank) A, B, C = A_list grad_A, grad_B, grad_C = ad.gradients(loss, [A, B, C]) executor = ad.Executor([loss, grad_A, grad_B, grad_C]) A_list, input_tensor_val = init_rand_cp(dim, size, rank) A_val, B_val, C_val = A_list loss_val, grad_A_val, grad_B_val, grad_C_val = executor.run( feed_dict={ input_tensor: input_tensor_val, A: A_val, B: B_val, C: C_val }) expected_output_tensor = T.einsum("ia,ja,ka->ijk", A_val, B_val, C_val) expected_residual = expected_output_tensor - input_tensor_val expected_norm_error = T.norm(expected_residual) expected_loss = expected_norm_error * expected_norm_error expected_contract_residual_A = 2 * T.einsum("ijk,ia->ajk", expected_residual, A_val) expected_contract_residual_B = 2 * T.einsum("ijk,ja->iak", expected_residual, B_val) expected_contract_residual_C = 2 * T.einsum("ijk,ka->ija", expected_residual, C_val) expected_grad_A = T.einsum("iak,ka->ia", expected_contract_residual_B, C_val) expected_grad_B = T.einsum("ajk,ka->ja", expected_contract_residual_A, C_val) expected_grad_C = T.einsum("ajk,ja->ka", expected_contract_residual_A, B_val) assert abs(loss_val - expected_loss) < 1e-8 assert T.norm(grad_A_val - expected_grad_A) < 1e-8 assert T.norm(grad_B_val - expected_grad_B) < 1e-8 assert T.norm(grad_C_val - expected_grad_C) < 1e-8
def test_inner_product_einsum(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[3]) x_inner = ad.einsum('i,i->', x, x) grad_x, = ad.gradients(x_inner, [x]) executor = ad.Executor([x_inner, grad_x]) x_val = T.tensor([3., 4.]) # 1x2 y_val, grad_x_val = executor.run(feed_dict={x: x_val}) expected_yval = T.norm(x_val)**2 expected_grad_x_val = 2 * x_val assert isinstance(x_inner, ad.Node) assert T.array_equal(y_val, expected_yval) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_add_mul_mix_1(): x1 = ad.Variable(name = "x1") x2 = ad.Variable(name = "x2") x3 = ad.Variable(name = "x3") y = x1 + x2 * x3 * x1 grad_x1, grad_x2, grad_x3 = ad.gradients(y, [x1, x2, x3]) executor = ad.Executor([y, grad_x1, grad_x2, grad_x3]) x1_val = 1 * np.ones(3) x2_val = 2 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x1_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x1 : x1_val, x2: x2_val, x3 : x3_val}) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x1_val + x2_val * x3_val) assert np.array_equal(grad_x1_val, np.ones_like(x1_val) + x2_val * x3_val) assert np.array_equal(grad_x2_val, x3_val * x1_val) assert np.array_equal(grad_x3_val, x2_val * x1_val)
def test_div_1(): x2 = ad.Variable(name="x2") x3 = ad.Variable(name="x3") y = x2 / x3 grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = 6 * np.ones(3) x3_val = 3 * np.ones(3) y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict={ x2: x2_val, x3: x3_val }) assert isinstance(y, ad.Node) assert np.array_equal(y_val, x2_val / x3_val) assert np.array_equal(grad_x2_val, 1 / x3_val) assert np.array_equal(grad_x3_val, -x2_val / (x3_val * x3_val))
def test_summation_einsum_2(backendopt): for datatype in backendopt: T.set_backend(datatype) x = ad.Variable(name="x", shape=[2, 2]) y = ad.Variable(name="y", shape=[2, 2]) out = ad.sum(ad.einsum('ij,ab->ab', x, y)) grad_x, = ad.gradients(out, [x]) executor = ad.Executor([out, grad_x]) x_val = T.tensor([[1., 2.], [3., 4.]]) y_val = T.tensor([[5., 6.], [7., 8.]]) out_val, grad_x_val = executor.run(feed_dict={x: x_val, y: y_val}) expected_out_val = T.sum(T.einsum('ij,ab->ab', x_val, y_val)) expected_grad_x_val = T.sum(y_val) * T.ones_like(x_val) assert T.array_equal(out_val, expected_out_val) assert T.array_equal(grad_x_val, expected_grad_x_val)
def test_logistic(): x1 = ad.Variable(name="x1") w = ad.Variable(name='w') y = 1/(1+ad.exp(-ad.reduce_sum(w * x1))) grad_w, = ad.gradients(y, [w]) executor = ad.Executor([y, grad_w]) x1_val = 3 * np.ones(3) w_val = 3 * np.zeros(3) y_val, grad_w_val = executor.run(feed_dict={x1: x1_val, w: w_val}) expected_y_val = 1/(1 + np.exp(-np.sum(w_val * x1_val))) expected_y_grad = expected_y_val * (1 - expected_y_val) * x1_val print(expected_y_grad) print(grad_w_val) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_y_val) assert np.sum(np.abs(grad_w_val - expected_y_grad)) < 1E-7
def test_sigmoid(): x2 = ad.Variable(name="x2") y = 1 / (1 + ad.exp_op(-1 * x2)) grad_x2, = ad.gradients(y, [x2]) executor = ad.Executor([y, grad_x2]) x2_val = 2 * np.ones(3) y_val, grad_x2_val = executor.run(feed_dict={x2: x2_val}) assert isinstance(y, ad.Node) assert isinstance(grad_x2, ad.Node) epsilon = 1e-10 zero_arr = np.zeros(3) + epsilon assert np.all( np.less_equal(np.abs(1 / (1 + np.exp(-1 * x2_val)) - y_val), zero_arr)) print(grad_x2_val) print(y_val * (1 - y_val)) assert np.all( np.less_equal(np.abs(grad_x2_val - y_val * (1 - y_val)), zero_arr))
def test_matmul_two_vars(): x2 = ad.Variable(name = "x2") x3 = ad.Variable(name = "x3") y = ad.matmul_op(x2, x3) grad_x2, grad_x3 = ad.gradients(y, [x2, x3]) executor = ad.Executor([y, grad_x2, grad_x3]) x2_val = np.array([[1, 2], [3, 4], [5, 6]]) # 3x2 x3_val = np.array([[7, 8, 9], [10, 11, 12]]) # 2x3 y_val, grad_x2_val, grad_x3_val = executor.run(feed_dict = {x2: x2_val, x3: x3_val}) expected_yval = np.matmul(x2_val, x3_val) expected_grad_x2_val = np.matmul(np.ones_like(expected_yval), np.transpose(x3_val)) expected_grad_x3_val = np.matmul(np.transpose(x2_val), np.ones_like(expected_yval)) assert isinstance(y, ad.Node) assert np.array_equal(y_val, expected_yval) assert np.array_equal(grad_x2_val, expected_grad_x2_val) assert np.array_equal(grad_x3_val, expected_grad_x3_val)