def test_many_vector_expressions(self): a = Var('a') b = Var('b') av = np.random.rand(3) bv = np.random.rand(3) sigm = layers.Sigmoid().forward dJdy = np.ones(3) models = [(a + b, av + bv, (1, 1)), (a * a + b, av**2 + bv, (2 * av, 1)), (a + b + a, av + bv + av, (2, 1)), (Sigmoid(a + b), sigm(av + bv), (sigm(av + bv) * (1 - sigm(av + bv)), sigm(av + bv) * (1 - sigm(av + bv)))), (Sigmoid(a + b + a), sigm(av + bv + av), (2 * sigm(av + bv + av) * (1 - sigm(av + bv + av)), sigm(av + bv + av) * (1 - sigm(av + bv + av))))] for model, fwd_return, (a_grad, b_grad) in models: y = model.forward_variables({'a': av, 'b': bv}) assert_array_equal(y, fwd_return) grad = model.backward_variables(dJdy, debug=True) assert_array_almost_equal(grad['a'], a_grad, err_msg="wrong gradient in model: %s" % model) assert_array_almost_equal(grad['b'], b_grad, err_msg="wrong gradient in model: %s" % model)
def __init__(self, last_h, in_size, hidden_size, Wx, Wh, Wy, by): self.last_h = last_h self.Wx = syntax.Wx(in_size, hidden_size, initialize=Wx, input=Var('x')) self.Wh = syntax.Wx(hidden_size, hidden_size, initialize=Wh, input=Var('last_h')) self.first = Tanh(self.Wx + self.Wh) self.second = syntax.WxBiasLinear(hidden_size, in_size, initialize_W=Wy, initialize_b=by, input=Var('h')) self.Wy = self.second
def test_just_numbers(self): a = Var('a') b = Var('b') input_dict = {'a': [2.], 'b': [3.]} model = (a + b) y = model.forward_variables(input_dict) assert_array_equal(y, [5]) model = (a * b) y = model.forward_variables(input_dict) assert_array_equal(y, [6])
def test_tanh(self): x = Var('x') b = Var('b') model = Tanh(x + b) x_val = np.random.rand(3) b_val = np.random.rand(3) y = model.forward_variables({'x': x_val, 'b': b_val}) assert_array_equal(y, np.tanh(x_val + b_val)) dJdy = np.ones(3) grad = model.backward_variables(dJdy, debug=True) manual_grad = (1. - np.tanh(x_val + b_val)**2) * dJdy assert_array_equal(grad['x'], manual_grad) assert_array_equal(grad['b'], manual_grad)
def __init__(self, Wz, Wr, W, last_h, decoderW): self.last_h = last_h h = Var('h') x = Var('x') hx = Concat(h, x) z = Sigmoid(Linear(0, 0, initialize=Wz, input=hx)) r = Sigmoid(Linear(0, 0, initialize=Wr, input=hx)) h_tilde = Tanh(Linear(0, 0, initialize=W, input=Concat(r * h, x))) self.h_model = ((Const(1) - z) * h) + (z * h_tilde) self.decoder = Linear(0, 0, initialize=decoderW, input=Var('h'))
def test_concat_op(self): a, b, c, d = Var('a'), Var('b'), Var('c'), Var('d') model = Concat(a, b) * Concat(c, d) aval, bval, cval, dval = [1.], [2.], [3.], [4.] y = model.forward_variables({ 'a': aval, 'b': bval, 'c': cval, 'd': dval }) assert_array_equal(y, [3, 8]) grads = model.backward_variables(1, 1) assert_array_equal(grads['a'], cval) assert_array_equal(grads['b'], dval) assert_array_equal(grads['c'], aval) assert_array_equal(grads['d'], bval)
def test_update_weights_layer_vs_syntax(self): x = np.array([1., 2., 3.]) optimizer = SGD(0.1) W = np.random.rand(3, 3 + 1) linear_layer = layers.Linear(3, 3, initialize=W.copy()) linear_layer_model = Seq(linear_layer, layers.Tanh) y = linear_layer_model.forward(x) back = linear_layer_model.backward(np.ones(3)) var_x = Var('x') syntax_linear = Linear(3, 3, initialize=W.copy(), input=var_x) syntax_model = Tanh(syntax_linear) syntax_y = syntax_model.forward_variables({'x': x}) syntax_back = syntax_model.backward_variables(np.ones(3)) assert_array_equal(linear_layer.delta_W, syntax_linear.layer.delta_W) # update weights in both models linear_layer_model.update_weights(optimizer) syntax_model.update_weights(optimizer) assert_array_equal(y, syntax_y) assert_array_equal(back, syntax_back['x']) assert_array_equal(linear_layer.W, syntax_linear.layer.W)
def test_compare_linear_syntax_and_linear_layer(self): x = np.random.rand(3) syntax_model = syntax.WxBiasLinear(3, 4, initialize_W='ones', initialize_b='ones', input=Var('x')) layer_model = layers.Linear(3, 4, initialize='ones') optimizer = SGD(0.1) # W = np.ones((4, 3)) # b = np.ones(4) for i in range(5): syntax_y = syntax_model.forward_variables({'x': x}) layer_y = layer_model.forward(x) assert_array_almost_equal(syntax_y, layer_y, decimal=12) dJdy = np.random.rand(4) syntax_grad = syntax_model.backward_variables(dJdy) layer_grad = layer_model.backward(dJdy) self.assertEqual(syntax_grad['x'].shape, layer_grad.shape, 'gradients should have the same vector shape') assert_array_almost_equal(syntax_grad['x'], layer_grad) # real_y = W.dot(x) + b # real_grad = W.T.dot(dJdy) # assert_array_equal(real_y, syntax_y) # assert_array_equal(syntax_grad['x'], real_grad) syntax_model.update_weights(optimizer) layer_model.update_weights(optimizer)
def test_proper_equation_sum(self): var_a = Var('a') var_b = Var('b') a = np.array([2.3, 3., 3]) b = np.array([3., 5., 4]) input_dict = {'a': a, 'b': b} sigm = layers.Sigmoid().forward model = Sigmoid(var_a + var_a) y = model.forward_variables(input_dict) assert_array_equal(y, sigm(a + a)) grad = model.backward_variables(np.ones(3)) assert_array_almost_equal(grad['a'], 2 * sigm(a + a) * (1 - sigm(a + a)))
def test_proper_equation(self): var_a = Var('a') var_b = Var('b') a = np.array([2.3, 3., 3]) b = np.array([3., 5., 4]) input_dict = {'a': a, 'b': b} sigm = layers.Sigmoid().forward model = Sigmoid(var_a * var_a) + var_b * var_b * var_b + var_a print(model) y = model.forward_variables(input_dict) assert_array_equal(y, sigm(a**2) + (b**3 + a)) grad = model.backward_variables(np.ones(3)) assert_array_almost_equal(grad['a'], 2 * a * sigm(a**2) * (1 - sigm(a**2)) + 1) assert_array_almost_equal(grad['b'], 3 * b * b)
def __init__(self, in_size, hidden_size, last_h_store, last_C_store, Wf, bf, Wi, bi, Wc, bc, Wo, bo): # States self.last_h_store = last_h_store self.last_C_store = last_C_store # Input x = Var('x') last_h = Var('last_h') last_C = Var('last_C') xh = Concat(x, last_h) # C network f = Sigmoid( WxBiasLinear(in_size, hidden_size, initialize_W=Wf, initialize_b=bf, input=xh)) i = Sigmoid( WxBiasLinear(in_size, hidden_size, initialize_W=Wi, initialize_b=bi, input=xh)) C_tilde = Tanh( WxBiasLinear(in_size, hidden_size, initialize_W=Wc, initialize_b=bc, input=xh)) self.C_model = (f * last_C) + (i * C_tilde) # h network new_C = Var("new_C") o = Sigmoid( WxBiasLinear(in_size, hidden_size, initialize_W=Wo, initialize_b=bo, input=xh)) self.h_model = o * Tanh(new_C)
def test_basic(self): a_var = Var('a') b_var = Var('b') a = np.random.rand(5) b = np.random.rand(5) input_dict = {'a': a, 'b': b} # a+b model = (a_var + b_var) y = model.forward_variables(input_dict) assert_array_equal(y, a + b) grad = model.backward_variables(np.ones(5)) assert_array_equal(grad['a'], [1, 1, 1, 1, 1]) assert_array_equal(grad['b'], [1, 1, 1, 1, 1]) # a+a+b model = (a_var + b_var + a_var) y = model.forward_variables(input_dict) assert_array_almost_equal(y, a + a + b) grad = model.backward_variables(np.ones(5), debug=True) assert_array_equal(grad['a'], [2, 2, 2, 2, 2]) assert_array_equal(grad['b'], [1, 1, 1, 1, 1]) # a-b model = (a_var - b_var - b_var) y = model.forward_variables(input_dict) assert_array_equal(y, a - 2 * b) grad = model.backward_variables(np.ones(5)) assert_array_equal(grad['a'], [1, 1, 1, 1, 1]) assert_array_equal(grad['b'], [-2, -2, -2, -2, -2]) # a*a*b model = a_var * b_var * a_var y = model.forward_variables(input_dict) assert_array_equal(y, a * b * a) grad = model.backward_variables(np.ones(5)) assert_array_almost_equal(grad['a'], 2 * a * b, decimal=12) assert_array_almost_equal(grad['b'], a * a, decimal=12)