def test_DiffKnownFunctions(reset_NodeDict): x = Var('x') fx = VSF('sin', x) dfx = fx.diff(x) dfx.cache() #dfx is used in dgfx again. Forgetting to cache fx may casue subtle errors. assert (dfx.expression() == 'cos(x)') gfx = VSF('exp', Mul(Val(3), fx)) dgfx = gfx.diff(x) assert (dgfx.expression() == u'exp(3*sin(x))⨯3⨯cos(x)') #Caching top expressions only is enough gfx.cache() dgfx.cache() for v in [1.0, 2.0, 14.2, 5.1, 5.72341]: x.val = v assert (dfx.val == np.cos(v)) assert (gfx.val == np.exp(3 * np.sin(v))) #Allow slight difference for complex numpy expressions. np.testing.assert_allclose(dgfx.val, np.exp(3 * np.sin(v)) * 3 * np.cos(v), rtol=1e-10, atol=0) hfx = VSF('log', fx) dhfx = hfx.diff(x) assert (dhfx.expression() == u'1/(sin(x))⨯cos(x)') hx = VSF('log', Add(fx, VSF('exp', x))) dhx = hx.diff(x) assert (dhx.expression() == u'1/(sin(x)+exp(x))⨯{cos(x)+exp(x)}')
def InitialRNN(): ran = lambda x: np.random.random(x) - 0.5 n = 200 h0, w0, b0, u0 = Var('h0'), Var('w0'), Var('b0'), Var('u0') vh0, vw0, vb0, vu0 = ran((n, 1)), ran((n, 2 * n)), ran((n, 1)), ran((n, 1)) h0.val, w0.val, b0.val, u0.val = vh0, vw0, vb0, vu0 u0.val /= np.abs(u0.val).sum() #L1 normalization. return RecursiveNN(w0, b0, u0)
def test_ReuseExistingExpressions(reset_NodeDict): ran = lambda x: np.random.random(x) - 0.5 x, y, a, b = Var('x'), Var('y'), Var('a'), Var('b') D, C, B, A = Var('D'), Var('C'), Var('B'), Var('A') x.val, y.val, a.val, b.val = ran((1, 4)), ran((4, 1)), ran((5, 1)), ran( (6, 1)) D.val, C.val, B.val, A.val = ran((4, 4)), ran((4, 6)), ran((6, 5)), ran( (5, 4)) xDy = Dot(x, Dot(D, y)) xDy.cache() assert Var('x') is xDy.x assert Dot(D, y).y is Var('y') assert Dot(D, y) is xDy.y xDy2 = Dot(Dot(x, D), y) tmp = Differentiation(xDy2, x) tmp2 = Differentiation(xDy2, x) assert tmp2 is not tmp tmp.cache() tmp2 = Differentiation(xDy2, x) assert tmp2 is tmp tmp3 = Differentiation(xDy, x) assert tmp3 is tmp f = Dot(Dot(x, C), Dot(B, VSF('sin', Add(Dot(A, y), a)))) f.cache() dfdy = Differentiation(f, y) dfdy2 = Differentiation(f, y) assert dfdy2 is not dfdy dfdy.cache() dfdy2 = Differentiation(f, y) assert dfdy2 is dfdy dfda = Differentiation(f, a) assert dfda.var is dfdy.var.x g = Dot(Dot(x, C), VSF('sin', Add(Dot(B, VSF('sin', Add(Dot(A, y), a))), b))) g.cache() assert g.y.var.x is f.y dgdA = Differentiation(g, A) dgdA.cache() dgdB = Differentiation(g, B) print unicode(dgdB) assert dgdB.y.var is f.y.y assert dgdA.x.var.x.x is dgdB.x.var
def test_GradientNumericalChecks(reset_NodeDict): ran = lambda x: np.random.random(x) - 0.5 x, y, a, b = Var('x'), Var('y'), Var('a'), Var('b') D, C, B, A = Var('D'), Var('C'), Var('B'), Var('A') #g:= x⋅C⋅sin(B⋅sin(A⋅y+a)+b) g = Dot(Dot(x, C), VSF('sin', Add(Dot(B, VSF('sin', Add(Dot(A, y), a))), b))) g.cache() #0.01 is may not small enough for g=x⋅C⋅sin(B⋅sin(A⋅y+a)+b). scale = 0.001 for var in [B, A, y, a]: p = [] p_ran = [] for i in range(3): x.val, y.val, a.val, b.val = ran((1, 4)), ran((4, 1)), ran( (5, 1)), ran((6, 1)) #D,C,B,A = Var('D', ran((4,4))), Var('C', ran((4,6))), Var('B', ran((6,5))), Var('A', ran((5,4))) D.val, C.val, B.val, A.val = ran((4, 4)), ran((4, 6)), ran( (6, 5)), ran((5, 4)) gradient = Differentiation(g, var) gradient.cache() var0 = var.val delta = NormalizedMatrix(ran(var.val.shape), scale) rand_grad = gradient.val.copy() np.random.shuffle(rand_grad) rand_grad = NormalizedMatrix(ran(gradient.val.shape), gradient.val.sum()) dg_ran = np.sum(delta * rand_grad) dg_grad = np.sum(delta * gradient.val) g0 = g.val var.val = var0 + delta g1 = g.val dg = g1 - g0 p.append(dg / dg_grad) p_ran.append(dg / dg_ran) p = np.array(p) p_ran = np.array(p_ran) precision = np.abs(np.mean(p) - 1) precision_ran = np.abs(np.mean(p_ran) - 1) assert precision < 10 * scale assert precision < precision_ran
def test_ExpressionMutations(reset_NodeDict): x = Var('x') fx = VSF('sin', x, np.sin) gx = VSF('exp', x, np.exp) #Mutable expression need to be cached. fx.cache() gx.cache() v = 1.0 for v in [1.0, 0.5, 0.1]: x.val = v assert (fx.val == np.sin(v)) assert (gx.val == np.exp(v))
def test_CacheKnownValues(reset_NodeDict): x = Var('x') fx = VSF('cos', x, np.cos) gfx = VSF('exp', fx, np.exp) gfx.cache() exp_cos = lambda x: np.exp(np.cos(x)) for v in np.random.random(10): x.val = v assert (gfx.val == exp_cos(v)) for i in range(100): assert (gfx.val == exp_cos(v)) y = Var('y') hy = VSF('tanh', y, np.tanh) hy.cache() for v in np.random.random(10): y.val = v assert (hy.val == np.tanh(v)) gfx_hy = CTimes(gfx, hy) gfx_hy.cache() exp_cos_x_times_tanh_y = lambda x, y: exp_cos(x) * np.tanh(y) vx = 5.7 vy = np.array([1.1, 2.1, 0.5]) x.val = vx y.val = vy print gfx_hy.val / exp_cos_x_times_tanh_y(vx, vy) assert_all(gfx_hy.val == exp_cos_x_times_tanh_y(vx, vy)) print "Change x only:" #TODO: verify hy will not be evaluated, but use cache, instead. vx = 1.0 x.val = vx assert_all(gfx_hy.val == exp_cos_x_times_tanh_y(vx, vy)) print "Change y only:" #TODO: verify gfx will not be evaluated, but use cache, instead. vy = 1.0 y.val = vy assert_all(gfx_hy.val == exp_cos_x_times_tanh_y(vx, vy)) #Instance of Var must be single-assigned, #but it is not yet enforced by code. a = Var('a') b = Var('b') ab = Mul(a, b) ab.cache() assert (np.isnan(ab.val)) a.val = 1.0 assert (np.isnan(ab.val)) b.val = 2.0 assert (ab.val == 2.0)
def test_IterativeParsing(reset_NodeDict): ran = lambda x: np.random.random(x) - 0.5 n = 5 h0, w0, b0, u0 = Var('h0'), Var('w0'), Var('b0'), Var('u0') vh0, vw0, vb0, vu0 = ran((n, 1)), ran((n, 2 * n)), ran((n, 1)), ran((n, 1)) h0.val, w0.val, b0.val, u0.val = vh0, vw0, vb0, vu0 rnn = RecursiveNN(w0, b0, u0) the, cat, on, hat = Word('the'), Word('cat'), Word('on'), Word('hat') nodes = [the, cat, on, the, hat] assert nodes[0] is nodes[3] sentence, score = rnn.combineToSentence(nodes) print unicode(sentence), score.val print sentence print '%s' % sentence
def test_Transpose(reset_NodeDict): vx = np.matrix([5, 1, 2]) vy = np.matrix([1, 3, 2]).T x = Var('x', vx) y = Var('y', vy) fy = VSF('f', y) gx = VSF('g', x) fygx = Mul(fy, gx) assert (unicode(Transpose(fygx).simplify()) == u'[f(y)*g(x)]ᵀ') assert (unicode(Transpose(Var('z', 2)).simplify()) == 'z') assert (Transpose(Var('z', 2)).val == 2) assert_all(Transpose(x).val == vx.T) assert_all(Transpose(Transpose(x)).val == x.val) y.val = vy.T xyt = Mul(x, Transpose(y)) assert (unicode(Transpose(xyt)) == u'[x*yᵀ]ᵀ') assert (xyt.val == 12)
def test_GradientNumericalChecks(reset_NodeDict): ran = lambda x: np.random.random(x) - 0.5 n = 5 h0, w0, b0, u0 = Var('h0'), Var('W0'), Var('b0'), Var('u0') vh0, vw0, vb0, vu0 = ran((n, 1)), ran((n, 2 * n)), ran((n, 1)), ran((n, 1)) h0.val, w0.val, b0.val, u0.val = vh0, vw0, vb0, vu0 rnn = RecursiveNN(w0, b0, u0) the, cat, on, a, hat = Word('the'), Word('cat'), Word('on'), Word( 'a'), Word('hat') the_cat = rnn.combineTwoNodes(the, cat) a_hat = rnn.combineTwoNodes(a, hat) the_cat_on = rnn.combineTwoNodes(the_cat, on) the_cat_on_a_hat = rnn.combineTwoNodes(the_cat_on, a_hat) assert unicode(the_cat_on_a_hat) == u'(((the,cat),on),(a,hat))' #assert unicode(rnn.score(the_cat))==u'u0ᵀ⋅tanh(W0⋅{the⊕cat}+b0)' #assert unicode(rnn.score(the_cat_on))==u'u0ᵀ⋅tanh(W0⋅{tanh(W0⋅{the⊕cat}+b0)⊕on}+b0)' score = rnn.score(the_cat_on_a_hat) score.cache() s0 = score.val gradient = Differentiation(score, w0) #TODO:Check parents management and makes the for-loop works. #for i in range(10): diff = 0.001 * ran(w0.val.shape) ds_grad = np.sum(diff * gradient.val) tmp = gradient.val np.random.shuffle(tmp) ds_ran = np.sum(diff * tmp) w0.val += diff s1 = score.val ds = s1 - s0 print ds_grad / ds, ds_ran / ds assert (abs(ds - ds_grad) < abs(ds - ds_ran)) np.testing.assert_allclose(ds, ds_grad, rtol=1e-2, atol=0) assert score.isContain(w0) assert not score.isContain(Var('xx'))
def test_Evaluation(reset_NodeDict): vx = np.array([1.0, 2.0, 3.0]).reshape(1, 3) vy = np.array([2.0, 3.0, 4.0]).reshape(3, 1) vz = np.array([3.0, 5.0, 7.0]).reshape(1, 3) x = Var('x') x.val = vx y = Var('y', vy) z = Var('z', vz) with pytest.raises(ValueError): Dot(x, Var('t', vy.T)).val xy = Mul(x, y) assert (unicode(xy) == 'x*y') assert_all(xy.val == vx.dot(vy)) x_plus_z = Add(x, z) assert (unicode(x_plus_z) == 'x+z') assert_all(x_plus_z.val == vx + vz) assert_all(CTimes(xy, z).val == CTimes(z, xy).val) assert_all(CTimes(xy, z).val == vx.dot(vy) * vz) s0 = 1.57 s = Var('s', s0) fs = VSF('cos', s, np.cos) assert (unicode(fs) == 'cos(s)') assert (fs.val == np.cos(s0))