Exemple #1
0
def test_recurrent(data, choice):
    X = data.draw(
        hnp.arrays(shape=hnp.array_shapes(max_side=3, min_dims=3, max_dims=3),
                   dtype=float,
                   elements=st.floats(-10, 10)))
    T, N, C = X.shape
    D = choice(list(range(1, 5)))

    s0 = data.draw(
        hnp.arrays(shape=(N, D), dtype=float, elements=st.floats(0.0, 0.0)))

    W = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    U = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)))

    W = Tensor(W)
    W2 = W.__copy__()

    U = Tensor(U)
    U2 = U.__copy__()

    V = Tensor(V)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    rec = RecurrentUnit(U, W, V, T)

    if X.shape[0] > 1:
        ls = add_sequence(*(dense(i, V).sum() for i in rec(X)[1:]))
    else:
        ls = dense(rec(X)[1], V).sum()
    ls.backward()

    s = s2
    ls2 = 0
    for n, x in enumerate(X):
        s = tanh(dense(x, U2) + dense(s, W2))
        o = dense(s, V2)
        ls2 += o.sum()
    ls2.backward()

    assert np.allclose(W.data, W2.data, atol=1E-3)
    assert np.allclose(W.grad, W2.grad, atol=1E-3)
    assert np.allclose(U.data, U2.data, atol=1E-3)
    assert np.allclose(U.grad, U2.grad, atol=1E-3)
    assert np.allclose(V.data, V2.data, atol=1E-3)
    assert np.allclose(V.grad, V2.grad, atol=1E-3)
Exemple #2
0
def test_gru_fwd(X, D, dropout, data: st.DataObject):
    T, N, C = X.shape

    Wz, Wr, Wh = data.draw(
        hnp.arrays(shape=(3, D, D),
                   dtype=float,
                   elements=st.floats(-10.0, 10.0)),
        label="Wz, Wr, Wh",
    )

    Uz, Ur, Uh = data.draw(
        hnp.arrays(shape=(3, C, D),
                   dtype=float,
                   elements=st.floats(-10.0, 10.0)),
        label="Uz, Ur, Uh",
    )

    bz, br, bh = data.draw(
        hnp.arrays(shape=(3, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="bz, br, bh",
    )

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="V",
    )

    s0 = np.zeros((N, D), dtype=float)

    X = Tensor(X)
    X2 = X.__copy__()

    Wz = Tensor(Wz)
    Wz2 = Wz.__copy__()

    Uz = Tensor(Uz)
    Uz2 = Uz.__copy__()

    bz = Tensor(bz)
    bz2 = bz.__copy__()

    Wr = Tensor(Wr)
    Wr2 = Wr.__copy__()

    Ur = Tensor(Ur)
    Ur2 = Ur.__copy__()

    br = Tensor(br)
    br2 = br.__copy__()

    Wh = Tensor(Wh)
    Wh2 = Wh.__copy__()

    Uh = Tensor(Uh)
    Uh2 = Uh.__copy__()

    bh = Tensor(bh)
    bh2 = bh.__copy__()

    V = Tensor(V)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    s = gru(X,
            Uz,
            Wz,
            bz,
            Ur,
            Wr,
            br,
            Uh,
            Wh,
            bh,
            dropout=dropout,
            constant=True)
    o = matmul(s[1:], V)
    ls = o.sum()

    assert s.constant is True

    if dropout:
        for d in [
                s.creator._dropUr,
                s.creator._dropUz,
                s.creator._dropUh,
                s.creator._dropWr,
                s.creator._dropWz,
                s.creator._dropWh,
        ]:
            assert np.all(np.logical_or(d == 1 / (1 - dropout), d == 0))

    stt = s2
    all_s = [s0.data]
    ls2 = 0
    if dropout:
        Wz2d = s.creator._dropWz * Wz2
        Wr2d = s.creator._dropWr * Wr2
        Wh2d = s.creator._dropWh * Wh2
    else:
        Wz2d = Wz2
        Wr2d = Wr2
        Wh2d = Wh2
    for n, x in enumerate(X2):
        if not dropout:
            z = sigmoid(matmul(x, Uz2) + matmul(stt, Wz2d) + bz2)
            r = sigmoid(matmul(x, Ur2) + matmul(stt, Wr2d) + br2)
            h = tanh(matmul(x, Uh2) + matmul((r * stt), Wh2d) + bh2)
        else:
            z = sigmoid((s.creator._dropUz[0] * matmul(x, Uz2)) +
                        matmul(stt, Wz2d) + bz2)
            r = sigmoid((s.creator._dropUr[0] * matmul(x, Ur2)) +
                        matmul(stt, Wr2d) + br2)
            h = tanh((s.creator._dropUh[0] * matmul(x, Uh2)) +
                     matmul((r * stt), Wh2d) + bh2)

        stt = (1 - z) * h + z * stt
        all_s.append(stt)
        o = matmul(stt, V2)
        ls2 += o.sum()

    tolerances = dict(atol=1e-5, rtol=1e-5)
    rec_s_dat = np.stack([i.data for i in all_s])

    assert_allclose(ls.data, ls2.data, **tolerances)

    assert_allclose(rec_s_dat, s.data, **tolerances)

    assert_allclose(Wz.data, Wz2.data, **tolerances)
    assert_allclose(Wr.data, Wr2.data, **tolerances)
    assert_allclose(Wh.data, Wh2.data, **tolerances)

    assert_allclose(Uz.data, Uz2.data, **tolerances)
    assert_allclose(Ur.data, Ur2.data, **tolerances)
    assert_allclose(Uh.data, Uh2.data, **tolerances)

    assert_allclose(bz.data, bz2.data, **tolerances)
    assert_allclose(br.data, br2.data, **tolerances)
    assert_allclose(bh.data, bh2.data, **tolerances)

    assert_allclose(V.data, V2.data, **tolerances)

    assert_allclose(X.data, X2.data, **tolerances)

    ls.null_gradients()
    for x in [s, Wz, Wr, Wh, bz, br, bh, X, Uz, Ur, Uh, V]:
        assert x.grad is None
Exemple #3
0
def test_gru_backward(
    data: st.DataObject,
    X: np.ndarray,
    D: int,
    bp_lim: bool,
    dropout: bool,
    U_constants: Tuple[bool, bool, bool],
    W_constants: Tuple[bool, bool, bool],
    b_constants: Tuple[bool, bool, bool],
    X_constant: bool,
    V_constant: bool,
):
    tolerances = dict(atol=1e-5, rtol=1e-5)
    T, N, C = X.shape

    Wz, Wr, Wh = data.draw(
        hnp.arrays(shape=(3, D, D),
                   dtype=float,
                   elements=st.floats(-10.0, 10.0)),
        label="Wz, Wr, Wh",
    )

    Uz, Ur, Uh = data.draw(
        hnp.arrays(shape=(3, C, D),
                   dtype=float,
                   elements=st.floats(-10.0, 10.0)),
        label="Uz, Ur, Uh",
    )

    bz, br, bh = data.draw(
        hnp.arrays(shape=(3, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="bz, br, bh",
    )

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="V",
    )

    s0 = np.zeros((N, D), dtype=float)

    X = Tensor(X, constant=X_constant)
    X2 = X.__copy__()

    Wz = Tensor(Wz, constant=W_constants[0])
    Wz2 = Wz.__copy__()

    Uz = Tensor(Uz, constant=U_constants[0])
    Uz2 = Uz.__copy__()

    bz = Tensor(bz, constant=b_constants[0])
    bz2 = bz.__copy__()

    Wr = Tensor(Wr, constant=W_constants[1])
    Wr2 = Wr.__copy__()

    Ur = Tensor(Ur, constant=U_constants[1])
    Ur2 = Ur.__copy__()

    br = Tensor(br, constant=b_constants[1])
    br2 = br.__copy__()

    Wh = Tensor(Wh, constant=W_constants[2])
    Wh2 = Wh.__copy__()

    Uh = Tensor(Uh, constant=U_constants[2])
    Uh2 = Uh.__copy__()

    bh = Tensor(bh, constant=b_constants[2])
    bh2 = bh.__copy__()

    V = Tensor(V, constant=V_constant)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    # bp_lim = len(X) - 1 should behave the same as no bp-lim
    s = gru(
        X,
        Uz,
        Wz,
        bz,
        Ur,
        Wr,
        br,
        Uh,
        Wh,
        bh,
        dropout=dropout,
        constant=False,
        bp_lim=len(X) - 1 if bp_lim else None,
    )
    o = matmul(s[1:], V)
    ls = o.sum()
    ls.backward()

    stt = s2
    all_s = [s0.data]
    ls2 = 0
    if dropout:
        Wz2d = s.creator._dropWz * Wz2
        Wr2d = s.creator._dropWr * Wr2
        Wh2d = s.creator._dropWh * Wh2
    else:
        Wz2d = Wz2
        Wr2d = Wr2
        Wh2d = Wh2
    for n, x in enumerate(X2):
        if not dropout:
            z = sigmoid(matmul(x, Uz2) + matmul(stt, Wz2d) + bz2)
            r = sigmoid(matmul(x, Ur2) + matmul(stt, Wr2d) + br2)
            h = tanh(matmul(x, Uh2) + matmul((r * stt), Wh2d) + bh2)
        else:
            z = sigmoid((s.creator._dropUz[0] * matmul(x, Uz2)) +
                        matmul(stt, Wz2d) + bz2)
            r = sigmoid((s.creator._dropUr[0] * matmul(x, Ur2)) +
                        matmul(stt, Wr2d) + br2)
            h = tanh((s.creator._dropUh[0] * matmul(x, Uh2)) +
                     matmul((r * stt), Wh2d) + bh2)
        stt = (1 - z) * h + z * stt
        all_s.append(stt)
        o = matmul(stt, V2)
        ls2 += o.sum()
    ls2.backward()

    rec_s_grad = np.stack([i.grad for i in all_s[1:]])

    if not s.constant:
        assert_allclose(rec_s_grad, s.grad, **tolerances)
    else:
        assert s.grad is None

    if not Wz.constant:
        assert_allclose(Wz.grad, Wz2.grad, **tolerances)
    else:
        assert Wz.grad is None

    if not Wr.constant:
        assert_allclose(Wr.grad, Wr2.grad, **tolerances)
    else:
        assert Wr.grad is None

    if not Wh.constant:
        assert_allclose(Wh.grad, Wh2.grad, **tolerances)
    else:
        assert Wh.grad is None

    if not Uz.constant:
        assert_allclose(Uz.grad, Uz2.grad, **tolerances)
    else:
        assert Uz.grad is None

    if not Ur.constant:
        assert_allclose(Ur.grad, Ur2.grad, **tolerances)
    else:
        assert Ur.grad is None

    if not Uh.constant:
        assert_allclose(Uh.grad, Uh2.grad, **tolerances)
    else:
        assert Uh.grad is None

    if not bz.constant:
        assert_allclose(bz.grad, bz2.grad, **tolerances)
    else:
        assert bz.grad is None

    if not br.constant:
        assert_allclose(br.grad, br2.grad, **tolerances)
    else:
        assert br.grad is None

    if not bh.constant:
        assert_allclose(bh.grad, bh2.grad, **tolerances)
    else:
        assert bh.grad is None

    if not V.constant:
        assert_allclose(V.grad, V2.grad, **tolerances)
    else:
        assert V.grad is None

    if not X.constant:
        assert_allclose(X.grad, X2.grad, **tolerances)
    else:
        assert X.grad is None

    ls.null_gradients()
    ls2.null_gradients()

    for x in [s, Wz, Wr, Wh, bz, br, bh, X, Uz, Ur, Uh, V]:
        assert x.grad is None
Exemple #4
0
def test_gru_backward(data):
    tolerances = dict(atol=1e-5, rtol=1e-5)
    X = data.draw(
        hnp.arrays(
            shape=hnp.array_shapes(max_side=5, min_dims=3, max_dims=3),
            dtype=float,
            elements=st.floats(-10, 10),
        ),
        label="X",
    )
    T, N, C = X.shape
    D = data.draw(st.sampled_from(list(range(1, 5))), label="D")
    dropout = data.draw(
        st.sampled_from([0, 0.45]), label="dropout"
    )  # TODO: RESTORE DROPOUT

    Wz = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Wz",
    )

    Uz = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Uz",
    )

    bz = data.draw(
        hnp.arrays(shape=(D,), dtype=float, elements=st.floats(-10.0, 10.0)), label="bz"
    )

    Wr = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Wr",
    )

    Ur = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Ur",
    )

    br = data.draw(
        hnp.arrays(shape=(D,), dtype=float, elements=st.floats(-10.0, 10.0)), label="br"
    )

    Wh = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Wh",
    )

    Uh = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="Uh",
    )

    bh = data.draw(
        hnp.arrays(shape=(D,), dtype=float, elements=st.floats(-10.0, 10.0)), label="bh"
    )

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)),
        label="V",
    )

    s0 = np.zeros((N, D), dtype=float)

    X = Tensor(X)
    X2 = X.__copy__()

    Wz = Tensor(Wz)
    Wz2 = Wz.__copy__()

    Uz = Tensor(Uz)
    Uz2 = Uz.__copy__()

    bz = Tensor(bz)
    bz2 = bz.__copy__()

    Wr = Tensor(Wr)
    Wr2 = Wr.__copy__()

    Ur = Tensor(Ur)
    Ur2 = Ur.__copy__()

    br = Tensor(br)
    br2 = br.__copy__()

    Wh = Tensor(Wh)
    Wh2 = Wh.__copy__()

    Uh = Tensor(Uh)
    Uh2 = Uh.__copy__()

    bh = Tensor(bh)
    bh2 = bh.__copy__()

    V = Tensor(V)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    s = gru(X, Uz, Wz, bz, Ur, Wr, br, Uh, Wh, bh, dropout=dropout, constant=False)
    o = dense(s[1:], V)
    ls = o.sum()
    ls.backward()

    stt = s2
    all_s = [s0.data]
    ls2 = 0
    if dropout:
        Wz2d = s.creator._dropWz * Wz2
        Wr2d = s.creator._dropWr * Wr2
        Wh2d = s.creator._dropWh * Wh2
    else:
        Wz2d = Wz2
        Wr2d = Wr2
        Wh2d = Wh2
    for n, x in enumerate(X2):
        if not dropout:
            z = sigmoid(dense(x, Uz2) + dense(stt, Wz2d) + bz2)
            r = sigmoid(dense(x, Ur2) + dense(stt, Wr2d) + br2)
            h = tanh(dense(x, Uh2) + dense((r * stt), Wh2d) + bh2)
        else:
            z = sigmoid((s.creator._dropUz[0] * dense(x, Uz2)) + dense(stt, Wz2d) + bz2)
            r = sigmoid((s.creator._dropUr[0] * dense(x, Ur2)) + dense(stt, Wr2d) + br2)
            h = tanh(
                (s.creator._dropUh[0] * dense(x, Uh2)) + dense((r * stt), Wh2d) + bh2
            )
        stt = (1 - z) * h + z * stt
        all_s.append(stt)
        o = dense(stt, V2)
        ls2 += o.sum()
    ls2.backward()

    rec_s_grad = np.stack([i.grad for i in all_s[1:]])

    assert_allclose(rec_s_grad, s.grad, **tolerances)

    assert_allclose(Wz.grad, Wz2.grad, **tolerances)
    assert_allclose(Wr.grad, Wr2.grad, **tolerances)
    assert_allclose(Wh.grad, Wh2.grad, **tolerances)

    assert_allclose(Uz.grad, Uz2.grad, **tolerances)
    assert_allclose(Ur.grad, Ur2.grad, **tolerances)
    assert_allclose(Uh.grad, Uh2.grad, **tolerances)

    assert_allclose(bz.grad, bz2.grad, **tolerances)
    assert_allclose(br.grad, br2.grad, **tolerances)
    assert_allclose(bh.grad, bh2.grad, **tolerances)

    assert_allclose(V.grad, V2.grad, **tolerances)

    assert_allclose(X.grad, X2.grad, **tolerances)

    ls.null_gradients()
    ls2.null_gradients()
    for x in [s, Wz, Wr, Wh, bz, br, bh, X, Uz, Ur, Uh, V]:
        assert x.grad is None
Exemple #5
0
def test_recurrent(data, choice):
    X = data.draw(
        hnp.arrays(shape=hnp.array_shapes(max_side=5, min_dims=3, max_dims=3),
                   dtype=float,
                   elements=st.floats(-10, 10)))
    T, N, C = X.shape
    D = choice(list(range(1, 5)))

    s0 = data.draw(
        hnp.arrays(shape=(N, D), dtype=float, elements=st.floats(0.0, 0.0)))

    W = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    U = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)))

    X = Tensor(X)
    X2 = X.__copy__()

    W = Tensor(W)
    W2 = W.__copy__()

    U = Tensor(U)
    U2 = U.__copy__()

    V = Tensor(V)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    s = simple_RNN(X, U, W, backprop_s=True)
    o = dense(s[1:], V)
    ls = o.sum()
    ls.backward()

    stt = s2
    all_s = [s0.data]
    ls2 = 0
    for n, x in enumerate(X2):
        stt = tanh(dense(x, U2) + dense(stt, W2))
        all_s.append(stt)
        o = dense(stt, V2)
        ls2 += o.sum()
    ls2.backward()

    rec_s_dat = np.stack([i.data for i in all_s])
    rec_s_grad = np.stack([i.grad for i in all_s[1:]])
    assert np.allclose(rec_s_dat, s.data)
    assert np.allclose(rec_s_grad, s.grad[1:])
    assert np.allclose(ls.data, ls2.data)
    assert np.allclose(W.data, W2.data)
    assert np.allclose(W.grad, W2.grad)
    assert np.allclose(U.data, U2.data)
    assert np.allclose(U.grad, U2.grad)
    assert np.allclose(V.data, V2.data)
    assert np.allclose(V.grad, V2.grad)
    assert np.allclose(X.data, X2.data)
    assert np.allclose(X.grad, X2.grad)
Exemple #6
0
def test_recurrent(data):
    X = data.draw(
        hnp.arrays(
            shape=hnp.array_shapes(max_side=5, min_dims=3, max_dims=3),
            dtype=float,
            elements=st.floats(-10, 10),
        ))
    T, N, C = X.shape
    D = data.draw(st.sampled_from(list(range(1, 5))))

    s0 = data.draw(
        hnp.arrays(shape=(N, D), dtype=float, elements=st.floats(0.0, 0.0)))

    W = data.draw(
        hnp.arrays(shape=(D, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    U = data.draw(
        hnp.arrays(shape=(C, D), dtype=float, elements=st.floats(-10.0, 10.0)))

    V = data.draw(
        hnp.arrays(shape=(D, C), dtype=float, elements=st.floats(-10.0, 10.0)))

    X = Tensor(X)
    X2 = X.__copy__()

    W = Tensor(W)
    W2 = W.__copy__()

    U = Tensor(U)
    U2 = U.__copy__()

    V = Tensor(V)
    V2 = V.__copy__()

    s0 = Tensor(s0)
    s2 = s0.__copy__()

    s = simple_RNN(X, U, W)
    o = matmul(s[1:], V)
    ls = o.sum()
    ls.backward()

    stt = s2
    all_s = [s0.data]
    ls2 = 0
    for n, x in enumerate(X2):
        stt = tanh(matmul(x, U2) + matmul(stt, W2))
        all_s.append(stt)
        o = matmul(stt, V2)
        ls2 += o.sum()
    ls2.backward()

    rec_s_dat = np.stack([i.data for i in all_s])
    rec_s_grad = np.stack([i.grad for i in all_s[1:]])
    assert_allclose(rec_s_dat, s.data, atol=1e-5, rtol=1e-5)
    assert_allclose(rec_s_grad, s.grad[1:], atol=1e-5, rtol=1e-5)
    assert_allclose(ls.data, ls2.data, atol=1e-5, rtol=1e-5)
    assert_allclose(W.data, W2.data, atol=1e-5, rtol=1e-5)
    assert_allclose(W.grad, W2.grad, atol=1e-5, rtol=1e-5)
    assert_allclose(U.data, U2.data, atol=1e-5, rtol=1e-5)
    assert_allclose(U.grad, U2.grad, atol=1e-5, rtol=1e-5)
    assert_allclose(V.data, V2.data, atol=1e-5, rtol=1e-5)
    assert_allclose(V.grad, V2.grad, atol=1e-5, rtol=1e-5)
    assert_allclose(X.data, X2.data, atol=1e-5, rtol=1e-5)
    assert_allclose(X.grad, X2.grad, atol=1e-5, rtol=1e-5)

    ls.null_gradients()
    for x in [s, W, X, U, V]:
        assert x.grad is None