Esempio n. 1
0
def test_biRNN_bprop(backend_default, fargs, deltas_buffer):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size,
                  activation=Rectlinclip(slope=0),
                  init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()

    birnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    birnn.set_deltas(deltas_buffer)

    # same weight for bi-rnn backward and rnn weights
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # same weight for bi-directional rnn
    init_glorot = GlorotUniform()
    rnn = Recurrent(hidden_size,
                    activation=Rectlinclip(slope=0),
                    init=init_glorot)
    rnn.configure(in_shape)
    rnn.prev_layer = True
    rnn.allocate()

    rnn.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    rnn.set_deltas(deltas_buffer)

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr_g = birnn.fprop(inp_lr)
    del_lr = birnn.bprop(out_lr_g).get().copy()
    birnn.h_buffer[:] = 0
    out_rl_g = birnn.fprop(inp_rl)
    del_rl = birnn.bprop(out_rl_g).get().copy()

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)
    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
Esempio n. 2
0
def test_glorot(backend, args):
    be = NervanaObject.be
    shape_1 = (1, 2)
    shape_2 = (1000, 10000)
    Wdev_1 = be.empty(shape_1)
    Wdev_2 = be.empty(shape_2)
    glorot_init = GlorotUniform()
    glorot_init.fill(Wdev_1)
    glorot_init.fill(Wdev_2)
    Whost_1 = Wdev_1.get()
    Whost_2 = Wdev_2.get()
    mean_1 = np.mean(Whost_1)
    mean_2 = np.mean(Whost_2)
    assert np.abs(mean_1) > np.abs(mean_2)

    return
Esempio n. 3
0
def test_lookuptable_ones_error(backend_default, basic_linargs):
    nin, nout, batch_size, vocab_size = basic_linargs
    NervanaObject.be.bsz = batch_size

    dtypeu = np.float32

    init_glorot = GlorotUniform()
    layer = LookupTable(
        vocab_size=vocab_size, embedding_dim=nout, init=init_glorot)

    inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size)
    layer.configure(nin)
    layer.allocate()
    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.set_deltas([layer.be.iobuf(nin)])

    inputs = layer.be.array(inp.reshape((nin, batch_size)))
    out = layer.fprop(inputs).get()
    W = layer.W.get()
    for i in range(nin * batch_size):
        assert np.all(W[inp[i]].T == out[:, i])

    err = dtypeu(np.ones((nout, nin * batch_size)))
    layer.bprop(layer.be.array(err)).get()

    dw = layer.dW.get()
    unqidx, count = np.unique(inp, return_counts=True)
    dw_exp = np.zeros((1, nout))
    for wrd_id, cnt in zip(unqidx, count):
        dw_exp = err[:, 0] * cnt
        assert np.all(dw_exp == dw[wrd_id, :])

    return
Esempio n. 4
0
def test_lookuptable_zeros_error(backend_default, basic_linargs):
    # basic sanity check with 0 weights random inputs
    nin, nout, batch_size, vocab_size = basic_linargs
    NervanaObject.be.bsz = batch_size

    dtypeu = np.float32

    init_glorot = GlorotUniform()
    layer = LookupTable(
        vocab_size=vocab_size, embedding_dim=nout, init=init_glorot)

    inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size)
    layer.configure(nin)
    layer.allocate()
    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.set_deltas([layer.be.iobuf(nin)])

    inputs = layer.be.array(inp.reshape((nin, batch_size)))
    out = layer.fprop(inputs).get()
    W = layer.W.get()
    for i in range(nin * batch_size):
        assert np.all(W[inp[i]].T == out[:, i])

    err = dtypeu(np.zeros((nout, nin * batch_size)))
    layer.bprop(layer.be.array(err)).get()

    dw = layer.dW.get()
    assert np.min(dw) == 0.0 and np.max(dw) == 0.0

    return
Esempio n. 5
0
def test_biLSTM_bprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size, gate_activation=Logistic(),
                    activation=Tanh(), init=init_glorot, reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()
    bilstm.set_deltas([bilstm.be.iobuf(bilstm.in_shape)])

    # same weight for bi-rnn backward and rnn weights
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs and views
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))
    rl = con(lr_rev, axis=1)

    # allocate gpu buffers
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr_g = bilstm.fprop(inp_lr)
    out_lr = out_lr_g.get().copy()
    del_lr = bilstm.bprop(out_lr_g).get().copy()
    bilstm.h_buffer[:] = 0
    out_rl_g = bilstm.fprop(inp_rl)
    out_rl = out_rl_g.get().copy()
    del_rl = bilstm.bprop(out_rl_g).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s,
                                  reversed(out_rl_f_s), reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)

    del_lr_s = get_steps(del_lr, in_shape)
    del_rl_s = get_steps(del_rl, in_shape)

    for (x, y) in zip(del_lr_s, reversed(del_rl_s)):
        assert np.allclose(x, y, rtol=0.0, atol=1.0e-5)
Esempio n. 6
0
def test_biLSTM_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    bilstm = BiLSTM(hidden_size,
                    gate_activation=Logistic(),
                    init=init_glorot,
                    activation=Tanh(),
                    reset_cells=True)
    bilstm.configure(in_shape)
    bilstm.prev_layer = True
    bilstm.allocate()

    # same weight
    nout = hidden_size
    bilstm.W_input_b[:] = bilstm.W_input_f
    bilstm.W_recur_b[:] = bilstm.W_recur_f
    bilstm.b_b[:] = bilstm.b_f
    bilstm.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = bilstm.be.array(lr)
    inp_rl = bilstm.be.array(rl)

    # outputs
    out_lr = bilstm.fprop(inp_lr).get().copy()

    bilstm.h_buffer[:] = 0
    out_rl = bilstm.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s),
                                  reversed(out_rl_b_s)):
        assert allclose_with_out(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert allclose_with_out(x_b, y_f, rtol=0.0, atol=1.0e-5)
Esempio n. 7
0
def test_biRNN_fprop(backend_default, fargs):

    # basic sanity check with 0 weights random inputs
    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    out_shape = (hidden_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiRNN(hidden_size,
                  activation=Rectlinclip(slope=0),
                  init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()
    birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)])

    # same weight
    nout = hidden_size
    birnn.W_input_b[:] = birnn.W_input_f
    birnn.W_recur_b[:] = birnn.W_recur_f
    birnn.b_b[:] = birnn.b_f
    birnn.dW[:] = 0

    # inputs - random and flipped left-to-right inputs
    lr = np.random.random((input_size, seq_len * batch_size))
    lr_rev = list(reversed(get_steps(lr.copy(), in_shape)))

    rl = con(lr_rev, axis=1)
    inp_lr = birnn.be.array(lr)
    inp_rl = birnn.be.array(rl)

    # outputs
    out_lr = birnn.fprop(inp_lr).get().copy()

    birnn.h_buffer[:] = 0
    out_rl = birnn.fprop(inp_rl).get().copy()

    # views
    out_lr_f_s = get_steps(out_lr[:nout], out_shape)
    out_lr_b_s = get_steps(out_lr[nout:], out_shape)
    out_rl_f_s = get_steps(out_rl[:nout], out_shape)
    out_rl_b_s = get_steps(out_rl[nout:], out_shape)

    # asserts
    for x_f, x_b, y_f, y_b in zip(out_lr_f_s, out_lr_b_s, reversed(out_rl_f_s),
                                  reversed(out_rl_b_s)):
        assert np.allclose(x_f, y_b, rtol=0.0, atol=1.0e-5)
        assert np.allclose(x_b, y_f, rtol=0.0, atol=1.0e-5)
Esempio n. 8
0
def test_lookuptable_rand_error(backend_default, basic_linargs, deltas_buffer):
    nin, nout, batch_size, vocab_size = basic_linargs
    NervanaObject.be.bsz = batch_size

    dtypeu = np.float32

    init_glorot = GlorotUniform()
    layer = LookupTable(vocab_size=vocab_size,
                        embedding_dim=nout,
                        init=init_glorot)

    inp = np.random.random_integers(0, vocab_size - 1, size=nin * batch_size)
    layer.configure(nin)
    layer.allocate()

    layer.prev_layer = True  # Hack to force delta buffer allocation
    layer.allocate_deltas(deltas_buffer)
    deltas_buffer.allocate_buffers()
    layer.set_deltas(deltas_buffer)

    inputs = layer.be.array(inp.reshape((nin, batch_size)))
    out = layer.fprop(inputs).get()
    W = layer.W.get()
    for i in range(nin * batch_size):
        assert np.all(W[inp[i]].T == out[:, i])

    err = dtypeu(np.random.random((nout, nin * batch_size)))
    layer.bprop(layer.be.array(err)).get()

    dw = layer.dW.get()
    unqidx, count = np.unique(inp, return_counts=True)
    dw_exp = np.zeros((1, nout))
    for wrd_id, cnt in zip(unqidx, count):
        dw_exp[:] = 0
        cnt_exp = 0
        for i, w_id in enumerate(inp):
            if w_id == wrd_id:
                dw_exp[:] = dw_exp[:] + err[:, i]
                cnt_exp += 1
        assert np.allclose(dw[wrd_id, :], dw_exp, atol=0, rtol=1e-4)
        assert np.allclose(dw_exp, dw[wrd_id, :], atol=0, rtol=1e-4)
        assert cnt == cnt_exp

    return
def test_bibn(backend_default, fargs):

    seq_len, input_size, hidden_size, batch_size = fargs
    in_shape = (input_size, seq_len)
    NervanaObject.be.bsz = batch_size

    # setup the bi-directional rnn
    init_glorot = GlorotUniform()
    birnn = BiBNRNN(hidden_size, activation=Logistic(), init=init_glorot)
    birnn.configure(in_shape)
    birnn.prev_layer = True
    birnn.allocate()
    birnn.set_deltas([birnn.be.iobuf(birnn.in_shape)])

    # test fprop

    # set the ff buffer
    inp_np = np.random.random(birnn.h_ff_buffer.shape)
    inp_be = birnn.be.array(inp_np)
    birnn.h_ff_buffer[:] = inp_np

    # compare the bn output with calling the backend bn
    xsum = birnn.be.zeros_like(birnn.xmean)
    xvar = birnn.be.zeros_like(birnn.xvar)
    gmean = birnn.be.zeros_like(birnn.gmean)
    gvar = birnn.be.zeros_like(birnn.gvar)
    gamma = birnn.be.ones(birnn.gamma.shape)
    beta = birnn.be.zeros_like(birnn.beta)
    grad_gamma = birnn.be.zeros_like(gamma)
    grad_beta = birnn.be.zeros_like(beta)
    out_ref = birnn.be.zeros_like(birnn.h_ff_buffer)

    xsum[:] = birnn.be.sum(birnn.h_ff_buffer, axis=1)
    birnn.be.compound_fprop_bn(birnn.h_ff_buffer,
                               xsum,
                               xvar,
                               gmean,
                               gvar,
                               gamma,
                               beta,
                               out_ref,
                               birnn.eps,
                               birnn.rho,
                               accumbeta=0,
                               relu=False)

    # call the bibnrnn layer fprop_bn
    out_bn = birnn._fprop_bn(birnn.h_ff_buffer, inference=False)

    assert allclose_with_out(out_bn.get(),
                             out_ref.get(),
                             rtol=0.0,
                             atol=1.0e-5)

    # test bprop
    err_np = np.random.random(birnn.h_ff_buffer.shape)
    err_be = birnn.be.array(err_np)

    err_out_ref = birnn.be.empty_like(err_be)
    birnn.be.compound_bprop_bn(err_out_ref, grad_gamma, grad_beta, err_be,
                               inp_be, xsum, xvar, gamma, birnn.eps)

    err_out_bn = birnn._bprop_bn(err_be, out_bn)

    assert allclose_with_out(err_out_bn.get(),
                             err_out_ref.get(),
                             rtol=0.0,
                             atol=1.0e-5)