コード例 #1
0
ファイル: test_recurrent.py プロジェクト: JediKoder/neon
def check_rnn(seq_len, input_size, hidden_size,
              batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # ======== create models ========
    # neon RNN
    rnn = Recurrent(hidden_size, init_func, activation=Tanh())

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    Wxh = rnn_ref.Wxh
    Whh = rnn_ref.Whh
    bh = rnn_ref.bh

    # ========= generate data =================
    # generate random input tensor
    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = rnn.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(
        seq_len, batch_size, input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(
        seq_len, batch_size, hidden_size).swapaxes(1, 2)

    # ========= running models ==========
    # run neon fprop
    rnn.configure((input_size, seq_len))
    rnn.prev_layer = True
    rnn.allocate()
    rnn.set_deltas([rnn.be.iobuf(rnn.in_shape)])
    rnn.fprop(inpa)

    # weights are only initialized after doing fprop, so now
    # make ref weights and biases the same with neon model
    Wxh[:] = rnn.W_input.get()
    Whh[:] = rnn.W_recur.get()
    bh[:] = rnn.b.get()

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list,
     dh_ref_list, d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref)

    # now test the bprop
    rnn.bprop(rnn.be.array(deltas))
    # grab the delta W from gradient buffer
    dWxh_neon = rnn.dW_input.get()
    dWhh_neon = rnn.dW_recur.get()
    db_neon = rnn.db.get()

    # comparing outputs
    neon_logger.display('====Verifying hidden states====')
    neon_logger.display(allclose_with_out(rnn.outputs.get(),
                                          h_ref_list,
                                          rtol=0.0,
                                          atol=1.0e-5))
    neon_logger.display('fprop is verified')

    neon_logger.display('====Verifying update on W and b ====')
    neon_logger.display('dWxh')
    assert allclose_with_out(dWxh_neon,
                             dWxh_ref,
                             rtol=0.0,
                             atol=1.0e-5)
    neon_logger.display('dWhh')
    assert allclose_with_out(dWhh_neon,
                             dWhh_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('====Verifying update on bias====')
    neon_logger.display('db')
    assert allclose_with_out(db_neon,
                             db_ref,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('bprop is verified')

    return
コード例 #2
0
def check_rnn(seq_len,
              input_size,
              hidden_size,
              batch_size,
              init_func,
              inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    output_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # ======== create models ========
    # neon RNN
    rnn = Recurrent(hidden_size, init_func, activation=Tanh())

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    Wxh = rnn_ref.Wxh
    Whh = rnn_ref.Whh
    bh = rnn_ref.bh

    # ========= generate data =================
    # generate random input tensor
    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = rnn.be.array(inp)
    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    inp_ref = inp.copy().T.reshape(seq_len, batch_size,
                                   input_size).swapaxes(1, 2)
    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size,
                                         hidden_size).swapaxes(1, 2)

    # ========= running models ==========
    # run neon fprop
    rnn.configure((input_size, seq_len))
    rnn.prev_layer = True
    rnn.allocate()

    dtree = DeltasTree()
    rnn.allocate_deltas(dtree)
    dtree.allocate_buffers()
    rnn.set_deltas(dtree)

    rnn.fprop(inpa)

    # weights are only initialized after doing fprop, so now
    # make ref weights and biases the same with neon model
    Wxh[:] = rnn.W_input.get()
    Whh[:] = rnn.W_recur.get()
    bh[:] = rnn.b.get()

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list,
     d_out_ref) = rnn_ref.lossFun(inp_ref, deltas_ref)

    # now test the bprop
    rnn.bprop(rnn.be.array(deltas))
    # grab the delta W from gradient buffer
    dWxh_neon = rnn.dW_input.get()
    dWhh_neon = rnn.dW_recur.get()
    db_neon = rnn.db.get()

    # comparing outputs
    neon_logger.display('====Verifying hidden states====')
    assert allclose_with_out(rnn.outputs.get(),
                             h_ref_list,
                             rtol=0.0,
                             atol=1.0e-5)

    neon_logger.display('fprop is verified')

    neon_logger.display('====Verifying update on W and b ====')
    neon_logger.display('dWxh')
    assert allclose_with_out(dWxh_neon, dWxh_ref, rtol=0.0, atol=1.0e-5)
    neon_logger.display('dWhh')
    assert allclose_with_out(dWhh_neon, dWhh_ref, rtol=0.0, atol=1.0e-5)

    neon_logger.display('====Verifying update on bias====')
    neon_logger.display('db')
    assert allclose_with_out(db_neon, db_ref, rtol=0.0, atol=1.0e-5)

    neon_logger.display('bprop is verified')

    return
コード例 #3
0
def check_rnn(seq_len,
              input_size,
              hidden_size,
              batch_size,
              init_func,
              return_seq=True):
    # init_func is the initializer for the model params
    assert batch_size == 1, "the recurrent reference implementation only support batch size 1"

    # ========== neon model ==========
    Cin = ng.make_axis(input_size)
    REC = ng.make_axis(seq_len, recurrent=True)
    N = ng.make_axis(batch_size, batch=True)
    H = ng.make_axis(hidden_size)
    ax_s = ng.make_axes([H, N])

    ex = ExecutorFactory()
    np.random.seed(0)

    rnn_ng = Recurrent(hidden_size,
                       init_func,
                       activation=Tanh(),
                       reset_cells=True,
                       return_sequence=return_seq)

    inp_ng = ng.placeholder([Cin, REC, N])
    init_state_ng = ng.placeholder(ax_s)

    # fprop graph
    out_ng = rnn_ng.train_outputs(inp_ng, init_state=init_state_ng)
    out_ng.input = True

    rnn_W_input = rnn_ng.W_input
    rnn_W_input.input = True
    rnn_W_recur = rnn_ng.W_recur
    rnn_W_recur.input = True
    rnn_b = rnn_ng.b
    rnn_b.input = True

    fprop_neon_fun = ex.executor(out_ng, inp_ng, init_state_ng)

    dWrecur_s_fun = ex.derivative(out_ng, rnn_W_recur, inp_ng, rnn_W_input,
                                  rnn_b)
    dWrecur_n_fun = ex.numeric_derivative(out_ng, rnn_W_recur, delta, inp_ng,
                                          rnn_W_input, rnn_b)
    dWinput_s_fun = ex.derivative(out_ng, rnn_W_input, inp_ng, rnn_W_recur,
                                  rnn_b)
    dWinput_n_fun = ex.numeric_derivative(out_ng, rnn_W_input, delta, inp_ng,
                                          rnn_W_recur, rnn_b)
    dWb_s_fun = ex.derivative(out_ng, rnn_b, inp_ng, rnn_W_input, rnn_W_recur)
    dWb_n_fun = ex.numeric_derivative(out_ng, rnn_b, delta, inp_ng,
                                      rnn_W_input, rnn_W_recur)

    # fprop on random inputs
    input_value = rng.uniform(-1, 1, inp_ng.axes)
    init_state_value = rng.uniform(-1, 1, init_state_ng.axes)
    fprop_neon = fprop_neon_fun(input_value, init_state_value).copy()

    # after the rnn graph has been executed, can get the W values. Get copies so
    # shared values don't confuse derivatives
    Wxh_neon = rnn_ng.W_input.value.get(None).copy()
    Whh_neon = rnn_ng.W_recur.value.get(None).copy()
    bh_neon = rnn_ng.b.value.get(None).copy()

    # bprop derivs
    dWrecur_s = dWrecur_s_fun(Whh_neon, input_value, Wxh_neon, bh_neon)
    dWrecur_n = dWrecur_n_fun(Whh_neon, input_value, Wxh_neon, bh_neon)
    np.testing.assert_allclose(dWrecur_s, dWrecur_n, rtol=rtol, atol=atol)

    dWb_s = dWb_s_fun(bh_neon, input_value, Wxh_neon, Whh_neon)
    dWb_n = dWb_n_fun(bh_neon, input_value, Wxh_neon, Whh_neon)
    np.testing.assert_allclose(dWb_s, dWb_n, rtol=rtol, atol=atol)

    dWinput_s = dWinput_s_fun(Wxh_neon, input_value, Whh_neon, bh_neon)
    dWinput_n = dWinput_n_fun(Wxh_neon, input_value, Whh_neon, bh_neon)
    np.testing.assert_allclose(dWinput_s, dWinput_n, rtol=rtol, atol=atol)

    # ========= reference model ==========
    output_shape = (hidden_size, seq_len * batch_size)

    # generate random deltas tensor
    deltas = np.random.randn(*output_shape)

    # the reference code expects these shapes:
    # input_shape: (seq_len, input_size, batch_size)
    # output_shape: (seq_len, hidden_size, batch_size)
    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size,
                                         hidden_size).swapaxes(1, 2)

    inp_ref = input_value.transpose([1, 0, 2])

    # reference numpy RNN
    rnn_ref = RefRecurrent(input_size, hidden_size)
    rnn_ref.Wxh[:] = Wxh_neon
    rnn_ref.Whh[:] = Whh_neon
    rnn_ref.bh[:] = bh_neon.reshape(rnn_ref.bh.shape)

    (dWxh_ref, dWhh_ref, db_ref, h_ref_list, dh_ref_list,
     d_out_ref) = rnn_ref.lossFun(inp_ref,
                                  deltas_ref,
                                  init_states=init_state_value)

    # comparing outputs
    if return_seq is False:
        h_ref_list = h_ref_list[:, -1].reshape(-1, 1)
    else:
        fprop_neon = fprop_neon[:, :, 0]
    np.testing.assert_allclose(fprop_neon, h_ref_list, rtol=0.0, atol=1.0e-5)

    return