Python LSTM.allocate_deltas Examples

Programming Language: Python

Namespace/Package Name: neon.layers.recurrent

Class/Type: LSTM

Method/Function: allocate_deltas

Examples at hotexamples.com: 2

Python LSTM.allocate_deltas - 2 examples found. These are the top rated real world Python examples of neon.layers.recurrent.LSTM.allocate_deltas extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

LSTM(4)

fprop(4)

allocate(3)

configure(3)

prev_layer(3)

set_deltas(3)

allocate_deltas(2)

bprop(2)

Example #1

Show file

def gradient_calc(seq_len, input_size, hidden_size, batch_size,
                  epsilon=None, rand_scale=None, inp_bl=None):
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    input_shape = (input_size, seq_len * batch_size)

    # generate input if one is not given
    if inp_bl is None:
        inp_bl = np.random.randn(*input_shape)

    # neon lstm instance
    lstm = LSTM(hidden_size, Gaussian(), activation=Tanh(), gate_activation=Logistic())
    inpa = lstm.be.array(np.copy(inp_bl))

    # run fprop on the baseline input
    lstm.configure((input_size, seq_len))
    lstm.prev_layer = True  # Hack to force allocating a delta buffer

    lstm.allocate()

    dtree = DeltasTree()
    lstm.allocate_deltas(dtree)
    dtree.allocate_buffers()
    lstm.set_deltas(dtree)

    out_bl = lstm.fprop(inpa).get()

    # random scaling/hash to generate fake loss
    if rand_scale is None:
        rand_scale = np.random.random(out_bl.shape) * 2.0 - 1.0
    # loss function would be:
    # loss_bl = np.sum(rand_scale * out_bl)

    # run back prop with rand_scale as the errors
    # use copy to avoid any interactions
    deltas_neon = lstm.bprop(lstm.be.array(np.copy(rand_scale))).get()

    # add a perturbation to each input element
    grads_est = np.zeros(inpa.shape)
    inp_pert = inp_bl.copy()
    for pert_ind in range(inpa.size):
        save_val = inp_pert.flat[pert_ind]

        inp_pert.flat[pert_ind] = save_val + epsilon
        reset_lstm(lstm)
        lstm.allocate()
        out_pos = lstm.fprop(lstm.be.array(inp_pert)).get()

        inp_pert.flat[pert_ind] = save_val - epsilon
        reset_lstm(lstm)
        lstm.allocate()
        out_neg = lstm.fprop(lstm.be.array(inp_pert)).get()

        # calculate the loss with perturbations
        loss_pos = np.sum(rand_scale * out_pos)
        loss_neg = np.sum(rand_scale * out_neg)
        # compute the gradient estimate
        grad = 0.5 / float(epsilon) * (loss_pos - loss_neg)

        grads_est.flat[pert_ind] = grad

        # reset the perturbed input element
        inp_pert.flat[pert_ind] = save_val

    del lstm
    return (grads_est, deltas_neon)

Example #2

Show file

def check_lstm(seq_len, input_size, hidden_size,
               batch_size, init_func, inp_moms=[0.0, 1.0]):
    # init_func is the initializer for the model params
    # inp_moms is the [ mean, std dev] of the random input
    input_shape = (input_size, seq_len * batch_size)
    hidden_shape = (hidden_size, seq_len * batch_size)
    NervanaObject.be.bsz = NervanaObject.be.batch_size = batch_size

    # neon LSTM
    lstm = LSTM(hidden_size,
                init_func,
                activation=Tanh(),
                gate_activation=Logistic())

    inp = np.random.rand(*input_shape) * inp_moms[1] + inp_moms[0]
    inpa = lstm.be.array(inp)
    # run neon fprop
    lstm.configure((input_size, seq_len))
    lstm.prev_layer = True  # Hack to force allocating a delta buffer
    lstm.allocate()

    dtree = DeltasTree()
    lstm.allocate_deltas(dtree)
    dtree.allocate_buffers()
    lstm.set_deltas(dtree)

    lstm.fprop(inpa)

    # reference numpy LSTM
    lstm_ref = RefLSTM()
    WLSTM = lstm_ref.init(input_size, hidden_size)

    # make ref weights and biases with neon model
    WLSTM[0, :] = lstm.b.get().T
    WLSTM[1:input_size + 1, :] = lstm.W_input.get().T
    WLSTM[input_size + 1:] = lstm.W_recur.get().T

    # transpose input X and do fprop
    inp_ref = inp.copy().T.reshape(seq_len, batch_size, input_size)
    (Hout_ref, cprev, hprev, batch_cache) = lstm_ref.forward(inp_ref,
                                                             WLSTM)

    # the output needs transpose as well
    Hout_ref = Hout_ref.reshape(seq_len * batch_size, hidden_size).T
    IFOGf_ref = batch_cache['IFOGf'].reshape(seq_len * batch_size, hidden_size * 4).T
    Ct_ref = batch_cache['Ct'].reshape(seq_len * batch_size, hidden_size).T

    # compare results
    neon_logger.display('====Verifying IFOG====')
    assert allclose_with_out(lstm.ifog_buffer.get(),
                             IFOGf_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying cell states====')
    assert allclose_with_out(lstm.c_act_buffer.get(),
                             Ct_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying hidden states====')
    assert allclose_with_out(lstm.outputs.get(),
                             Hout_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('fprop is verified')

    # now test the bprop
    # generate random deltas tensor
    deltas = np.random.randn(*hidden_shape)

    lstm.bprop(lstm.be.array(deltas))
    # grab the delta W from gradient buffer
    dWinput_neon = lstm.dW_input.get()
    dWrecur_neon = lstm.dW_recur.get()
    db_neon = lstm.db.get()

    deltas_ref = deltas.copy().T.reshape(seq_len, batch_size, hidden_size)
    (dX_ref, dWLSTM_ref, dc0_ref, dh0_ref) = lstm_ref.backward(deltas_ref,
                                                               batch_cache)
    dWrecur_ref = dWLSTM_ref[-hidden_size:, :]
    dWinput_ref = dWLSTM_ref[1:input_size + 1, :]
    db_ref = dWLSTM_ref[0, :]
    dX_ref = dX_ref.reshape(seq_len * batch_size, input_size).T

    # compare results
    neon_logger.display('Making sure neon LSTM match numpy LSTM in bprop')
    neon_logger.display('====Verifying update on W_recur====')

    assert allclose_with_out(dWrecur_neon,
                             dWrecur_ref.T,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying update on W_input====')
    assert allclose_with_out(dWinput_neon,
                             dWinput_ref.T,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying update on bias====')
    assert allclose_with_out(db_neon.flatten(),
                             db_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('====Verifying output delta====')
    assert allclose_with_out(lstm.out_deltas_buffer.get(),
                             dX_ref,
                             rtol=0.0,
                             atol=1.5e-5)

    neon_logger.display('bprop is verified')

    return