def RNN(x, weights, biases):
    with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        cell1 = LSTMCell(n_hidden,debug=True)
        cell2 = LSTMCell(n_hidden,debug=True)
        cell= MultiRNNCell([cell1, cell2])
        result, state = dynamic_rnn(cell, symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
    return out_l(state[-1].h)
예제 #2
0
def RNN(x, weights, biases):
    with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        bw_cell = LSTMCell(n_hidden)
        fw_cell = LSTMCell(n_hidden)
        result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell,
                                                  symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,
                  kernel_initializer=init_ops.Constant(out_weights),
                  bias_initializer=init_ops.Constant(out_biases))
    fw_result, bw_result = result
    h = np.concatenate((fw_result, bw_result), -1)
    pred = out_l(h[0][-1].reshape(1, vocab_size))
    return pred
예제 #3
0
def get_rnn_cell(rnn_cell_size, dropout_prob,n_layers,debug):
    rnn_cell=None
    print("n_layers:",n_layers)
    if(n_layers==1):

        with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
            rnn_cell = LSTMCell(rnn_cell_size,debug=debug)
    else:
        cell_list=[]
        for i in range(n_layers):
            with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
                cell_list.append(LSTMCell(rnn_cell_size,debug=debug))
        rnn_cell=MultiRNNCell(cell_list)
    return rnn_cell
예제 #4
0
def create_attention(decoding_cell,encoding_op,encoding_st,fr_len):

    if(args.attention_option is "Luong"):
        with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
            attention_mechanism = LuongAttention(hidden_size, encoding_op, fr_len)
            decoding_cell =  AttentionWrapper(decoding_cell,attention_mechanism,hidden_size)
        attention_zero_state = decoding_cell.zero_state(batch_size)
        attention_zero_state = attention_zero_state.clone(cell_state = encoding_st)
        print("attentionstate0:",attention_zero_state)
        return decoding_cell,attention_zero_state
예제 #5
0
def RNN(x, weights, biases):
    fw_cell_list = []
    bw_cell_list = []
    for i in range(n_layers):
        with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
            fw_cell_list.append(LSTMCell(n_hidden, debug=True))
            bw_cell_list.append(LSTMCell(n_hidden, debug=True))
    fw_cell = MultiRNNCell(fw_cell_list)
    bw_cell = MultiRNNCell(bw_cell_list)
    result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell,
                                              symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,
                  kernel_initializer=init_ops.Constant(out_weights),
                  bias_initializer=init_ops.Constant(out_biases))
    fw_result, bw_result = result
    h = np.concatenate((fw_result, bw_result), -1)
    pred = out_l(h[0][-1].reshape(1, vocab_size))
    print("pred:", pred)
    return pred
예제 #6
0
def decoding_layer(decoding_embed_inp, embeddings, encoding_op, encoding_st, v_size, fr_len,
                   en_len, max_en_len, rnn_cell_size, word2int, dropout_prob, batch_size, n_layers):

    out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init))
    logits_tr = training_decoding_layer(decoding_embed_inp,
                                            en_len,
                                            get_rnn_cell(rnn_cell_size, dr_prob,n_layers,debug),
                                            encoding_op,
                                            encoding_st,
                                            out_l,
                                            v_size,
                                            fr_len,
                                            max_en_len)

    return logits_tr
예제 #7
0
def create_attention(decoding_cell,encoding_op,encoding_st,fr_len):

    if(args.attention_option is "Luong"):
        print("Attention is all I need.fr_len:",fr_len,decoding_cell)
        #print("encoding_op[0].shape:",encoding_op[0].shape,type(encoding_op),len(encoding_op))
        #if(encoder_type=="bi"):

        #encoding_op=np.concatenate((encoding_op[0],encoding_op[1]),axis=-1)
        #encoding_op=encoding_op[0]+encoding_op[1]
        with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
            attention_mechanism = LuongAttention(hidden_size, encoding_op, fr_len)
            decoding_cell =  AttentionWrapper(decoding_cell,attention_mechanism,hidden_size)
        attention_zero_state = decoding_cell.zero_state(batch_size)
        attention_zero_state = attention_zero_state.clone(cell_state = encoding_st)
        print("attentionstate0:",attention_zero_state)
        return decoding_cell,attention_zero_state
예제 #8
0
    def __init__(self,units,
               activation=None,
               use_bias=True,
               kernel_initializer=None,
               bias_initializer=init_ops.Constant(0),
               kernel_regularizer=None,
               bias_regularizer=None,
               activity_regularizer=None,
               kernel_constraint=None,
               bias_constraint=None,
               trainable=True,
               name=None,
               debug=False,
               backpassdebug=False):

        self.debug=debug
        self.backpassdebug=backpassdebug
        self.use_bias=use_bias
        self.units=units;
        # First preference to static initializer through "WeightsInitializer"
        # but dont ise both to avoid confusion
        if(WeightsInitializer.initializer is not None):
            self.init_function=WeightsInitializer.initializer
        else:
            #If that's not then the usual "kernel_initializer"
            if kernel_initializer is None:
                self.init_function=init_ops.RandomUniform()
            else:
                self.init_function= kernel_initializer

        if (self.use_bias):
            if(bias_initializer is not None):
                self.bias_initializer=bias_initializer
        self.kernelname=None
        self.biasname=None

        self.use_act=False
        self.activation=None
        if(activation is not None):
            self.use_act=True
            self.activation=activation
        self.trainable=trainable
        if name is None:
            self.name="FeedForward"
        else:
            self.name=name
        self.ffl=FFLayer(name=self.name,layer=self)
step = 0
#offset = rnd.randint(0, n_input + 1)
offset = 2
end_offset = n_input + 1
acc_total = 0
loss_total = 0
print("offset:", offset)
# only for testing
weights = np.ones([4 * n_hidden, vocab_size + n_hidden + 1]) * .1

c = np.ones((n_hidden, 1))
h = np.ones((n_hidden, 1))
#initstate=(c,h)
initstate = LSTMStateTuple(c, h)

with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
    cell = LSTMCell(n_hidden, debug=True)
gdo = BatchGradientDescent(learning_rate)
out_l = Dense(10,
              kernel_initializer=init_ops.Constant(out_weights),
              bias_initializer=init_ops.Constant(out_biases))

while step < training_iters:
    if offset > (len(train_data) - end_offset):
        offset = rnd.randint(0, n_input + 1)
    print("offset:", offset)
    symbols_in_keys = [
        input_one_hot(dictionary[str(train_data[i])], vocab_size)
        for i in range(offset, offset + n_input)
    ]
    symbols_in_keys = np.reshape(np.array(symbols_in_keys),
예제 #10
0
    lr = args.learning_rate
    debug=args.debug
    per_epoch=args.per_epoch
    logs_path=args.out_dir
    display_steps=args.display_steps

fr_embeddings_matrix,en_embeddings_matrix,fr_word2int,en_word2int,fr_filtered,en_filtered,args=get_nmt_data()
set_modelparams(args)
make_model()

en_train = en_filtered[0:30000]
fr_train = fr_filtered[0:30000]
update_check = (len(fr_train) // batch_size // per_epoch) - 1


out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init))
for epoch_i in range(1, epochs + 1):
    update_loss = 0
    batch_loss = 0
    for batch_i, (en_batch, fr_batch, en_text_len, fr_text_len) in enumerate(
            get_batches(en_train, fr_train, batch_size)):
        before = time.time()
        encoding_optf, encoding_sttf ,logits_tr= seq2seq_model(fr_batch[:, ::-1], en_batch, dr_prob, fr_text_len, en_text_len,
                                                     np.amax(en_text_len),
                                                     len(en_word2int) + 1
                                                     , hidden_size, n_layers, en_word2int, batch_size);

        #print("batch:", batch_i, "decoding:logits:", logits_tr)
        yhat,loss=sequence_loss(logits_tr.rnn_output,en_batch,make_mask(en_batch))
        print("loss:",loss)
        gradients=gdo.compute_gradients(yhat,en_batch)
예제 #11
0
#!/usr/bin/env python3
from org.mk.training.dl.rnn_cell import LSTMCell
from org.mk.training.dl.rnn import LSTMStateTuple

from org.mk.training.dl.common import WeightsInitializer
from org.mk.training.dl import init_ops
import numpy as np

n_hidden = 2

with WeightsInitializer(initializer=init_ops.Constant(0.5)) as vs:
    cell = LSTMCell(n_hidden, debug=True)

c = 0.1 * np.asarray([[0], [1]])
h = 0.1 * np.asarray([[2], [3]])

x = np.array([[1], [1], [1]])

print(cell(x, (c, h)))

expected_h = np.array([[0.64121795, 0.68166804]])
expected_c = np.array([[0.88477188, 0.98103917]])