예제 #1
0
    def __init__(self,
                 num_units,
                 memory,
                 memory_sequence_length=None,
                 scale=False,
                 probablity_fn=None,
                 score_mask_value=None,
                 name="LuongAttention"):
        if probablity_fn is None:
            probablity_fn = softmax
        wrapped_probability_fn = lambda score: probablity_fn(score)
        super(LuongAttention,
              self).__init__(query_layer=None,
                             memory_layer=Dense(num_units,
                                                name="memory_layer",
                                                use_bias=False,
                                                trainable=False),
                             memory=memory,
                             probability_fn=wrapped_probability_fn,
                             memory_sequence_length=memory_sequence_length,
                             score_mask_value=score_mask_value,
                             name=name)

        if (self.debug):
            print(self)
        self._num_units = num_units
        self._scale = scale
        self._name = name
def RNN(x, weights, biases):
    with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        cell1 = LSTMCell(n_hidden,debug=True)
        cell2 = LSTMCell(n_hidden,debug=True)
        cell= MultiRNNCell([cell1, cell2])
        result, state = dynamic_rnn(cell, symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases))
    return out_l(state[-1].h)
예제 #3
0
def RNN(x, weights, biases):
    with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
        bw_cell = LSTMCell(n_hidden)
        fw_cell = LSTMCell(n_hidden)
        result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell,
                                                  symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,
                  kernel_initializer=init_ops.Constant(out_weights),
                  bias_initializer=init_ops.Constant(out_biases))
    fw_result, bw_result = result
    h = np.concatenate((fw_result, bw_result), -1)
    pred = out_l(h[0][-1].reshape(1, vocab_size))
    return pred
예제 #4
0
def decoding_layer(decoding_embed_inp, embeddings, encoding_op, encoding_st, v_size, fr_len,
                   en_len, max_en_len, rnn_cell_size, word2int, dropout_prob, batch_size, n_layers):

    out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init))
    logits_tr = training_decoding_layer(decoding_embed_inp,
                                            en_len,
                                            get_rnn_cell(rnn_cell_size, dr_prob,n_layers,debug),
                                            encoding_op,
                                            encoding_st,
                                            out_l,
                                            v_size,
                                            fr_len,
                                            max_en_len)

    return logits_tr
예제 #5
0
    def __init__(self,
                 cell,
                 attention_mechanism,
                 attention_layer_size=None,
                 alignment_history=False,
                 cell_input_fn=None,
                 output_attention=True,
                 initial_cell_state=None,
                 name=None,
                 attention_layer=None,
                 debug=False):
        self.seqsize = 0
        self.ec = ExecutionContext.getInstance()
        self.debug = debug
        if issubclass(type(cell), MultiRNNCell):
            """"""
        else:
            cell = MultiRNNCell([cell])

        al = AttentionLayer(name="AttentionLayer",
                            bi=False,
                            fw_cell=self,
                            bw_cell=None,
                            prev=None)
        self.ec.current_layer(al)
        self.ec.register(self.ec.get_current_layer())

        self._cell = cell
        self._attention_mechanism = attention_mechanism
        self._output_attention = output_attention

        if attention_layer_size is not None and attention_layer is not None:
            raise ValueError(
                "Only one of attention_layer_size and attention_layer "
                "should be set")

        if (attention_layer_size is not None):
            self._attention_layer = Dense(attention_layer_size,
                                          name="attention_layer",
                                          use_bias=False,
                                          trainable=False)
            self._attention_layer_size = attention_layer_size

        #state for Ds
        self.aht,self.attenzt,self.attentiont,self.alignmentst={},{},{},{}
예제 #6
0
def RNN(x, weights, biases):
    fw_cell_list = []
    bw_cell_list = []
    for i in range(n_layers):
        with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
            fw_cell_list.append(LSTMCell(n_hidden, debug=True))
            bw_cell_list.append(LSTMCell(n_hidden, debug=True))
    fw_cell = MultiRNNCell(fw_cell_list)
    bw_cell = MultiRNNCell(bw_cell_list)
    result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell,
                                              symbols_in_keys)
    "Dense in this case should be out of WeightsInitializer scope because we are passing constants"
    out_l = Dense(10,
                  kernel_initializer=init_ops.Constant(out_weights),
                  bias_initializer=init_ops.Constant(out_biases))
    fw_result, bw_result = result
    h = np.concatenate((fw_result, bw_result), -1)
    pred = out_l(h[0][-1].reshape(1, vocab_size))
    print("pred:", pred)
    return pred
acc_total = 0
loss_total = 0
print("offset:", offset)
# only for testing
weights = np.ones([4 * n_hidden, vocab_size + n_hidden + 1]) * .1

c = np.ones((n_hidden, 1))
h = np.ones((n_hidden, 1))
#initstate=(c,h)
initstate = LSTMStateTuple(c, h)

with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs:
    cell = LSTMCell(n_hidden, debug=True)
gdo = BatchGradientDescent(learning_rate)
out_l = Dense(10,
              kernel_initializer=init_ops.Constant(out_weights),
              bias_initializer=init_ops.Constant(out_biases))

while step < training_iters:
    if offset > (len(train_data) - end_offset):
        offset = rnd.randint(0, n_input + 1)
    print("offset:", offset)
    symbols_in_keys = [
        input_one_hot(dictionary[str(train_data[i])], vocab_size)
        for i in range(offset, offset + n_input)
    ]
    symbols_in_keys = np.reshape(np.array(symbols_in_keys),
                                 [-1, n_input, vocab_size])
    target = dictionary[str(train_data[offset + n_input])]

    result, state = dynamic_rnn(cell, symbols_in_keys, initstate)
예제 #8
0
    lr = args.learning_rate
    debug=args.debug
    per_epoch=args.per_epoch
    logs_path=args.out_dir
    display_steps=args.display_steps

fr_embeddings_matrix,en_embeddings_matrix,fr_word2int,en_word2int,fr_filtered,en_filtered,args=get_nmt_data()
set_modelparams(args)
make_model()

en_train = en_filtered[0:30000]
fr_train = fr_filtered[0:30000]
update_check = (len(fr_train) // batch_size // per_epoch) - 1


out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init))
for epoch_i in range(1, epochs + 1):
    update_loss = 0
    batch_loss = 0
    for batch_i, (en_batch, fr_batch, en_text_len, fr_text_len) in enumerate(
            get_batches(en_train, fr_train, batch_size)):
        before = time.time()
        encoding_optf, encoding_sttf ,logits_tr= seq2seq_model(fr_batch[:, ::-1], en_batch, dr_prob, fr_text_len, en_text_len,
                                                     np.amax(en_text_len),
                                                     len(en_word2int) + 1
                                                     , hidden_size, n_layers, en_word2int, batch_size);

        #print("batch:", batch_i, "decoding:logits:", logits_tr)
        yhat,loss=sequence_loss(logits_tr.rnn_output,en_batch,make_mask(en_batch))
        print("loss:",loss)
        gradients=gdo.compute_gradients(yhat,en_batch)