def RNN(x, weights, biases): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell1 = LSTMCell(n_hidden,debug=True) cell2 = LSTMCell(n_hidden,debug=True) cell= MultiRNNCell([cell1, cell2]) result, state = dynamic_rnn(cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10,kernel_initializer=init_ops.Constant(out_weights),bias_initializer=init_ops.Constant(out_biases)) return out_l(state[-1].h)
def RNN(x, weights, biases): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: bw_cell = LSTMCell(n_hidden) fw_cell = LSTMCell(n_hidden) result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) fw_result, bw_result = result h = np.concatenate((fw_result, bw_result), -1) pred = out_l(h[0][-1].reshape(1, vocab_size)) return pred
def get_rnn_cell(rnn_cell_size, dropout_prob,n_layers,debug): rnn_cell=None print("n_layers:",n_layers) if(n_layers==1): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: rnn_cell = LSTMCell(rnn_cell_size,debug=debug) else: cell_list=[] for i in range(n_layers): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell_list.append(LSTMCell(rnn_cell_size,debug=debug)) rnn_cell=MultiRNNCell(cell_list) return rnn_cell
def create_attention(decoding_cell,encoding_op,encoding_st,fr_len): if(args.attention_option is "Luong"): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: attention_mechanism = LuongAttention(hidden_size, encoding_op, fr_len) decoding_cell = AttentionWrapper(decoding_cell,attention_mechanism,hidden_size) attention_zero_state = decoding_cell.zero_state(batch_size) attention_zero_state = attention_zero_state.clone(cell_state = encoding_st) print("attentionstate0:",attention_zero_state) return decoding_cell,attention_zero_state
def RNN(x, weights, biases): fw_cell_list = [] bw_cell_list = [] for i in range(n_layers): with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: fw_cell_list.append(LSTMCell(n_hidden, debug=True)) bw_cell_list.append(LSTMCell(n_hidden, debug=True)) fw_cell = MultiRNNCell(fw_cell_list) bw_cell = MultiRNNCell(bw_cell_list) result, state = bidirectional_dynamic_rnn(fw_cell, bw_cell, symbols_in_keys) "Dense in this case should be out of WeightsInitializer scope because we are passing constants" out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) fw_result, bw_result = result h = np.concatenate((fw_result, bw_result), -1) pred = out_l(h[0][-1].reshape(1, vocab_size)) print("pred:", pred) return pred
def decoding_layer(decoding_embed_inp, embeddings, encoding_op, encoding_st, v_size, fr_len, en_len, max_en_len, rnn_cell_size, word2int, dropout_prob, batch_size, n_layers): out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init)) logits_tr = training_decoding_layer(decoding_embed_inp, en_len, get_rnn_cell(rnn_cell_size, dr_prob,n_layers,debug), encoding_op, encoding_st, out_l, v_size, fr_len, max_en_len) return logits_tr
def create_attention(decoding_cell,encoding_op,encoding_st,fr_len): if(args.attention_option is "Luong"): print("Attention is all I need.fr_len:",fr_len,decoding_cell) #print("encoding_op[0].shape:",encoding_op[0].shape,type(encoding_op),len(encoding_op)) #if(encoder_type=="bi"): #encoding_op=np.concatenate((encoding_op[0],encoding_op[1]),axis=-1) #encoding_op=encoding_op[0]+encoding_op[1] with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: attention_mechanism = LuongAttention(hidden_size, encoding_op, fr_len) decoding_cell = AttentionWrapper(decoding_cell,attention_mechanism,hidden_size) attention_zero_state = decoding_cell.zero_state(batch_size) attention_zero_state = attention_zero_state.clone(cell_state = encoding_st) print("attentionstate0:",attention_zero_state) return decoding_cell,attention_zero_state
def __init__(self,units, activation=None, use_bias=True, kernel_initializer=None, bias_initializer=init_ops.Constant(0), kernel_regularizer=None, bias_regularizer=None, activity_regularizer=None, kernel_constraint=None, bias_constraint=None, trainable=True, name=None, debug=False, backpassdebug=False): self.debug=debug self.backpassdebug=backpassdebug self.use_bias=use_bias self.units=units; # First preference to static initializer through "WeightsInitializer" # but dont ise both to avoid confusion if(WeightsInitializer.initializer is not None): self.init_function=WeightsInitializer.initializer else: #If that's not then the usual "kernel_initializer" if kernel_initializer is None: self.init_function=init_ops.RandomUniform() else: self.init_function= kernel_initializer if (self.use_bias): if(bias_initializer is not None): self.bias_initializer=bias_initializer self.kernelname=None self.biasname=None self.use_act=False self.activation=None if(activation is not None): self.use_act=True self.activation=activation self.trainable=trainable if name is None: self.name="FeedForward" else: self.name=name self.ffl=FFLayer(name=self.name,layer=self)
step = 0 #offset = rnd.randint(0, n_input + 1) offset = 2 end_offset = n_input + 1 acc_total = 0 loss_total = 0 print("offset:", offset) # only for testing weights = np.ones([4 * n_hidden, vocab_size + n_hidden + 1]) * .1 c = np.ones((n_hidden, 1)) h = np.ones((n_hidden, 1)) #initstate=(c,h) initstate = LSTMStateTuple(c, h) with WeightsInitializer(initializer=init_ops.Constant(0.1)) as vs: cell = LSTMCell(n_hidden, debug=True) gdo = BatchGradientDescent(learning_rate) out_l = Dense(10, kernel_initializer=init_ops.Constant(out_weights), bias_initializer=init_ops.Constant(out_biases)) while step < training_iters: if offset > (len(train_data) - end_offset): offset = rnd.randint(0, n_input + 1) print("offset:", offset) symbols_in_keys = [ input_one_hot(dictionary[str(train_data[i])], vocab_size) for i in range(offset, offset + n_input) ] symbols_in_keys = np.reshape(np.array(symbols_in_keys),
lr = args.learning_rate debug=args.debug per_epoch=args.per_epoch logs_path=args.out_dir display_steps=args.display_steps fr_embeddings_matrix,en_embeddings_matrix,fr_word2int,en_word2int,fr_filtered,en_filtered,args=get_nmt_data() set_modelparams(args) make_model() en_train = en_filtered[0:30000] fr_train = fr_filtered[0:30000] update_check = (len(fr_train) // batch_size // per_epoch) - 1 out_l = Dense(len(en_word2int) + 1,kernel_initializer=init_ops.Constant(init)) for epoch_i in range(1, epochs + 1): update_loss = 0 batch_loss = 0 for batch_i, (en_batch, fr_batch, en_text_len, fr_text_len) in enumerate( get_batches(en_train, fr_train, batch_size)): before = time.time() encoding_optf, encoding_sttf ,logits_tr= seq2seq_model(fr_batch[:, ::-1], en_batch, dr_prob, fr_text_len, en_text_len, np.amax(en_text_len), len(en_word2int) + 1 , hidden_size, n_layers, en_word2int, batch_size); #print("batch:", batch_i, "decoding:logits:", logits_tr) yhat,loss=sequence_loss(logits_tr.rnn_output,en_batch,make_mask(en_batch)) print("loss:",loss) gradients=gdo.compute_gradients(yhat,en_batch)
#!/usr/bin/env python3 from org.mk.training.dl.rnn_cell import LSTMCell from org.mk.training.dl.rnn import LSTMStateTuple from org.mk.training.dl.common import WeightsInitializer from org.mk.training.dl import init_ops import numpy as np n_hidden = 2 with WeightsInitializer(initializer=init_ops.Constant(0.5)) as vs: cell = LSTMCell(n_hidden, debug=True) c = 0.1 * np.asarray([[0], [1]]) h = 0.1 * np.asarray([[2], [3]]) x = np.array([[1], [1], [1]]) print(cell(x, (c, h))) expected_h = np.array([[0.64121795, 0.68166804]]) expected_c = np.array([[0.88477188, 0.98103917]])