def test_gradients(self): inputs = tf.random_normal( [self.batch_size, self.sequence_length, self.input_depth]) seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length labels = np.random.randint(0, self.vocab_size, [self.batch_size, self.sequence_length]) helper = decode_helper.TrainingHelper(inputs=inputs, sequence_length=seq_length) decoder_fn = self.create_decoder(helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN) initial_state = decoder_fn.cell.zero_state(self.batch_size, dtype=tf.float32) decoder_output, _ = decoder_fn(initial_state, helper) losses = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=decoder_output.logits, labels=labels) optimizer = tf.train.AdamOptimizer(learning_rate=0.001) grads_and_vars = optimizer.compute_gradients(tf.reduce_mean(losses)) #pylint: disable=E1101 with self.test_session() as sess: sess.run(tf.global_variables_initializer()) grads_and_vars_ = sess.run(grads_and_vars) for grad, _ in grads_and_vars_: self.assertFalse(np.isnan(grad).any()) return grads_and_vars_
def test_with_fixed_inputs(self): inputs = tf.random_normal( [self.batch_size, self.sequence_length, self.input_depth]) seq_length = tf.ones(self.batch_size, dtype=tf.int32) * self.sequence_length helper = decode_helper.TrainingHelper(inputs=inputs, sequence_length=seq_length) decoder_fn = self.create_decoder(helper=helper, mode=tf.contrib.learn.ModeKeys.TRAIN) initial_state = decoder_fn.cell.zero_state(self.batch_size, dtype=tf.float32) decoder_output, _ = decoder_fn(initial_state, helper) #pylint: disable=E1101 with self.test_session() as sess: sess.run(tf.global_variables_initializer()) decoder_output_ = sess.run(decoder_output) np.testing.assert_array_equal( decoder_output_.logits.shape, [self.sequence_length, self.batch_size, self.vocab_size]) np.testing.assert_array_equal(decoder_output_.predicted_ids.shape, [self.sequence_length, self.batch_size]) return decoder_output_
def _decode_train(self, decoder, bridge, _encoder_output, _features, labels): """Runs decoding in training mode""" target_embedded = tf.nn.embedding_lookup(self.target_embedding, labels["target_ids"]) helper_train = tf_decode_helper.TrainingHelper( inputs=target_embedded[:, :-1], sequence_length=labels["target_len"] - 1) decoder_initial_state = bridge() return decoder(decoder_initial_state, helper_train)
params={}, mode=pmode, vocab_size=output_vocab_size, attention_values=summedAttention, attention_values_length=summedLengths, attention_keys=summedOutputs, attention_fn=attention.AttentionLayerBahdanau(params={}, mode=pmode)) batch_size = 2 target_start_id = 1 # helper_infer = tf_decode_helper.GreedyEmbeddingHelper( # embedding=output_embeddings, # start_tokens=tf.fill([batch_size], target_start_id), # end_token=5) helper_train = tf_decode_helper.TrainingHelper( inputs=decoder_targets_embedded[:, :-1], sequence_length=decoder_targets_length - 1) dstate = eout.final_state summed_encoder_final_state_c = tf.add(tf.multiply(sumUp(dstate[0].c), .5), tf.multiply(sumUp(dstate[1].c), .5)) summed_encoder_final_state_h = tf.add(tf.multiply(sumUp(dstate[0].h), .5), tf.multiply(sumUp(dstate[1].h), .5)) summed_encoder_final_state = LSTMStateTuple(c=summed_encoder_final_state_c, h=summed_encoder_final_state_h) decoder_output, _, = decoder(summed_encoder_final_state, helper_train) # predictions = predict(decoder_output)['predicted_ids'] losses, loss = compute_loss(decoder_output=decoder_output,