def loss_decoder(self, logits): #logits is a max_len sized list of 2-D tensors of dimension batch_size * decoder_words #self.target_text is a max_len sized list of 1-D tensors of dimension batch_size #self.text_weights is a max_len sized list of 1-D tensors of dimension batch_size losses=seq2seq.sequence_loss_by_example(logits, self.decoder_text_outputs, self.text_weights) #losses is a 1-D tensor of dimension batch_size return losses
def loss(self, logits): ''' Calculates sequence loss between logits(decoder output or predicted output) and self.targets(True output). Args: logits: logits which is a list(output of decoder) of length self.max_len having tensor of size batch_size*self.vocab_size. Return: losses which is loss calculated between predicted output and true output.''' #self.targets: List of 1D batch-sized int32 Tensors of the same length as logits. #self.weights: List of 1D batch-sized float-Tensors of the same length as logits. losses = seq2seq.sequence_loss_by_example(logits, self.targets, self.weights) return losses
def loss_task_text(self, logits): # for logit in logits: # self.tf_print(logit) # print 'len of text weights ', self.text_weights # for txt in self.target_text: # self.tf_print(txt) # logits is a max_len sized list of 2-D tensors of dimension batch_size * decoder_words # self.target_text is a max_len sized list of 1-D tensors of dimension batch_size # self.text_weights is a max_len sized list of 1-D tensors of dimension batch_size losses = seq2seq.sequence_loss_by_example(logits, self.target_text, self.text_weights) # losses is a 1-D tensor of dimension batch_size return losses
def __init__(self, is_training, config): self.batch_size = batch_size = config.batch_size self.num_steps = num_steps = config.num_steps size = config.hidden_size vocab_size = config.vocab_size self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) self._targets = tf.placeholder(tf.int32, [batch_size, num_steps]) # Slightly better results can be obtained with forget gate biases # initialized to 1 but the hyperparameters of the model would need to be # different than reported in the paper. lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0) if is_training and config.keep_prob < 1: lstm_cell = rnn_cell.DropoutWrapper( lstm_cell, output_keep_prob=config.keep_prob) cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers) self._initial_state = cell.zero_state(batch_size, tf.float32) with tf.device("/cpu:0"): embedding = tf.get_variable("embedding", [vocab_size, size]) inputs = tf.nn.embedding_lookup(embedding, self._input_data) if is_training and config.keep_prob < 1: inputs = tf.nn.dropout(inputs, config.keep_prob) # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). # This builds an unrolled LSTM for tutorial purposes only. # In general, use the rnn() or state_saving_rnn() from rnn.py. # # The alternative version of the code below is: # # from tensorflow.models.rnn import rnn # inputs = [tf.squeeze(input_, [1]) # for input_ in tf.split(1, num_steps, inputs)] # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) outputs = [] state = self._initial_state with tf.variable_scope("RNN"): for time_step in range(num_steps): if time_step > 0: tf.get_variable_scope().reuse_variables() (cell_output, state) = cell(inputs[:, time_step, :], state) outputs.append(cell_output) output = tf.reshape(tf.concat(1, outputs), [-1, size]) softmax_w = tf.get_variable("softmax_w", [size, vocab_size]) softmax_b = tf.get_variable("softmax_b", [vocab_size]) logits = tf.matmul(output, softmax_w) + softmax_b loss = seq2seq.sequence_loss_by_example( [logits], [tf.reshape(self._targets, [-1])], [tf.ones([batch_size * num_steps])]) self._cost = cost = tf.reduce_sum(loss) / batch_size self._final_state = state if not is_training: return self._lr = tf.Variable(0.0, trainable=False) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars), config.max_grad_norm) optimizer = tf.train.GradientDescentOptimizer(self.lr) self._train_op = optimizer.apply_gradients(zip(grads, tvars))
def loss(self, logits): #self.decoder_label is a tensor of dimension (self.num_steps_label, batch_size) losses = seq2seq.sequence_loss_by_example(logits, self.decoder_label, self.decoder_sequence_weight) return losses