Ejemplo n.º 1
0
 def loss_decoder(self, logits):
     #logits is a max_len sized list of 2-D tensors of dimension batch_size * decoder_words
     #self.target_text is a max_len sized list of 1-D tensors of dimension batch_size
     #self.text_weights is a max_len sized list of 1-D tensors of dimension batch_size
     losses=seq2seq.sequence_loss_by_example(logits, self.decoder_text_outputs, self.text_weights)
     #losses is a 1-D tensor of dimension batch_size 
     return losses
    def loss(self, logits):
        ''' Calculates sequence loss between logits(decoder output or predicted output) and self.targets(True output).
         Args:
             logits: logits which is a list(output of decoder) of length self.max_len having tensor of size batch_size*self.vocab_size. 
         Return: losses which is loss calculated between predicted output and true output.'''

        #self.targets: List of 1D batch-sized int32 Tensors of the same length as logits.
        #self.weights: List of 1D batch-sized float-Tensors of the same length as logits.
        losses = seq2seq.sequence_loss_by_example(logits, self.targets,
                                                  self.weights)
        return losses
    def loss_task_text(self, logits):

        # for logit in logits:
        #	self.tf_print(logit)
        # print 'len of text weights ', self.text_weights
        # for txt in self.target_text:
        #	self.tf_print(txt)
        # logits is a max_len sized list of 2-D tensors of dimension batch_size * decoder_words
        # self.target_text is a max_len sized list of 1-D tensors of dimension batch_size
        # self.text_weights is a max_len sized list of 1-D tensors of dimension batch_size
        losses = seq2seq.sequence_loss_by_example(logits, self.target_text, self.text_weights)
        # losses is a 1-D tensor of dimension batch_size
        return losses
Ejemplo n.º 4
0
    def __init__(self, is_training, config):
        self.batch_size = batch_size = config.batch_size
        self.num_steps = num_steps = config.num_steps
        size = config.hidden_size
        vocab_size = config.vocab_size

        self._input_data = tf.placeholder(tf.int32, [batch_size, num_steps])
        self._targets = tf.placeholder(tf.int32, [batch_size, num_steps])

        # Slightly better results can be obtained with forget gate biases
        # initialized to 1 but the hyperparameters of the model would need to be
        # different than reported in the paper.
        lstm_cell = rnn_cell.BasicLSTMCell(size, forget_bias=0.0)
        if is_training and config.keep_prob < 1:
            lstm_cell = rnn_cell.DropoutWrapper(
                lstm_cell, output_keep_prob=config.keep_prob)
        cell = rnn_cell.MultiRNNCell([lstm_cell] * config.num_layers)

        self._initial_state = cell.zero_state(batch_size, tf.float32)

        with tf.device("/cpu:0"):
            embedding = tf.get_variable("embedding", [vocab_size, size])
            inputs = tf.nn.embedding_lookup(embedding, self._input_data)

        if is_training and config.keep_prob < 1:
            inputs = tf.nn.dropout(inputs, config.keep_prob)

        # Simplified version of tensorflow.models.rnn.rnn.py's rnn().
        # This builds an unrolled LSTM for tutorial purposes only.
        # In general, use the rnn() or state_saving_rnn() from rnn.py.
        #
        # The alternative version of the code below is:
        #
        # from tensorflow.models.rnn import rnn
        # inputs = [tf.squeeze(input_, [1])
        #           for input_ in tf.split(1, num_steps, inputs)]
        # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state)
        outputs = []
        state = self._initial_state
        with tf.variable_scope("RNN"):
            for time_step in range(num_steps):
                if time_step > 0: tf.get_variable_scope().reuse_variables()
                (cell_output, state) = cell(inputs[:, time_step, :], state)
                outputs.append(cell_output)

        output = tf.reshape(tf.concat(1, outputs), [-1, size])
        softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
        softmax_b = tf.get_variable("softmax_b", [vocab_size])
        logits = tf.matmul(output, softmax_w) + softmax_b
        loss = seq2seq.sequence_loss_by_example(
            [logits], [tf.reshape(self._targets, [-1])],
            [tf.ones([batch_size * num_steps])])
        self._cost = cost = tf.reduce_sum(loss) / batch_size
        self._final_state = state

        if not is_training:
            return

        self._lr = tf.Variable(0.0, trainable=False)
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(cost, tvars),
                                          config.max_grad_norm)
        optimizer = tf.train.GradientDescentOptimizer(self.lr)
        self._train_op = optimizer.apply_gradients(zip(grads, tvars))
 def loss(self, logits):
     #self.decoder_label is a tensor of dimension (self.num_steps_label, batch_size)
     losses = seq2seq.sequence_loss_by_example(logits, self.decoder_label,
                                               self.decoder_sequence_weight)
     return losses