Esempio n. 1
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        loss_init = [tf.ones([self.config.batch_size * self.config.num_steps])]
        reshaped_labels = tf.reshape(
            self.labels_placeholder,
            [self.config.batch_size * self.config.num_steps, -1])
        cross_entropy = sequence_loss([output], [reshaped_labels], loss_init,
                                      len(self.vocab))

        #add cross_entropy (loss between pred and labels)
        tf.add_to_collection("total_loss", cross_entropy)
        #tf.get_collection(name, scope=None) : Returns a list of values in the collection with the given name
        loss = tf.add_n(tf.get_collection("total_loss"))

        ### END YOUR CODE
        return loss
Esempio n. 2
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    #"""
    all_ones_weights = [tf.ones([self.config.batch_size * self.config.num_steps])]
    # output is logits
    loss = sequence_loss([output], \
        [tf.reshape(self.labels_placeholder, [-1])],\
        all_ones_weights) # , len(self.vocab)
    """
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
    cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
    tf.add_to_collection('total_loss', cross_entropy)
    loss = tf.add_n(tf.get_collection('total_loss'))
    """
    ### END YOUR CODE
    return loss
Esempio n. 3
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 
          Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py

    Args:
      output: A tensor of shape (None, self.vocab)  (LIBIN : not used)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        # output shape  : [num_steps * (batch_size, len(self.vocab))]
        # targets shape : [num_steps * (batch_size, )]
        # weights shape : [num_steps * (batch_size, )]
        targets = [
            tf.squeeze(ts, [1]) for ts in tf.split(1, self.config.num_steps,
                                                   self.labels_placeholder)
        ]
        weights = [
            tf.ones((self.config.batch_size, ))
            for step in xrange(self.config.num_steps)
        ]
        loss = sequence_loss(output, targets, weights)
        ### END YOUR CODE
        return loss
Esempio n. 4
0
    def build_loss(self, out, out_tensor):
        """Build a loss function and accuracy for the model."""
        print('  Building loss and accuracy')

        with tf.variable_scope('accuracy'):
            argmax = tf.to_int32(tf.argmax(out_tensor, 2))
            correct = tf.to_float(tf.equal(argmax, self.ts)) * self.t_mask
            accuracy = tf.reduce_sum(correct) / tf.reduce_sum(self.t_mask)

        with tf.variable_scope('loss'):
            with tf.variable_scope('split_t_and_mask'):
                split_kwargs = {
                    'split_dim': 1,
                    'num_split': self.max_t_seq_len
                }
                ts = tf.split(value=self.ts, **split_kwargs)
                t_mask = tf.split(value=self.t_mask, **split_kwargs)
                t_mask = [tf.squeeze(weight) for weight in t_mask]

            loss = seq2seq.sequence_loss(out, ts, t_mask, self.max_t_seq_len)

            with tf.variable_scope('regularization'):
                regularize = tf.contrib.layers.l2_regularizer(self.reg_scale)
                params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                reg_term = sum([regularize(param) for param in params])

            loss += reg_term

        return loss, accuracy
Esempio n. 5
0
    def build_loss(self, out, out_tensor):
        """Build a loss function and accuracy for the model."""
        print('  Building loss and accuracy')

        with tf.variable_scope('accuracy'):
            argmax = tf.to_int32(tf.argmax(out_tensor, 2))
            correct = tf.to_float(tf.equal(argmax, self.ts)) * self.t_mask
            accuracy = tf.reduce_sum(correct) / tf.reduce_sum(self.t_mask)

        with tf.variable_scope('loss'):
            with tf.variable_scope('split_t_and_mask'):
                split_kwargs = { 'split_dim': 1,
                                 'num_split': self.max_t_seq_len }
                ts     = tf.split(value=self.ts,     **split_kwargs)
                t_mask = tf.split(value=self.t_mask, **split_kwargs)
                t_mask = [tf.squeeze(weight) for weight in t_mask]

            loss = seq2seq.sequence_loss(out, ts, t_mask,
                                         self.max_t_seq_len)

            with tf.variable_scope('regularization'):
                regularize = tf.contrib.layers.l2_regularizer(self.reg_scale)
                params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
                reg_term = sum([regularize(param) for param in params])

            loss += reg_term

        return loss, accuracy
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        #logits = output
        #print(logits)
        #targets = self.labels_placeholder
        #print(targets)
        #weights = tf.ones((self.config.batch_size * self.config.num_steps))
        #loss = sequence_loss(logits, tf.reshape(targets, [-1]), weights)
        all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        cross_entropy = sequence_loss(
            [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones,
            len(self.vocab))
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))
        ### END YOUR CODE
        return loss
Esempio n. 7
0
    def add_loss_op(self, outputs):

        all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        cross_entropy = sequence_loss([outputs],\
                                      [tf.reshape(self.label_placeholder, [-1])],\
                                      all_ones, len(self.vocab))
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))

        return loss
Esempio n. 8
0
    def build_xent_loss(self, scores):
        batch_size = self.opts.batch_size
        sequence_length = self.opts.sequence_length
        vocab_dim = self.dataset.vocab_dim
        scores = tf.reshape(scores, (batch_size * sequence_length, vocab_dim))

        logits = [scores]
        targets = [tf.reshape(self.xent_targets_placeholder, [-1])]
        weights = [tf.ones((batch_size * sequence_length,))]
        loss = sequence_loss(logits, targets, weights)
        return loss
Esempio n. 9
0
 def add_loss_op(self, output):
     # (batch_size, num_steps)
     all_ones = [tf.ones([self.config.num_steps * self.config.batch_size])]
     # 序列的交叉熵损失,即整个长度为num_step的序列的损失
     # sequence_loss各个参数说明请看源码
     # sequence_loss返回的是平均对数困惑度(log-perplexity),即平均交叉熵
     cross_entropy = sequence_loss(
         [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones)
     # 得到所有的误差.如果要考虑L2正则化,可以考虑将RNN_I和RNN_H以及U加入total_loss
     tf.add_to_collection('total_loss', cross_entropy)
     loss = tf.add_n(tf.get_collection('total_loss'))
     return loss
Esempio n. 10
0
  def create_loss(self):
    start_time = time.time()

    self.losses = []

    logits = self.decoder_states
    targets = self.tokens[1:]
    weights = self.tokens_weights[1:]

    log_perps = seq2seq.sequence_loss(logits, targets, weights, self.vocab_size)
    self.losses.append(log_perps)

    print('create_loss graph time %f' % (time.time() - start_time))
Esempio n. 11
0
 def add_loss_op(self, output):
   """Adds loss ops to the computational graph.
   Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.
   Args:
     output: A tensor of shape (None, self.vocab)
   Returns:
     loss: A 0-d tensor (scalar)
   """
   all_ones_weights = [tf.ones([self.config.batch_size * self.config.num_steps])]
   # output is logits
   loss = sequence_loss([output], \
       [tf.reshape(self.labels_placeholder, [-1])],\
       all_ones_weights) # , len(self.vocab)
   return loss
Esempio n. 12
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        return sequence_loss([output], [
            tf.reshape(self.labels_placeholder,
                       [self.config.batch_size * self.config.num_steps, -1])
        ], [tf.constant(1.0)])
Esempio n. 13
0
File: ntm.py Progetto: sch0414/myNTM
    def get_loss(self, seq_length):
        if notself.outputs.has_key(seq_length):
            self.get_outputs(seq_length)

        if not self.losses.has_key(seq_length):
            loss = sequence_loss(logits=self.outputs[seq_length],
                targets = self.true_outputs[0:seq_length],
                weights = [1]*seq_length,
                average_across_timesteps = False,
                average_across_batch = False,
                softmax_loss_function = binary_cross_entropy_with_logits)

            slef.losses[seq_length] = loss
        return self.losses[seq_length]
Esempio n. 14
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    weights = tf.ones([self.config.batch_size * self.config.num_steps])
    loss = sequence_loss([outputs], [tf.reshape(self.labels_placeholder, [-1])], [weights])
    ### END YOUR CODE
    return loss
Esempio n. 15
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    flattened_labels = tf.reshape(self.labels_placeholder, [-1])
    loss = sequence_loss([output], [flattened_labels], [tf.ones_like(flattened_labels, dtype=tf.float32)])
    ### END YOUR CODE
    return loss
Esempio n. 16
0
 def sequence_loss(self, y_pred, y_true):
     '''
     Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
     then use seq2seq.sequence_loss to actually compute the loss function.
     '''
     #print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
     logits = tf.unpack(y_pred, axis=1)		# list of [-1, num_decoder_synbols] elements
     targets = tf.unpack(y_true, axis=1)		# y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
     #print ("my_sequence_loss logits=%s" % (logits,))
     #print ("my_sequence_loss targets=%s" % (targets,))
     weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
     #print ("my_sequence_loss weights=%s" % (weights,))
     sl = seq2seq.sequence_loss(logits, targets, weights)
     #print ("my_sequence_loss return = %s" % sl)
     return sl
Esempio n. 17
0
    def get_loss(self, seq_length):
        if not self.outputs.has_key(seq_length):
            self.get_outputs(seq_length)

        if not self.losses.has_key(seq_length):
            loss = sequence_loss(logits=self.outputs[seq_length],
                                targets=self.true_outputs[0:seq_length],
                                weights=[1] * seq_length,
                                average_across_timesteps=False,
                                average_across_batch=False,
                                softmax_loss_function=\
                                    binary_cross_entropy_with_logits)

            self.losses[seq_length] = loss 
        return self.losses[seq_length]
Esempio n. 18
0
 def sequence_loss(self, y_pred, y_true):
     '''
     Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
     then use seq2seq.sequence_loss to actually compute the loss function.
     '''
     if self.verbose > 2: print ("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
     logits = tf.unstack(y_pred, axis=1)		# list of [-1, num_decoder_synbols] elements
     targets = tf.unstack(y_true, axis=1)		# y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
     if self.verbose > 2:
         print ("my_sequence_loss logits=%s" % (logits,))
         print ("my_sequence_loss targets=%s" % (targets,))
     weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
     if self.verbose > 4: print ("my_sequence_loss weights=%s" % (weights,))
     sl = seq2seq.sequence_loss(logits, targets, weights)
     if self.verbose > 2: print ("my_sequence_loss return = %s" % sl)
     return sl
Esempio n. 19
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    cross_entropy = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.constant(1.0, shape=[self.config.batch_size * self.config.num_steps])])
    tf.add_to_collection("total_loss", cross_entropy)
    loss = tf.add_n(tf.get_collection("total_loss"))
    ### END YOUR CODE
    return loss
Esempio n. 20
0
 def add_loss_op(self, output):
     """Adds loss ops to the computational graph.
     Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss.
     Args:
         output: A tensor of shape (None, self.vocab)
     Returns:
         loss: A 0-d tensor (scalar)
     """
     all_ones_weights = [
         tf.ones([self.config.batch_size * self.config.num_steps])
     ]
     # output is logits
     loss = sequence_loss([output], \
             [tf.reshape(self.labels_placeholder, [-1])],\
             all_ones_weights) # , len(self.vocab)
     return loss
Esempio n. 21
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        flatten_lables = tf.reshape(self.labels_placeholder, [-1])
        weights = [tf.ones_like(flatten_lables, dtype=tf.float32)]
        cross_entropy = sequence_loss([output], [flatten_lables], weights)
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))
        ### END YOUR CODE
        return loss
Esempio n. 22
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    
    logits = [output]
    targets = [tf.reshape(self.labels_placeholder, [-1])]
    weights = [tf.ones(tf.shape(targets[0]))]
    
    loss = sequence_loss(logits, targets, weights, len(self.vocab))

    return loss
Esempio n. 23
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        labels = tf.reshape(self.labels_placeholder, [-1])
        #onehot = tf.one_hot(intlabels, len(self.vocab), 1, 0)
        onehot = tf.to_int64(labels)
        weightCount = self.config.batch_size * self.config.num_steps
        weights = tf.ones([weightCount])
        loss = sequence_loss([output], [onehot], [weights])
        return loss
Esempio n. 24
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 
          tensorflow.python.ops.seq2seq.sequence_loss: search for the source code
    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
    #seq_loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
    seq_loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], all_ones)
    tf.add_to_collection("total_loss", seq_loss)
    loss = tf.add_n(tf.get_collection("total_loss"))
    ### END YOUR CODE
    return loss
Esempio n. 25
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.
    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 
    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    weights = tf.ones([self.config.batch_size, self.config.num_steps])
    output = tf.reshape(output,[self.config,batch_size, self.config.num_steps, -1])
    loss = sequence_loss(output, self.labels_placeholder, weights)
##    tf.add_to_collection('total_loss', cross_entropy)
##    loss = tf.add_n(tf.get_collection('total_loss'))
    tf.scalar_summary('loss', loss)
    
    ### END YOUR CODE
    return loss
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    comment: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ##Code Begin
        all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        cross_entropy = sequence_loss(
            [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones,
            len(self.vocab))
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))
        ##Code Ends
        return loss
Esempio n. 27
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
	#Return the average log-perplexity per symbol (weighted).
    cross_entropy = sequence_loss(
        [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
    tf.add_to_collection('total_loss', cross_entropy)
    loss = tf.add_n(tf.get_collection('total_loss'))
    ### END YOUR CODE
    return loss
Esempio n. 28
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 
          Check https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/ops/seq2seq.py

    Args:
      output: A tensor of shape (None, self.vocab)  (LIBIN : not used)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    # output shape  : [num_steps * (batch_size, len(self.vocab))]
    # targets shape : [num_steps * (batch_size, )]
    # weights shape : [num_steps * (batch_size, )]
    targets = [tf.squeeze(ts,[1]) for ts in tf.split(1, self.config.num_steps, self.labels_placeholder)]
    weights = [tf.ones((self.config.batch_size, )) for step in xrange(self.config.num_steps)]
    loss = sequence_loss(output, targets, weights)
    ### END YOUR CODE
    return loss
Esempio n. 29
0
def local_model_with_buckets(encoder_inputs,
                             decoder_inputs,
                             targets,
                             weights,
                             buckets,
                             seq2seq_f,
                             softmax_loss_function=None,
                             name=None):
    if len(encoder_inputs) < buckets[-1][0]:
        raise ValueError(
            "Length of encoder_inputs (%d) must be at least that of la"
            "st bucket (%d)." % (len(encoder_inputs), buckets[-1][0]))
    if len(targets) < buckets[-1][1]:
        raise ValueError("Length of targets (%d) must be at least that of last"
                         "bucket (%d)." % (len(targets), buckets[-1][1]))
    if len(weights) < buckets[-1][1]:
        raise ValueError("Length of weights (%d) must be at least that of last"
                         "bucket (%d)." % (len(weights), buckets[-1][1]))

    all_inputs = encoder_inputs + decoder_inputs + targets + weights
    losses = []
    outputs = []
    with ops.name_scope(name, "model_with_buckets", all_inputs):
        embeddings = embedding_utils.load_vocab()
        for j, bucket in enumerate(buckets):
            print("Preparing bucket", str(j), "...")
            with variable_scope.variable_scope(
                    variable_scope.get_variable_scope(),
                    reuse=True if j > 0 else None):
                bucket_outputs, _ = seq2seq_f(encoder_inputs[:bucket[0]],
                                              decoder_inputs[:bucket[1]],
                                              embeddings)
                outputs.append(bucket_outputs)
                losses.append(
                    seq2seq.sequence_loss(
                        outputs[-1],
                        targets[:bucket[1]],
                        weights[:bucket[1]],
                        softmax_loss_function=softmax_loss_function))

    return outputs, losses
Esempio n. 30
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        num_steps = self.config.num_steps
        batch_size = self.config.batch_size
        targets = tf.reshape(self.labels_placeholder, [-1])
        weights = tf.ones([batch_size * num_steps], dtype=tf.float32)
        cross_entropy = sequence_loss([output], [targets], [weights],
                                      len(self.vocab))
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))
        ### END YOUR CODE
        return loss
Esempio n. 31
0
def model(encoder_inputs,
          decoder_inputs,
          targets,
          weights,
          encoder_input_length,
          list_of_mask,
          encoder_cell,
          decoder_cell,
          num_encoder_symbols,
          num_decoder_symbols,
          embedding_size,
          beam_size=1,
          output_projection=None,
          softmax_loss_function=None,
          dtype=None,
          name=None):
    all_inputs = encoder_inputs + decoder_inputs + targets + weights
    with ops.name_scope(name, "seq2seq_model", all_inputs):
        with variable_scope.variable_scope("model_seq2seq"):
            outputs, _, beams = embedding_attention_bidirectional_seq2seq(
                encoder_inputs,
                decoder_inputs,
                encoder_input_length,
                list_of_mask,
                encoder_cell,
                decoder_cell,
                num_encoder_symbols,
                num_decoder_symbols,
                embedding_size,
                beam_size=beam_size,
                output_projection=output_projection,
                dtype=dtype)
            loss = None
            if beam_size == 1:
                loss = seq2seq.sequence_loss(
                    outputs,
                    targets,
                    weights,
                    softmax_loss_function=softmax_loss_function)
    return outputs, loss, beams
Esempio n. 32
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    ### YOUR CODE HERE
    # https://github.com/tensorflow/tensorflow/blob/13ea3ca91ba5aecab6f21acc14b9cb6a9afa8630/tensorflow/python/ops/seq2seq.py#L814
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
    cross_entropy = sequence_loss(
        [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones, len(self.vocab))
    tf.add_to_collection('total_loss', cross_entropy)
    loss = tf.add_n(tf.get_collection('total_loss'))
    
    #loss = sequence_loss(output, self.labels_placeholder, tf.ones(self.labels_placeholder.get_shape(), dtype=tf.float32))
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, self.labels_placeholder))
    # raise NotImplementedError
    ### END YOUR CODE
    return loss
Esempio n. 33
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        weights = tf.ones(self.config.batch_size * self.config.num_steps)
        # might have to reshape outputs according to this op's prototype
        # loss = tf.contrib.seq2seq.sequence_loss(output, self.labels_placeholder, weights)
        print "In add_loss_op:"
        print output.get_shape()
        print self.labels_placeholder.get_shape()
        print weights.get_shape()
        print "---------------"
        lbls = tf.reshape(self.labels_placeholder, [-1])
        loss = sequence_loss([output], [lbls], [weights])
        ### END YOUR CODE
        return loss
Esempio n. 34
0
    def loss(self, predictions, rnn_outputs, labels):
        """Calculates the loss from the predictions (logits?) and the labels.
        """
        all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        cross_entropy = sequence_loss([predictions],
                                      [tf.reshape(labels, [-1])], all_ones, self.config.data_sets._len_vocab)
        tf.add_to_collection('total_loss', cross_entropy)

	with tf.variable_scope('Embedding_similarity'):
		    num_steps = 7
		    if self.config.num_steps != 1:
			    h1 = [o1*o2 for o1,o2 in zip(rnn_outputs[:-2],rnn_outputs[1:-1])]
			    h2 = [o1*o2 for o1,o2 in zip(rnn_outputs[1:-1],rnn_outputs[2:])]
			    emb_sim = tf.reduce_mean(tf.exp(tf.square(tf.sub(h1,h2))))
		    else:
			    h1 = [rnn_outputs[0]*rnn_outputs[0] for i in range(num_steps-2)]
			    h2 = [rnn_outputs[0]*rnn_outputs[0] for i in range(num_steps-2)]
			    emb_sim = tf.reduce_mean(tf.exp(tf.square(tf.sub(h1,h2))))	

	            tf.add_to_collection('total_loss', emb_sim)


        loss = tf.add_n(tf.get_collection('total_loss'))
        return loss, cross_entropy, emb_sim
Esempio n. 35
0
 def add_loss_op(self, output):
     logits = [output]
     targets = [tf.reshape(self.labels_placeholder, [-1])]
     weights = [tf.ones((self.config.batch_size * self.config.num_steps, ))]
     loss = sequence_loss(logits, targets, weights)
     return loss
Esempio n. 36
0
    def build_model(self, forward_only, is_copy=True):
        print(" [*] Building a NTM model")

        with tf.variable_scope(self.scope):
            # present start symbol
            if is_copy:
                _, prev_state = self.cell(self.start_symbol, state=None)
                self.save_state(prev_state, 0, self.max_length)

            zeros = np.zeros(self.cell.input_dim, dtype=np.float32)

            tf.get_variable_scope().reuse_variables()
            for seq_length in xrange(1, self.max_length + 1):
                progress(seq_length / float(self.max_length))

                input_ = tf.placeholder(tf.float32, [self.cell.input_dim],
                                        name='input_%s' % seq_length)
                true_output = tf.placeholder(
                    tf.float32, [self.cell.output_dim],
                    name='true_output_%s' % seq_length)

                self.inputs.append(input_)
                self.true_outputs.append(true_output)

                # present inputs
                _, prev_state = self.cell(input_, prev_state)
                self.save_state(prev_state, seq_length, self.max_length)

                # present end symbol
                if is_copy:
                    _, state = self.cell(self.end_symbol, prev_state)
                    self.save_state(state, seq_length)

                self.prev_states[seq_length] = state

                if not forward_only:
                    # present targets
                    outputs = []
                    for _ in xrange(seq_length):
                        output, state = self.cell(zeros, state)
                        self.save_state(state, seq_length, is_output=True)
                        outputs.append(output)

                    self.outputs[seq_length] = outputs

            if not forward_only:
                for seq_length in xrange(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" %
                          seq_length)

                    loss = sequence_loss(logits=self.outputs[seq_length],
                                        targets=self.true_outputs[0:seq_length],
                                        weights=[1] * seq_length,
                                        num_decoder_symbols=-1, # trash
                                        average_across_timesteps=False,
                                        average_across_batch=False,
                                        softmax_loss_function=\
                                            binary_cross_entropy_with_logits)

                    self.losses[seq_length] = loss

                    if not self.params:
                        self.params = tf.trainable_variables()

                    #grads, norm = tf.clip_by_global_norm(
                    #                  tf.gradients(loss, self.params), 5)

                    grads = []
                    for grad in tf.gradients(loss, self.params):
                        if grad:
                            grads.append(
                                tf.clip_by_value(grad, self.min_grad,
                                                 self.max_grad))
                        else:
                            grads.append(grad)

                    self.grads[seq_length] = grads
                    self.optims[seq_length] = self.opt.apply_gradients(
                        zip(grads, self.params), global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a NTM model finished")
Esempio n. 37
0
    def __init__(self, vocab_size, batch_size, topology, cell_sizes,
                 learning_rate, lr_decay_rate, max_gradient_norm,
                 cell_type=BasicLSTMCell, embed=False, forward_only=False):
        self.emb_size = vocab_size
        self.batch_size = batch_size
        self.seq_sizes = topology
        self.n_layers = len(topology)
        self.cell_sizes = cell_sizes

        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * lr_decay_rate)
        self.global_step = tf.Variable(0, trainable=False)
        self.seq_len = 1
        for seq_size in self.seq_sizes:
            self.seq_len *= seq_size
        self.enc_inputs = [tf.placeholder(tf.float32, [batch_size, self.emb_size],
                                          name='Encoder_Input_{}'.format(q)) for q in range(self.seq_len)]
        self.dec_inputs = []
        self.enc_cells = []
        self.dec_cells = []
        self.enc_scopes = []
        self.dec_scopes = []
        self.dec_data = []

        self.cell_type = cell_type

        # topology = [..., (layer_size, state_dim), ...]

        def build_layer(layer_size, input_size):
            enc_cell = self.cell_type(input_size)
            if layer_size > 1:
                enc_cell = [enc_cell]
                for _ in range(1, layer_size):
                    enc_cell.append(self.cell_type(input_size, enc_cell[-1].output_size))
                enc_cell = MultiRNNCell(enc_cell)
            return enc_cell

        def build_inputs(seq_len, input_size):
            return [tf.placeholder(tf.float32, [self.batch_size, input_size]) for _ in range(seq_len)]

        for i in range(0, self.n_layers):
            size = self.enc_cells[i - 1].state_size if i > 0 else self.emb_size
            cell = build_layer(self.cell_sizes[i], size)
            self.enc_cells.append(cell)
            self.enc_scopes.append('encoder_{}'.format(i))
            dec_input = build_inputs(self.seq_sizes[i], size)
            self.dec_cells.append(cell)
            self.dec_inputs.append(dec_input)
            self.dec_data.append([np.zeros((batch_size, self.dec_cells[i].input_size))
                                  for _ in range(self.seq_sizes[i])])
            self.dec_scopes.append('decoder_{}'.format(i))

        self.dec_inputs = self.dec_inputs[::-1]
        self.dec_data = self.dec_data[::-1]
        self.dec_cells = self.dec_cells[::-1]

        if embed:
            self.enc_cells[0] = EmbeddingWrapper(self.enc_cells[0], self.emb_size, self.emb_size)
            self.enc_inputs = [tf.placeholder(tf.int32, [None],
                                              name='Encoder_Input_{}'.format(q)) for q in range(self.seq_len)]
            self.targets = [tf.placeholder(tf.int32, [None],
                                           name='Target_{}'.format(q)) for q in range(self.seq_len)]
            self.weights = [tf.placeholder(tf.float32, [None],
                                           name='Weights_{}'.format(q)) for q in range(self.seq_len)]

        self.encoder = self.hierarchical_encoder()
        self.logits = self.hierarchical_decoder(self.encoder)
        self.seq2seq = [tf.arg_max(x, 1) for x in self.logits]
        self.losses = seq2seq.sequence_loss(self.logits, self.targets, self.weights)

        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.AdadeltaOptimizer(self.learning_rate)
            gradients = tf.gradients(self.losses, params)
            clipped_gradients, norm = tf.clip_by_global_norm(gradients, max_gradient_norm)
            self.gradient_norm = norm
            self.updates = opt.apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 38
0
    def build_model(self, forward_only, is_copy=True):
        print(" [*] Building a NTM model")

        with tf.variable_scope(self.scope):
            # present start symbol
            if is_copy:
                _, prev_state = self.cell(self.start_symbol, state=None)
                self.save_state(prev_state, 0, self.max_length)

            zeros = np.zeros(self.cell.input_dim, dtype=np.float32)

            tf.get_variable_scope().reuse_variables()
            for seq_length in xrange(1, self.max_length + 1):
                progress(seq_length/float(self.max_length))

                input_ = tf.placeholder(tf.float32, [self.cell.input_dim],
                                        name='input_%s' % seq_length)
                true_output = tf.placeholder(tf.float32, [self.cell.output_dim],
                                             name='true_output_%s' % seq_length)

                self.inputs.append(input_)
                self.true_outputs.append(true_output)

                # present inputs
                _, prev_state = self.cell(input_, prev_state)
                self.save_state(prev_state, seq_length, self.max_length)

                # present end symbol
                if is_copy:
                    _, state = self.cell(self.end_symbol, prev_state)
                    self.save_state(state, seq_length)

                self.prev_states[seq_length] = state

                if not forward_only:
                    # present targets
                    outputs = []
                    for _ in xrange(seq_length):
                        output, state = self.cell(zeros, state)
                        self.save_state(state, seq_length, is_output=True)
                        outputs.append(output)

                    self.outputs[seq_length] = outputs

            if not forward_only:
                for seq_length in xrange(self.min_length, self.max_length + 1):
                    print(" [*] Building a loss model for seq_length %s" % seq_length)

                    loss = sequence_loss(logits=self.outputs[seq_length],
                                        targets=self.true_outputs[0:seq_length],
                                        weights=[1] * seq_length,
                                        average_across_timesteps=False,
                                        average_across_batch=False,
                                        softmax_loss_function=\
                                            binary_cross_entropy_with_logits)

                    self.losses[seq_length] = loss 

                    if not self.params:
                        self.params = tf.trainable_variables()

                    #grads, norm = tf.clip_by_global_norm(
                    #                  tf.gradients(loss, self.params), 5)

                    grads = []
                    for grad in tf.gradients(loss, self.params):
                        if grad is not None:
                            grads.append(tf.clip_by_value(grad,
                                                          self.min_grad,
                                                          self.max_grad))
                        else:
                            grads.append(grad)

                    self.grads[seq_length] = grads
                    self.optims[seq_length] = self.opt.apply_gradients(
                                                  zip(grads, self.params),
                                                  global_step=self.global_step)

        self.saver = tf.train.Saver()
        print(" [*] Build a NTM model finished")
Esempio n. 39
0
    def __init__(self, vocab_size, size,
                 num_layers, max_gradient_norm, batch_size, learning_rate, learning_rate_decay_factor,
                 num_samples=512, forward_only=False, max_dialog_length = 10, max_answer_length = 20):

        self.vocab_size = vocab_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False)
        self.learning_rate_decay_op = self.learning_rate.assign(
            self.learning_rate * learning_rate_decay_factor)
        self.global_step = tf.Variable(0, trainable=False)

        self.max_dialog_length = max_dialog_length
        self.max_answer_length = max_answer_length

        # If we use sampled softmax, we need an output projection.
        output_projection = None
        softmax_loss_function = None

        # Sampled softmax only makes sense if we sample less than vocabulary size.
        if num_samples > 0 and num_samples < self.vocab_size:
            with tf.device("/cpu:0"):
                w = tf.get_variable("proj_w", [size, self.vocab_size])
                w_t = tf.transpose(w)
                b = tf.get_variable("proj_b", [self.vocab_size])
            output_projection = (w, b)

            def sampled_loss(inputs, labels):
                with tf.device("/cpu:0"):
                    labels = tf.reshape(labels, [-1, 1])
                    return tf.nn.sampled_softmax_loss(w_t, b, inputs, labels, num_samples,
                                                      self.vocab_size)

            softmax_loss_function = sampled_loss

        # Create the internal multi-layer cell for our RNN.
        single_cell = tf.nn.rnn_cell.BasicLSTMCell(size)
        cell = single_cell
        if num_layers > 1:
            cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

        # The seq2seq function: we use embedding for the input and attention.
        def seq2seq_f(encoder_inputs, decoder_inputs, do_decode):
            return dialog_attention_seq2seq(
                encoder_inputs, decoder_inputs, cell, vocab_size, output_projection=output_projection,
                feed_previous=do_decode)

        # Feeds for inputs.
        self.encoder_inputs = []
        self.decoder_inputs = []
        self.target_weights = []

        for i in range(0, max_dialog_length):
            one_turn_encoder_inputs = []
            one_turn_decoder_inputs = []
            one_turn_target_weights = []
            for j in range(0, max_answer_length):
                one_turn_encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                      name="encoder{0}_{1}".format(i, j)))

            for j in range(0, max_answer_length + 1):
                one_turn_decoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                      name="decoder{0}_{1}".format(i, j)))
                one_turn_target_weights.append(tf.placeholder(tf.float32, shape=[None],
                                                      name="weight{0}_{1}".format(i, j)))

            self.encoder_inputs.append(list(one_turn_encoder_inputs))
            self.decoder_inputs.append(list(one_turn_decoder_inputs))
            self.target_weights.append(list(one_turn_target_weights))

        # Our targets are decoder inputs shifted by one.
        targets = []
        for i in range(0, max_dialog_length):
            targets.append([self.decoder_inputs[i][j + 1] for j in xrange(len(self.decoder_inputs[i]) - 1)])

        # Training outputs and losses.
        if forward_only:
            self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, True)

            self.loss = 0
            for i in range(0, max_dialog_length):
                self.loss += sequence_loss(self.outputs[i][:-1], targets[i], self.target_weights[i][:-1],
                                        softmax_loss_function=softmax_loss_function)

            # If we use output projection, we need to project outputs for decoding.
            if output_projection is not None:
                self.outputs = tf.matmul(self.outputs, output_projection[0]) + output_projection[1]
        else:
            self.outputs, _ = seq2seq_f(self.encoder_inputs, self.decoder_inputs, False)

            self.loss = 0
            for i in range(0, max_dialog_length):
                self.loss += sequence_loss(self.outputs[i][:-1], targets[i], self.target_weights[i][:-1],
                                        softmax_loss_function=softmax_loss_function)

        # Gradients and SGD update operation for training the model.
        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.GradientDescentOptimizer(self.learning_rate)

            gradients = tf.gradients(self.loss, params)
            clipped_gradients, self.gradient_norm = tf.clip_by_global_norm(gradients,
                                                                           max_gradient_norm)
            self.update = opt.apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.all_variables())
Esempio n. 40
0
    scope.reuse_variables()

    decode_outputs_test, decode_state_test = seq2seq.embedding_attention_seq2seq(
        encode_input,
        decode_input,
        stacked_lstm,
        vocab_size,
        vocab_size,
        num_hidden,
        feed_previous=True)

# In[6]:

with tf.name_scope('loss'):
    loss_weights = [tf.ones_like(l, dtype=tf.float32) for l in labels]
    loss = seq2seq.sequence_loss(decode_outputs, labels, loss_weights,
                                 vocab_size)

tf.scalar_summary('loss', loss)

# In[7]:

optimizer = tf.train.AdamOptimizer(learning_rate)
train = optimizer.minimize(loss)

# In[8]:

init = tf.initialize_all_variables()
saver = tf.train.Saver()

sess = tf.InteractiveSession()
merged = tf.merge_all_summaries()
Esempio n. 41
0
def sequence_loss(y_pred, y_true):
    logits = tf.unpack(y_pred, axis=1)
    targets = tf.unpack(y_true, axis=1)
    weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
    return seq2seq.sequence_loss(logits, targets, weights)
label_rnn_initial_state = label_lstm_cell.zero_state(label_batch_size, tf.float32)
label_rnn_outputs, label_rnn_states = rnn.rnn(label_lstm_cell, label_rnn_inputs, initial_state=label_rnn_initial_state, scope="RNN2")

label_rnn_outputs = [tf.matmul(lro, w_label_out) + b_label_out for lro in label_rnn_outputs] # n_label_rnn_steps * (n_batch_size,n_classes)

label_rnn_predicted_index_labels = tf.pack(label_rnn_outputs) # (n_label_rnn_steps,n_batch_size,n_classes)
label_rnn_predicted_index_labels = tf.transpose(label_rnn_predicted_index_labels,[1,0,2]) # (n_batch_size,n_label_rnn_steps,n_classes)
#label_rnn_predicted_index_labels = tf.concat(0,label_rnn_outputs) # (n_label_rnn_steps*n_batch_size,n_classes)
# label_rnn_predicted_index_labels = tf.reshape(label_rnn_predicted_index_labels,[-1,n_label_rnn_steps,n_classes]) # (n_batch_size,n_label_rnn_steps,n_classes)
label_rnn_predicted_index_labels = tf.argmax(label_rnn_predicted_index_labels,2) # (n_batch_size, n_label_rnn_steps)

# Optimization

#cost = tf.nn.sparse_softmax_cross_entropy_with_logits(label_rnn_predicted_data,label_rnn_target_data)
sequence_loss_weights = [tf.ones(tf.shape(label_rnn_target_outputs[0]))]*n_label_rnn_steps
cost = sequence_loss(label_rnn_outputs,label_rnn_target_outputs,sequence_loss_weights)

# cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_pred, y)) # Softmax loss
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) # Adam Optimizer
#
# correct_pred = tf.equal(tf.argmax(y_pred,1), tf.argmax(y,1))
# accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Initializing the variables
init = tf.initialize_all_variables()

# EXECUTION

# Launch the graph
with tf.Session() as sess:
    sess.run(init)
Esempio n. 43
0
 def add_loss_op(self, output):
   logits = [output]
   targets = [tf.reshape(self.labels_placeholder, [-1])]
   weights = [tf.ones((self.config.batch_size * self.config.num_steps,))]
   loss = sequence_loss(logits, targets, weights)
   return loss
Esempio n. 44
0
    def __init__(self, args, infer=False):
        self.args = args
        if infer:
            self.batch_size = 1
            self.seq_length = 1
        else:
            self.batch_size = args.batch_size
            self.seq_length = args.seq_length

        if args.model == 'rnn':
            cell_fn = rnn_cell.BasicRNNCell
        elif args.model == 'gru':
            cell_fn = rnn_cell.GRUCell
        elif args.model == 'lstm':
            cell_fn = rnn_cell.BasicLSTMCell
        elif args.model == 'dropgru' or args.model == 'droprnn':
            pass
        else:
            raise Exception("model type not supported: {}".format(args.model))

        if args.model.startswith('drop'):
            cells = []
            
            dt1 = DropoutBasicRNNCell
            dt2 = DropoutGRUCell
            if args.model != 'dropgru':
                print("additional layers will be basic RNN")
                dt2 = DropoutBasicRNNCell
            
            for ii in range(args.num_layers):
                if False and args.learn_input_embedding:
                    # context-dependent embedding learned as a small RNN before the large GRUs
                    args.learn_input_embedding = False
                    if ii == 0:
                        nc = dt1(args.vocab_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=0.0)
                    elif ii == 1:
                        nc = dt2(args.rnn_size,   input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                    else:
                        nc = dt2(args.rnn_size,   input_size=args.rnn_size,   probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                else:
                    # embedding is fixed, context-independent; like word vectors
                    firstdroprate = 0.0
                    if args.learn_input_embedding:
                        firstdroprate = args.dropout
                    if ii == 0:
                        nc = dt2(args.rnn_size, input_size=args.vocab_size, probofdrop_st=args.dropout, probofdrop_in=firstdroprate)
                    else:
                        nc = dt2(args.rnn_size, input_size=args.rnn_size,   probofdrop_st=args.dropout, probofdrop_in=args.dropout)
                cells.append(nc)
            
            self.cell = rnn_cell.MultiRNNCell(cells)
            self.cellusesdropout = True
        else:
            print("building basic non-dropout model")
            c1 = cell_fn(args.rnn_size)
            self.cell = rnn_cell.MultiRNNCell([c1] * args.num_layers)
            self.cellusesdropout = False

        self.input_data = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="x_input_data")
        self.targets = tf.placeholder(tf.int32, [self.batch_size, self.seq_length], name="y_targets")
        self.initial_state = self.cell.zero_state(self.batch_size, tf.float32)

        if args.learn_input_embedding:
            self.embedding = tf.get_variable("embedding", [args.vocab_size, args.vocab_size])
        else:
            self.embedding = tf.placeholder(tf.float32, [args.vocab_size, args.vocab_size], name="embedding")

        if self.cellusesdropout:
            self._dropMaskOutput = tf.placeholder(dtype=tf.float32, shape=[self.batch_size*self.seq_length, args.rnn_size], name="dropout_output_mask")
            self._latest_mask_output = None

        with tf.variable_scope('rnnlm'):
            softmax_w = tf.get_variable("top_softmax_w", [args.rnn_size, args.vocab_size])
            softmax_b = tf.get_variable("top_softmax_b", [args.vocab_size])
            inputs = tf.split(1, self.seq_length, tf.nn.embedding_lookup(self.embedding, self.input_data))
            inputs = [tf.squeeze(input_, [1]) for input_ in inputs]

        def loop(prev, _):
            if self.cellusesdropout:
                assert(prev.get_shape() == self._dropMaskOutput.get_shape())
                prev = tf.matmul(tf.mul(prev, self._dropMaskOutput), softmax_w) + softmax_b
            else:
                prev = tf.matmul(prev, softmax_w) + softmax_b
            prev_symbol = tf.stop_gradient(tf.argmax(prev, 1))
            return tf.nn.embedding_lookup(self.embedding, prev_symbol)

        self.temperature = tf.placeholder(tf.float32, 1, name="temperature")

        # if loop_function is not None, it is used to generate the next input
        # otherwise, if it is None, the next input will be from the "inputs" sequence
        outputs, last_state = seq2seq.rnn_decoder(inputs, self.initial_state, self.cell, loop_function=loop if infer else None, scope='rnnlm')
        output = tf.reshape(tf.concat(1, outputs), [self.batch_size*self.seq_length, args.rnn_size])

        if self.cellusesdropout:
            assert(output.get_shape() == self._dropMaskOutput.get_shape())
            self.logits = tf.matmul(tf.mul(output, self._dropMaskOutput), softmax_w) + softmax_b
        else:
            self.logits = tf.matmul(output, softmax_w) + softmax_b
        self.probs = tf.nn.softmax(self.logits)
        self.probswithtemp = tf.nn.softmax(self.logits / self.temperature)

        # 1.44... term converts cost from units of "nats" to units of "bits"
        self.cost = seq2seq.sequence_loss([self.logits],
                [tf.reshape(self.targets, [-1])],
                [tf.ones([self.batch_size * self.seq_length])]) * 1.44269504088896340736
        self.pred_entropy = tf.reduce_sum(tf.mul(self.probs, tf.log(self.probs + 1e-12)), 1) * (-1.44269504088896340736)

        self.final_state = last_state
        self.lr = tf.Variable(0.0, trainable=False, name="learningrate")
        tvars = tf.trainable_variables()
        grads, _ = tf.clip_by_global_norm(tf.gradients(self.cost, tvars),
                args.grad_clip)
        optimizer = tf.train.AdamOptimizer(self.lr)
        zipgradvars = zip(grads, tvars)
        self.train_op = optimizer.apply_gradients(zipgradvars)

        # for tensorboard
        tb_cost = tf.scalar_summary('cost_train', self.cost)
        tb_predent = tf.scalar_summary('prediction_entropy_train', tf.reduce_mean(self.pred_entropy))
        mergethese = [tb_cost, tb_predent]
        for grad,var in zipgradvars:
            mergethese.append(tf.histogram_summary(var.name+'_value', var))
            mergethese.append(tf.histogram_summary(var.name+'_grad', grad))
        self.tbsummary = tf.merge_summary(mergethese)
def sequence_loss(y_pred, y_true):
    logits = tf.unpack(y_pred, axis=1)
    targets = tf.unpack(y_true, axis=1)
    weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
    return seq2seq.sequence_loss(logits, targets, weights)
Esempio n. 46
0
File: RNN.py Progetto: zbxzc35/RNN-2
  def add_loss_op(self, output):#计算损失函数
    
    loss = sequence_loss([output], [tf.reshape(self.labels_placeholder, [-1])], [tf.ones([self.config.batch_size * self.config.num_steps])])

    return loss