Beispiel #1
0
  def testSequenceLoss(self):
    with self.test_session() as sess:
      logits = [constant_op.constant(i + 0.5, shape=[2, 5]) for i in range(3)]
      targets = [
          constant_op.constant(
              i, dtypes.int32, shape=[2]) for i in range(3)
      ]
      weights = [constant_op.constant(1.0, shape=[2]) for i in range(3)]

      average_loss_per_example = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=True,
          average_across_batch=True)
      res = sess.run(average_loss_per_example)
      self.assertAllClose(1.60944, res)

      average_loss_per_sequence = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=False,
          average_across_batch=True)
      res = sess.run(average_loss_per_sequence)
      self.assertAllClose(4.828314, res)

      total_loss = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=False,
          average_across_batch=False)
      res = sess.run(total_loss)
      self.assertAllClose(9.656628, res)
Beispiel #2
0
  def testSequenceLoss(self):
    with self.test_session() as sess:
      logits = [constant_op.constant(i + 0.5, shape=[2, 5]) for i in range(3)]
      targets = [
          constant_op.constant(
              i, dtypes.int32, shape=[2]) for i in range(3)
      ]
      weights = [constant_op.constant(1.0, shape=[2]) for i in range(3)]

      average_loss_per_example = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=True,
          average_across_batch=True)
      res = sess.run(average_loss_per_example)
      self.assertAllClose(1.60944, res)

      average_loss_per_sequence = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=False,
          average_across_batch=True)
      res = sess.run(average_loss_per_sequence)
      self.assertAllClose(4.828314, res)

      total_loss = seq2seq_lib.sequence_loss(
          logits,
          targets,
          weights,
          average_across_timesteps=False,
          average_across_batch=False)
      res = sess.run(total_loss)
      self.assertAllClose(9.656628, res)
Beispiel #3
0
def get_model(feed_previous=False):
    learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))

    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    update = opt.apply_gradients(opt.compute_gradients(loss))
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #4
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    # cell = tf.contrib.rnn.BasicLSTMCell(size)
    dropout = 1
    num_layers = 3
    cell = tf.contrib.rnn.BasicLSTMCell(size)
    cell = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=dropout)
    cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers)  # 纵向上有两个LSTM

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    # 优化目标:让loss最小化
    # update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, opt, saver, learning_rate_decay_op, learning_rate
Beispiel #5
0
def get_model(feed_previous=False):
    """
    构造模型:seq2seq
    feed_previous表示decoder_inputs是我们直接提供训练数据的输入,
    还是用前一个RNNCell的输出映射出来的,如果feed_previous为True,
    那么就是用前一个RNNCell的输出,并经过Wx+b线性变换成
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #6
0
 def ForwardBackward(enc_inp, dec_inp, feed_previous):
   scope_name = "fp_{}".format(feed_previous)
   with variable_scope.variable_scope(scope_name):
     dec_op, _ = seq2seq(enc_inp, dec_inp, feed_previous=feed_previous)
     net_variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
                                        scope_name)
   optimizer = adam.AdamOptimizer(0.03, epsilon=1e-5)
   update_op = optimizer.minimize(
       seq2seq_lib.sequence_loss(dec_op, targets, weights),
       var_list=net_variables)
   return dec_op, update_op, net_variables
Beispiel #7
0
 def ForwardBackward(enc_inp, dec_inp, feed_previous):
   scope_name = "fp_{}".format(feed_previous)
   with variable_scope.variable_scope(scope_name):
     dec_op, _ = seq2seq(enc_inp, dec_inp, feed_previous=feed_previous)
     net_variables = ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES,
                                        scope_name)
   optimizer = adam.AdamOptimizer(0.03, epsilon=1e-5)
   update_op = optimizer.minimize(
       seq2seq_lib.sequence_loss(dec_op, targets, weights),
       var_list=net_variables)
   return dec_op, update_op, net_variables
Beispiel #8
0
def get_model(feed_previous=False):
    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.compat.v1.placeholder(tf.int32,
                                     shape=[None],
                                     name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.compat.v1.placeholder(tf.int32,
                                     shape=[None],
                                     name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.compat.v1.placeholder(tf.float32,
                                     shape=[None],
                                     name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        # 是一个(W, B)结构的tuple,W是shape为[output_size x num_decoder_symbols]的weight矩阵,B是shape为[num_decoder_symbols]的偏置向量
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.compat.v1.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化,保存所有的变量
    saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #9
0
def get_model():
    # 这个方法需要的参数分别是:inputs_tensor,decoder_tensor,cell,类似与vocab_size的symbols,虽然我不知道encoder_symbolsy有什么用
    # 然后是embed_size,应该和cell的size一样,然后是需不需要softmax,decode_inputs是来自前面的RNNcell还是我们自己输入,最后是数
    # 据类型
    '''
    embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs,
        cell,
        num_encoder_symbols,
        num_decoder_symbols,
        embedding_size,
        num_heads=1,
        output_projection=None,
        feed_previous=False,
        dtype=None,
        scope=None,
        initial_state_attention=False
    )
    '''
    encoder_inputs = []
    decoder_inputs = []
    targets_weigh = []
    for i in range(input_seq_len):
        encoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="encoder{0}".format(i)))
    for i in range(output_seq_len):
        decoder_inputs.append(tf.placeholder(shape=[None],dtype=tf.int32,name="decode{0}".format(i)))
    for i in range(output_seq_len):
        targets_weigh.append(
            tf.placeholder(shape=[None],dtype=tf.float32,name="weight{0}".format(i))
        )
    targets = [decoder_inputs[i] for i in range(1,output_seq_len)]
    targets.append(np.zeros(shape=[2],dtype=np.int32))
    cell = tf.nn.rnn_cell.BasicLSTMCell(size)
    outputs,_ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs,
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=False,
        dtype=tf.float32
    )
    loss = seq2seq.sequence_loss(
        outputs,targets,targets_weigh
    )
    opt = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    update = opt.apply_gradients(opt.compute_gradients(loss))
    saver = tf.train.Saver(tf.global_variables())
    return encoder_inputs,decoder_inputs,targets_weigh,outputs,loss,update,saver
    pass
Beispiel #10
0
 def sequence_loss(self, y_pred, y_true):
     '''
     Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
     then use seq2seq.sequence_loss to actually compute the loss function.
     '''
     logits = tf.unstack(
         y_pred, axis=1)  # list of [-1, num_decoder_synbols] elements
     targets = tf.unstack(
         y_true, axis=1
     )  # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
     weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
     sl = seq2seq.sequence_loss(logits, targets, weights)
     return sl
Beispiel #11
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate),
                                trainable=False,
                                dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.99)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in range(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in range(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in range(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in range(output_seq_len)]

    cell = tf.contrib.rnn.LSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    update = tf.train.AdamOptimizer(learning_rate).minimize(loss)
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #12
0
 def add_loss_op(self, output):
     """将损失添加到目标函数上面
         Hint:使用tensorflow.python.ops.seq2seq.sequence_loss 来实现序列损失
                           参数:
                                     输出:一个张量   大小是 (None,self.vocab)
                           返回:
                                     损失:一个0-d大小的张量
     """
     all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
     cross_entropy = sequence_loss(
         [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones,
         len(self.vocab))
     tf.add_to_collection('total_loss', cross_entropy)
     loss = tf.add_n(tf.get_collection('total_loss'))
     return loss
Beispiel #13
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
        ### YOUR CODE HERE
        ones = tf.ones([self.config.batch_size * self.config.num_steps])
        loss = sequence_loss([output],
                             [tf.reshape(self.labels_placeholder, [-1])],
                             [ones])
        ### END YOUR CODE
        return loss
Beispiel #14
0
def get_model(feed_previous=False):
    """构造模型
    """
    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in xrange(input_seq_len):
        encoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="encoder{0}".format(i)))
    for i in xrange(output_seq_len + 1):
        decoder_inputs.append(
            tf.placeholder(tf.int32, shape=[None],
                           name="decoder{0}".format(i)))
    for i in xrange(output_seq_len):
        target_weights.append(
            tf.placeholder(tf.float32,
                           shape=[None],
                           name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
        encoder_inputs,
        decoder_inputs[:output_seq_len],
        cell,
        num_encoder_symbols=num_encoder_symbols,
        num_decoder_symbols=num_decoder_symbols,
        embedding_size=size,
        output_projection=None,
        feed_previous=feed_previous,
        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())
    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver
Beispiel #15
0
  def add_loss_op(self, output):
    """Adds loss ops to the computational graph.

    Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

    Args:
      output: A tensor of shape (None, self.vocab)
    Returns:
      loss: A 0-d tensor (scalar)
    """
    all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]  # [(640,1)]
    targets = [tf.reshape(self.labels_placeholder, [-1])]
    cross_entropy = sequence_loss(logits=[output],    # [(640,10000)]
                                  targets=targets,  # [(640,1)]
                                  weights=all_ones) # [(640,1)]
    tf.add_to_collection('total_loss', cross_entropy)
    loss = tf.add_n(tf.get_collection('total_loss'))
    return loss
Beispiel #16
0
def get_model(feed_previous=False):
    """构造模型
    """

    learning_rate = tf.Variable(float(init_learning_rate), trainable=False, dtype=tf.float32)
    learning_rate_decay_op = learning_rate.assign(learning_rate * 0.9)

    encoder_inputs = []
    decoder_inputs = []
    target_weights = []
    for i in xrange(input_seq_len):
        encoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="encoder{0}".format(i)))
    for i in xrange(output_seq_len + 1):
        decoder_inputs.append(tf.placeholder(tf.int32, shape=[None], name="decoder{0}".format(i)))
    for i in xrange(output_seq_len):
        target_weights.append(tf.placeholder(tf.float32, shape=[None], name="weight{0}".format(i)))

    # decoder_inputs左移一个时序作为targets
    targets = [decoder_inputs[i + 1] for i in xrange(output_seq_len)]

    cell = tf.contrib.rnn.BasicLSTMCell(size)

    # 这里输出的状态我们不需要
    outputs, _ = seq2seq.embedding_attention_seq2seq(
                        encoder_inputs,
                        decoder_inputs[:output_seq_len],
                        cell,
                        num_encoder_symbols=num_encoder_symbols,
                        num_decoder_symbols=num_decoder_symbols,
                        embedding_size=size,
                        output_projection=None,
                        feed_previous=feed_previous,
                        dtype=tf.float32)

    # 计算加权交叉熵损失
    loss = seq2seq.sequence_loss(outputs, targets, target_weights)
    # 梯度下降优化器
    opt = tf.train.GradientDescentOptimizer(learning_rate)
    # 优化目标:让loss最小化
    update = opt.apply_gradients(opt.compute_gradients(loss))
    # 模型持久化
    saver = tf.train.Saver(tf.global_variables())

    return encoder_inputs, decoder_inputs, target_weights, outputs, loss, update, saver, learning_rate_decay_op, learning_rate
Beispiel #17
0
    def add_loss_op(self, output):
        """Adds loss ops to the computational graph.

        Hint: Use tensorflow.python.ops.seq2seq.sequence_loss to implement sequence loss. 

        Args:
          output: A tensor of shape (None, self.vocab)
        Returns:
          loss: A 0-d tensor (scalar)
        """
        # YOUR CODE HERE
        all_ones = [tf.ones([self.config.batch_size * self.config.num_steps])]
        cross_entropy = sequence_loss(
            [output], [tf.reshape(self.labels_placeholder, [-1])], all_ones,
            len(self.vocab))
        tf.add_to_collection('total_loss', cross_entropy)
        loss = tf.add_n(tf.get_collection('total_loss'))
        # END YOUR CODE
        return loss
Beispiel #18
0
	def sequence_loss(self, y_pred, y_true):
		'''
		Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
		then use seq2seq.sequence_loss to actually compute the loss function.
		'''
		if self.verbose > 2:
			print("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
		logits = tf.unstack(y_pred, axis=1)  # list of [-1, num_decoder_synbols] elements
		targets = tf.unstack(y_true,
		                     axis=1)  # y_true has shape [-1, self.out_seq_len]; unpack to list of self.out_seq_len [-1] elements
		if self.verbose > 2:
			print("my_sequence_loss logits=%s" % (logits,))
			print("my_sequence_loss targets=%s" % (targets,))
		weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
		if self.verbose > 4:
			print("my_sequence_loss weights=%s" % (weights,))
		sl = seq2seq.sequence_loss(logits, targets, weights)
		if self.verbose > 2:
			print("my_sequence_loss return = %s" % sl)
		return sl
 def sequence_loss(self, y_pred, y_true):
     '''
     Loss function for the seq2seq RNN.  Reshape predicted and true (label) tensors, generate dummy weights,
     then use seq2seq.sequence_loss to actually compute the loss function.
     '''
     if self.verbose > 2:
         print("my_sequence_loss y_pred=%s, y_true=%s" % (y_pred, y_true))
     logits = tf.unstack(
         y_pred, axis=1)  # list of [-1, num_decoder_synbols] elements
     targets = tf.unstack(
         y_true, axis=1
     )  # y_true has shape [-1, self.out_seq_len]; unstack to list of self.out_seq_len [-1] elements
     if self.verbose > 2:
         print("my_sequence_loss logits=%s" % (logits, ))
         print("my_sequence_loss targets=%s" % (targets, ))
     weights = [tf.ones_like(yp, dtype=tf.float32) for yp in targets]
     if self.verbose > 4: print("my_sequence_loss weights=%s" % (weights, ))
     sl = seq2seq.sequence_loss(logits, targets, weights)
     if self.verbose > 2: print("my_sequence_loss return = %s" % sl)
     return sl
Beispiel #20
0
                                            feed_previous = False,
                                            output_projection = output_projection,
                                            dtype = tf.float32)

# define our loss function

def sampled_loss(labels, logits):
    return tf.nn.sampled_softmax_loss(
                        weights = w_t,
                        biases = b,
                        labels = tf.reshape(labels, [-1, 1]),
                        inputs = logits,
                        num_sampled = 512,
                        num_classes = en_vocab_size)

loss = seq2seq_lib.sequence_loss(outputs, targets, target_weights, softmax_loss_function = sampled_loss)


def softmax(x):
    n = np.max(x)
    e_x = np.exp(x - n)
    return e_x / e_x.sum()

# feed data into placeholders
def feed_dict(x, y, batch_size = 64):
    feed = {}
    
    idxes = np.random.choice(len(x), size = batch_size, replace = False)
    
    for i in range(input_seq_len):
        feed[encoder_inputs[i].name] = np.array([x[j][i] for j in idxes])
Beispiel #21
0
def rnn_model(model, input_data, output_data,labels, vocab_size, batch_size=64,rnn_size=128):
    """
    construct rnn seq2seq model.
    :param model: model class
    :param input_data: input data placeholder
    :param output_data: output data placeholder
    :param vocab_size:
    :param rnn_size:
    :param num_layers:
    :param batch_size:
    :param learning_rate:
    :return:
    """

    end_points = {}

    if model == 'rnn':
        cell_fun = tf.contrib.rnn.BasicRNNCell
    elif model == 'gru':
        cell_fun = tf.contrib.rnn.GRUCell
    elif model == 'lstm':
        cell_fun = tf.contrib.rnn.BasicLSTMCell

    cell = cell_fun(rnn_size)
    # cell = tf.contrib.rnn.MultiRNNCell([cell] * num_layers, state_is_tuple=True)
    weights = [tf.ones_like(labels_t, dtype=tf.float32) for labels_t in labels]

    outputs,last_state = seq2seq.embedding_rnn_seq2seq(input_data,output_data,cell,vocab_size,vocab_size,len(input_data))
    loss = seq2seq.sequence_loss(ou, labels, weights, vocab_size)

    tf.scalar_summary("loss", loss)
    magnitude = tf.sqrt(tf.reduce_sum(tf.square(last_state[1])))
    tf.scalar_summary("magnitude at t=1", magnitude)
    summary_op = tf.merge_all_summaries()

    learning_rate = 0.05
    momentum = 0.9  
    optimizer = tf.train.MomentumOptimizer(learning_rate, momentum)
    train_op = optimizer.minimize(loss)
    logdir = tempfile.mkdtemp()
    print(logdir)
    summary_writer = tf.train.SummaryWriter(logdir, sess.graph_def)
    # if output_data is not None:
    #     initial_state = cell.zero_state(batch_size, tf.float32)
    # else:
    #     initial_state = cell.zero_state(1, tf.float32)

    # with tf.device("/cpu:0"):
    #     embedding = tf.get_variable('embedding', initializer=tf.random_uniform(
    #         [vocab_size + 1, rnn_size], -1.0, 1.0))
    #     inputs = tf.nn.embedding_lookup(embedding, input_data)
    #     decoder_inputs = tf.nn.embedding_lookup(embedding, output_data)

    # # [batch_size, ?, rnn_size] = [64, ?, 128]
    # # outputs, last_state = tf.nn.dynamic_rnn(cell, inputs, initial_state=initial_state)
    # outputs,last_state = basic_rnn_seq2seq(inputs,decoder_inputs,cell)
    # output = tf.reshape(outputs, [-1, rnn_size])

    # weights = tf.Variable(tf.truncated_normal([rnn_size, vocab_size + 1]))
    # bias = tf.Variable(tf.zeros(shape=[vocab_size + 1]))
    # logits = tf.nn.bias_add(tf.matmul(output, weights), bias=bias)
    # # [?, vocab_size+1]

    # if output_data is not None:
    #     # output_data must be one-hot encode
    #     labels = tf.one_hot(tf.reshape(output_data, [-1]), depth=vocab_size + 1)
    #     # should be [?, vocab_size+1]

    #     loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits)
    #     # loss shape should be [?, vocab_size+1]
    #     total_loss = tf.reduce_mean(loss)
    #     train_op = tf.train.AdamOptimizer(learning_rate).minimize(total_loss)

    #     end_points['initial_state'] = initial_state
    #     end_points['output'] = output
    #     end_points['train_op'] = train_op
    #     end_points['total_loss'] = total_loss
    #     end_points['loss'] = loss
    #     end_points['last_state'] = last_state
    # else:
    #     prediction = tf.nn.softmax(logits)

    #     end_points['initial_state'] = initial_state
    #     end_points['last_state'] = last_state
    #     end_points['prediction'] = prediction

    # return end_points
Beispiel #22
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     is_training=True,
                     sparse_labels=None,
                     label_weights=None,
                     **unused_params):

        self.phase_train = is_training
        num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        model_inputs = utils.SampleRandomSequence(model_input, num_frames,
                                                  self.max_steps)

        total_vocab_size = vocab_size + 3
        enc_cell = self.get_enc_cell(self.cell_size, total_vocab_size)
        dec_cell = self.get_dec_cell(self.cell_size)
        runtime_batch_size = tf.shape(model_inputs)[0]

        # TODO
        if False:
            with tf.variable_scope("Enc"):
                enc_init_state = enc_cell.zero_state(runtime_batch_size,
                                                     dtype=tf.float32)
                enc_outputs, enc_state = tf.nn.dynamic_rnn(
                    enc_cell,
                    model_inputs,
                    initial_state=enc_init_state,
                    scope="enc")
        else:
            enc_outputs = model_inputs
            enc_state = dec_cell.zero_state(runtime_batch_size,
                                            dtype=tf.float32)

        label_weights = tf.cast(label_weights, tf.float32)
        dec_weights = tf.unstack(label_weights, axis=1)
        dec_input_lists = tf.unstack(sparse_labels, axis=1)

        dec_targets = [
            dec_input_lists[i + 1] for i in xrange(len(dec_input_lists) - 1)
        ]
        dec_targets += [tf.zeros_like(dec_input_lists[0])]
        # enc_outputs_lists = tf.split(enc_outputs, num_or_size_splits=self.max_steps, axis=1)
        dec_outputs, _ = attn.embedding_attention_decoder(
            dec_input_lists,
            initial_state=enc_state,
            attention_states=enc_outputs,
            cell=dec_cell,
            num_symbols=total_vocab_size,
            embedding_size=1024,
            output_size=total_vocab_size,
            output_projection=None,
            feed_previous=False,
            dtype=tf.float32,
            scope="LSTMEncDec")
        loss = seq2seq_lib.sequence_loss(dec_outputs,
                                         dec_targets,
                                         dec_weights,
                                         softmax_loss_function=None)
        # logits = tf.reduce_mean(dec_outputs, axis=0)
        label_num = tf.reduce_sum(label_weights, axis=1, keep_dims=True)
        logits = tf.add_n(dec_outputs) / label_num
        # logits = tf.Print(logits.get_shape(), [logits])
        logits = logits[:, :vocab_size]
        # logits = tf.nn.sigmoid(enc_outputs[:, -1, :])
        return {
            "predictions": dec_outputs,
            # "predictions": logits,
            "loss": loss,
        }
Beispiel #23
0
    def _seq2seq(self):
        hps = self._hps
        vocab_size = self._vocab.count
        with tf.variable_scope("SumModel"):
            article_lens = self._article_lens
            # 由于sequence loss需要 seq_len * [batch_size]
            targets = tf.unstack(tf.transpose(self._targets))
            loss_weights = tf.unstack(tf.transpose(self._loss_weights))
            with tf.variable_scope('embedding'), tf.device('/cpu:0'):
                embedding = tf.get_variable("embedding",
                                            [vocab_size, hps.emb_dim],
                                            dtype=tf.float32)
                # [batch, seq_len, emb_dim]
                emb_encoder_inputs = tf.nn.embedding_lookup(
                    embedding, self._articles)
                emb_decoder_inputs = tf.nn.embedding_lookup(
                    embedding, self._abstracts)

            with tf.variable_scope("encoder"):
                cell_fw = LSTMCell(hps.num_hidden,
                                   initializer=tf.random_uniform_initializer(
                                       -0.1, 0.1, seed=123),
                                   state_is_tuple=False)
                cell_bw = LSTMCell(hps.num_hidden,
                                   initializer=tf.random_uniform_initializer(
                                       -0.1, 0.1, seed=113),
                                   state_is_tuple=False)
                # outputs: (output_fw, output_bw) => output_fw: [batch_size, max_time, cell_fw.output_size]
                # output_states: A tuple (output_state_fw, output_state_bw)
                encoder_outputs, encoder_output_states = tf.nn.bidirectional_dynamic_rnn(
                    cell_fw,
                    cell_bw,
                    inputs=emb_encoder_inputs,
                    dtype=tf.float32,
                    sequence_length=article_lens)
            # encoder_outputs: [batch_size, max_time, 2 * output_size]
            self._enc_outputs = tf.concat(encoder_outputs, axis=2)
            # [batch_size, 2 * output_size]
            encoder_state_fw, _ = encoder_output_states

            with tf.variable_scope("output_projection"):
                w = tf.get_variable(
                    "w", [hps.num_hidden, vocab_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))
                v = tf.get_variable(
                    "b", [vocab_size],
                    dtype=tf.float32,
                    initializer=tf.truncated_normal_initializer(stddev=1e-4))

            with tf.variable_scope("decoder"):
                loop_function = None
                if hps.mode == "test":
                    loop_function = _extract_argmax_and_embed(
                        embedding, (w, v), update_embedding=False)
                decoder_cell = LSTMCell(
                    hps.num_hidden,
                    initializer=tf.random_uniform_initializer(-0.1,
                                                              0.1,
                                                              seed=113),
                    state_is_tuple=False)
                # 将实际输入转化成符合要求的输入
                # [seq_len, batch, emb_dim] => seq_len * [batch, emb_dim]
                emb_decoder_inputs = tf.unstack(
                    tf.transpose(emb_decoder_inputs, perm=[1, 0, 2]))
                # [batch, cell_size]
                self._dec_in_state = encoder_state_fw
                initial_state_attention = (hps.mode == 'test')
                # decoder_outputs: seq_len * [batch, hidden_size]
                # self._dec_out_state: [batch, state_size]=[batch, 2*cell_size]
                decoder_outputs, self._dec_out_state = attention_decoder(
                    decoder_inputs=emb_decoder_inputs,
                    initial_state=self._dec_in_state,
                    attention_states=self._enc_outputs,
                    cell=decoder_cell,
                    num_heads=1,
                    loop_function=loop_function,
                    initial_state_attention=initial_state_attention)

            with tf.variable_scope("output"):
                # 还可以写成
                #[batch * seq_len, vsize]
                output = tf.reshape(tf.stack(values=decoder_outputs, axis=1),
                                    [-1, hps.num_hidden])
                logits = tf.matmul(output, w) + v
                model_outputs = tf.unstack(tf.reshape(
                    logits, [-1, hps.dec_timesteps, vocab_size]),
                                           axis=1)
                # seq_len * [batch, vsize]
                # 输出层共享
                # model_outputs = []
                # for i in range(len(decoder_outputs)):
                #     if i > 0:
                #         tf.get_variable_scope().reuse_variables()
                #     model_outputs.append(
                #         tf.nn.xw_plus_b(decoder_outputs[i], w, v))

            with tf.variable_scope("loss"):
                # logits: seq_len * [batch_size, vsize]
                # targets: seq_len * [batch_size]
                # weights: seq_len * [batch_size] 注意这里的weights的作用是做mask
                # 1. sequence_loss先是调用sequence_loss_by_example,获取[batch_size]维的loss,在除以batch_size
                # 2. sequence_loss_by_example利用weights来做mask,获取实际的每个time_step的平均loss
                # 因为batch里面实际句子长度不一样,所有weights要先初始化zeros,然后向里面填1
                self._loss = sequence_loss(logits=model_outputs,
                                           targets=targets,
                                           weights=loss_weights)
            if hps.mode == "test":
                with tf.variable_scope("decode_output"):
                    # seq_len * [batch, vsize] => seq_len * [batch, 1]
                    best_outputs = [tf.arg_max(x, 1) for x in model_outputs]
                    # [batch, seq_len]
                    self._outputs = tf.concat(
                        axis=1,
                        values=[tf.reshape(x, [-1, 1]) for x in best_outputs])
Beispiel #24
0
    def create_model(self,
                     model_input,
                     vocab_size,
                     num_frames,
                     is_training=True,
                     dense_labels=None,
                     feature_sizes=None,
                     input_weights=None,
                     **unused_params):
        self.is_training = is_training
        feature_size = sum(feature_sizes)
        num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32)
        # TODO
        self.max_steps = 300  # 30
        enc_inputs = utils.SampleRandomSequence(model_input, num_frames,
                                                self.max_steps)

        enc_cell = self.get_enc_cell(self.cell_size, self.cell_size)
        dec_cell = self.get_dec_cell(self.cell_size)
        runtime_batch_size = tf.shape(enc_inputs)[0]

        enc_init_state = enc_cell.zero_state(runtime_batch_size,
                                             dtype=tf.float32)
        enc_outputs, enc_state = tf.nn.dynamic_rnn(
            enc_cell, enc_inputs, initial_state=enc_init_state, scope="enc")

        if True:
            enc_outputs_stopped = tf.stop_gradient(enc_outputs)
            input_weights = tf.tile(tf.expand_dims(input_weights, 2),
                                    [1, 1, self.cell_size])
            enc_outputs_stopped = enc_outputs_stopped * input_weights
            enc_rep = tf.reduce_sum(enc_outputs_stopped, axis=1) / num_frames
            # enc_rep = tf.reduce_sum(enc_outputs_stopped, axis=1) / self.max_steps

            self.vocab_size = vocab_size
            cls_func = self.moe
            logits = cls_func(enc_rep)

            if cls_func == self.moe:
                epsilon = 1e-12
                labels = tf.cast(dense_labels, tf.float32)
                cross_entropy_loss = labels * tf.log(logits + epsilon) + (
                    1 - labels) * tf.log(1 - logits + epsilon)
                cross_entropy_loss = tf.negative(cross_entropy_loss)
                loss = tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1))

                predictions = logits
            else:
                loss = tf.nn.sigmoid_cross_entropy_with_logits(labels=tf.cast(
                    dense_labels, tf.float32),
                                                               logits=logits)
                loss = tf.reduce_mean(tf.reduce_sum(loss, 1))
                predictions = tf.nn.sigmoid(logits)
        else:
            dec_targets = tf.unstack(enc_inputs, axis=1)
            dec_targets.reverse()
            dec_inputs = [tf.zeros_like(dec_targets[0])] + dec_targets[:-1]

            dec_outputs, _ = attn.attention_decoder(
                decoder_inputs=dec_inputs,
                initial_state=enc_state,
                attention_states=enc_outputs,
                cell=dec_cell,
                output_size=feature_size,
                dtype=tf.float32)
            dec_weights = []
            for _ in xrange(self.max_steps):
                dec_weights.append(
                    tf.ones([
                        runtime_batch_size,
                    ], dtype=tf.float32))
            loss = seq2seq_lib.sequence_loss(
                dec_outputs,
                dec_targets,
                dec_weights,
                softmax_loss_function=self.reconstruct_loss)
            predictions = tf.no_op()
        return {
            "loss": loss,
            "predictions": predictions,
        }
    def _build_model(self):
        """
        Builds a model either for training or testing
        :return:
        """
        cell = self._set_cell_type()
        self._build_inputs()
        output_projection = None
        print("Embedding size: ", self.embedding_size)

        if self.use_attn:
            if self.copy:
                print("Using attention of form ", self.attn_type,
                      " with copy mechanism...")
            else:
                print("Using attention of form ", self.attn_type)
            self.outputs, self.states, self.attn_outputs = embedding_attention_seq2seq(
                self.encoder_inputs,
                self.decoder_inputs,
                cell,
                num_encoder_symbols=self.vocab_size,
                num_decoder_symbols=self.vocab_size,
                embedding_size=self.embedding_size,
                output_projection=output_projection,
                feed_previous=self.do_decode,
                dtype=tf.float32,
                copy=self.copy,
                attn_type=self.attn_type)
        else:
            print("Using vanilla seq2seq...")

            self.outputs, self.states = embedding_rnn_seq2seq(
                self.encoder_inputs,
                self.decoder_inputs,
                cell,
                num_encoder_symbols=self.vocab_size,
                num_decoder_symbols=self.vocab_size,
                embedding_size=self.embedding_size,
                output_projection=output_projection,
                feed_previous=self.do_decode,
                dtype=tf.float32)
            self.attn_outputs = None

        # Compute loss -- averaged across batch + with l2 loss added
        trainable_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        # Only get non-bias terms
        non_bias_vars = [v for v in trainable_vars if "Bias" not in v.name]
        l2_loss = tf.add_n(
            [self.l2_reg * tf.nn.l2_loss(nb) for nb in non_bias_vars])

        # Compute loss -- averaged across batch
        self.total_loss = sequence_loss(self.outputs, self.decoder_inputs,
                                        self.target_weights) + l2_loss

        self.training_op = tf.train.AdamOptimizer(
            learning_rate=0.0001).minimize(self.total_loss)

        self.dec_prediction = tf.transpose(tf.argmax(self.outputs, axis=1),
                                           [1, 0])

        self.predictions = tf.transpose(
            tf.argmax(tf.stack(self.outputs), axis=-1), [1, 0])
        self.saver = tf.train.Saver(max_to_keep=10)
        self.increment_global_step = tf.assign_add(
            self.global_step, 1, name='increment_global_step')