예제 #1
0
  def __init__(self, 
               source_vocab_size, 
               tag_vocab_size, 
               label_vocab_size, 
               buckets, 
               word_embedding_size, 
               size, 
               num_layers, 
               max_gradient_norm, 
               batch_size, 
               dropout_keep_prob=1.0, 
               use_lstm=False, 
               bidirectional_rnn=True,
               num_samples=1024, 
               use_attention=False, 
               task=None, 
               forward_only=False,
               pred_only=False):
    self.source_vocab_size = source_vocab_size
    self.tag_vocab_size = tag_vocab_size
    self.label_vocab_size = label_vocab_size
    self.word_embedding_size = word_embedding_size
    self.cell_size = size
    self.num_layers = num_layers
    self.buckets = buckets
    self.batch_size = batch_size
    self.bidirectional_rnn = bidirectional_rnn
    self.global_step = tf.Variable(0, trainable=False)
    self.pred_only = pred_only
    
    # If we use sampled softmax, we need an output projection.
    softmax_loss_function = None

    # Create the internal multi-layer cell for our RNN.
    def create_cell():
      if not forward_only and dropout_keep_prob < 1.0:
        single_cell = lambda: BasicLSTMCell(self.cell_size)
        cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)])
        cell = DropoutWrapper(cell,
                                input_keep_prob=dropout_keep_prob,
                                output_keep_prob=dropout_keep_prob)         
      else:
        single_cell = lambda: BasicLSTMCell(self.cell_size)
        cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)])
      return cell
  
    self.cell_fw = create_cell()
    self.cell_bw = create_cell()

    # Feeds for inputs.
    self.encoder_inputs = []
    self.tags = []    
    self.tag_weights = []    
    self.labels = []    
    self.sequence_length = tf.placeholder(tf.int32, [None], 
                                          name="sequence_length")
    
    for i in xrange(buckets[-1][0]):
      self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                name="encoder{0}".format(i)))
    
    if not self.pred_only:
      for i in xrange(buckets[-1][1]):
        self.tags.append(tf.placeholder(tf.float32, shape=[None], 
                                        name="tag{0}".format(i)))
        self.tag_weights.append(tf.placeholder(tf.float32, shape=[None],
                                                  name="weight{0}".format(i)))
      self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label"))

    base_rnn_output = self.generate_rnn_output()
    encoder_outputs, encoder_state, attention_states = base_rnn_output
    
    if task['tagging'] == 1:
       seq_labeling_outputs = seq_labeling.generate_sequence_output(
                                   self.source_vocab_size,
                                   encoder_outputs, 
                                   encoder_state,                                    
                                   self.sequence_length,                                    
                                   self.tag_vocab_size, 
                                   self.tags,
                                   self.tag_weights,
                                   buckets, 
                                   softmax_loss_function=softmax_loss_function, 
                                   use_attention=use_attention)
       self.tagging_output, self.tagging_loss = seq_labeling_outputs
    if task['intent'] == 1:
      seq_intent_outputs = seq_classification.generate_single_output(
                                    encoder_state, 
                                    attention_states, 
                                    self.sequence_length, 
                                    self.labels, 
                                    self.label_vocab_size,
                                    buckets, 
                                    softmax_loss_function=softmax_loss_function, 
                                    use_attention=use_attention)
      self.classification_output, self.classification_loss = seq_intent_outputs
    
    if task['tagging'] == 1:
      self.loss = self.tagging_loss
    elif task['intent'] == 1:
      self.loss = self.classification_loss

    # Gradients and SGD update operation for training the model.
    params = tf.trainable_variables()
    if not forward_only:
      opt = tf.train.AdamOptimizer()
      if task['joint'] == 1:
        # backpropagate the intent and tagging loss, one may further adjust 
        # the weights for the two costs.
        gradients = tf.gradients([self.tagging_loss, self.classification_loss], 
                                 params)
      elif task['tagging'] == 1:
        gradients = tf.gradients(self.tagging_loss, params)
      elif task['intent'] == 1:
        gradients = tf.gradients(self.classification_loss, params)
        
      clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                       max_gradient_norm)
      self.gradient_norm = norm
      self.update = opt.apply_gradients(
          zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.global_variables())
예제 #2
0
  def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets,
               word_embedding_size, size, num_layers, max_gradient_norm, batch_size,
               dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True,
               num_samples=1024, use_attention=False,
               task=None, forward_only=False):
    self.source_vocab_size = source_vocab_size
    self.tag_vocab_size = tag_vocab_size
    self.label_vocab_size = label_vocab_size
    self.buckets = buckets
    self.batch_size = batch_size
    self.global_step = tf.Variable(0, trainable=False)

    # If we use sampled softmax, we need an output projection.
    softmax_loss_function = None

    # Create the internal multi-layer cell for our RNN.
    single_cell = tf.contrib.rnn.GRUCell(size)
    if use_lstm:
      single_cell = tf.contrib.rnn.BasicLSTMCell(size)
    cell = single_cell
    if num_layers > 1:
      cell = tf.nn.rnn_cell.MultiRNNCell([single_cell] * num_layers)

    if not forward_only and dropout_keep_prob < 1.0:
      cell = tf.contrib.rnn.DropoutWrapper(cell,
                                           input_keep_prob=dropout_keep_prob,
                                           output_keep_prob=dropout_keep_prob)


    # Feeds for inputs.
    self.encoder_inputs = []
    self.tags = []
    self.tag_weights = []
    self.labels = []
    self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length")

    for i in xrange(buckets[-1][0]):
      self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                name="encoder{0}".format(i)))
    for i in xrange(buckets[-1][1]):
      self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i)))
      self.tag_weights.append(tf.placeholder(tf.float32, shape=[None],
                                                name="weight{0}".format(i)))
    self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label"))

    base_rnn_output = generate_encoder_output.generate_embedding_RNN_output(self.encoder_inputs,
                                                                            cell,
                                                                            self.source_vocab_size,
                                                                            word_embedding_size,
                                                                            dtype=dtypes.float32,
                                                                            scope=None,
                                                                            sequence_length=self.sequence_length,
                                                                            bidirectional_rnn=bidirectional_rnn)
    encoder_outputs, encoder_state, attention_states = base_rnn_output

    if task['tagging'] == 1:
      self.tagging_output, self.tagging_loss = seq_labeling.generate_sequence_output(
          self.source_vocab_size,
          encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, self.tag_weights,
          buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention)
    if task['intent'] == 1:
      self.classification_output, self.classification_loss = seq_classification.generate_single_output(
          encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size,
          buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention)

    if task['tagging'] == 1:
      self.loss = self.tagging_loss
    elif task['intent'] == 1:
      self.loss = self.classification_loss

    # Gradients and SGD update operation for training the model.
    params = tf.trainable_variables()
    if not forward_only:
      opt = tf.train.AdamOptimizer()
      if task['joint'] == 1:
        # backpropagate the intent and tagging loss, one may further adjust
        # the weights for the two costs.
        gradients = tf.gradients([self.tagging_loss, self.classification_loss], params)
      elif task['tagging'] == 1:
        gradients = tf.gradients(self.tagging_loss, params)
      elif task['intent'] == 1:
        gradients = tf.gradients(self.classification_loss, params)

      clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                       max_gradient_norm)
      self.gradient_norm = norm
      self.update = opt.apply_gradients(
          zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.global_variables())
예제 #3
0
    def __init__(self,
                 source_vocab_size,
                 tag_vocab_size,
                 label_vocab_size,
                 buckets,
                 word_embedding_size,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 dropout_keep_prob=1.0,
                 use_lstm=False,
                 bidirectional_rnn=True,
                 num_samples=1,
                 use_attention=False,
                 task=None,
                 forward_only=False):
        self.source_vocab_size = source_vocab_size
        self.tag_vocab_size = tag_vocab_size
        self.label_vocab_size = label_vocab_size
        self.word_embedding_size = word_embedding_size
        self.cell_size = size
        self.num_layers = num_layers
        self.buckets = buckets
        self.batch_size = batch_size
        self.bidirectional_rnn = bidirectional_rnn
        self.global_step = tf.Variable(0, trainable=False)

        # If we use sampled softmax, we need an output projection.
        softmax_loss_function = None

        # 2-1. Make multi-layer cells
        def create_cell():

            # Add Dropout
            if not forward_only and dropout_keep_prob < 1.0:
                single_cell = lambda: BasicLSTMCell(self.cell_size
                                                    )  # cell_size = 1
                cell = MultiRNNCell(
                    [single_cell() for _ in range(self.num_layers)])
                cell = DropoutWrapper(cell,
                                      input_keep_prob=dropout_keep_prob,
                                      output_keep_prob=dropout_keep_prob)
            # Not Dropout
            else:
                single_cell = lambda: BasicLSTMCell(self.cell_size)
                cell = MultiRNNCell(
                    [single_cell() for _ in range(self.num_layers)])
            return cell

        # 2-1-1. Create Forwadr/Backward cell of encoder.
        self.cell_fw = create_cell()
        self.cell_bw = create_cell()

        # 2-2. Define Placeholder(=input)
        self.encoder_inputs = []
        self.tags = []
        self.tag_weights = []
        self.labels = []
        self.sequence_length = tf.placeholder(tf.int32, [None],
                                              name="sequence_length")

        # 2-2-1. Define Sentence placeholder( =encoder_inputs)
        for i in range(
                buckets[-1]
            [0]):  # bucket[-1][0] = encoder_length, xrange --) range
            self.encoder_inputs.append(
                tf.placeholder(tf.int32,
                               shape=[None],
                               name="encoder{0}".format(i)))

        # 2-2-2. Define tags and tags weights
        for i in range(buckets[-1][1]):  # xrange --) range
            self.tags.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="tag{0}".format(i)))
            self.tag_weights.append(
                tf.placeholder(tf.float32,
                               shape=[None],
                               name="weight{0}".format(i)))
        self.labels.append(
            tf.placeholder(tf.float32, shape=[None],
                           name="label"))  # self.labels = [ [] ]

        # 2-3-5. Get the bi-directional outputs
        base_rnn_output = self.generate_rnn_output()
        encoder_outputs, encoder_state, attention_states = base_rnn_output

        # 2-4. Sequence labeling or sequence classification.

        # 2-4-1. get decoder output
        if task['tagging'] == 1:  # task = One of Class input
            seq_labeling_outputs = seq_labeling.generate_sequence_output(
                self.source_vocab_size,
                encoder_outputs,
                encoder_state,
                self.tags,
                self.sequence_length,
                self.tag_vocab_size,  # num_decoder_symbols
                self.tag_weights,
                buckets,
                softmax_loss_function=softmax_loss_function,
                use_attention=use_attention)
            self.tagging_output, self.tagging_loss = seq_labeling_outputs

        # 2-4-2. Sequence classification.
        if task['intent'] == 1:
            seq_intent_outputs = seq_classification.generate_single_output(
                encoder_state,
                attention_states,
                self.sequence_length,
                self.labels,
                self.label_vocab_size,
                buckets,
                softmax_loss_function=softmax_loss_function,
                use_attention=use_attention)
            self.classification_output, self.classification_loss = seq_intent_outputs

        # 2-4-3. Define Loss.
        if task['tagging'] == 1:
            self.loss = self.tagging_loss
        elif task['intent'] == 1:
            self.loss = self.classification_loss

        # 2-5. Define Gradients and SGD and train the model.
        params = tf.trainable_variables()
        if not forward_only:

            # 2-5-1. Define optimizer
            opt = tf.train.AdamOptimizer()

            # 2-5-2. Define gradients
            if task['joint'] == 1:
                gradients = tf.gradients(
                    [self.tagging_loss, self.classification_loss], params)
            elif task['tagging'] == 1:
                gradients = tf.gradients(self.tagging_loss, params)
            elif task['intent'] == 1:
                gradients = tf.gradients(self.classification_loss, params)

            # clipped
            clipped_gradients, norm = tf.clip_by_global_norm(
                gradients, max_gradient_norm)
            self.gradient_norm = norm
            # 2-5-3. Train
            self.update = opt.apply_gradients(zip(clipped_gradients, params),
                                              global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables())
  def __init__(self, source_vocab_size, tag_vocab_size, label_vocab_size, buckets,
               word_embedding_size, size, num_layers, max_gradient_norm, batch_size,
               dropout_keep_prob=1.0, use_lstm=False, bidirectional_rnn=True,
               num_samples=1024, use_attention=False,
               task=None, forward_only=False):
    self.source_vocab_size = source_vocab_size
    self.tag_vocab_size = tag_vocab_size
    self.label_vocab_size = label_vocab_size
    self.buckets = buckets
    self.batch_size = batch_size
    self.global_step = tf.Variable(0, trainable=False)

    # If we use sampled softmax, we need an output projection.
    softmax_loss_function = None

    # Create the internal multi-layer cell for our RNN.
    single_cell = tf.contrib.rnn.GRUCell(size)
    if use_lstm:
      single_cell = tf.contrib.rnn.BasicLSTMCell(size)
    cell = single_cell
    if num_layers > 1:
      cell = tf.contrib.rnn.MultiRNNCell([single_cell for _ in range(num_layers)])
      #cell = tf.contrib.rnn.MultiRNNCell([single_cell] * num_layers)

    if not forward_only and dropout_keep_prob < 1.0:
      cell = tf.contrib.rnn.DropoutWrapper(cell,
                                           input_keep_prob=dropout_keep_prob,
                                           output_keep_prob=dropout_keep_prob)


    # Feeds for inputs.
    self.encoder_inputs = []
    self.tags = []
    self.tag_weights = []
    self.labels = []
    self.sequence_length = tf.placeholder(tf.int32, [None], name="sequence_length")

    for i in xrange(buckets[-1][0]):
      self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                name="encoder{0}".format(i)))
    for i in xrange(buckets[-1][1]):
      self.tags.append(tf.placeholder(tf.float32, shape=[None], name="tag{0}".format(i)))
      self.tag_weights.append(tf.placeholder(tf.float32, shape=[None],
                                                name="weight{0}".format(i)))
    self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label"))

    # Initiate embedding
    self.embedding = variable_scope.get_variable("embedding", [self.source_vocab_size, word_embedding_size])
    #self.embedding = tf.Variable(tf.constant(0.0, shape= [self.source_vocab_size, word_embedding_size]), name="embedding")

    base_rnn_output = generate_encoder_output.generate_embedding_RNN_output(self.encoder_inputs,
                                                                            cell,
                                                                            self.source_vocab_size,
                                                                            word_embedding_size,
                                                                            embedding=self.embedding,
                                                                            dtype=dtypes.float32,
                                                                            scope=None,
                                                                            sequence_length=self.sequence_length,
                                                                            bidirectional_rnn=bidirectional_rnn)
    encoder_outputs, encoder_state, attention_states = base_rnn_output

    if task['tagging'] == 1:
      self.tagging_output, self.tagging_loss = seq_labeling.generate_sequence_output(
          self.source_vocab_size,
          encoder_outputs, encoder_state, self.tags, self.sequence_length, self.tag_vocab_size, self.tag_weights,
          buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention)
    if task['intent'] == 1:
      self.classification_output, self.classification_loss = seq_classification.generate_single_output(
          encoder_state, attention_states, self.sequence_length, self.labels, self.label_vocab_size,
          buckets, softmax_loss_function=softmax_loss_function, use_attention=use_attention)

    if task['tagging'] == 1:
      self.loss = self.tagging_loss
    elif task['intent'] == 1:
      self.loss = self.classification_loss

    # Gradients and SGD update operation for training the model.
    params = tf.trainable_variables()
    if not forward_only:
      opt = tf.train.AdamOptimizer()
      if task['joint'] == 1:
        # backpropagate the intent and tagging loss, one may further adjust
        # the weights for the two costs.
        gradients = tf.gradients([self.tagging_loss, self.classification_loss], params)
      elif task['tagging'] == 1:
        gradients = tf.gradients(self.tagging_loss, params)
      elif task['intent'] == 1:
        gradients = tf.gradients(self.classification_loss, params)

      clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                       max_gradient_norm)
      self.gradient_norm = norm
      self.update = opt.apply_gradients(
          zip(clipped_gradients, params), global_step=self.global_step)

    self.saver = tf.train.Saver(tf.all_variables())
예제 #5
0
    def __init__(self,
                 source_vocab_size,
                 tag_vocab_size,
                 label_vocab_size,
                 buckets,
                 word_embedding_size,
                 size,
                 num_layers,
                 max_gradient_norm,
                 batch_size,
                 dropout_keep_prob=1.0,
                 use_lstm=False,
                 bidirectional_rnn=True,
                 num_samples=1024,
                 use_attention=False,
                 task=None,
                 forward_only=False):
        self.source_vocab_size = source_vocab_size
        self.tag_vocab_size = tag_vocab_size
        self.label_vocab_size = label_vocab_size
        self.word_embedding_size = word_embedding_size
        self.cell_size = size
        self.num_layers = num_layers
        self.buckets = buckets
        self.batch_size = batch_size
        self.bidirectional_rnn = bidirectional_rnn
        self.global_step = tf.Variable(0, trainable=False)

        # 如果我们使用采样softmax,我们需要一个输出投影。
        softmax_loss_function = None

        # 为我们的RNN创建内部的多层单元。
        def create_cell():
            if not forward_only and dropout_keep_prob < 1.0:
                single_cell = lambda: BasicLSTMCell(self.cell_size)
                cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)])
                cell = DropoutWrapper(cell,
                                      input_keep_prob=dropout_keep_prob,
                                      output_keep_prob=dropout_keep_prob)
            else:
                single_cell = lambda: BasicLSTMCell(self.cell_size)
                cell = MultiRNNCell([single_cell() for _ in range(self.num_layers)])
            return cell

        self.cell_fw = create_cell()
        self.cell_bw = create_cell()

        # 输入源。
        self.encoder_inputs = []
        self.tags = []
        self.tag_weights = []
        self.labels = []
        self.sequence_length = tf.placeholder(tf.int32, [None],
                                              name="sequence_length")

        for i in xrange(buckets[-1][0]):
            self.encoder_inputs.append(tf.placeholder(tf.int32, shape=[None],
                                                      name="encoder{0}".format(i)))
        for i in xrange(buckets[-1][1]):
            self.tags.append(tf.placeholder(tf.float32, shape=[None],
                                            name="tag{0}".format(i)))
            self.tag_weights.append(tf.placeholder(tf.float32, shape=[None],
                                                   name="weight{0}".format(i)))
        self.labels.append(tf.placeholder(tf.float32, shape=[None], name="label"))

        base_rnn_output = self.generate_rnn_output()
        encoder_outputs, encoder_state, attention_states = base_rnn_output

        if task['tagging'] == 1:
            seq_labeling_outputs = seq_labeling.generate_sequence_output(
                self.source_vocab_size,
                encoder_outputs,
                encoder_state,
                self.tags,
                self.sequence_length,
                self.tag_vocab_size,
                self.tag_weights,
                buckets,
                softmax_loss_function=softmax_loss_function,
                use_attention=use_attention)
            self.tagging_output, self.tagging_loss = seq_labeling_outputs
        if task['intent'] == 1:
            seq_intent_outputs = seq_classification.generate_single_output(
                encoder_state,
                attention_states,
                self.sequence_length,
                self.labels,
                self.label_vocab_size,
                buckets,
                softmax_loss_function=softmax_loss_function,
                use_attention=use_attention)
            self.classification_output, self.classification_loss = seq_intent_outputs

        if task['tagging'] == 1:
            self.loss = self.tagging_loss
        elif task['intent'] == 1:
            self.loss = self.classification_loss

        # 梯度和SGD更新操作对模型进行训练。
        params = tf.trainable_variables()
        if not forward_only:
            opt = tf.train.AdamOptimizer()
            gradients = None
            if task['joint'] == 1:
                # 反向传播意图和标记损失,可以进一步调整这两种成本的权重。
                gradients = tf.gradients([self.tagging_loss, self.classification_loss],
                                         params)
            elif task['tagging'] == 1:
                gradients = tf.gradients(self.tagging_loss, params)
            elif task['intent'] == 1:
                gradients = tf.gradients(self.classification_loss, params)

            clipped_gradients, norm = tf.clip_by_global_norm(gradients,
                                                             max_gradient_norm)
            self.gradient_norm = norm
            self.update = opt.apply_gradients(
                zip(clipped_gradients, params), global_step=self.global_step)

        self.saver = tf.train.Saver(tf.global_variables())