Example #1
0
  def join(self, is_training, hparams, inputs, out_w, out_c, tags):
    """Meta model joins word and char model."""
    with tf.variable_scope('meta_char_word'):
      out_1 = layers.dropout(is_training, hparams.keep_prob, out_w)
      out_2 = layers.dropout(is_training, hparams.keep_prob, out_c)

      outputs = tf.concat([out_1, out_2], axis=2)
      out_fw, out_bw, _ = layers.lstm_layers(is_training, outputs,
                                             hparams.num_layers_meta,
                                             hparams.hidden_meta_size,
                                             hparams.recur_keep_j_prob)
      outputs = tf.concat([out_fw, out_bw], axis=2)
      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=tags,
          keep_prob=hparams.keep_prob)
      preds_w = tf.to_int32(tf.argmax(outputs, axis=-1))
      targets_w = inputs[:, :, 2]
      tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD))
      loss = tf.losses.sparse_softmax_cross_entropy(targets_w, outputs,
                                                    tokens_to_keep)
    if is_training:
      return loss
    else:
      return preds_w
Example #2
0
  def char_model(self, is_training, hparams, chars, embedding_char_size, tags,
                 inputs_char, indexs_start, indexs_end, targets_w):
    """Character model."""
    with tf.variable_scope('chars'):
      if is_training:
        embed_dims = [chars, embedding_char_size]
        np.random.seed(seed=1)
        embeddings_char = np.random.randn(*embed_dims).astype(np.float32)
        cembed = tf.get_variable(
            'char_embeddings', dtype=tf.float32, initializer=embeddings_char)
      else:
        cembed = tf.get_variable('char_embeddings')

      # joint for both
      embed_nd = tf.nn.embedding_lookup(cembed, inputs_char[:, :])
      embed = layers.dropout(is_training, hparams.embed_keep_prob_ch, embed_nd)

      output_fw, output_bw, _ = layers.lstm_layers(
          is_training, embed, hparams.num_layers_chars,
          hparams.hidden_char_size, hparams.recur_keep_prob)

      # Gather forward start and end of word of char LSTM output.
      output_fw_fst = tf.gather_nd(output_fw, indexs_start)
      output_fw_lst = tf.gather_nd(output_fw, indexs_end)

      # Gather backword start and end of word of char LSTM output.
      output_bw_fst = tf.gather_nd(output_bw, indexs_start)
      output_bw_lst = tf.gather_nd(output_bw, indexs_end)

      # Gathered LSTM outputs into the right shape and concatenate it.
      outputs = tf.concat(
          [output_fw_fst, output_fw_lst, output_bw_fst, output_bw_lst], axis=2)

      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=hparams.mlp_size,
          keep_prob=hparams.keep_prob)

      targets = targets_w[:, :]
      tok_keep = tf.to_float(tf.greater(targets, PAD))

      linear = layers.linear_with_dropout(
          is_training, outputs, tags, keep_prob=hparams.keep_prob)
      preds = tf.to_int32(tf.argmax(linear, axis=-1))

      if is_training:
        int_tok_keep = tf.to_int32(tok_keep)
        t_correct = tf.to_int32(tf.equal(preds, targets)) * int_tok_keep
        accuracy = tf.reduce_sum(t_correct) / tf.reduce_sum(int_tok_keep)

        loss = tf.losses.sparse_softmax_cross_entropy(targets, linear, tok_keep)
        return loss, accuracy
      else:
        return preds, outputs
Example #3
0
  def word_model(self, is_training, hparams, words, embedding_word_size,
                 tags, pretrained_embed, inputs):
    """Word model."""
    with tf.variable_scope('words'):
      embedding = tf.get_variable(
          'word_embedding', [words, embedding_word_size],
          dtype=tf.float32,
          initializer=tf.zeros_initializer())
      word_inputs = tf.nn.embedding_lookup(embedding, inputs[:, :, 0])
      word_inputs = word_inputs
      word_inputs = layers.dropout(is_training, hparams.embed_keep_prob,
                                   word_inputs)
      pret_inputs = tf.nn.embedding_lookup(pretrained_embed, inputs[:, :, 1])
      pret_inputs = layers.dropout(is_training, hparams.embed_keep_prob,
                                   pret_inputs)
      word_inputs += pret_inputs

      targets_w = inputs[:, :, 2]
      outputs = word_inputs

      output_fw, output_bw, _ = layers.lstm_layers(
          is_training, outputs, hparams.num_layers_words,
          hparams.hidden_word_size, hparams.recur_keep_w_prob)
      outputs = tf.concat([output_fw, output_bw], axis=2)

      outputs = layers.mlp(
          is_training,
          outputs,
          output_size=hparams.mlp_size,
          keep_prob=hparams.keep_prob)

      logits = layers.linear_with_dropout(
          is_training,
          outputs,
          tags,
          keep_prob=hparams.keep_prob)
      preds_w = tf.to_int32(tf.argmax(logits, axis=-1))
      tag_correct_w = tf.to_int32(tf.equal(preds_w, targets_w))
      correct = tf.reduce_sum(tag_correct_w) / tf.size(
          tag_correct_w)
      tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD))
      loss_w = tf.losses.sparse_softmax_cross_entropy(targets_w, logits,
                                                      tokens_to_keep)

      if is_training:
        return loss_w, correct
      else:
        return preds_w, outputs