예제 #1
0
def embed_text(tensors, embeddings):
    """Build embeddings using the word/char ids from `build_tensorize_text_fn`."""
    wids = tensors["wids"]
    cids = tensors["cids"]

    embedding_weights = embeddings.get_initialized_params(trainable=False)
    word_vecs = tf.nn.embedding_lookup(embedding_weights, wids)
    char_emb = common_layers.character_cnn(cids)
    return tf.concat([word_vecs, char_emb], -1)
예제 #2
0
 def test_character_cnn(self):
     with tf.Graph().as_default():
         input_words = [["google", "lumiere"], [u"¯\\_(ツ)_/¯", u"(ᵔᴥᵔ)"],
                        [u"谷", u"歌"]]
         char_ids = char_utils.batch_word_to_char_ids(
             tf.constant(input_words), 10)
         output_emb = common_layers.character_cnn(char_ids, num_filters=5)
         with tf.Session() as sess:
             sess.run(tf.compat.v1.global_variables_initializer())
             actual_output_emb = sess.run(output_emb)
         self.assertAllEqual(actual_output_emb.shape, [3, 2, 5])
예제 #3
0
  def _embed(prefix):
    """Embed the input text based and word and character IDs."""
    word_emb = tf.nn.embedding_lookup(embedding_weights,
                                      features[prefix + "_wid"])
    char_emb = common_layers.character_cnn(
        char_ids=features[prefix + "_cid"],
        emb_size=FLAGS.char_emb_size,
        kernel_width=FLAGS.char_kernel_width,
        num_filters=FLAGS.num_char_filters)
    concat_emb = tf.concat([word_emb, char_emb], -1)

    if mode == tf.estimator.ModeKeys.TRAIN:
      concat_emb = tf.nn.dropout(concat_emb, 1.0 - FLAGS.dropout_ratio)
    return concat_emb