def join(self, is_training, hparams, inputs, out_w, out_c, tags): """Meta model joins word and char model.""" with tf.variable_scope('meta_char_word'): out_1 = layers.dropout(is_training, hparams.keep_prob, out_w) out_2 = layers.dropout(is_training, hparams.keep_prob, out_c) outputs = tf.concat([out_1, out_2], axis=2) out_fw, out_bw, _ = layers.lstm_layers(is_training, outputs, hparams.num_layers_meta, hparams.hidden_meta_size, hparams.recur_keep_j_prob) outputs = tf.concat([out_fw, out_bw], axis=2) outputs = layers.mlp( is_training, outputs, output_size=tags, keep_prob=hparams.keep_prob) preds_w = tf.to_int32(tf.argmax(outputs, axis=-1)) targets_w = inputs[:, :, 2] tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD)) loss = tf.losses.sparse_softmax_cross_entropy(targets_w, outputs, tokens_to_keep) if is_training: return loss else: return preds_w
def char_model(self, is_training, hparams, chars, embedding_char_size, tags, inputs_char, indexs_start, indexs_end, targets_w): """Character model.""" with tf.variable_scope('chars'): if is_training: embed_dims = [chars, embedding_char_size] np.random.seed(seed=1) embeddings_char = np.random.randn(*embed_dims).astype(np.float32) cembed = tf.get_variable( 'char_embeddings', dtype=tf.float32, initializer=embeddings_char) else: cembed = tf.get_variable('char_embeddings') # joint for both embed_nd = tf.nn.embedding_lookup(cembed, inputs_char[:, :]) embed = layers.dropout(is_training, hparams.embed_keep_prob_ch, embed_nd) output_fw, output_bw, _ = layers.lstm_layers( is_training, embed, hparams.num_layers_chars, hparams.hidden_char_size, hparams.recur_keep_prob) # Gather forward start and end of word of char LSTM output. output_fw_fst = tf.gather_nd(output_fw, indexs_start) output_fw_lst = tf.gather_nd(output_fw, indexs_end) # Gather backword start and end of word of char LSTM output. output_bw_fst = tf.gather_nd(output_bw, indexs_start) output_bw_lst = tf.gather_nd(output_bw, indexs_end) # Gathered LSTM outputs into the right shape and concatenate it. outputs = tf.concat( [output_fw_fst, output_fw_lst, output_bw_fst, output_bw_lst], axis=2) outputs = layers.mlp( is_training, outputs, output_size=hparams.mlp_size, keep_prob=hparams.keep_prob) targets = targets_w[:, :] tok_keep = tf.to_float(tf.greater(targets, PAD)) linear = layers.linear_with_dropout( is_training, outputs, tags, keep_prob=hparams.keep_prob) preds = tf.to_int32(tf.argmax(linear, axis=-1)) if is_training: int_tok_keep = tf.to_int32(tok_keep) t_correct = tf.to_int32(tf.equal(preds, targets)) * int_tok_keep accuracy = tf.reduce_sum(t_correct) / tf.reduce_sum(int_tok_keep) loss = tf.losses.sparse_softmax_cross_entropy(targets, linear, tok_keep) return loss, accuracy else: return preds, outputs
def word_model(self, is_training, hparams, words, embedding_word_size, tags, pretrained_embed, inputs): """Word model.""" with tf.variable_scope('words'): embedding = tf.get_variable( 'word_embedding', [words, embedding_word_size], dtype=tf.float32, initializer=tf.zeros_initializer()) word_inputs = tf.nn.embedding_lookup(embedding, inputs[:, :, 0]) word_inputs = word_inputs word_inputs = layers.dropout(is_training, hparams.embed_keep_prob, word_inputs) pret_inputs = tf.nn.embedding_lookup(pretrained_embed, inputs[:, :, 1]) pret_inputs = layers.dropout(is_training, hparams.embed_keep_prob, pret_inputs) word_inputs += pret_inputs targets_w = inputs[:, :, 2] outputs = word_inputs output_fw, output_bw, _ = layers.lstm_layers( is_training, outputs, hparams.num_layers_words, hparams.hidden_word_size, hparams.recur_keep_w_prob) outputs = tf.concat([output_fw, output_bw], axis=2) outputs = layers.mlp( is_training, outputs, output_size=hparams.mlp_size, keep_prob=hparams.keep_prob) logits = layers.linear_with_dropout( is_training, outputs, tags, keep_prob=hparams.keep_prob) preds_w = tf.to_int32(tf.argmax(logits, axis=-1)) tag_correct_w = tf.to_int32(tf.equal(preds_w, targets_w)) correct = tf.reduce_sum(tag_correct_w) / tf.size( tag_correct_w) tokens_to_keep = tf.to_float(tf.greater(inputs[:, :, 0], PAD)) loss_w = tf.losses.sparse_softmax_cross_entropy(targets_w, logits, tokens_to_keep) if is_training: return loss_w, correct else: return preds_w, outputs