def conv_char_embedding_alt(char_vocab, size, unique_word_chars, unique_word_lengths, word_to_uniqs, conv_width=5, emb_initializer=tf.random_normal_initializer( 0.0, 0.1), scope=None): # "fixed PADDING on character level" pad = tf.zeros( tf.stack( [tf.shape(unique_word_lengths)[0], math.floor(conv_width / 2)]), tf.int32) unique_word_chars = tf.concat([pad, unique_word_chars, pad], 1) if not isinstance(word_to_uniqs, list): word_to_uniqs = [word_to_uniqs] with tf.variable_scope(scope or "char_embeddings") as vs: char_embedding_matrix = \ tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size), initializer=emb_initializer, trainable=True) max_word_length = tf.reduce_max(unique_word_lengths) embedded_chars = tf.nn.embedding_lookup( char_embedding_matrix, tf.cast(unique_word_chars, tf.int32)) with tf.variable_scope("conv"): # create filter like this to get fan-in and fan-out right for initializers depending on those filter = tf.get_variable("filter", [conv_width * size, size]) filter_reshaped = tf.reshape(filter, [conv_width, size, size]) # [B, T, S + pad_right] conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1, "VALID") conv_mask = tf.expand_dims( tfutil.mask_for_lengths(unique_word_lengths, max_length=max_word_length), 2) conv_out = conv_out + conv_mask unique_embedded_words = tf.reduce_max(conv_out, [1]) all_embedded = [] for word_idx in word_to_uniqs: flat_word_idx = tf.reshape(word_idx, [-1]) embedded_words = tf.gather(unique_embedded_words, flat_word_idx) embedded_words = tf.reshape( embedded_words, tf.stack([-1, tf.unstack(tf.shape(word_idx))[1], size])) all_embedded.append(embedded_words) return all_embedded
def mask_with_start(scores): return scores + tfutil.mask_for_lengths(tf.cast( start_pointer, tf.int32), batch_size * beam_size, tf.reduce_max(support_length), mask_right=False)
def fastqa_answer_layer(size, encoded_question, question_length, encoded_support, support_length, correct_start, answer2question, is_eval, embedded_slot_ids, beam_size=1): beam_size = tf.cond(is_eval, lambda: tf.constant(beam_size, tf.int32), lambda: tf.constant(1, tf.int32)) batch_size = tf.shape(question_length)[0] answer2question = tf.cond(is_eval, lambda: tf.range(0, batch_size, dtype=tf.int32), lambda: answer2question) input_size = encoded_support.get_shape()[-1].value support_states_flat = tf.reshape(encoded_support, [-1, input_size]) # computing single time attention over question attention_scores = tf.contrib.layers.fully_connected( encoded_question, 1, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="question_attention") q_mask = tfutil.mask_for_lengths(question_length, batch_size) attention_scores = attention_scores + tf.expand_dims(q_mask, 2) question_attention_weights = tf.nn.softmax( attention_scores, 1, name="question_attention_weights") question_state = tf.reduce_sum( question_attention_weights * encoded_question, [1]) slot_q_state = tf.concat([ tf.expand_dims(question_state, dim=1), tf.expand_dims(embedded_slot_ids, dim=1) ], 1) attention_scores2 = tf.contrib.layers.fully_connected( slot_q_state, 1, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="slot_question_attention") slot_question_attention_weights = tf.nn.softmax( attention_scores2, 1, name="slot_question_attention_weights") slot_question_state = tf.reduce_sum( slot_question_attention_weights * slot_q_state, [1]) # Prediction # start with tf.variable_scope('coattention'): s_encoding = tf.contrib.layers.fully_connected( encoded_support, size, activation_fn=None, weights_initializer=None, scope="q_start_inter") q_encoding = tf.tanh(encoded_question) c_encoding = tf.tanh(s_encoding) q_variation = tf.transpose(q_encoding, perm=[0, 2, 1]) # compute affinity matrix, (batch_size, context+1, question+1) L = tf.matmul(c_encoding, q_variation) # shape = (batch_size, question+1, context+1) L_t = tf.transpose(L, perm=[0, 2, 1]) # normalize with respect to question a_q = tf.map_fn(lambda x: tf.nn.softmax(x), L_t, dtype=tf.float32) # normalize with respect to context a_c = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32) # summaries with respect to question, (batch_size, question+1, hidden_size) c_q = tf.matmul(a_q, c_encoding) c_q_emb = tf.concat( [q_variation, tf.transpose(c_q, perm=[0, 2, 1])], 1) # summaries of previous attention with respect to context c_d = tf.matmul(c_q_emb, a_c, adjoint_b=True) # final coattention context, (batch_size, context+1, 3*hidden_size) co_att = tf.concat([c_encoding, tf.transpose(c_d, perm=[0, 2, 1])], 2) with tf.variable_scope('encoder'): # LSTM for coattention encoding cell_fw = tf.nn.rnn_cell.LSTMCell(size) cell_bw = tf.nn.rnn_cell.LSTMCell(size) # compute coattention encoding u, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, co_att, sequence_length=support_length, dtype=tf.float32) output_match = tf.concat(u, 2) ''' max_support_length= tf.reduce_max(support_length) max_question_length= tf.reduce_max(question_length) question_mask=mask(question_length,maxlen=max_question_length) support_mask=mask(support_length,maxlen=max_support_length) question_mask=tf.cast(question_mask,tf.float32) support_mask=tf.cast(support_mask,tf.float32) output_match=match_utils.bilateral_match_func2(encoded_question, encoded_support, question_length, support_length, question_mask, support_mask, MP_dim=30, input_dim=size, with_filter_layer=True, context_layer_num=1, context_lstm_dim=size/2,is_training=True,dropout_rate=0.5, with_match_highway=True,aggregation_layer_num=1, aggregation_lstm_dim=size/2,highway_layer_num=1, with_aggregation_highway=False,with_lex_decomposition=False,lex_decompsition_dim=300, with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True, with_left_match=True, with_right_match=True, with_mean_aggregation=True) output_match=run_match_lstm([encoded_question,encoded_support],[question_length,support_length],size) ''' start_input = tf.concat([ tf.expand_dims(slot_question_state, 1) * encoded_support, encoded_support, output_match ], 2) q_start_inter = tf.contrib.layers.fully_connected(slot_question_state, size, activation_fn=None, weights_initializer=None, scope="q_start_inter") q_start_state = tf.contrib.layers.fully_connected( start_input, size, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="q_start") + tf.expand_dims(q_start_inter, 1) start_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_start_state), 1, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="start_scores") start_scores = tf.squeeze(start_scores, [2]) support_mask = tfutil.mask_for_lengths(support_length, batch_size) start_scores = start_scores + support_mask # probs are needed during beam search start_probs = tf.nn.softmax(start_scores) predicted_start_probs, predicted_start_pointer = tf.nn.top_k( start_probs, beam_size) # use correct start during training, because p(end|start) should be optimized predicted_start_pointer = tf.gather(predicted_start_pointer, answer2question) predicted_start_probs = tf.gather(predicted_start_probs, answer2question) start_pointer = tf.cond(is_eval, lambda: predicted_start_pointer, lambda: tf.expand_dims(correct_start, 1)) # flatten again start_pointer = tf.reshape(start_pointer, [-1]) answer2questionwithbeam = tf.reshape( tf.tile(tf.expand_dims(answer2question, 1), tf.stack([1, beam_size])), [-1]) offsets = tf.cast(tf.range(0, batch_size) * tf.reduce_max(support_length), dtype=tf.int32) offsets = tf.gather(offsets, answer2questionwithbeam) u_s = tf.gather(support_states_flat, start_pointer + offsets) start_scores = tf.gather(start_scores, answer2questionwithbeam) start_input = tf.gather(start_input, answer2questionwithbeam) encoded_support = tf.gather(encoded_support, answer2questionwithbeam) slot_question_state = tf.gather(slot_question_state, answer2questionwithbeam) support_mask = tf.gather(support_mask, answer2questionwithbeam) # end end_input = tf.concat( [tf.expand_dims(u_s, 1) * encoded_support, start_input], 2) q_end_inter = tf.contrib.layers.fully_connected(tf.concat( [slot_question_state, u_s], 1), size, activation_fn=None, weights_initializer=None, scope="q_end_inter") q_end_state = tf.contrib.layers.fully_connected( end_input, size, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="q_end") + tf.expand_dims(q_end_inter, 1) end_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_end_state), 1, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="end_scores") end_scores = tf.squeeze(end_scores, [2]) end_scores = end_scores + support_mask def mask_with_start(scores): return scores + tfutil.mask_for_lengths(tf.cast( start_pointer, tf.int32), batch_size * beam_size, tf.reduce_max(support_length), mask_right=False) end_scores = tf.cond(is_eval, lambda: mask_with_start(end_scores), lambda: end_scores) # probs are needed during beam search end_probs = tf.nn.softmax(end_scores) predicted_end_probs, predicted_end_pointer = tf.nn.top_k(end_probs, 1) predicted_end_probs = tf.reshape(predicted_end_probs, tf.stack([-1, beam_size])) predicted_end_pointer = tf.reshape(predicted_end_pointer, tf.stack([-1, beam_size])) predicted_idx = tf.cast( tf.argmax(predicted_start_probs * predicted_end_probs, 1), tf.int32) predicted_idx = tf.stack([ tf.range(0, tf.shape(answer2question)[0], dtype=tf.int32), predicted_idx ], 1) predicted_start_pointer = tf.gather_nd(predicted_start_pointer, predicted_idx) predicted_end_pointer = tf.gather_nd(predicted_end_pointer, predicted_idx) return start_scores, end_scores, predicted_start_pointer, predicted_end_pointer
def fastqa_model(shared_vocab_config, emb_question, question_length, emb_support, support_length, unique_word_chars, unique_word_char_length, question_words2unique, support_words2unique, word_in_question, slot_list, correct_start, answer2question, keep_prob, is_eval): """ fast_qa model Args: shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input" emb_question: [Q, L_q, N] question_length: [Q] emb_support: [Q, L_s, N] support_length: [Q] unique_word_chars unique_word_char_length question_words2unique support_words2unique word_in_question: [Q, L_s] correct_start: [A], only during training, i.e., is_eval=False answer2question: [A], only during training, i.e., is_eval=False keep_prob: [] is_eval: [] Returns: start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2] """ with tf.variable_scope("fast_qa", initializer=tf.contrib.layers.xavier_initializer()): # Some helpers batch_size = tf.shape(question_length)[0] max_question_length = tf.reduce_max(question_length) support_mask = tfutil.mask_for_lengths(support_length, batch_size) question_binary_mask = tfutil.mask_for_lengths(question_length, batch_size, mask_right=False, value=1.0) input_size = shared_vocab_config.config["repr_dim_input"] size = shared_vocab_config.config["repr_dim"] with_char_embeddings = shared_vocab_config.config.get( "with_char_embeddings", False) # set shapes for inputs emb_question.set_shape([None, None, input_size]) emb_support.set_shape([None, None, input_size]) #slot embedding part slot_embeddings = tf.get_variable("slot_embeddings", [64, size]) embedded_slot_ids = tf.gather(slot_embeddings, slot_list) if with_char_embeddings: # compute combined embeddings [char_emb_question, char_emb_support] = conv_char_embedding_alt( shared_vocab_config.config["char_vocab"], size, unique_word_chars, unique_word_char_length, [question_words2unique, support_words2unique]) emb_question = tf.concat([emb_question, char_emb_question], 2) emb_support = tf.concat([emb_support, char_emb_support], 2) input_size += size # set shapes for inputs emb_question.set_shape([None, None, input_size]) emb_support.set_shape([None, None, input_size]) # compute encoder features question_features = tf.ones( tf.stack([batch_size, max_question_length, 2])) v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size], initializer=tf.constant_initializer(1.0)) wiq_w = tf.matmul(emb_question * v_wiqw, emb_support, adjoint_b=True) wiq_w = wiq_w + tf.expand_dims(support_mask, 1) wiq_w = tf.reduce_sum( tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2), [1]) # [B, L , 2] support_features = tf.concat( [tf.expand_dims(word_in_question, 2), tf.expand_dims(wiq_w, 2)], 2) # highway layer to allow for interaction between concatenated embeddings if with_char_embeddings: all_embedded = tf.concat([emb_question, emb_support], 1) all_embedded = tf.contrib.layers.fully_connected( all_embedded, size, activation_fn=None, weights_initializer=None, biases_initializer=None, scope="embeddings_projection") all_embedded_hw = highway_network(all_embedded, 1) emb_question = tf.slice(all_embedded_hw, [0, 0, 0], tf.stack([-1, max_question_length, -1])) emb_support = tf.slice(all_embedded_hw, tf.stack([0, max_question_length, 0]), [-1, -1, -1]) emb_question.set_shape([None, None, size]) emb_support.set_shape([None, None, size]) # variational dropout dropout_shape = tf.unstack(tf.shape(emb_question)) dropout_shape[1] = 1 [emb_question, emb_support] = tf.cond( is_eval, lambda: [emb_question, emb_support], lambda: fixed_dropout([emb_question, emb_support], keep_prob, dropout_shape)) # extend embeddings with features emb_question_ext = tf.concat([emb_question, question_features], 2) emb_support_ext = tf.concat([emb_support, support_features], 2) # encode question and support rnn = tf.contrib.rnn.LSTMBlockFusedCell encoded_question = birnn_with_projection( size, rnn, emb_question_ext, question_length, projection_scope="question_proj") encoded_support = birnn_with_projection( size, rnn, emb_support_ext, support_length, share_rnn=True, projection_scope="support_proj") start_scores, end_scores, predicted_start_pointer, predicted_end_pointer = \ fastqa_answer_layer(size, encoded_question, question_length, encoded_support, support_length, correct_start, answer2question, is_eval,embedded_slot_ids, beam_size=shared_vocab_config.config.get("beam_size", 1)) span = tf.concat([ tf.expand_dims(predicted_start_pointer, 1), tf.expand_dims(predicted_end_pointer, 1) ], 1) return start_scores, end_scores, span
def cbow_xqa_model(shared_vocab_config, emb_question, question_length, emb_support, support_length, unique_word_chars, unique_word_char_length, question_words2unique, support_words2unique, word_in_question, correct_start, answer2question, keep_prob, is_eval, answer_type_span): """ cbow_baseline_model model Args: shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input" emb_question: [Q, L_q, N] question_length: [Q] emb_support: [Q, L_s, N] support_length: [Q] unique_word_chars unique_word_char_length question_words2unique support_words2unique word_in_question: [Q, L_s] correct_start: [A], only during training, window_size.e., is_eval=False answer2question: [A], only during training, window_size.e., is_eval=False keep_prob: [] is_eval: [] answer_type_span: [Q, 2], span within question marking the expected answer type Returns: start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2] """ with tf.variable_scope("cbow_xqa", initializer=tf.contrib.layers.xavier_initializer()): # Some helpers batch_size = tf.shape(question_length)[0] max_support_length = tf.reduce_max(support_length) max_question_length = tf.reduce_max(question_length) input_size = shared_vocab_config.config["repr_dim_input"] size = shared_vocab_config.config["repr_dim"] with_char_embeddings = shared_vocab_config.config.get("with_char_embeddings", False) # set shapes for inputs emb_question.set_shape([None, None, input_size]) emb_support.set_shape([None, None, input_size]) if with_char_embeddings: # compute combined embeddings [char_emb_question, char_emb_support] = conv_char_embedding_alt(shared_vocab_config.config["char_vocab"], size, unique_word_chars, unique_word_char_length, [question_words2unique, support_words2unique]) emb_question = tf.concat([emb_question, char_emb_question], 2) emb_support = tf.concat([emb_support, char_emb_support], 2) input_size += size # set shapes for inputs emb_question.set_shape([None, None, input_size]) emb_support.set_shape([None, None, input_size]) # variational dropout dropout_shape = tf.unstack(tf.shape(emb_question)) dropout_shape[1] = 1 [emb_question, emb_support] = tf.cond(is_eval, lambda: [emb_question, emb_support], lambda: fixed_dropout([emb_question, emb_support], keep_prob, dropout_shape)) # question encoding answer_type_start = tf.squeeze(tf.slice(answer_type_span, [0, 0], [-1, 1])) answer_type_end = tf.squeeze(tf.slice(answer_type_span, [0, 1], [-1, -1])) answer_type_mask = tfutil.mask_for_lengths(answer_type_start, batch_size, max_question_length, value=1.0) * \ tfutil.mask_for_lengths(answer_type_end + 1, batch_size, max_question_length, mask_right=False, value=1.0) answer_type = tf.reduce_sum(emb_question * tf.expand_dims(answer_type_mask, 2), 1) / \ tf.maximum(1.0, tf.reduce_sum(answer_type_mask, 1, keep_dims=True)) batch_size_range = tf.range(0, batch_size) answer_type_start_state = tf.gather_nd(emb_question, tf.stack([batch_size_range, answer_type_start], 1)) answer_type_end_state = tf.gather_nd(emb_question, tf.stack([batch_size_range, answer_type_end], 1)) question_rep = tf.concat([answer_type, answer_type_start_state, answer_type_end_state], 1) question_rep.set_shape([None, input_size * 3]) # wiq features support_mask = tfutil.mask_for_lengths(support_length, batch_size) question_binary_mask = tfutil.mask_for_lengths(question_length, batch_size, mask_right=False, value=1.0) v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size], initializer=tf.constant_initializer(1.0)) wiq_w = tf.matmul(emb_question * v_wiqw, emb_support, adjoint_b=True) wiq_w = wiq_w + tf.expand_dims(support_mask, 1) wiq_w = tf.reduce_sum(tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2), [1]) wiq_exp = tf.stack([word_in_question, wiq_w], 2) # support span encoding spans = [tf.stack([tf.range(0, max_support_length), tf.range(0, max_support_length)], 1)] wiq_exp = tf.pad(wiq_exp, [[0, 0], [20, 20], [0, 0]]) wiq_pooled5 = tf.layers.average_pooling1d( tf.slice(wiq_exp, [0, 15, 0], tf.stack([-1, max_support_length + 10, -1])), 5, [1], 'valid') wiq_pooled10 = tf.layers.average_pooling1d( tf.slice(wiq_exp, [0, 10, 0], tf.stack([-1, max_support_length + 20, -1])), 10, [1], 'valid') wiq_pooled20 = tf.layers.average_pooling1d(wiq_exp, 20, [1], 'valid') wiqs_left5 = [tf.slice(wiq_pooled5, [0, 0, 0], tf.stack([-1, max_support_length, -1]))] wiqs_right5 = [tf.slice(wiq_pooled5, [0, 6, 0], [-1, -1, -1])] wiqs_left10 = [tf.slice(wiq_pooled10, [0, 0, 0], tf.stack([-1, max_support_length, -1]))] wiqs_right10 = [tf.slice(wiq_pooled10, [0, 11, 0], [-1, -1, -1])] wiqs_left20 = [tf.slice(wiq_pooled20, [0, 0, 0], tf.stack([-1, max_support_length, -1]))] wiqs_right20 = [tf.slice(wiq_pooled20, [0, 21, 0], [-1, -1, -1])] context_window = 5 padded_support = tf.pad(emb_support, [[0, 0], [context_window, context_window], [0, 0]], "CONSTANT") # [B, L + 10 - 4, S] emb_support_windows = tf.layers.average_pooling1d(padded_support, 5, [1], "VALID", "channels_last") left_context_windows = tf.slice(emb_support_windows, [0, 0, 0], tf.stack([-1, max_support_length, -1])) right_context_windows = tf.slice(emb_support_windows, [0, context_window + 1, 0], [-1, -1, -1]) span_rep = [tf.concat([emb_support, emb_support, emb_support, left_context_windows, right_context_windows], 2)] for window_size in range(2, _max_span_size + 1): start = tf.slice(emb_support, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1])) end = tf.slice(emb_support, [0, window_size - 1, 0], [-1, -1, -1]) averagespan = tf.layers.average_pooling1d(emb_support, window_size, [1], "VALID", "channels_last") left_context_windows = tf.slice(emb_support_windows, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1])) right_context_windows = tf.slice(emb_support_windows, [0, window_size - 1 + context_window + 1, 0], [-1, -1, -1]) span_rep.append(tf.concat([averagespan, start, end, left_context_windows, right_context_windows], 2)) wiqs_left5.append( tf.slice(wiq_pooled5, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1]))) wiqs_left10.append( tf.slice(wiq_pooled10, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1]))) wiqs_left20.append( tf.slice(wiq_pooled20, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1]))) wiqs_right5.append(tf.slice(wiq_pooled5, [0, window_size + 5, 0], [-1, -1, -1])) wiqs_right10.append(tf.slice(wiq_pooled10, [0, window_size + 10, 0], [-1, -1, -1])) wiqs_right20.append(tf.slice(wiq_pooled20, [0, window_size + 20, 0], [-1, -1, -1])) spans.append(tf.stack([tf.range(0, max_support_length - (window_size - 1)), tf.range(window_size - 1, max_support_length)], 1)) span_rep = tf.concat(span_rep, 1) span_rep.set_shape([None, None, input_size * 5]) wiqs_left5 = tf.concat(wiqs_left5, 1) wiqs_left10 = tf.concat(wiqs_left10, 1) wiqs_left20 = tf.concat(wiqs_left20, 1) wiqs_right5 = tf.concat(wiqs_right5, 1) wiqs_right10 = tf.concat(wiqs_right10, 1) wiqs_right20 = tf.concat(wiqs_right20, 1) spans = tf.concat(spans, 0) # scoring with tf.variable_scope("question_rep"): question_rep = tf.layers.dense(question_rep, size, activation=tf.tanh) with tf.variable_scope("question_inter"): question_inter = tf.layers.dense(question_rep, size, activation=None) with tf.variable_scope("span_rep"): span_rep = tf.layers.dense(span_rep, size, activation=tf.tanh) span_question_rep = tf.concat([span_rep, tf.expand_dims(question_rep, 1) * span_rep, wiqs_left5, wiqs_left10, wiqs_left20, wiqs_right5, wiqs_right10, wiqs_right20], 2) span_question_rep.set_shape([None, None, 2 * size + 6 * 2]) with tf.variable_scope("hidden"): h = tf.tanh(tf.layers.dense(span_question_rep, size, activation=None) + tf.expand_dims(question_inter, 1)) with tf.variable_scope("scoring"): span_scores = tf.squeeze(tf.layers.dense(h, 1, activation=None), 2) best_span = tf.arg_max(span_scores, 1) predicted_span = tf.gather(spans, best_span) return span_scores, tf.tile(tf.expand_dims(spans, 0), tf.stack([batch_size, 1, 1])), predicted_span
def conv_char_embeddings(vocab, size, word_ids, conv_width=5, emb_initializer=tf.random_normal_initializer( 0.0, 0.1), scope=None): """ :param vocab: filled Vocab instance :param size: size of embeddings :param word_ids: tf.Tensor[None, None] or list of tensors :param conv_width: int :return: char embedded word ids """ if not isinstance(word_ids, list): word_ids = [word_ids] # create character vocab + word lengths + char ids per word pad_right = math.ceil(conv_width / 2) # "fixed PAD o right side" vocab_size = max(vocab.sym2id.values()) + 1 max_l = max(len(w) for w in vocab.sym2id) + pad_right char_vocab = defaultdict(lambda: len(char_vocab)) char_vocab["PAD"] = 0 word_to_chars_arr = np.zeros((vocab_size, max_l), np.int16) word_lengths_arr = np.zeros([vocab_size], np.int8) for w, i in vocab.sym2id.items(): for k, c in enumerate(w): j = char_vocab[c] word_to_chars_arr[i, k] = j word_lengths_arr[i] = len(w) + conv_width - 1 with tf.variable_scope(scope or "char_embeddings") as vs: word_to_chars = tf.constant(word_to_chars_arr, name="word_to_chars") word_lengths = tf.constant(word_lengths_arr, name="word_lengths") char_embedding_matrix = \ tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size), initializer=emb_initializer, trainable=True) all_embedded = [] for i, ids in enumerate(zip(word_ids)): if i > 0: vs.reuse_variables() unique_words, word_idx = tf.unique(tf.reshape(ids, [-1])) chars = tf.nn.embedding_lookup(word_to_chars, unique_words) wl = tf.nn.embedding_lookup(word_lengths, unique_words) wl = tf.cast(wl, tf.int32) max_word_length = tf.reduce_max(wl) chars = tf.slice(chars, [0, 0], tf.stack([-1, max_word_length])) embedded_chars = tf.nn.embedding_lookup(char_embedding_matrix, tf.cast(chars, tf.int32)) with tf.variable_scope("conv"): # create filter like this to get fan-in and fan-out right for initializers depending on those filter = tf.get_variable("filter", [conv_width * size, size]) filter_reshaped = tf.reshape(filter, [conv_width, size, size]) conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1, "SAME") conv_mask = tf.expand_dims( tfutil.mask_for_lengths(wl - pad_right, max_length=max_word_length), 2) conv_out = conv_out + conv_mask unique_embedded_words = tf.reduce_max(conv_out, [1]) embedded_words = tf.gather(unique_embedded_words, word_idx) embedded_words = tf.reshape( embedded_words, tf.stack([-1, tf.unstack(tf.shape(ids))[1], size])) all_embedded.append(embedded_words) return all_embedded