def conv_char_embedding_alt(char_vocab,
                            size,
                            unique_word_chars,
                            unique_word_lengths,
                            word_to_uniqs,
                            conv_width=5,
                            emb_initializer=tf.random_normal_initializer(
                                0.0, 0.1),
                            scope=None):
    # "fixed PADDING on character level"
    pad = tf.zeros(
        tf.stack(
            [tf.shape(unique_word_lengths)[0],
             math.floor(conv_width / 2)]), tf.int32)
    unique_word_chars = tf.concat([pad, unique_word_chars, pad], 1)

    if not isinstance(word_to_uniqs, list):
        word_to_uniqs = [word_to_uniqs]

    with tf.variable_scope(scope or "char_embeddings") as vs:
        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size),
                            initializer=emb_initializer, trainable=True)

        max_word_length = tf.reduce_max(unique_word_lengths)
        embedded_chars = tf.nn.embedding_lookup(
            char_embedding_matrix, tf.cast(unique_word_chars, tf.int32))

        with tf.variable_scope("conv"):
            # create filter like this to get fan-in and fan-out right for initializers depending on those
            filter = tf.get_variable("filter", [conv_width * size, size])
            filter_reshaped = tf.reshape(filter, [conv_width, size, size])
            # [B, T, S + pad_right]
            conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1,
                                    "VALID")
            conv_mask = tf.expand_dims(
                tfutil.mask_for_lengths(unique_word_lengths,
                                        max_length=max_word_length), 2)
            conv_out = conv_out + conv_mask

        unique_embedded_words = tf.reduce_max(conv_out, [1])

        all_embedded = []
        for word_idx in word_to_uniqs:
            flat_word_idx = tf.reshape(word_idx, [-1])
            embedded_words = tf.gather(unique_embedded_words, flat_word_idx)
            embedded_words = tf.reshape(
                embedded_words,
                tf.stack([-1, tf.unstack(tf.shape(word_idx))[1], size]))
            all_embedded.append(embedded_words)

    return all_embedded
Ejemplo n.º 2
0
 def mask_with_start(scores):
     return scores + tfutil.mask_for_lengths(tf.cast(
         start_pointer, tf.int32),
                                             batch_size * beam_size,
                                             tf.reduce_max(support_length),
                                             mask_right=False)
Ejemplo n.º 3
0
def fastqa_answer_layer(size,
                        encoded_question,
                        question_length,
                        encoded_support,
                        support_length,
                        correct_start,
                        answer2question,
                        is_eval,
                        embedded_slot_ids,
                        beam_size=1):
    beam_size = tf.cond(is_eval, lambda: tf.constant(beam_size, tf.int32),
                        lambda: tf.constant(1, tf.int32))
    batch_size = tf.shape(question_length)[0]
    answer2question = tf.cond(is_eval,
                              lambda: tf.range(0, batch_size, dtype=tf.int32),
                              lambda: answer2question)
    input_size = encoded_support.get_shape()[-1].value
    support_states_flat = tf.reshape(encoded_support, [-1, input_size])

    # computing single time attention over question
    attention_scores = tf.contrib.layers.fully_connected(
        encoded_question,
        1,
        activation_fn=None,
        weights_initializer=None,
        biases_initializer=None,
        scope="question_attention")
    q_mask = tfutil.mask_for_lengths(question_length, batch_size)
    attention_scores = attention_scores + tf.expand_dims(q_mask, 2)
    question_attention_weights = tf.nn.softmax(
        attention_scores, 1, name="question_attention_weights")
    question_state = tf.reduce_sum(
        question_attention_weights * encoded_question, [1])

    slot_q_state = tf.concat([
        tf.expand_dims(question_state, dim=1),
        tf.expand_dims(embedded_slot_ids, dim=1)
    ], 1)
    attention_scores2 = tf.contrib.layers.fully_connected(
        slot_q_state,
        1,
        activation_fn=None,
        weights_initializer=None,
        biases_initializer=None,
        scope="slot_question_attention")
    slot_question_attention_weights = tf.nn.softmax(
        attention_scores2, 1, name="slot_question_attention_weights")
    slot_question_state = tf.reduce_sum(
        slot_question_attention_weights * slot_q_state, [1])
    # Prediction
    # start

    with tf.variable_scope('coattention'):
        s_encoding = tf.contrib.layers.fully_connected(
            encoded_support,
            size,
            activation_fn=None,
            weights_initializer=None,
            scope="q_start_inter")
        q_encoding = tf.tanh(encoded_question)
        c_encoding = tf.tanh(s_encoding)
        q_variation = tf.transpose(q_encoding, perm=[0, 2, 1])
        # compute affinity matrix, (batch_size, context+1, question+1)
        L = tf.matmul(c_encoding, q_variation)
        # shape = (batch_size, question+1, context+1)
        L_t = tf.transpose(L, perm=[0, 2, 1])
        # normalize with respect to question
        a_q = tf.map_fn(lambda x: tf.nn.softmax(x), L_t, dtype=tf.float32)
        # normalize with respect to context
        a_c = tf.map_fn(lambda x: tf.nn.softmax(x), L, dtype=tf.float32)
        # summaries with respect to question, (batch_size, question+1, hidden_size)
        c_q = tf.matmul(a_q, c_encoding)
        c_q_emb = tf.concat(
            [q_variation, tf.transpose(c_q, perm=[0, 2, 1])], 1)
        # summaries of previous attention with respect to context
        c_d = tf.matmul(c_q_emb, a_c, adjoint_b=True)
        # final coattention context, (batch_size, context+1, 3*hidden_size)
        co_att = tf.concat([c_encoding, tf.transpose(c_d, perm=[0, 2, 1])], 2)

    with tf.variable_scope('encoder'):
        # LSTM for coattention encoding
        cell_fw = tf.nn.rnn_cell.LSTMCell(size)
        cell_bw = tf.nn.rnn_cell.LSTMCell(size)
        # compute coattention encoding
        u, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw,
                                               cell_bw,
                                               co_att,
                                               sequence_length=support_length,
                                               dtype=tf.float32)
        output_match = tf.concat(u, 2)
    '''
    max_support_length= tf.reduce_max(support_length)
    max_question_length= tf.reduce_max(question_length)
    question_mask=mask(question_length,maxlen=max_question_length)
    support_mask=mask(support_length,maxlen=max_support_length)
    question_mask=tf.cast(question_mask,tf.float32)
    support_mask=tf.cast(support_mask,tf.float32)
    output_match=match_utils.bilateral_match_func2(encoded_question, encoded_support,
                    question_length, support_length, question_mask, support_mask, MP_dim=30, input_dim=size, 
                    with_filter_layer=True, context_layer_num=1, context_lstm_dim=size/2,is_training=True,dropout_rate=0.5,
                    with_match_highway=True,aggregation_layer_num=1, aggregation_lstm_dim=size/2,highway_layer_num=1,
                    with_aggregation_highway=False,with_lex_decomposition=False,lex_decompsition_dim=300,
                    with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True,
                    with_left_match=True, with_right_match=True, with_mean_aggregation=True)
    
    output_match=run_match_lstm([encoded_question,encoded_support],[question_length,support_length],size)
    '''
    start_input = tf.concat([
        tf.expand_dims(slot_question_state, 1) * encoded_support,
        encoded_support, output_match
    ], 2)

    q_start_inter = tf.contrib.layers.fully_connected(slot_question_state,
                                                      size,
                                                      activation_fn=None,
                                                      weights_initializer=None,
                                                      scope="q_start_inter")

    q_start_state = tf.contrib.layers.fully_connected(
        start_input,
        size,
        activation_fn=None,
        weights_initializer=None,
        biases_initializer=None,
        scope="q_start") + tf.expand_dims(q_start_inter, 1)

    start_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_start_state),
                                                     1,
                                                     activation_fn=None,
                                                     weights_initializer=None,
                                                     biases_initializer=None,
                                                     scope="start_scores")
    start_scores = tf.squeeze(start_scores, [2])

    support_mask = tfutil.mask_for_lengths(support_length, batch_size)
    start_scores = start_scores + support_mask

    # probs are needed during beam search
    start_probs = tf.nn.softmax(start_scores)

    predicted_start_probs, predicted_start_pointer = tf.nn.top_k(
        start_probs, beam_size)

    # use correct start during training, because p(end|start) should be optimized
    predicted_start_pointer = tf.gather(predicted_start_pointer,
                                        answer2question)
    predicted_start_probs = tf.gather(predicted_start_probs, answer2question)

    start_pointer = tf.cond(is_eval, lambda: predicted_start_pointer,
                            lambda: tf.expand_dims(correct_start, 1))

    # flatten again
    start_pointer = tf.reshape(start_pointer, [-1])
    answer2questionwithbeam = tf.reshape(
        tf.tile(tf.expand_dims(answer2question, 1), tf.stack([1, beam_size])),
        [-1])

    offsets = tf.cast(tf.range(0, batch_size) * tf.reduce_max(support_length),
                      dtype=tf.int32)
    offsets = tf.gather(offsets, answer2questionwithbeam)
    u_s = tf.gather(support_states_flat, start_pointer + offsets)

    start_scores = tf.gather(start_scores, answer2questionwithbeam)
    start_input = tf.gather(start_input, answer2questionwithbeam)
    encoded_support = tf.gather(encoded_support, answer2questionwithbeam)
    slot_question_state = tf.gather(slot_question_state,
                                    answer2questionwithbeam)
    support_mask = tf.gather(support_mask, answer2questionwithbeam)

    # end
    end_input = tf.concat(
        [tf.expand_dims(u_s, 1) * encoded_support, start_input], 2)

    q_end_inter = tf.contrib.layers.fully_connected(tf.concat(
        [slot_question_state, u_s], 1),
                                                    size,
                                                    activation_fn=None,
                                                    weights_initializer=None,
                                                    scope="q_end_inter")

    q_end_state = tf.contrib.layers.fully_connected(
        end_input,
        size,
        activation_fn=None,
        weights_initializer=None,
        biases_initializer=None,
        scope="q_end") + tf.expand_dims(q_end_inter, 1)

    end_scores = tf.contrib.layers.fully_connected(tf.nn.relu(q_end_state),
                                                   1,
                                                   activation_fn=None,
                                                   weights_initializer=None,
                                                   biases_initializer=None,
                                                   scope="end_scores")
    end_scores = tf.squeeze(end_scores, [2])
    end_scores = end_scores + support_mask

    def mask_with_start(scores):
        return scores + tfutil.mask_for_lengths(tf.cast(
            start_pointer, tf.int32),
                                                batch_size * beam_size,
                                                tf.reduce_max(support_length),
                                                mask_right=False)

    end_scores = tf.cond(is_eval, lambda: mask_with_start(end_scores),
                         lambda: end_scores)

    # probs are needed during beam search
    end_probs = tf.nn.softmax(end_scores)
    predicted_end_probs, predicted_end_pointer = tf.nn.top_k(end_probs, 1)
    predicted_end_probs = tf.reshape(predicted_end_probs,
                                     tf.stack([-1, beam_size]))
    predicted_end_pointer = tf.reshape(predicted_end_pointer,
                                       tf.stack([-1, beam_size]))

    predicted_idx = tf.cast(
        tf.argmax(predicted_start_probs * predicted_end_probs, 1), tf.int32)
    predicted_idx = tf.stack([
        tf.range(0, tf.shape(answer2question)[0], dtype=tf.int32),
        predicted_idx
    ], 1)

    predicted_start_pointer = tf.gather_nd(predicted_start_pointer,
                                           predicted_idx)
    predicted_end_pointer = tf.gather_nd(predicted_end_pointer, predicted_idx)

    return start_scores, end_scores, predicted_start_pointer, predicted_end_pointer
Ejemplo n.º 4
0
def fastqa_model(shared_vocab_config, emb_question, question_length,
                 emb_support, support_length, unique_word_chars,
                 unique_word_char_length, question_words2unique,
                 support_words2unique, word_in_question, slot_list,
                 correct_start, answer2question, keep_prob, is_eval):
    """
    fast_qa model
    Args:
        shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input"
        emb_question: [Q, L_q, N]
        question_length: [Q]
        emb_support: [Q, L_s, N]
        support_length: [Q]
        unique_word_chars
        unique_word_char_length
        question_words2unique
        support_words2unique
        word_in_question: [Q, L_s]
        correct_start: [A], only during training, i.e., is_eval=False
        answer2question: [A], only during training, i.e., is_eval=False
        keep_prob: []
        is_eval: []

    Returns:
        start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2]
    """
    with tf.variable_scope("fast_qa",
                           initializer=tf.contrib.layers.xavier_initializer()):
        # Some helpers
        batch_size = tf.shape(question_length)[0]
        max_question_length = tf.reduce_max(question_length)
        support_mask = tfutil.mask_for_lengths(support_length, batch_size)
        question_binary_mask = tfutil.mask_for_lengths(question_length,
                                                       batch_size,
                                                       mask_right=False,
                                                       value=1.0)

        input_size = shared_vocab_config.config["repr_dim_input"]
        size = shared_vocab_config.config["repr_dim"]
        with_char_embeddings = shared_vocab_config.config.get(
            "with_char_embeddings", False)

        # set shapes for inputs
        emb_question.set_shape([None, None, input_size])
        emb_support.set_shape([None, None, input_size])
        #slot embedding part
        slot_embeddings = tf.get_variable("slot_embeddings", [64, size])
        embedded_slot_ids = tf.gather(slot_embeddings, slot_list)

        if with_char_embeddings:
            # compute combined embeddings
            [char_emb_question, char_emb_support] = conv_char_embedding_alt(
                shared_vocab_config.config["char_vocab"], size,
                unique_word_chars, unique_word_char_length,
                [question_words2unique, support_words2unique])

            emb_question = tf.concat([emb_question, char_emb_question], 2)
            emb_support = tf.concat([emb_support, char_emb_support], 2)
            input_size += size

            # set shapes for inputs
            emb_question.set_shape([None, None, input_size])
            emb_support.set_shape([None, None, input_size])

        # compute encoder features
        question_features = tf.ones(
            tf.stack([batch_size, max_question_length, 2]))

        v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                 initializer=tf.constant_initializer(1.0))

        wiq_w = tf.matmul(emb_question * v_wiqw, emb_support, adjoint_b=True)
        wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

        wiq_w = tf.reduce_sum(
            tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2),
            [1])

        # [B, L , 2]
        support_features = tf.concat(
            [tf.expand_dims(word_in_question, 2),
             tf.expand_dims(wiq_w, 2)], 2)

        # highway layer to allow for interaction between concatenated embeddings
        if with_char_embeddings:
            all_embedded = tf.concat([emb_question, emb_support], 1)
            all_embedded = tf.contrib.layers.fully_connected(
                all_embedded,
                size,
                activation_fn=None,
                weights_initializer=None,
                biases_initializer=None,
                scope="embeddings_projection")

            all_embedded_hw = highway_network(all_embedded, 1)

            emb_question = tf.slice(all_embedded_hw, [0, 0, 0],
                                    tf.stack([-1, max_question_length, -1]))
            emb_support = tf.slice(all_embedded_hw,
                                   tf.stack([0, max_question_length, 0]),
                                   [-1, -1, -1])

            emb_question.set_shape([None, None, size])
            emb_support.set_shape([None, None, size])

        # variational dropout
        dropout_shape = tf.unstack(tf.shape(emb_question))
        dropout_shape[1] = 1

        [emb_question, emb_support] = tf.cond(
            is_eval, lambda: [emb_question, emb_support],
            lambda: fixed_dropout([emb_question, emb_support], keep_prob,
                                  dropout_shape))

        # extend embeddings with features
        emb_question_ext = tf.concat([emb_question, question_features], 2)
        emb_support_ext = tf.concat([emb_support, support_features], 2)

        # encode question and support
        rnn = tf.contrib.rnn.LSTMBlockFusedCell
        encoded_question = birnn_with_projection(
            size,
            rnn,
            emb_question_ext,
            question_length,
            projection_scope="question_proj")

        encoded_support = birnn_with_projection(
            size,
            rnn,
            emb_support_ext,
            support_length,
            share_rnn=True,
            projection_scope="support_proj")


        start_scores, end_scores, predicted_start_pointer, predicted_end_pointer = \
            fastqa_answer_layer(size, encoded_question, question_length, encoded_support, support_length,
                                correct_start, answer2question, is_eval,embedded_slot_ids,
                                beam_size=shared_vocab_config.config.get("beam_size", 1))

        span = tf.concat([
            tf.expand_dims(predicted_start_pointer, 1),
            tf.expand_dims(predicted_end_pointer, 1)
        ], 1)

        return start_scores, end_scores, span
Ejemplo n.º 5
0
def cbow_xqa_model(shared_vocab_config, emb_question, question_length,
                   emb_support, support_length,
                   unique_word_chars, unique_word_char_length,
                   question_words2unique, support_words2unique,
                   word_in_question,
                   correct_start, answer2question, keep_prob, is_eval,
                   answer_type_span):
    """
    cbow_baseline_model model
    Args:
        shared_vocab_config: has at least a field config (dict) with keys "rep_dim", "rep_dim_input"
        emb_question: [Q, L_q, N]
        question_length: [Q]
        emb_support: [Q, L_s, N]
        support_length: [Q]
        unique_word_chars
        unique_word_char_length
        question_words2unique
        support_words2unique
        word_in_question: [Q, L_s]
        correct_start: [A], only during training, window_size.e., is_eval=False
        answer2question: [A], only during training, window_size.e., is_eval=False
        keep_prob: []
        is_eval: []
        answer_type_span: [Q, 2], span within question marking the expected answer type

    Returns:
        start_scores [B, L_s, N], end_scores [B, L_s, N], span_prediction [B, 2]
    """
    with tf.variable_scope("cbow_xqa", initializer=tf.contrib.layers.xavier_initializer()):
        # Some helpers
        batch_size = tf.shape(question_length)[0]
        max_support_length = tf.reduce_max(support_length)
        max_question_length = tf.reduce_max(question_length)

        input_size = shared_vocab_config.config["repr_dim_input"]
        size = shared_vocab_config.config["repr_dim"]
        with_char_embeddings = shared_vocab_config.config.get("with_char_embeddings", False)

        # set shapes for inputs
        emb_question.set_shape([None, None, input_size])
        emb_support.set_shape([None, None, input_size])

        if with_char_embeddings:
            # compute combined embeddings
            [char_emb_question, char_emb_support] = conv_char_embedding_alt(shared_vocab_config.config["char_vocab"],
                                                                            size,
                                                                            unique_word_chars, unique_word_char_length,
                                                                            [question_words2unique,
                                                                             support_words2unique])

            emb_question = tf.concat([emb_question, char_emb_question], 2)
            emb_support = tf.concat([emb_support, char_emb_support], 2)
            input_size += size

            # set shapes for inputs
            emb_question.set_shape([None, None, input_size])
            emb_support.set_shape([None, None, input_size])

        # variational dropout
        dropout_shape = tf.unstack(tf.shape(emb_question))
        dropout_shape[1] = 1

        [emb_question, emb_support] = tf.cond(is_eval,
                                              lambda: [emb_question, emb_support],
                                              lambda: fixed_dropout([emb_question, emb_support],
                                                                    keep_prob, dropout_shape))

        # question encoding
        answer_type_start = tf.squeeze(tf.slice(answer_type_span, [0, 0], [-1, 1]))
        answer_type_end = tf.squeeze(tf.slice(answer_type_span, [0, 1], [-1, -1]))
        answer_type_mask = tfutil.mask_for_lengths(answer_type_start, batch_size, max_question_length, value=1.0) * \
                           tfutil.mask_for_lengths(answer_type_end + 1, batch_size, max_question_length,
                                                   mask_right=False, value=1.0)
        answer_type = tf.reduce_sum(emb_question * tf.expand_dims(answer_type_mask, 2), 1) / \
                      tf.maximum(1.0, tf.reduce_sum(answer_type_mask, 1, keep_dims=True))

        batch_size_range = tf.range(0, batch_size)
        answer_type_start_state = tf.gather_nd(emb_question, tf.stack([batch_size_range, answer_type_start], 1))
        answer_type_end_state = tf.gather_nd(emb_question, tf.stack([batch_size_range, answer_type_end], 1))

        question_rep = tf.concat([answer_type, answer_type_start_state, answer_type_end_state], 1)
        question_rep.set_shape([None, input_size * 3])

        # wiq features
        support_mask = tfutil.mask_for_lengths(support_length, batch_size)
        question_binary_mask = tfutil.mask_for_lengths(question_length, batch_size, mask_right=False, value=1.0)

        v_wiqw = tf.get_variable("v_wiq_w", [1, 1, input_size],
                                 initializer=tf.constant_initializer(1.0))

        wiq_w = tf.matmul(emb_question * v_wiqw, emb_support, adjoint_b=True)
        wiq_w = wiq_w + tf.expand_dims(support_mask, 1)

        wiq_w = tf.reduce_sum(tf.nn.softmax(wiq_w) * tf.expand_dims(question_binary_mask, 2), [1])

        wiq_exp = tf.stack([word_in_question, wiq_w], 2)

        # support span encoding
        spans = [tf.stack([tf.range(0, max_support_length), tf.range(0, max_support_length)], 1)]

        wiq_exp = tf.pad(wiq_exp, [[0, 0], [20, 20], [0, 0]])
        wiq_pooled5 = tf.layers.average_pooling1d(
            tf.slice(wiq_exp, [0, 15, 0], tf.stack([-1, max_support_length + 10, -1])), 5, [1], 'valid')
        wiq_pooled10 = tf.layers.average_pooling1d(
            tf.slice(wiq_exp, [0, 10, 0], tf.stack([-1, max_support_length + 20, -1])), 10, [1], 'valid')
        wiq_pooled20 = tf.layers.average_pooling1d(wiq_exp, 20, [1], 'valid')

        wiqs_left5 = [tf.slice(wiq_pooled5, [0, 0, 0], tf.stack([-1, max_support_length, -1]))]
        wiqs_right5 = [tf.slice(wiq_pooled5, [0, 6, 0], [-1, -1, -1])]
        wiqs_left10 = [tf.slice(wiq_pooled10, [0, 0, 0], tf.stack([-1, max_support_length, -1]))]
        wiqs_right10 = [tf.slice(wiq_pooled10, [0, 11, 0], [-1, -1, -1])]
        wiqs_left20 = [tf.slice(wiq_pooled20, [0, 0, 0], tf.stack([-1, max_support_length, -1]))]
        wiqs_right20 = [tf.slice(wiq_pooled20, [0, 21, 0], [-1, -1, -1])]

        context_window = 5
        padded_support = tf.pad(emb_support, [[0, 0], [context_window, context_window], [0, 0]], "CONSTANT")
        # [B, L + 10 - 4, S]
        emb_support_windows = tf.layers.average_pooling1d(padded_support, 5, [1], "VALID", "channels_last")

        left_context_windows = tf.slice(emb_support_windows, [0, 0, 0],
                                        tf.stack([-1, max_support_length, -1]))
        right_context_windows = tf.slice(emb_support_windows, [0, context_window + 1, 0],
                                         [-1, -1, -1])
        span_rep = [tf.concat([emb_support, emb_support, emb_support, left_context_windows, right_context_windows], 2)]

        for window_size in range(2, _max_span_size + 1):
            start = tf.slice(emb_support, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1]))
            end = tf.slice(emb_support, [0, window_size - 1, 0], [-1, -1, -1])
            averagespan = tf.layers.average_pooling1d(emb_support, window_size, [1], "VALID", "channels_last")

            left_context_windows = tf.slice(emb_support_windows, [0, 0, 0],
                                            tf.stack([-1, max_support_length - (window_size - 1), -1]))
            right_context_windows = tf.slice(emb_support_windows, [0, window_size - 1 + context_window + 1, 0],
                                             [-1, -1, -1])

            span_rep.append(tf.concat([averagespan, start, end, left_context_windows, right_context_windows], 2))

            wiqs_left5.append(
                tf.slice(wiq_pooled5, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1])))
            wiqs_left10.append(
                tf.slice(wiq_pooled10, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1])))
            wiqs_left20.append(
                tf.slice(wiq_pooled20, [0, 0, 0], tf.stack([-1, max_support_length - (window_size - 1), -1])))

            wiqs_right5.append(tf.slice(wiq_pooled5, [0, window_size + 5, 0], [-1, -1, -1]))
            wiqs_right10.append(tf.slice(wiq_pooled10, [0, window_size + 10, 0], [-1, -1, -1]))
            wiqs_right20.append(tf.slice(wiq_pooled20, [0, window_size + 20, 0], [-1, -1, -1]))

            spans.append(tf.stack([tf.range(0, max_support_length - (window_size - 1)),
                                   tf.range(window_size - 1, max_support_length)], 1))

        span_rep = tf.concat(span_rep, 1)
        span_rep.set_shape([None, None, input_size * 5])
        wiqs_left5 = tf.concat(wiqs_left5, 1)
        wiqs_left10 = tf.concat(wiqs_left10, 1)
        wiqs_left20 = tf.concat(wiqs_left20, 1)

        wiqs_right5 = tf.concat(wiqs_right5, 1)
        wiqs_right10 = tf.concat(wiqs_right10, 1)
        wiqs_right20 = tf.concat(wiqs_right20, 1)

        spans = tf.concat(spans, 0)

        # scoring
        with tf.variable_scope("question_rep"):
            question_rep = tf.layers.dense(question_rep, size, activation=tf.tanh)
        with tf.variable_scope("question_inter"):
            question_inter = tf.layers.dense(question_rep, size, activation=None)

        with tf.variable_scope("span_rep"):
            span_rep = tf.layers.dense(span_rep, size, activation=tf.tanh)

        span_question_rep = tf.concat([span_rep, tf.expand_dims(question_rep, 1) * span_rep,
                                       wiqs_left5, wiqs_left10, wiqs_left20,
                                       wiqs_right5, wiqs_right10, wiqs_right20], 2)
        span_question_rep.set_shape([None, None, 2 * size + 6 * 2])

        with tf.variable_scope("hidden"):
            h = tf.tanh(tf.layers.dense(span_question_rep, size, activation=None) + tf.expand_dims(question_inter, 1))

        with tf.variable_scope("scoring"):
            span_scores = tf.squeeze(tf.layers.dense(h, 1, activation=None), 2)

        best_span = tf.arg_max(span_scores, 1)
        predicted_span = tf.gather(spans, best_span)

        return span_scores, tf.tile(tf.expand_dims(spans, 0), tf.stack([batch_size, 1, 1])), predicted_span
def conv_char_embeddings(vocab,
                         size,
                         word_ids,
                         conv_width=5,
                         emb_initializer=tf.random_normal_initializer(
                             0.0, 0.1),
                         scope=None):
    """
    :param vocab: filled Vocab instance
    :param size: size of embeddings
    :param word_ids: tf.Tensor[None, None] or list of tensors
    :param conv_width: int
    :return: char embedded word ids
    """
    if not isinstance(word_ids, list):
        word_ids = [word_ids]

    # create character vocab + word lengths + char ids per word
    pad_right = math.ceil(conv_width / 2)  # "fixed PAD o right side"
    vocab_size = max(vocab.sym2id.values()) + 1
    max_l = max(len(w) for w in vocab.sym2id) + pad_right
    char_vocab = defaultdict(lambda: len(char_vocab))
    char_vocab["PAD"] = 0
    word_to_chars_arr = np.zeros((vocab_size, max_l), np.int16)
    word_lengths_arr = np.zeros([vocab_size], np.int8)
    for w, i in vocab.sym2id.items():
        for k, c in enumerate(w):
            j = char_vocab[c]
            word_to_chars_arr[i, k] = j
        word_lengths_arr[i] = len(w) + conv_width - 1

    with tf.variable_scope(scope or "char_embeddings") as vs:
        word_to_chars = tf.constant(word_to_chars_arr, name="word_to_chars")
        word_lengths = tf.constant(word_lengths_arr, name="word_lengths")

        char_embedding_matrix = \
            tf.get_variable("char_embedding_matrix", shape=(len(char_vocab), size),
                            initializer=emb_initializer, trainable=True)

        all_embedded = []
        for i, ids in enumerate(zip(word_ids)):
            if i > 0:
                vs.reuse_variables()

            unique_words, word_idx = tf.unique(tf.reshape(ids, [-1]))
            chars = tf.nn.embedding_lookup(word_to_chars, unique_words)
            wl = tf.nn.embedding_lookup(word_lengths, unique_words)
            wl = tf.cast(wl, tf.int32)
            max_word_length = tf.reduce_max(wl)
            chars = tf.slice(chars, [0, 0], tf.stack([-1, max_word_length]))

            embedded_chars = tf.nn.embedding_lookup(char_embedding_matrix,
                                                    tf.cast(chars, tf.int32))

            with tf.variable_scope("conv"):
                # create filter like this to get fan-in and fan-out right for initializers depending on those
                filter = tf.get_variable("filter", [conv_width * size, size])
                filter_reshaped = tf.reshape(filter, [conv_width, size, size])
                conv_out = tf.nn.conv1d(embedded_chars, filter_reshaped, 1,
                                        "SAME")
                conv_mask = tf.expand_dims(
                    tfutil.mask_for_lengths(wl - pad_right,
                                            max_length=max_word_length), 2)
                conv_out = conv_out + conv_mask

            unique_embedded_words = tf.reduce_max(conv_out, [1])

            embedded_words = tf.gather(unique_embedded_words, word_idx)
            embedded_words = tf.reshape(
                embedded_words,
                tf.stack([-1, tf.unstack(tf.shape(ids))[1], size]))
            all_embedded.append(embedded_words)

    return all_embedded