Esempio n. 1
0
    def build_encoder(self, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb, entity_emb = self.build_emebdding(*args, **kargs)
        dropout_rate = tf.cond(self.is_training, 
                            lambda:self.config.dropout_rate,
                            lambda:0.0)

        with tf.variable_scope(self.config.scope+"_encoder", reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            word_emb = match_utils.multi_highway_layer(word_emb, input_dim, self.config.highway_layer_num)
            [sent_repres_fw, sent_repres_bw, sent_repres] = layer_utils.my_lstm_layer(word_emb, 
                            self.config.context_lstm_dim, 
                            input_lengths=input_lengths, 
                            scope_name=self.config.scope, 
                            reuse=reuse, 
                            is_training=self.is_training,
                            dropout_rate=dropout_rate, 
                            use_cudnn=self.config.use_cudnn)
            # word_emb = tf.layers.dense(word_emb, self.emb_size)

            memory_tran = tf.transpose(self.memory, [1,0]) # e * c
            word_emb_ = tf.expand_dims(sent_repres, 3)
            input_mask = tf.cast(input_mask, tf.float32)
            print(word_emb_.get_shape(), "======emb shape======")
            H_enc = leam_utils.att_emb_ngram_encoder_maxout(
                word_emb_,
                input_mask,
                self.memory,
                memory_tran,
                self.config
                )
            print("===H_enc shape===", H_enc.get_shape())
            return H_enc
Esempio n. 2
0
    def build_emebdding(self, *args, **kargs):

        reuse = kargs["reuse"]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)
        word_emb = tf.nn.embedding_lookup(self.emb_mat, self.sent_token)

        if self.config.with_word_drop:

            word_drop_rate = tf.cond(self.is_training,
                                     lambda: self.config.word_drop_rate,
                                     lambda: 0.0)
            word_emb, word_drop_mask = common_utils.word_dropout(
                word_emb, word_drop_rate)
        else:
            word_drop_mask = self.sent_token_mask
        entity_emb = tf.nn.embedding_lookup(self.emb_mat, self.entity_token)

        [_, _, entity_emb
         ] = layer_utils.my_lstm_layer(entity_emb,
                                       self.config.context_lstm_dim,
                                       input_lengths=self.entity_token_len,
                                       scope_name=self.config.scope,
                                       reuse=reuse,
                                       is_training=self.is_training,
                                       dropout_rate=dropout_rate,
                                       use_cudnn=self.config.use_cudnn)

        entity_mask = tf.expand_dims(self.entity_token_mask,
                                     axis=-1)  # batch x len x 1
        entity_emb = tf.reduce_max(qanet_layers.mask_logits(
            entity_emb, entity_mask),
                                   axis=1)

        entity_emb = tf.expand_dims(entity_emb, axis=1)
        seq_len = tf.reduce_max(self.sent_token_len)
        entity_emb = tf.tile(entity_emb, [1, seq_len, 1])

        mask = tf.expand_dims(self.sent_token_mask, -1)
        word_emb = tf.concat([word_emb, entity_emb], axis=-1)
        word_emb *= tf.cast(mask, tf.float32)

        print(word_emb.get_shape(), "=====word with entity========")
        if self.config.with_char:
            char_emb = self.build_char_embedding(self.sent_char,
                                                 self.sent_char_len,
                                                 self.char_mat,
                                                 is_training=self.is_training,
                                                 reuse=reuse)
            word_emb = tf.concat([word_emb, char_emb], axis=-1)

        return word_emb, word_drop_mask
Esempio n. 3
0
def lstm_char_embedding(char_token,
                        char_lengths,
                        char_embedding,
                        config,
                        is_training=True,
                        reuse=None):
    dropout_rate = tf.cond(is_training, lambda: config.dropout_rate,
                           lambda: 0.0)

    with tf.variable_scope(config.scope + "_lstm_char_embedding_layer",
                           reuse=reuse):
        char_dim = char_embedding.get_shape()[-1]
        input_shape = tf.shape(char_token)
        batch_size = input_shape[0]
        question_len = input_shape[1]
        char_len = input_shape[2]

        in_question_char_repres = tf.nn.embedding_lookup(
            char_embedding, char_token)
        in_question_char_repres = tf.reshape(in_question_char_repres,
                                             shape=[-1, char_len, char_dim])
        question_char_lengths = tf.reshape(char_lengths, [-1])
        quesiton_char_mask = tf.sequence_mask(
            question_char_lengths, char_len,
            dtype=tf.float32)  # [batch_size*question_len, q_char_len]
        in_question_char_repres = tf.multiply(
            in_question_char_repres, tf.expand_dims(quesiton_char_mask,
                                                    axis=-1))

        (question_char_outputs_fw, question_char_outputs_bw,
         _) = layer_utils.my_lstm_layer(in_question_char_repres,
                                        config.char_lstm_dim,
                                        input_lengths=question_char_lengths,
                                        scope_name="char_lstm",
                                        reuse=reuse,
                                        is_training=is_training,
                                        dropout_rate=dropout_rate,
                                        use_cudnn=config.use_cudnn)
        question_char_outputs_fw = layer_utils.collect_final_step_of_lstm(
            question_char_outputs_fw, question_char_lengths - 1)
        question_char_outputs_bw = question_char_outputs_bw[:, 0, :]
        question_char_outputs = tf.concat(
            axis=1,
            values=[question_char_outputs_fw, question_char_outputs_bw])
        question_char_outputs = tf.reshape(
            question_char_outputs,
            [batch_size, question_len, 2 * config.char_lstm_dim])

        return question_char_outputs
Esempio n. 4
0
    def build_encoder(self, index, input_lengths, input_mask, 
                    *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(index, *args, **kargs)
        dropout_rate = tf.cond(self.is_training, 
                            lambda:self.config.dropout_rate,
                            lambda:0.0)

        word_emb = tf.nn.dropout(word_emb, 1-dropout_rate)
        with tf.variable_scope(self.config.scope+"_input_highway", reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(word_emb, input_dim, self.config.highway_layer_num)
            
            [_, _, sent_repres] = layer_utils.my_lstm_layer(sent_repres, 
                            self.config.context_lstm_dim, 
                            input_lengths=input_lengths, 
                            scope_name=self.config.scope, 
                            reuse=reuse, 
                            is_training=self.is_training,
                            dropout_rate=dropout_rate, 
                            use_cudnn=self.config.use_cudnn)

            sent_repres = tf.layers.dense(sent_repres, 
                            self.config.context_lstm_dim*2, 
                            activation=tf.nn.relu) + sent_repres

            ignore_padding = (1 - input_mask)
            ignore_padding = decathlon_utils.attention_bias_ignore_padding(ignore_padding)
            encoder_self_attention_bias = ignore_padding

            output = decathlon_utils.multihead_attention_texar(sent_repres, 
                            memory=None, 
                            memory_attention_bias=encoder_self_attention_bias,
                            num_heads=self.config.num_heads, 
                            num_units=None, 
                            dropout_rate=dropout_rate, 
                            scope="multihead_attention")

            output = tf.layers.dense(output, 
                            self.config.context_lstm_dim*2, 
                            activation=tf.nn.relu) + output

            output = qanet_layers.layer_norm(output, 
                                scope = "layer_norm", 
                                reuse = reuse)

        return sent_repres
Esempio n. 5
0
    def build_encoder(self, index, input_lengths, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(index, *args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)

                sent_repres = new_hidden_states

        return sent_repres
Esempio n. 6
0
File: drcn.py Progetto: yyht/simnet
    def build_encoder(self, sent_repres, input_lengths, *args, **kargs):

        reuse = kargs["reuse"]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn,
                                               lstm_type=self.config.lstm_type)
                match_dim = self.config.context_lstm_dim * 2

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)
                match_dim = self.config.slstm_hidden_size * 2
                sent_repres = new_hidden_states

        return sent_repres, match_dim
Esempio n. 7
0
    def build_compression(self, context_fusion, context_mask, context_len,
                        scope_name, 
                        *args, **kargs):

        reuse = kargs["reuse"]
        dropout_rate = tf.cond(self.is_training, 
                            lambda:self.config.dropout_rate,
                            lambda:0.0)

        with tf.variable_scope(self.config.scope+"_compression_"+scope_name, 
                                reuse=reuse): 

            ignore_padding = (1 - context_mask)
            ignore_padding = decathlon_utils.attention_bias_ignore_padding(ignore_padding)
            encoder_self_attention_bias = ignore_padding

            context_repres = decathlon_utils.multihead_attention_texar(context_fusion, 
                            memory=None, 
                            memory_attention_bias=encoder_self_attention_bias,
                            num_heads=self.config.num_heads, 
                            num_units=None, 
                            dropout_rate=dropout_rate, 
                            scope="context")

            context_repres = tf.layers.dense(context_repres, 
                            self.config.context_lstm_dim*2, 
                            activation=tf.nn.relu) + context_repres

            context_repres = qanet_layers.layer_norm(context_repres, 
                                scope = "layer_norm", 
                                reuse = reuse)

            [_, _, context_repres] = layer_utils.my_lstm_layer(context_repres, 
                            self.config.context_lstm_dim, 
                            input_lengths=input_lengths, 
                            scope_name=self.config.scope, 
                            reuse=reuse, 
                            is_training=self.is_training,
                            dropout_rate=dropout_rate, 
                            use_cudnn=self.config.use_cudnn)

            return context_repres
Esempio n. 8
0
    def build_attention_aggregation(self, coattention, context, context_len, context_mask, 
                        sent1_repres, sent2_repres, 
                        sent1_len, sent2_len,
                        sent1_mask, sent2_mask, 
                        *args, **kargs):
        reuse = kargs["reuse"]
        dropout_rate = tf.cond(self.is_training, 
                            lambda:self.config.dropout_rate,
                            lambda:0.0)

        context_fusion = []
        with tf.variable_scope(self.config.scope+"_aggerate_attention", reuse=reuse):

            for i in range(len(self.config.attn_lst)):
                context_f = tf.concat([context, coattention[i]], axis=-1)
 
                with tf.variable_scope(self.config.scope+"_attn_fusion_{}".format(i), 
                        reuse=reuse):

                    [_, _, context_f] = layer_utils.my_lstm_layer(context_f, 
                            self.config.context_lstm_dim, 
                            input_lengths=context_len, 
                            scope_name=self.config.scope+"_context", 
                            reuse=None, 
                            is_training=self.is_training,
                            dropout_rate=dropout_rate, 
                            use_cudnn=self.config.use_cudnn)

                    context_fusion.append(context_f)

            # batch x 4 x len x dim
            context_fusion = tf.stack(context_fusion, axis=1) 

            with tf.variable_scope(self.config.scope+"_attention_fusion", reuse=reuse): 

                context_fusion = decathlon_utils.attention_fusion(
                                    context_fusion, 
                                    context_maks, 
                                    self.config.scope+"_context_fusion",
                                    reuse=reuse)

            return context_fusion
Esempio n. 9
0
    def build_interactor(self, sent1_repres, sent2_repres, sent1_len,
                         sent2_len, sent1_mask, sent2_mask, *args, **kargs):
        reuse = kargs["reuse"]
        input_dim = sent1_repres.get_shape()[-1]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        with tf.variable_scope(self.config.scope + "_interaction_module",
                               reuse=reuse):

            if self.config.with_self_attention:
                v_1_attn = esim_utils.multihead_attention(
                    sent1_repres,
                    sent1_repres,
                    num_units=None,
                    num_heads=self.config.num_heads,
                    dropout_rate=dropout_rate,
                    is_training=True,
                    causality=False,
                    scope="multihead_attention",
                    reuse=None)

                v_2_attn = esim_utils.multihead_attention(
                    sent2_repres,
                    sent2_repres,
                    num_units=None,
                    num_heads=self.config.num_heads,
                    dropout_rate=dropout_rate,
                    is_training=True,
                    causality=False,
                    scope="multihead_attention",
                    reuse=True)

                sent1_repres = tf.concat([sent1_repres, v_1_attn], axis=-1)
                sent2_repres = tf.concat([sent2_repres, v_2_attn], axis=-1)

            [query_attention_outputs, context_attention_outputs
             ] = esim_utils.query_context_attention(sent1_repres,
                                                    sent2_repres,
                                                    sent1_len,
                                                    sent2_len,
                                                    sent1_mask,
                                                    sent2_mask,
                                                    dropout_rate,
                                                    self.config.scope,
                                                    reuse=reuse)

            if self.config.rnn == "lstm":
                [sent1_repres_fw, sent1_repres_bw, sent1_repres
                 ] = layer_utils.my_lstm_layer(query_attention_outputs,
                                               self.config.context_lstm_dim,
                                               input_lengths=sent1_len,
                                               scope_name=self.config.scope,
                                               reuse=None,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

                [sent2_repres_fw, sent2_repres_bw, sent2_repres
                 ] = layer_utils.my_lstm_layer(context_attention_outputs,
                                               self.config.context_lstm_dim,
                                               input_lengths=sent2_len,
                                               scope_name=self.config.scope,
                                               reuse=True,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)
                match_dim = self.config.context_lstm_dim * 8

            elif self.config.rnn == "slstm":

                sent1_initial_hidden_states = tf.layers.dense(
                    query_attention_outputs, self.config.slstm_hidden_size)
                sent1_initial_cell_states = tf.identity(
                    sent1_initial_hidden_states)

                [
                    new_sent1_hidden_states, new_sent1_cell_states,
                    dummynode_sent1_hidden_states
                ] = slstm_utils.slstm_cell(self.config,
                                           self.config.scope,
                                           self.config.slstm_hidden_size,
                                           sent1_len,
                                           sent1_initial_hidden_states,
                                           sent1_initial_cell_states,
                                           self.config.slstm_layer_num,
                                           dropout_rate,
                                           reuse=None)

                sent1_repres = new_sent1_hidden_states

                sent2_initial_hidden_states = tf.layers.dense(
                    context_attention_outputs, self.config.slstm_hidden_size)
                sent2_initial_cell_states = tf.identity(
                    sent2_initial_hidden_states)

                [
                    new_sent2_hidden_states, new_sent2_cell_states,
                    dummynode_sent2_hidden_states
                ] = slstm_utils.slstm_cell(self.config,
                                           self.config.scope,
                                           self.config.slstm_hidden_size,
                                           sent2_len,
                                           sent2_initial_hidden_states,
                                           sent2_initial_cell_states,
                                           self.config.slstm_layer_num,
                                           dropout_rate,
                                           reuse=True)

                sent2_repres = new_sent2_hidden_states
                match_dim = self.config.slstm_hidden_size * 4

            v_1_sum = tf.reduce_sum(sent1_repres, 1)
            v_1_ave = tf.div(
                v_1_sum,
                tf.expand_dims(tf.cast(sent1_len, tf.float32) + EPSILON, -1))

            v_2_sum = tf.reduce_sum(sent2_repres, 1)
            v_2_ave = tf.div(
                v_2_sum,
                tf.expand_dims(tf.cast(sent2_len, tf.float32) + EPSILON, -1))

            # v_1_max = tf.reduce_max(sent1_repres, 1)
            # v_2_max = tf.reduce_max(sent2_repres, 1)

            mask_q = tf.expand_dims(sent1_mask, -1)
            mask_c = tf.expand_dims(sent2_mask, -1)

            v_1_max = tf.reduce_max(qanet_layers.mask_logits(
                sent1_repres, mask_q),
                                    axis=1)
            v_2_max = tf.reduce_max(qanet_layers.mask_logits(
                sent2_repres, mask_c),
                                    axis=1)

            out1 = tf.concat([v_1_ave, v_1_max], axis=-1)
            out2 = tf.concat([v_2_ave, v_2_max], axis=-1)

            out = tf.concat([v_1_ave, v_2_ave, v_1_max, v_2_max], 1)

            return out1, out2, out, match_dim
Esempio n. 10
0
def bilateral_match_func(in_question_repres,
                         in_passage_repres,
                         question_lengths,
                         passage_lengths,
                         question_mask,
                         passage_mask,
                         input_dim,
                         is_training,
                         options=None):

    question_aware_representatins = []
    question_aware_dim = 0
    passage_aware_representatins = []
    passage_aware_dim = 0

    # ====word level matching======
    (match_reps, match_dim) = match_passage_with_question(
        in_passage_repres,
        in_question_repres,
        passage_mask,
        question_mask,
        passage_lengths,
        question_lengths,
        input_dim,
        scope="word_match_forward",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=True)
    question_aware_representatins.append(match_reps)
    question_aware_dim += match_dim

    (match_reps, match_dim) = match_passage_with_question(
        in_question_repres,
        in_passage_repres,
        question_mask,
        passage_mask,
        question_lengths,
        passage_lengths,
        input_dim,
        scope="word_match_backward",
        with_full_match=False,
        with_maxpool_match=options.with_maxpool_match,
        with_attentive_match=options.with_attentive_match,
        with_max_attentive_match=options.with_max_attentive_match,
        is_training=is_training,
        options=options,
        dropout_rate=options.dropout_rate,
        forward=False)
    passage_aware_representatins.append(match_reps)
    passage_aware_dim += match_dim

    with tf.variable_scope('context_MP_matching'):
        for i in range(
                options.context_layer_num):  # support multiple context layer
            with tf.variable_scope('layer-{}'.format(i)):
                # contextual lstm for both passage and question
                in_question_repres = tf.multiply(
                    in_question_repres, tf.expand_dims(question_mask, axis=-1))
                in_passage_repres = tf.multiply(
                    in_passage_repres, tf.expand_dims(passage_mask, axis=-1))
                (question_context_representation_fw,
                 question_context_representation_bw,
                 in_question_repres) = layer_utils.my_lstm_layer(
                     in_question_repres,
                     options.context_lstm_dim,
                     input_lengths=question_lengths,
                     scope_name="context_represent",
                     reuse=False,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)
                (passage_context_representation_fw,
                 passage_context_representation_bw,
                 in_passage_repres) = layer_utils.my_lstm_layer(
                     in_passage_repres,
                     options.context_lstm_dim,
                     input_lengths=passage_lengths,
                     scope_name="context_represent",
                     reuse=True,
                     is_training=is_training,
                     dropout_rate=options.dropout_rate,
                     use_cudnn=options.use_cudnn)

                # Multi-perspective matching
                with tf.variable_scope('left_MP_matching'):
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_fw,
                        question_context_representation_fw,
                        passage_mask,
                        question_mask,
                        passage_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)
                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        passage_context_representation_bw,
                        question_context_representation_bw,
                        passage_mask,
                        question_mask,
                        passage_lengths,
                        question_lengths,
                        options.context_lstm_dim,
                        scope="backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    question_aware_representatins.append(match_reps)
                    question_aware_dim += match_dim

                with tf.variable_scope('right_MP_matching'):
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_fw,
                        passage_context_representation_fw,
                        question_mask,
                        passage_mask,
                        question_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="forward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=True)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim
                    (match_reps, match_dim) = match_passage_with_question(
                        question_context_representation_bw,
                        passage_context_representation_bw,
                        question_mask,
                        passage_mask,
                        question_lengths,
                        passage_lengths,
                        options.context_lstm_dim,
                        scope="backward_match",
                        with_full_match=options.with_full_match,
                        with_maxpool_match=options.with_maxpool_match,
                        with_attentive_match=options.with_attentive_match,
                        with_max_attentive_match=options.
                        with_max_attentive_match,
                        is_training=is_training,
                        options=options,
                        dropout_rate=options.dropout_rate,
                        forward=False)
                    passage_aware_representatins.append(match_reps)
                    passage_aware_dim += match_dim

    question_aware_representatins = tf.concat(
        axis=2, values=question_aware_representatins
    )  # [batch_size, passage_len, question_aware_dim]
    passage_aware_representatins = tf.concat(
        axis=2, values=passage_aware_representatins
    )  # [batch_size, question_len, question_aware_dim]

    dropout_rate = tf.cond(is_training, lambda: options.dropout_rate,
                           lambda: 0.0)

    question_aware_representatins = tf.nn.dropout(
        question_aware_representatins, (1 - dropout_rate))
    passage_aware_representatins = tf.nn.dropout(passage_aware_representatins,
                                                 (1 - dropout_rate))

    # ======Highway layer======
    if options.with_match_highway:
        with tf.variable_scope("left_matching_highway"):
            question_aware_representatins = multi_highway_layer(
                question_aware_representatins, question_aware_dim,
                options.highway_layer_num)
        with tf.variable_scope("right_matching_highway"):
            passage_aware_representatins = multi_highway_layer(
                passage_aware_representatins, passage_aware_dim,
                options.highway_layer_num)

    #========Aggregation Layer======
    aggregation_representation = []
    aggregation_dim = 0

    qa_aggregation_input = question_aware_representatins
    pa_aggregation_input = passage_aware_representatins
    with tf.variable_scope('aggregation_layer'):
        for i in range(options.aggregation_layer_num
                       ):  # support multiple aggregation layer
            qa_aggregation_input = tf.multiply(
                qa_aggregation_input, tf.expand_dims(passage_mask, axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 qa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=passage_lengths,
                 scope_name='left_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, passage_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            qa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

            pa_aggregation_input = tf.multiply(
                pa_aggregation_input, tf.expand_dims(question_mask, axis=-1))
            (fw_rep, bw_rep,
             cur_aggregation_representation) = layer_utils.my_lstm_layer(
                 pa_aggregation_input,
                 options.aggregation_lstm_dim,
                 input_lengths=question_lengths,
                 scope_name='right_layer-{}'.format(i),
                 reuse=False,
                 is_training=is_training,
                 dropout_rate=options.dropout_rate,
                 use_cudnn=options.use_cudnn)
            fw_rep = layer_utils.collect_final_step_of_lstm(
                fw_rep, question_lengths - 1)
            bw_rep = bw_rep[:, 0, :]
            aggregation_representation.append(fw_rep)
            aggregation_representation.append(bw_rep)
            aggregation_dim += 2 * options.aggregation_lstm_dim
            pa_aggregation_input = cur_aggregation_representation  # [batch_size, passage_len, 2*aggregation_lstm_dim]

    aggregation_representation = tf.concat(
        axis=1,
        values=aggregation_representation)  # [batch_size, aggregation_dim]

    # ======Highway layer======
    if options.with_aggregation_highway:
        with tf.variable_scope("aggregation_highway"):
            agg_shape = tf.shape(aggregation_representation)
            batch_size = agg_shape[0]
            aggregation_representation = tf.reshape(
                aggregation_representation, [1, batch_size, aggregation_dim])
            aggregation_representation = multi_highway_layer(
                aggregation_representation, aggregation_dim,
                options.highway_layer_num)
            aggregation_representation = tf.reshape(
                aggregation_representation, [batch_size, aggregation_dim])

    return (aggregation_representation, aggregation_dim)
Esempio n. 11
0
    def build_encoder(self, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(*args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)
                match_dim = self.config.context_lstm_dim * 6

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)

                sent_repres = new_hidden_states
                match_dim = self.config.slstm_hidden_size * 3

            if self.config.multi_head:
                mask = tf.cast(input_mask, tf.float32)
                ignore_padding = (1 - mask)
                ignore_padding = label_network_utils.attention_bias_ignore_padding(
                    ignore_padding)
                encoder_self_attention_bias = ignore_padding

                sent_repres = label_network_utils.multihead_attention_texar(
                    sent_repres,
                    memory=None,
                    memory_attention_bias=encoder_self_attention_bias,
                    num_heads=8,
                    num_units=128,
                    dropout_rate=dropout_rate,
                    scope="multihead_attention")

            v_attn = self_attn.multi_dimensional_attention(
                sent_repres, input_mask,
                'multi_dim_attn_for_%s' % self.config.scope, 1 - dropout_rate,
                self.is_training, self.config.weight_decay, "relu")

            mask = tf.expand_dims(input_mask, -1)
            v_sum = tf.reduce_sum(sent_repres * tf.cast(mask, tf.float32), 1)
            v_ave = tf.div(
                v_sum,
                tf.expand_dims(
                    tf.cast(input_lengths, tf.float32) + EPSILON, -1))

            v_max = tf.reduce_max(qanet_layers.mask_logits(sent_repres, mask),
                                  axis=1)

            v_last = esim_utils.last_relevant_output(sent_repres,
                                                     input_lengths)

            out = tf.concat([v_ave, v_max, v_last, v_attn], axis=-1)

        return out, match_dim
Esempio n. 12
0
    def build_interactor(self, sent1_repres, sent2_repres, sent1_len,
                         sent2_len, sent1_mask, sent2_mask, *args, **kargs):
        reuse = kargs["reuse"]
        input_dim = sent1_repres.get_shape()[-1]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        with tf.variable_scope(self.config.scope + "_interaction_module",
                               reuse=reuse):
            [c2q_concat,
             q2c_concat] = man_utils.concat_attention(sent1_repres,
                                                      sent2_repres,
                                                      sent1_len,
                                                      sent2_len,
                                                      sent1_mask,
                                                      sent2_mask,
                                                      dropout_rate,
                                                      self.config.scope,
                                                      reuse=reuse)

            [c2q_bilinear,
             q2c_bilinear] = man_utils.bilinear_attention(sent1_repres,
                                                          sent2_repres,
                                                          sent1_len,
                                                          sent2_len,
                                                          sent1_mask,
                                                          sent2_mask,
                                                          dropout_rate,
                                                          self.config.scope,
                                                          reuse=reuse)

            [c2q_dot, q2c_dot] = man_utils.dot_attention(sent1_repres,
                                                         sent2_repres,
                                                         sent1_len,
                                                         sent2_len,
                                                         sent1_mask,
                                                         sent2_mask,
                                                         dropout_rate,
                                                         self.config.scope,
                                                         reuse=reuse)

            [c2q_minus,
             q2c_minus] = man_utils.minus_attention(sent1_repres,
                                                    sent2_repres,
                                                    sent1_len,
                                                    sent2_len,
                                                    sent1_mask,
                                                    sent2_mask,
                                                    dropout_rate,
                                                    self.config.scope,
                                                    reuse=reuse)

            sent1_agg = tf.concat(
                [sent1_repres, c2q_concat, c2q_bilinear, c2q_dot, c2q_minus],
                axis=-1)
            sent1_agg_dim = self.config.context_lstm_dim * 10

            sent2_agg = tf.concat(
                [sent2_repres, q2c_concat, q2c_bilinear, q2c_dot, q2c_minus],
                axis=-1)
            sent2_agg_dim = self.config.context_lstm_dim * 10

            with tf.variable_scope(self.config.scope + "_inner_highway",
                                   reuse=None):

                sent1_agg = match_utils.multi_highway_layer(
                    sent1_agg,
                    sent1_agg_dim,
                    1,
                    scope="sent_attention_highway")
                tf.get_variable_scope().reuse_variables()
                sent2_agg = match_utils.multi_highway_layer(
                    sent2_agg,
                    sent2_agg_dim,
                    1,
                    scope="sent_attention_highway")

            [_, _, sent1_agg
             ] = layer_utils.my_lstm_layer(sent1_agg,
                                           self.config.context_lstm_dim,
                                           input_lengths=sent1_len,
                                           scope_name="inner_aggeration",
                                           reuse=False,
                                           is_training=self.is_training,
                                           dropout_rate=dropout_rate,
                                           use_cudnn=self.config.use_cudnn)

            [_, _, sent2_agg
             ] = layer_utils.my_lstm_layer(sent2_agg,
                                           self.config.context_lstm_dim,
                                           input_lengths=sent2_len,
                                           scope_name="inner_aggeration",
                                           reuse=True,
                                           is_training=self.is_training,
                                           dropout_rate=dropout_rate,
                                           use_cudnn=self.config.use_cudnn)

            with tf.variable_scope(self.config.scope +
                                   "_predictor_self_attention",
                                   reuse=None):
                context_attn = man_utils.self_attention(sent1_repres,
                                                        sent2_agg,
                                                        sent1_len,
                                                        sent2_len,
                                                        sent1_mask,
                                                        sent2_mask,
                                                        dropout_rate,
                                                        self.config.scope,
                                                        reuse=None)

                tf.get_variable_scope().reuse_variables()
                query_attn = man_utils.self_attention(sent2_repres,
                                                      sent1_agg,
                                                      sent2_len,
                                                      sent1_len,
                                                      sent2_mask,
                                                      sent1_mask,
                                                      dropout_rate,
                                                      self.config.scope,
                                                      reuse=None)
            aggre_output = tf.concat([
                context_attn, query_attn,
                tf.abs(context_attn - query_attn), context_attn * query_attn
            ],
                                     axis=-1)
            match_dim = self.config.context_lstm_dim * 2 * 4
            return context_attn, query_attn, aggre_output, match_dim
Esempio n. 13
0
    def build_encoder(self, index, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(index, *args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

            elif self.config.rnn == "slstm":
                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)
                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)
                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)
                sent_repres = new_hidden_states
            elif self.config.rnn == "base_transformer":
                sent_repres = base_transformer_utils.transformer_encoder(
                    sent_repres,
                    target_space=None,
                    hparams=self.config,
                    features=None,
                    make_image_summary=False)
            elif self.config.rnn == "universal_transformer":
                sent_repres, act_loss = universal_transformer_utils.universal_transformer_encoder(
                    sent_repres,
                    target_space=None,
                    hparams=self.config,
                    features=None,
                    make_image_summary=False)
            elif self.config.rnn == "highway":
                sent_repres = sent_repres

            input_mask = tf.expand_dims(tf.cast(input_mask, tf.float32),
                                        axis=-1)
            sent_repres_sum = tf.reduce_sum(sent_repres * input_mask, axis=1)
            sent_repres_avr = tf.div(
                sent_repres_sum,
                tf.expand_dims(
                    tf.cast(input_lengths, tf.float32) + EPSILON, -1))

            if self.config.metric == "Hyperbolic":
                sent_repres = tf.clip_by_norm(sent_repres_sum,
                                              1.0 - EPSILON,
                                              axes=1)
            else:
                sent_repres = sent_repres_avr

        if self.config.rnn == "universal_transformer":
            return sent_repres, act_loss
        else:
            return sent_repres
Esempio n. 14
0
    def build_encoder(self, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb, entity_emb = self.build_emebdding(*args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)
            mask = tf.expand_dims(input_mask, -1)

            # sent_repres = tf.layers.dense(sent_repres, self.emb_size)

            sent_repres *= tf.cast(mask, tf.float32)
            # sent_repres = label_network_utils.self_attn(
            #     enc=sent_repres,
            #     scope=self.config.scope,
            #     dropout=dropout_rate,
            #     reuse=None,
            #     config=self.config
            #     )

            # sent_repres = label_network_utils.text_cnn(
            #         sent_repres,
            #         filter_sizes=[1,3,5],
            #         scope=self.config.scope,
            #         embed_size=self.emb_size,
            #         num_filters=self.config.num_filters)
            # output = sent_repres
            # print(sent_repres.get_shape(), "===text cnn encoder shape===")
            [sent_repres_fw, sent_repres_bw, sent_repres
             ] = layer_utils.my_lstm_layer(sent_repres,
                                           self.config.context_lstm_dim,
                                           input_lengths=input_lengths,
                                           scope_name=self.config.scope,
                                           reuse=reuse,
                                           is_training=self.is_training,
                                           dropout_rate=dropout_rate,
                                           use_cudnn=self.config.use_cudnn)
            match_dim = self.config.context_lstm_dim * 8

        with tf.variable_scope(self.config.scope + "sent_label_attention",
                               reuse=reuse):

            memory = tf.expand_dims(self.memory, axis=0)
            memory = tf.tile(memory, [tf.shape(sent_repres)[0], 1, 1])
            # entity_emb = tf.expand_dims(entity_emb, axis=1)
            # entity_emb = tf.tile(entity_emb, [1, tf.shape(memory)[1], 1])
            # print("===emb shape===", entity_emb.get_shape())
            # # batch x classes x dim
            # memory = tf.concat([memory, entity_emb], axis=-1)
            print("==memory shape==", memory.get_shape())

            # output = label_network_utils.memory_attention(sent_repres,
            #         memory, input_mask,
            #         scope=self.config.scope,
            #         memory_mask=None)
            print(sent_repres.get_shape(), memory.get_shape())
            output = label_network_utils.memory_attention_v1(
                sent_repres,
                memory,
                input_mask,
                "memory_attention",
                memory_mask=None,
                reuse=None,
                attention_output="multi_head",
                num_heads=4,
                dropout_rate=dropout_rate,
                threshold=1 / float(self.num_classes),
                apply_hard_attn=True)
            print("==output shape==", output.get_shape())

            return sent_repres, entity_emb, output