Ejemplo n.º 1
0
    def build_encoder(self, index, input_lengths, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(index, *args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)

                sent_repres = new_hidden_states

        return sent_repres
Ejemplo n.º 2
0
Archivo: drcn.py Proyecto: yyht/simnet
    def build_encoder(self, sent_repres, input_lengths, *args, **kargs):

        reuse = kargs["reuse"]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn,
                                               lstm_type=self.config.lstm_type)
                match_dim = self.config.context_lstm_dim * 2

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)
                match_dim = self.config.slstm_hidden_size * 2
                sent_repres = new_hidden_states

        return sent_repres, match_dim
Ejemplo n.º 3
0
    def build_interactor(self, sent1_repres, sent2_repres, sent1_len,
                         sent2_len, sent1_mask, sent2_mask, *args, **kargs):
        reuse = kargs["reuse"]
        input_dim = sent1_repres.get_shape()[-1]
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        with tf.variable_scope(self.config.scope + "_interaction_module",
                               reuse=reuse):

            if self.config.with_self_attention:
                v_1_attn = esim_utils.multihead_attention(
                    sent1_repres,
                    sent1_repres,
                    num_units=None,
                    num_heads=self.config.num_heads,
                    dropout_rate=dropout_rate,
                    is_training=True,
                    causality=False,
                    scope="multihead_attention",
                    reuse=None)

                v_2_attn = esim_utils.multihead_attention(
                    sent2_repres,
                    sent2_repres,
                    num_units=None,
                    num_heads=self.config.num_heads,
                    dropout_rate=dropout_rate,
                    is_training=True,
                    causality=False,
                    scope="multihead_attention",
                    reuse=True)

                sent1_repres = tf.concat([sent1_repres, v_1_attn], axis=-1)
                sent2_repres = tf.concat([sent2_repres, v_2_attn], axis=-1)

            [query_attention_outputs, context_attention_outputs
             ] = esim_utils.query_context_attention(sent1_repres,
                                                    sent2_repres,
                                                    sent1_len,
                                                    sent2_len,
                                                    sent1_mask,
                                                    sent2_mask,
                                                    dropout_rate,
                                                    self.config.scope,
                                                    reuse=reuse)

            if self.config.rnn == "lstm":
                [sent1_repres_fw, sent1_repres_bw, sent1_repres
                 ] = layer_utils.my_lstm_layer(query_attention_outputs,
                                               self.config.context_lstm_dim,
                                               input_lengths=sent1_len,
                                               scope_name=self.config.scope,
                                               reuse=None,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

                [sent2_repres_fw, sent2_repres_bw, sent2_repres
                 ] = layer_utils.my_lstm_layer(context_attention_outputs,
                                               self.config.context_lstm_dim,
                                               input_lengths=sent2_len,
                                               scope_name=self.config.scope,
                                               reuse=True,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)
                match_dim = self.config.context_lstm_dim * 8

            elif self.config.rnn == "slstm":

                sent1_initial_hidden_states = tf.layers.dense(
                    query_attention_outputs, self.config.slstm_hidden_size)
                sent1_initial_cell_states = tf.identity(
                    sent1_initial_hidden_states)

                [
                    new_sent1_hidden_states, new_sent1_cell_states,
                    dummynode_sent1_hidden_states
                ] = slstm_utils.slstm_cell(self.config,
                                           self.config.scope,
                                           self.config.slstm_hidden_size,
                                           sent1_len,
                                           sent1_initial_hidden_states,
                                           sent1_initial_cell_states,
                                           self.config.slstm_layer_num,
                                           dropout_rate,
                                           reuse=None)

                sent1_repres = new_sent1_hidden_states

                sent2_initial_hidden_states = tf.layers.dense(
                    context_attention_outputs, self.config.slstm_hidden_size)
                sent2_initial_cell_states = tf.identity(
                    sent2_initial_hidden_states)

                [
                    new_sent2_hidden_states, new_sent2_cell_states,
                    dummynode_sent2_hidden_states
                ] = slstm_utils.slstm_cell(self.config,
                                           self.config.scope,
                                           self.config.slstm_hidden_size,
                                           sent2_len,
                                           sent2_initial_hidden_states,
                                           sent2_initial_cell_states,
                                           self.config.slstm_layer_num,
                                           dropout_rate,
                                           reuse=True)

                sent2_repres = new_sent2_hidden_states
                match_dim = self.config.slstm_hidden_size * 4

            v_1_sum = tf.reduce_sum(sent1_repres, 1)
            v_1_ave = tf.div(
                v_1_sum,
                tf.expand_dims(tf.cast(sent1_len, tf.float32) + EPSILON, -1))

            v_2_sum = tf.reduce_sum(sent2_repres, 1)
            v_2_ave = tf.div(
                v_2_sum,
                tf.expand_dims(tf.cast(sent2_len, tf.float32) + EPSILON, -1))

            # v_1_max = tf.reduce_max(sent1_repres, 1)
            # v_2_max = tf.reduce_max(sent2_repres, 1)

            mask_q = tf.expand_dims(sent1_mask, -1)
            mask_c = tf.expand_dims(sent2_mask, -1)

            v_1_max = tf.reduce_max(qanet_layers.mask_logits(
                sent1_repres, mask_q),
                                    axis=1)
            v_2_max = tf.reduce_max(qanet_layers.mask_logits(
                sent2_repres, mask_c),
                                    axis=1)

            out1 = tf.concat([v_1_ave, v_1_max], axis=-1)
            out2 = tf.concat([v_2_ave, v_2_max], axis=-1)

            out = tf.concat([v_1_ave, v_2_ave, v_1_max, v_2_max], 1)

            return out1, out2, out, match_dim
Ejemplo n.º 4
0
    def build_encoder(self, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(*args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)
                match_dim = self.config.context_lstm_dim * 6

            elif self.config.rnn == "slstm":

                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)

                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)

                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)

                sent_repres = new_hidden_states
                match_dim = self.config.slstm_hidden_size * 3

            if self.config.multi_head:
                mask = tf.cast(input_mask, tf.float32)
                ignore_padding = (1 - mask)
                ignore_padding = label_network_utils.attention_bias_ignore_padding(
                    ignore_padding)
                encoder_self_attention_bias = ignore_padding

                sent_repres = label_network_utils.multihead_attention_texar(
                    sent_repres,
                    memory=None,
                    memory_attention_bias=encoder_self_attention_bias,
                    num_heads=8,
                    num_units=128,
                    dropout_rate=dropout_rate,
                    scope="multihead_attention")

            v_attn = self_attn.multi_dimensional_attention(
                sent_repres, input_mask,
                'multi_dim_attn_for_%s' % self.config.scope, 1 - dropout_rate,
                self.is_training, self.config.weight_decay, "relu")

            mask = tf.expand_dims(input_mask, -1)
            v_sum = tf.reduce_sum(sent_repres * tf.cast(mask, tf.float32), 1)
            v_ave = tf.div(
                v_sum,
                tf.expand_dims(
                    tf.cast(input_lengths, tf.float32) + EPSILON, -1))

            v_max = tf.reduce_max(qanet_layers.mask_logits(sent_repres, mask),
                                  axis=1)

            v_last = esim_utils.last_relevant_output(sent_repres,
                                                     input_lengths)

            out = tf.concat([v_ave, v_max, v_last, v_attn], axis=-1)

        return out, match_dim
Ejemplo n.º 5
0
    def build_encoder(self, index, input_lengths, input_mask, *args, **kargs):

        reuse = kargs["reuse"]
        word_emb = self.build_emebdding(index, *args, **kargs)
        dropout_rate = tf.cond(self.is_training,
                               lambda: self.config.dropout_rate, lambda: 0.0)

        word_emb = tf.nn.dropout(word_emb, 1 - dropout_rate)
        with tf.variable_scope(self.config.scope + "_input_highway",
                               reuse=reuse):
            input_dim = word_emb.get_shape()[-1]
            sent_repres = match_utils.multi_highway_layer(
                word_emb, input_dim, self.config.highway_layer_num)

            if self.config.rnn == "lstm":
                [sent_repres_fw, sent_repres_bw, sent_repres
                 ] = layer_utils.my_lstm_layer(sent_repres,
                                               self.config.context_lstm_dim,
                                               input_lengths=input_lengths,
                                               scope_name=self.config.scope,
                                               reuse=reuse,
                                               is_training=self.is_training,
                                               dropout_rate=dropout_rate,
                                               use_cudnn=self.config.use_cudnn)

            elif self.config.rnn == "slstm":
                word_emb_proj = tf.layers.dense(word_emb,
                                                self.config.slstm_hidden_size)
                initial_hidden_states = word_emb_proj
                initial_cell_states = tf.identity(initial_hidden_states)
                [new_hidden_states, new_cell_states, dummynode_hidden_states
                 ] = slstm_utils.slstm_cell(self.config,
                                            self.config.scope,
                                            self.config.slstm_hidden_size,
                                            input_lengths,
                                            initial_hidden_states,
                                            initial_cell_states,
                                            self.config.slstm_layer_num,
                                            dropout_rate,
                                            reuse=reuse)
                sent_repres = new_hidden_states
            elif self.config.rnn == "base_transformer":
                sent_repres = base_transformer_utils.transformer_encoder(
                    sent_repres,
                    target_space=None,
                    hparams=self.config,
                    features=None,
                    make_image_summary=False)
            elif self.config.rnn == "universal_transformer":
                sent_repres, act_loss = universal_transformer_utils.universal_transformer_encoder(
                    sent_repres,
                    target_space=None,
                    hparams=self.config,
                    features=None,
                    make_image_summary=False)
            elif self.config.rnn == "highway":
                sent_repres = sent_repres

            input_mask = tf.expand_dims(tf.cast(input_mask, tf.float32),
                                        axis=-1)
            sent_repres_sum = tf.reduce_sum(sent_repres * input_mask, axis=1)
            sent_repres_avr = tf.div(
                sent_repres_sum,
                tf.expand_dims(
                    tf.cast(input_lengths, tf.float32) + EPSILON, -1))

            if self.config.metric == "Hyperbolic":
                sent_repres = tf.clip_by_norm(sent_repres_sum,
                                              1.0 - EPSILON,
                                              axes=1)
            else:
                sent_repres = sent_repres_avr

        if self.config.rnn == "universal_transformer":
            return sent_repres, act_loss
        else:
            return sent_repres