Esempio n. 1
0
    def context_encoder(self, context_embedded, context_len):

        with tf.variable_scope("context-encoder"):
            context_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.rnn_hidden_dim * 2,
                inputs=context_embedded,
                sequence_length=context_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

        return context_outputs
Esempio n. 2
0
    def lstm_encoder(self, embedded, embedded_len, name, rnn_hidden_dim=None):
        if rnn_hidden_dim is None:
            rnn_hidden_dim = self.hparams.rnn_hidden_dim

        with tf.variable_scope("lstm-encoder-%s" % name, reuse=tf.AUTO_REUSE):
            lstm_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=rnn_hidden_dim * 2,
                inputs=embedded,
                sequence_length=embedded_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1
            )

        return lstm_outputs
    def dialog_representation(self, context_sentence_representation,
                              tot_context_len):
        """context sentence outputs"""
        with tf.variable_scope("tot-context-sentence-layer"):
            tot_context_sentence_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.sentence_rnn_hidden_dim * 2,
                inputs=context_sentence_representation,
                sequence_length=tot_context_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

            tot_context_fw_last_state, tot_context_bw_first_state = \
                sequence_feature(tot_context_sentence_outputs, tot_context_len)

            tot_context_hidden = tf.concat(
                axis=-1,
                values=[tot_context_fw_last_state, tot_context_bw_first_state])

        return tot_context_sentence_outputs, tot_context_hidden
Esempio n. 4
0
    def utterance_encoder(self, utterances_embedded, utterances_len):

        # [10,10,281,300]
        with tf.variable_scope("utterances-encoder"):

            utterances_len = tf.reshape(utterances_len, [-1])
            utterances_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.rnn_hidden_dim * 2,
                inputs=utterances_embedded,
                sequence_length=utterances_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

            # [batch_size, num_candidates, max_utter_len, rnn_hidden_dim * 2]
            utterances_outputs = \
                tf.reshape(utterances_outputs,
                           [self.batch_size, -1, self.max_utterances_len, self.hparams.rnn_hidden_dim * 2])

        return utterances_outputs
    def context_sentence_representation(self, context_sentence_embedded,
                                        context_sentence_len, tot_context_len,
                                        speaker):
        """context-sentence GLOVE(Avg) representation"""
        context_mask = tf.sequence_mask(context_sentence_len,
                                        maxlen=self.max_c_sentence_len)
        context_mask = tf.expand_dims(tf.cast(context_mask, tf.float32),
                                      axis=-1)

        masked_context_sentence = tf.multiply(context_sentence_embedded,
                                              context_mask)

        context_sentence_sum = tf.reduce_sum(masked_context_sentence, axis=2)

        context_mask = tf.squeeze(context_mask, [-1])

        tot_context_mask = tf.cast(
            tf.sequence_mask(tot_context_len, maxlen=self.max_dialog_len),
            tf.float32)

        tot_context_len_tile = tf.tile(tf.expand_dims(tot_context_len, -1),
                                       [1, tf.shape(tot_context_mask)[-1]])
        context_sentence_mean = \
            tf.multiply(context_sentence_sum,
                        tf.expand_dims(tf.divide(tot_context_mask, tf.cast(tot_context_len_tile, tf.float32)), -1))
        """context sentence LSTM representation"""
        context_sentence_embedded = tf.reshape(
            context_sentence_embedded,
            [-1, self.max_c_sentence_len, self.hparams.embedding_dim])
        context_sentence_len = tf.reshape(context_sentence_len, [-1])

        with tf.variable_scope("context-sentence-encoder"):
            c_sentence_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.sentence_rnn_hidden_dim * 2,
                inputs=context_sentence_embedded,
                sequence_length=context_sentence_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

            c_sentence_fw_last_state, c_sentence_bw_first_state = \
                sequence_feature(c_sentence_outputs, context_sentence_len)

            c_sentence_hidden = tf.concat(
                axis=-1,
                values=[c_sentence_fw_last_state, c_sentence_bw_first_state])
            c_sentence_hidden = tf.reshape(c_sentence_hidden, [
                self.batch_size, self.max_dialog_len,
                self.hparams.sentence_rnn_hidden_dim * 2
            ])

        context_sentence_representation = tf.concat(
            axis=-1, values=[context_sentence_mean, c_sentence_hidden])

        if self.pos_emb_bool or self.user_emb_bool:
            print("with sentence_features")
            context_w_sentence_feature = self._sentence_order_speaker_feature(
                context_sentence_representation, speaker)
        else:
            print("without sentence_features")
            context_w_sentence_feature = context_sentence_representation

        return context_w_sentence_feature
    def response_sentence_representation(self, utterances_embedded,
                                         utterances_len):
        """response_sentence_level_encoder"""
        utterances_len = tf.reshape(utterances_len, [self.batch_size, -1])
        response_mask = tf.expand_dims(tf.sequence_mask(
            utterances_len, maxlen=self.max_response_len),
                                       axis=-1)
        response_mask = tf.cast(response_mask, tf.float32)

        # [batch_size, num_candiates, sentence_len, embedding_dim]
        masked_response = tf.multiply(utterances_embedded, response_mask)
        response_sentence_sum = tf.reduce_sum(masked_response, axis=2)
        response_mask = tf.squeeze(response_mask, [-1])
        # [batch_size, num_candidates, 300]
        response_sentence_mean = tf.divide(
            response_sentence_sum,
            tf.expand_dims(tf.reduce_sum(response_mask, axis=-1), axis=-1))

        with tf.variable_scope("response-sentence-encoder"):
            utterances_embedded = tf.reshape(
                utterances_embedded,
                [-1, self.max_response_len, self.hparams.embedding_dim])
            utterances_len = tf.reshape(utterances_len, [-1])
            response_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.sentence_rnn_hidden_dim * 2,
                inputs=utterances_embedded,
                sequence_length=utterances_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

            response_fw_bw_outputs = tf.split(response_outputs, 2, axis=-1)
            value_index = tf.range(self.batch_size * self.num_candidates)
            first_index = tf.stack(
                (value_index, tf.zeros(tf.shape(value_index), tf.int32)),
                axis=1)
            last_index = tf.stack((value_index, utterances_len - 1), axis=1)

            r_sentence_fw_last_state = tf.gather_nd(response_fw_bw_outputs[0],
                                                    last_index)
            r_sentence_bw_last_state = tf.gather_nd(response_fw_bw_outputs[1],
                                                    first_index)

            r_sentence_hidden = tf.concat(
                axis=-1,
                values=[r_sentence_fw_last_state, r_sentence_bw_last_state])
            r_sentence_hidden = \
                tf.reshape(r_sentence_hidden,
                           [self.batch_size, self.num_candidates, self.hparams.sentence_rnn_hidden_dim * 2])

        response_sentence_representation = tf.concat(
            axis=-1, values=[response_sentence_mean, r_sentence_hidden])

        response_sentence_representation = tf.layers.dense(
            inputs=response_sentence_representation,
            units=self.hparams.sentence_rnn_hidden_dim * 2,
            activation=None,
            kernel_initializer=tf.initializers.variance_scaling(
                scale=2.0, mode="fan_in", distribution="normal"),
            name="response_projection")

        return response_sentence_representation
Esempio n. 7
0
    def _matching_aggregation_layer(self, m_context, context_len, m_utterances,
                                    utterances_len):
        batch_size = tf.shape(m_context)[0]
        num_candidates = tf.shape(m_context)[1]
        max_context_len = tf.shape(m_context)[2]
        max_utterances_len = tf.shape(m_utterances)[2]

        # Matching Aggregation Layer
        """m_context"""

        with tf.variable_scope("matching-context"):
            m_context = tf.reshape(
                m_context,
                [-1, max_context_len, self.hparams.rnn_hidden_dim * 8])
            m_context_len = tf.reshape(
                tf.tile(tf.expand_dims(context_len, -1), [1, num_candidates]),
                [-1])

            m_context_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.rnn_hidden_dim * 2,
                inputs=m_context,
                sequence_length=m_context_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)
            m_context_fw_bw = tf.split(m_context_outputs, 2, axis=-1)
            value_index = tf.range(batch_size * num_candidates)
            last_index = tf.stack((value_index, m_context_len - 1), axis=1)

            m_context_fw_last_state = tf.gather_nd(m_context_fw_bw[0],
                                                   last_index)
            m_context_fw_last_state = \
                tf.reshape(m_context_fw_last_state, [batch_size, num_candidates, self.hparams.rnn_hidden_dim])

            m_context_outputs = \
                tf.reshape(m_context_outputs,
                           [batch_size, num_candidates, max_context_len, self.hparams.rnn_hidden_dim * 2])

            m_context_max = tf.reduce_max(m_context_outputs, axis=2)
        """m_utterances"""
        with tf.variable_scope("matching-utterances"):
            m_utterances = tf.reshape(
                m_utterances,
                [-1, max_utterances_len, self.hparams.rnn_hidden_dim * 8])
            m_utterances_len = tf.reshape(utterances_len, [-1])

            m_utterances_outputs = stack_bidirectional_rnn(
                cell="CUDNNGRU",
                num_layers=self.hparams.rnn_depth,
                num_units=self.hparams.rnn_hidden_dim * 2,
                inputs=m_utterances,
                sequence_length=m_utterances_len,
                state_merge="concat",
                output_dropout_keep_prob=self.dropout_keep_prob,
                residual=self.hparams.rnn_depth > 1)

            m_utterance_fw_bw = tf.split(m_utterances_outputs, 2, axis=-1)
            value_index = tf.range(batch_size * num_candidates)
            last_index = tf.stack((value_index, m_utterances_len - 1), axis=1)

            m_utterances_fw_last_state = tf.gather_nd(m_utterance_fw_bw[0],
                                                      last_index)
            m_utterances_fw_last_state = \
                tf.reshape(m_utterances_fw_last_state, [batch_size, num_candidates, self.hparams.rnn_hidden_dim])

            m_utterances_outputs = \
                tf.reshape(m_utterances_outputs,
                           [batch_size, num_candidates, max_utterances_len, self.hparams.rnn_hidden_dim * 2])

            m_utterances_max = tf.reduce_max(m_utterances_outputs, axis=2)

        return m_context_max, m_utterances_max, m_context_fw_last_state, m_utterances_fw_last_state