def context_encoder(self, context_embedded, context_len): with tf.variable_scope("context-encoder"): context_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.rnn_hidden_dim * 2, inputs=context_embedded, sequence_length=context_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) return context_outputs
def lstm_encoder(self, embedded, embedded_len, name, rnn_hidden_dim=None): if rnn_hidden_dim is None: rnn_hidden_dim = self.hparams.rnn_hidden_dim with tf.variable_scope("lstm-encoder-%s" % name, reuse=tf.AUTO_REUSE): lstm_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=rnn_hidden_dim * 2, inputs=embedded, sequence_length=embedded_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1 ) return lstm_outputs
def dialog_representation(self, context_sentence_representation, tot_context_len): """context sentence outputs""" with tf.variable_scope("tot-context-sentence-layer"): tot_context_sentence_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.sentence_rnn_hidden_dim * 2, inputs=context_sentence_representation, sequence_length=tot_context_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) tot_context_fw_last_state, tot_context_bw_first_state = \ sequence_feature(tot_context_sentence_outputs, tot_context_len) tot_context_hidden = tf.concat( axis=-1, values=[tot_context_fw_last_state, tot_context_bw_first_state]) return tot_context_sentence_outputs, tot_context_hidden
def utterance_encoder(self, utterances_embedded, utterances_len): # [10,10,281,300] with tf.variable_scope("utterances-encoder"): utterances_len = tf.reshape(utterances_len, [-1]) utterances_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.rnn_hidden_dim * 2, inputs=utterances_embedded, sequence_length=utterances_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) # [batch_size, num_candidates, max_utter_len, rnn_hidden_dim * 2] utterances_outputs = \ tf.reshape(utterances_outputs, [self.batch_size, -1, self.max_utterances_len, self.hparams.rnn_hidden_dim * 2]) return utterances_outputs
def context_sentence_representation(self, context_sentence_embedded, context_sentence_len, tot_context_len, speaker): """context-sentence GLOVE(Avg) representation""" context_mask = tf.sequence_mask(context_sentence_len, maxlen=self.max_c_sentence_len) context_mask = tf.expand_dims(tf.cast(context_mask, tf.float32), axis=-1) masked_context_sentence = tf.multiply(context_sentence_embedded, context_mask) context_sentence_sum = tf.reduce_sum(masked_context_sentence, axis=2) context_mask = tf.squeeze(context_mask, [-1]) tot_context_mask = tf.cast( tf.sequence_mask(tot_context_len, maxlen=self.max_dialog_len), tf.float32) tot_context_len_tile = tf.tile(tf.expand_dims(tot_context_len, -1), [1, tf.shape(tot_context_mask)[-1]]) context_sentence_mean = \ tf.multiply(context_sentence_sum, tf.expand_dims(tf.divide(tot_context_mask, tf.cast(tot_context_len_tile, tf.float32)), -1)) """context sentence LSTM representation""" context_sentence_embedded = tf.reshape( context_sentence_embedded, [-1, self.max_c_sentence_len, self.hparams.embedding_dim]) context_sentence_len = tf.reshape(context_sentence_len, [-1]) with tf.variable_scope("context-sentence-encoder"): c_sentence_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.sentence_rnn_hidden_dim * 2, inputs=context_sentence_embedded, sequence_length=context_sentence_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) c_sentence_fw_last_state, c_sentence_bw_first_state = \ sequence_feature(c_sentence_outputs, context_sentence_len) c_sentence_hidden = tf.concat( axis=-1, values=[c_sentence_fw_last_state, c_sentence_bw_first_state]) c_sentence_hidden = tf.reshape(c_sentence_hidden, [ self.batch_size, self.max_dialog_len, self.hparams.sentence_rnn_hidden_dim * 2 ]) context_sentence_representation = tf.concat( axis=-1, values=[context_sentence_mean, c_sentence_hidden]) if self.pos_emb_bool or self.user_emb_bool: print("with sentence_features") context_w_sentence_feature = self._sentence_order_speaker_feature( context_sentence_representation, speaker) else: print("without sentence_features") context_w_sentence_feature = context_sentence_representation return context_w_sentence_feature
def response_sentence_representation(self, utterances_embedded, utterances_len): """response_sentence_level_encoder""" utterances_len = tf.reshape(utterances_len, [self.batch_size, -1]) response_mask = tf.expand_dims(tf.sequence_mask( utterances_len, maxlen=self.max_response_len), axis=-1) response_mask = tf.cast(response_mask, tf.float32) # [batch_size, num_candiates, sentence_len, embedding_dim] masked_response = tf.multiply(utterances_embedded, response_mask) response_sentence_sum = tf.reduce_sum(masked_response, axis=2) response_mask = tf.squeeze(response_mask, [-1]) # [batch_size, num_candidates, 300] response_sentence_mean = tf.divide( response_sentence_sum, tf.expand_dims(tf.reduce_sum(response_mask, axis=-1), axis=-1)) with tf.variable_scope("response-sentence-encoder"): utterances_embedded = tf.reshape( utterances_embedded, [-1, self.max_response_len, self.hparams.embedding_dim]) utterances_len = tf.reshape(utterances_len, [-1]) response_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.sentence_rnn_hidden_dim * 2, inputs=utterances_embedded, sequence_length=utterances_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) response_fw_bw_outputs = tf.split(response_outputs, 2, axis=-1) value_index = tf.range(self.batch_size * self.num_candidates) first_index = tf.stack( (value_index, tf.zeros(tf.shape(value_index), tf.int32)), axis=1) last_index = tf.stack((value_index, utterances_len - 1), axis=1) r_sentence_fw_last_state = tf.gather_nd(response_fw_bw_outputs[0], last_index) r_sentence_bw_last_state = tf.gather_nd(response_fw_bw_outputs[1], first_index) r_sentence_hidden = tf.concat( axis=-1, values=[r_sentence_fw_last_state, r_sentence_bw_last_state]) r_sentence_hidden = \ tf.reshape(r_sentence_hidden, [self.batch_size, self.num_candidates, self.hparams.sentence_rnn_hidden_dim * 2]) response_sentence_representation = tf.concat( axis=-1, values=[response_sentence_mean, r_sentence_hidden]) response_sentence_representation = tf.layers.dense( inputs=response_sentence_representation, units=self.hparams.sentence_rnn_hidden_dim * 2, activation=None, kernel_initializer=tf.initializers.variance_scaling( scale=2.0, mode="fan_in", distribution="normal"), name="response_projection") return response_sentence_representation
def _matching_aggregation_layer(self, m_context, context_len, m_utterances, utterances_len): batch_size = tf.shape(m_context)[0] num_candidates = tf.shape(m_context)[1] max_context_len = tf.shape(m_context)[2] max_utterances_len = tf.shape(m_utterances)[2] # Matching Aggregation Layer """m_context""" with tf.variable_scope("matching-context"): m_context = tf.reshape( m_context, [-1, max_context_len, self.hparams.rnn_hidden_dim * 8]) m_context_len = tf.reshape( tf.tile(tf.expand_dims(context_len, -1), [1, num_candidates]), [-1]) m_context_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.rnn_hidden_dim * 2, inputs=m_context, sequence_length=m_context_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) m_context_fw_bw = tf.split(m_context_outputs, 2, axis=-1) value_index = tf.range(batch_size * num_candidates) last_index = tf.stack((value_index, m_context_len - 1), axis=1) m_context_fw_last_state = tf.gather_nd(m_context_fw_bw[0], last_index) m_context_fw_last_state = \ tf.reshape(m_context_fw_last_state, [batch_size, num_candidates, self.hparams.rnn_hidden_dim]) m_context_outputs = \ tf.reshape(m_context_outputs, [batch_size, num_candidates, max_context_len, self.hparams.rnn_hidden_dim * 2]) m_context_max = tf.reduce_max(m_context_outputs, axis=2) """m_utterances""" with tf.variable_scope("matching-utterances"): m_utterances = tf.reshape( m_utterances, [-1, max_utterances_len, self.hparams.rnn_hidden_dim * 8]) m_utterances_len = tf.reshape(utterances_len, [-1]) m_utterances_outputs = stack_bidirectional_rnn( cell="CUDNNGRU", num_layers=self.hparams.rnn_depth, num_units=self.hparams.rnn_hidden_dim * 2, inputs=m_utterances, sequence_length=m_utterances_len, state_merge="concat", output_dropout_keep_prob=self.dropout_keep_prob, residual=self.hparams.rnn_depth > 1) m_utterance_fw_bw = tf.split(m_utterances_outputs, 2, axis=-1) value_index = tf.range(batch_size * num_candidates) last_index = tf.stack((value_index, m_utterances_len - 1), axis=1) m_utterances_fw_last_state = tf.gather_nd(m_utterance_fw_bw[0], last_index) m_utterances_fw_last_state = \ tf.reshape(m_utterances_fw_last_state, [batch_size, num_candidates, self.hparams.rnn_hidden_dim]) m_utterances_outputs = \ tf.reshape(m_utterances_outputs, [batch_size, num_candidates, max_utterances_len, self.hparams.rnn_hidden_dim * 2]) m_utterances_max = tf.reduce_max(m_utterances_outputs, axis=2) return m_context_max, m_utterances_max, m_context_fw_last_state, m_utterances_fw_last_state