def _preprocessing_RNN(self): print('[launch] Preprocessing-RNN') with tf.name_scope('sentence_embedding_layer-RNN'): self.outputs_q, self.states_q = add_GRU( inputs=self.embed_q, inputs_len=self.batch_len_q, hidden_dim=self.params.DIM_SENTENCE_EMBEDDING, layers=1, scope='encoding_RNN', reuse=False, dr_input_keep_prob=self.dr_rnn_prob, dr_output_keep_prob=1.0, is_bidir=False, is_bw_reversed=True, is_residual=False) # shape [ batch_size, 1(question only), hidden_dim ] self.encoded_q = tf.reshape(self.states_q, [ self.params.batch_size, 1, self.params.DIM_SENTENCE_EMBEDDING ]) self.embed_s_flat = tf.reshape(self.embed_s, [ self.params.batch_size * self.params.MAX_SENTENCES, -1, self.embed_dim ]) self.batch_len_s_flat = tf.reshape( self.batch_len_s, [self.params.batch_size * self.params.MAX_SENTENCES]) self.outputs_s, self.states_s = add_GRU( inputs=self.embed_s_flat, inputs_len=self.batch_len_s_flat, hidden_dim=self.params.DIM_SENTENCE_EMBEDDING, layers=1, scope='encoding_RNN', reuse=True, dr_input_keep_prob=self.dr_rnn_prob, dr_output_keep_prob=1.0, is_bidir=False, is_bw_reversed=True, is_residual=False) # shape [ batch_size, max_sent_len, hidden_dim ] self.encoded_s = tf.reshape(self.states_s, [ self.params.batch_size, self.params.MAX_SENTENCES, self.params.DIM_SENTENCE_EMBEDDING ]) self.embed_sent_q = self.encoded_q # concat [q;list_s] self.embed_sent_q_s = tf.concat([self.encoded_q, self.encoded_s], axis=1)
def _create_gru_model(self): print('[launch-audio] create gru cell - bidirectional:', self.bi) with tf.name_scope('audio_RNN') as scope: with tf.compat.v1.variable_scope( "audio_GRU", reuse=False, initializer=tf.orthogonal_initializer()): #print self.encoder_inputs.shape print("[INFO] IS_AUDIO_RESIDUAL: ", IS_AUDIO_RESIDUAL) # match embedding_dim - rnn_dim to use residual connection if IS_AUDIO_RESIDUAL: self.audio_residual_matrix = tf.Variable( tf.random.normal([N_AUDIO_MFCC, self.hidden_dim], mean=0.0, stddev=0.01, dtype=tf.float32, seed=None), trainable=True, name='audio_residual_projection') self.audio_residual_bias = tf.Variable( tf.zeros([self.hidden_dim], dtype=tf.float32), name="audio_res_bias") h = tf.matmul( tf.reshape(self.encoder_inputs, [-1, N_AUDIO_MFCC]), self.audio_residual_matrix) + self.audio_residual_bias self.encoder_inputs_match_dim = tf.reshape( h, [self.batch_size, self.encoder_size, self.hidden_dim]) else: self.encoder_inputs_match_dim = self.encoder_inputs #print self.encoder_inputs.shape self.outputs, self.last_states_en = add_GRU( inputs=self.encoder_inputs_match_dim, inputs_len=self.encoder_seq, hidden_dim=self.hidden_dim, layers=self.num_layers, scope='audio_encoding_RNN', reuse=False, dr_input_keep_prob=self.dr_audio_in_ph, dr_output_keep_prob=self.dr_audio_out_ph, is_bidir=self.bi, is_bw_reversed=True, is_residual=IS_AUDIO_RESIDUAL) self.state_concat = self.outputs self.final_encoder = self.last_states_en[-1] if self.bi: self.final_encoder_dimension = self.hidden_dim * 2 else: self.final_encoder_dimension = self.hidden_dim
def _create_gru_hrde_model(self): print '[launch] create encoding layer textBi/chunkBi, textResi/chunkResi', Params.is_text_encoding_bidir, Params.is_chunk_encoding_bidir, Params.is_text_residual, Params.is_chunk_residual with tf.name_scope('text_encoding_RNN') as scope: # match embedding_dim - rnn_dim to use residual connection if Params.is_text_residual: self.text_residual_matrix = tf.Variable( tf.random_normal([self.embed_size, self.hidden_dim], mean=0.0, stddev=0.01, dtype=tf.float32, seed=None), trainable=True, name='text_residual_projection') h = tf.matmul(tf.reshape(self.embed_en, [-1, self.embed_size]), self.text_residual_matrix) self.embed_en = tf.reshape(h, [ self.batch_size * self.context_size, self.encoder_size, self.hidden_dim ]) h_R = tf.matmul( tf.reshape(self.embed_enR, [-1, self.embed_size]), self.text_residual_matrix) self.embed_enR = tf.reshape( h_R, [self.batch_size, self.encoderR_size, self.hidden_dim]) # enoder RNN self.outputs_en, self.states_en = add_GRU( inputs=self.embed_en, inputs_len=self.encoder_seq_length, hidden_dim=self.hidden_dim, layers=self.num_layers, scope='text_encoding_RNN', reuse=False, dr_input_keep_prob=self.dr_text_in_ph, dr_output_keep_prob=self.dr_text_out_ph, is_bidir=Params.is_text_encoding_bidir, is_residual=Params.is_text_residual) # response RNN self.outputs_enR, self.states_enR = add_GRU( inputs=self.embed_enR, inputs_len=self.encoderR_seq_length, hidden_dim=self.hidden_dim, layers=self.num_layers, scope='text_encoding_RNN', reuse=True, dr_input_keep_prob=self.dr_text_in_ph, dr_output_keep_prob=self.dr_text_out_ph, is_bidir=Params.is_text_encoding_bidir, is_residual=Params.is_text_residual) self.final_encoder = self.states_en[-1] self.final_encoderR = self.states_enR[-1] self.final_encoder_dimension = self.hidden_dim self.final_encoderR_dimension = self.hidden_dim with tf.name_scope('chunk_encoding_RNN') as scope: # make data for context input self.con_input = tf.reshape(self.final_encoder, [ self.batch_size, self.context_size, self.final_encoder_dimension ]) # make data for context input self.con_inputR = tf.reshape( self.final_encoderR, [self.batch_size, 1, self.final_encoderR_dimension]) # match rnn_dim - context_rnn_dim to use residual connection if Params.is_chunk_residual: self.chunk_residual_matrix = tf.Variable( tf.random_normal( [self.final_encoder_dimension, self.hidden_dim_con], mean=0.0, stddev=0.01, dtype=tf.float32, seed=None), trainable=True, name='chunk_residual_projection') h = tf.matmul( tf.reshape(self.con_input, [-1, self.final_encoder_dimension]), self.chunk_residual_matrix) self.con_input = tf.reshape( h, [self.batch_size, self.context_size, self.hidden_dim_con]) h_R = tf.matmul( tf.reshape(self.con_inputR, [-1, self.final_encoderR_dimension]), self.chunk_residual_matrix) self.con_inputR = tf.reshape( h_R, [self.batch_size, 1, self.hidden_dim_con]) self.outputs_con, self.last_states_con = add_GRU( inputs=self.con_input, inputs_len=self.context_seq_length, hidden_dim=self.hidden_dim_con, layers=self.num_layers_con, scope='chunk_encoding_RNN', reuse=False, dr_input_keep_prob=self.dr_con_in_ph, dr_output_keep_prob=self.dr_con_out_ph, is_bidir=Params.is_chunk_encoding_bidir, is_residual=Params.is_chunk_residual) self.outputs_conR, self.last_states_conR = add_GRU( inputs=self.con_inputR, inputs_len=np.ones(self.batch_size, dtype=np.int).tolist(), hidden_dim=self.hidden_dim_con, layers=self.num_layers_con, scope='chunk_encoding_RNN', reuse=True, dr_input_keep_prob=self.dr_con_in_ph, dr_output_keep_prob=self.dr_con_out_ph, is_bidir=Params.is_chunk_encoding_bidir, is_residual=Params.is_chunk_residual) self.final_encoder = self.last_states_con[-1] self.final_encoderR = self.last_states_conR[-1] if Params.is_chunk_encoding_bidir: self.final_encoder_dimension = self.hidden_dim_con * 2 self.final_encoderR_dimension = self.hidden_dim_con * 2 else: self.final_encoder_dimension = self.hidden_dim_con self.final_encoderR_dimension = self.hidden_dim_con