def build(self): print('Building model') self.x_embeddings = tf.Variable( tf.random_normal([self.alphabet_src_size, self.embedd_dims], stddev=0.1), name='x_embeddings') self.t_embeddings = tf.Variable( tf.random_normal([self.alphabet_tar_size, self.embedd_dims], stddev=0.1), name='t_embeddings') X_embedded = tf.gather(self.x_embeddings, self.Xs, name='embed_X') t_embedded = tf.gather(self.t_embeddings, self.ts_go, name='embed_t') with tf.variable_scope('dense_out'): W_out = tf.get_variable('W_out', [self.word_encoder_units*2, self.alphabet_tar_size]) b_out = tf.get_variable('b_out', [self.alphabet_tar_size]) # forward encoding char_enc_state, char_enc_out = encoder(X_embedded, self.X_len, 'char_encoder', self.char_encoder_units) char2word = _grid_gather(char_enc_out, self.X_spaces) char2word.set_shape([None, None, self.char_encoder_units]) word_enc_state, word_enc_out = encoder(char2word, self.X_spaces_len, 'word_encoder', self.word_encoder_units) # backward encoding words char2word = tf.reverse_sequence(char2word, tf.to_int64(self.X_spaces_len), 1) char2word.set_shape([None, None, self.char_encoder_units]) word_enc_state_bck, word_enc_out_bck = encoder(char2word, self.X_spaces_len, 'word_encoder_backwards', self.word_encoder_units) word_enc_out_bck = tf.reverse_sequence(word_enc_out_bck, tf.to_int64(self.X_spaces_len), 1) word_enc_state = tf.concat(1, [word_enc_state, word_enc_state_bck]) word_enc_out = tf.concat(2, [word_enc_out, word_enc_out_bck]) # decoding dec_state, dec_out, valid_dec_out, valid_attention_tracker = ( attention_decoder(word_enc_out, self.X_spaces_len, word_enc_state, t_embedded, self.t_len, self.attn_units, self.t_embeddings, W_out, b_out)) out_tensor = tf.reshape(dec_out, [-1, self.word_encoder_units*2]) out_tensor = tf.matmul(out_tensor, W_out) + b_out out_shape = tf.concat(0, [tf.expand_dims(tf.shape(self.X_len)[0], 0), tf.expand_dims(tf.shape(t_embedded)[1], 0), tf.expand_dims(tf.constant(self.alphabet_tar_size), 0)]) self.valid_attention_tracker = valid_attention_tracker.pack() self.out_tensor = tf.reshape(out_tensor, out_shape) self.out_tensor.set_shape([None, None, self.alphabet_tar_size]) valid_out_tensor = tf.reshape(valid_dec_out, [-1, self.word_encoder_units*2]) valid_out_tensor = tf.matmul(valid_out_tensor, W_out) + b_out self.valid_out_tensor = tf.reshape(valid_out_tensor, out_shape) self.out = None # add TensorBoard summaries for all variables tf.contrib.layers.summarize_variables()
def build(self): print('Building model') self.x_embeddings = tf.Variable( tf.random_normal([self.alphabet_src_size, self.embedd_dims], stddev=0.1), name='x_embeddings') self.t_embeddings = tf.Variable( tf.random_normal([self.alphabet_tar_size, self.embedd_dims], stddev=0.1), name='t_embeddings') X_embedded = tf.gather(self.x_embeddings, self.Xs, name='embed_X') t_embedded = tf.gather(self.t_embeddings, self.ts_go, name='embed_t') with tf.variable_scope('dense_out'): W_out = tf.get_variable('W_out', [self.word_encoder_units*2, self.alphabet_tar_size]) b_out = tf.get_variable('b_out', [self.alphabet_tar_size]) # forward encoding char_enc_state, char_enc_out = encoder(X_embedded, self.X_len, 'char_encoder', self.char_encoder_units) char2word = _grid_gather(char_enc_out, self.X_spaces) char2word.set_shape([None, None, self.char_encoder_units]) word_enc_state, word_enc_out = encoder(char2word, self.X_spaces_len, 'word_encoder', self.word_encoder_units) # backward encoding words char2word = tf.reverse_sequence(char2word, tf.to_int64(self.X_spaces_len), 1) char2word.set_shape([None, None, self.char_encoder_units]) word_enc_state_bck, word_enc_out_bck = encoder(char2word, self.X_spaces_len, 'word_encoder_backwards', self.word_encoder_units) word_enc_out_bck = tf.reverse_sequence(word_enc_out_bck, tf.to_int64(self.X_spaces_len), 1) word_enc_state = tf.concat(1, [word_enc_state, word_enc_state_bck]) word_enc_out = tf.concat(2, [word_enc_out, word_enc_out_bck]) # decoding dec_state, dec_out, valid_dec_out = ( attention_decoder(word_enc_out, self.X_spaces_len, word_enc_state, t_embedded, self.t_len, self.attn_units, self.t_embeddings, W_out, b_out)) out_tensor = tf.reshape(dec_out, [-1, self.word_encoder_units*2]) out_tensor = tf.matmul(out_tensor, W_out) + b_out out_shape = tf.concat(0, [tf.expand_dims(tf.shape(self.X_len)[0], 0), tf.expand_dims(tf.shape(t_embedded)[1], 0), tf.expand_dims(tf.constant(self.alphabet_tar_size), 0)]) self.out_tensor = tf.reshape(out_tensor, out_shape) self.out_tensor.set_shape([None, None, self.alphabet_tar_size]) valid_out_tensor = tf.reshape(valid_dec_out, [-1, self.word_encoder_units*2]) valid_out_tensor = tf.matmul(valid_out_tensor, W_out) + b_out self.valid_out_tensor = tf.reshape(valid_out_tensor, out_shape) self.out = None # add TensorBoard summaries for all variables tf.contrib.layers.summarize_variables()
def build(self): print('Building model') self.embeddings = tf.Variable( tf.random_uniform([self.alphabet_size, self.embedd_dims]), name='embeddings') X_embedded = tf.gather(self.embeddings, self.Xs, name='embed_X') t_embedded = tf.gather(self.embeddings, self.ts_go, name='embed_t') with tf.variable_scope('split_X_inputs'): X_list = tf.split( split_dim=1, num_split=self.max_x_seq_len, value=X_embedded) X_list = [tf.squeeze(X) for X in X_list] [X.set_shape([None, self.embedd_dims]) for X in X_list] with tf.variable_scope('split_t_inputs'): t_list = tf.split( split_dim=1, num_split=self.max_t_seq_len, value=t_embedded) t_list = [tf.squeeze(t) for t in t_list] [t.set_shape([None, self.embedd_dims]) for t in t_list] with tf.variable_scope('dense_out'): W_out = tf.get_variable('W_out', [self.dec_units, self.alphabet_size]) b_out = tf.get_variable('b_out', [self.alphabet_size]) # char encoder char_cell = rnn_cell.GRUCell(self.char_enc_units) char_enc_outputs, char_enc_state = rnn.rnn( cell=char_cell, inputs=X_list, dtype=tf.float32, sequence_length=self.X_len, scope='rnn_char_encoder') # char2word char2word = tf.transpose(tf.pack(char_enc_outputs), perm=[1, 0, 2]) char2word = _grid_gather(char2word, self.X_spaces) char2word = tf.unpack(tf.transpose(char2word, perm=[1, 0, 2])) [t.set_shape([None, self.char_enc_units]) for t in char2word] # word encoder word_cell = rnn_cell.GRUCell(self.word_enc_units) word_enc_outputs, word_enc_state = rnn.rnn( cell=word_cell, inputs=char2word, dtype=tf.float32, sequence_length=self.X_spaces_len, scope='rnn_word_encoder' ) # The loop function provides inputs to the decoder: def decoder_loop_function(prev, i): def feedback_on(): prev_1 = tf.matmul(prev, W_out) + b_out # feedback is on, so feed the decoder with the previous output return tf.gather(self.embeddings, tf.argmax(prev_1, 1)) def feedback_off(): # feedback is off, so just feed the decoder with t's return t_list[i] return tf.cond(self.feedback, feedback_on, feedback_off) # decoder att_states = tf.transpose(tf.pack(word_enc_outputs), perm=[1, 0, 2]) dec_cell = rnn_cell.GRUCell(self.dec_units) dec_out, dec_state = seq2seq.attention_decoder( decoder_inputs=t_list, initial_state=word_enc_state, attention_states=att_states, cell=dec_cell, loop_function=decoder_loop_function, scope='attention_decoder' ) self.out = [] for d in dec_out: self.out.append(tf.matmul(d, W_out) + b_out) # for debugging network (should write this outside of build) out_packed = tf.pack(self.out) out_packed = tf.transpose(out_packed, perm=[1, 0, 2]) self.out_tensor = out_packed # add TensorBoard summaries for all variables tf.contrib.layers.summarize_variables()
def build(self): print('Building model') self.embeddings = tf.Variable(tf.random_uniform( [self.alphabet_size, self.embedd_dims]), name='embeddings') X_embedded = tf.gather(self.embeddings, self.Xs, name='embed_X') t_embedded = tf.gather(self.embeddings, self.ts_go, name='embed_t') with tf.variable_scope('split_X_inputs'): X_list = tf.split(split_dim=1, num_split=self.max_x_seq_len, value=X_embedded) X_list = [tf.squeeze(X) for X in X_list] [X.set_shape([None, self.embedd_dims]) for X in X_list] with tf.variable_scope('split_t_inputs'): t_list = tf.split(split_dim=1, num_split=self.max_t_seq_len, value=t_embedded) t_list = [tf.squeeze(t) for t in t_list] [t.set_shape([None, self.embedd_dims]) for t in t_list] with tf.variable_scope('dense_out'): W_out = tf.get_variable('W_out', [self.dec_units, self.alphabet_size]) b_out = tf.get_variable('b_out', [self.alphabet_size]) # char encoder char_cell = rnn_cell.GRUCell(self.char_enc_units) char_enc_outputs, char_enc_state = rnn.rnn(cell=char_cell, inputs=X_list, dtype=tf.float32, sequence_length=self.X_len, scope='rnn_char_encoder') # char2word char2word = tf.transpose(tf.pack(char_enc_outputs), perm=[1, 0, 2]) char2word = _grid_gather(char2word, self.X_spaces) char2word = tf.unpack(tf.transpose(char2word, perm=[1, 0, 2])) [t.set_shape([None, self.char_enc_units]) for t in char2word] # word encoder word_cell = rnn_cell.GRUCell(self.word_enc_units) word_enc_outputs, word_enc_state = rnn.rnn( cell=word_cell, inputs=char2word, dtype=tf.float32, sequence_length=self.X_spaces_len, scope='rnn_word_encoder') # The loop function provides inputs to the decoder: def decoder_loop_function(prev, i): def feedback_on(): prev_1 = tf.matmul(prev, W_out) + b_out # feedback is on, so feed the decoder with the previous output return tf.gather(self.embeddings, tf.argmax(prev_1, 1)) def feedback_off(): # feedback is off, so just feed the decoder with t's return t_list[i] return tf.cond(self.feedback, feedback_on, feedback_off) # decoder att_states = tf.transpose(tf.pack(word_enc_outputs), perm=[1, 0, 2]) dec_cell = rnn_cell.GRUCell(self.dec_units) dec_out, dec_state = seq2seq.attention_decoder( decoder_inputs=t_list, initial_state=word_enc_state, attention_states=att_states, cell=dec_cell, loop_function=decoder_loop_function, scope='attention_decoder') self.out = [] for d in dec_out: self.out.append(tf.matmul(d, W_out) + b_out) # for debugging network (should write this outside of build) out_packed = tf.pack(self.out) out_packed = tf.transpose(out_packed, perm=[1, 0, 2]) self.out_tensor = out_packed # add TensorBoard summaries for all variables tf.contrib.layers.summarize_variables()