def multilayer_perceptron(input_, layer_sizes, activation=tf.tanh, dropout_plc=None, scope="mlp"): mlp_input = input_ with tf.variable_scope(scope): for i, size in enumerate(layer_sizes): mlp_input = nonlinear(mlp_input, size, activation=activation, scope="mlp_layer_{}".format(i)) if dropout_plc: mlp_input = tf.nn.dropout(mlp_input, dropout_plc) return mlp_input
def _projection(prev_state, prev_output, ctx_tensors, train_mode): return nonlinear([prev_state, prev_output] + ctx_tensors, output_size, activation=activation_fn)
def _projection(prev_state, prev_output, ctx_tensors, train_mode): return nonlinear([prev_state] + ctx_tensors, output_size, activation=tf.nn.relu, scope="AttnOutputProjection")
def body(*args) -> LoopState: loop_state = LoopState(*args) step = loop_state.step with tf.variable_scope(self.step_scope): # Compute the input to the RNN rnn_input = self.input_projection(*loop_state) # Run the RNN. cell = self._get_rnn_cell() if self._rnn_cell_str == 'GRU': cell_output, state = cell(rnn_input, loop_state.prev_rnn_output) next_state = state attns = [ a.attention(cell_output, loop_state.prev_rnn_output, rnn_input, att_loop_state, loop_state.step) for a, att_loop_state in zip( att_objects, loop_state.attention_loop_states) ] if att_objects: contexts, att_loop_states = zip(*attns) else: contexts, att_loop_states = [], [] if self._conditional_gru: cell_cond = self._get_conditional_gru_cell() cond_input = tf.concat(contexts, -1) cell_output, state = cell_cond(cond_input, state, scope="cond_gru_2_cell") elif self._rnn_cell_str == 'LSTM': prev_state = tf.contrib.rnn.LSTMStateTuple( loop_state.prev_rnn_state, loop_state.prev_rnn_output) cell_output, state = cell(rnn_input, prev_state) next_state = state.c attns = [ a.attention(cell_output, loop_state.prev_rnn_output, rnn_input, att_loop_state, loop_state.step) for a, att_loop_state in zip( att_objects, loop_state.attention_loop_states) ] if att_objects: contexts, att_loop_states = zip(*attns) else: contexts, att_loop_states = [], [] else: raise ValueError("Unknown RNN cell.") with tf.name_scope("rnn_output_projection"): if attns: output = nonlinear([cell_output] + list(contexts), cell.output_size, activation=tf.nn.relu, scope="AttnOutputProjection") else: output = cell_output att_loop_states = [] logits = self._logit_function(output) self.step_scope.reuse_variables() if sample: next_symbols = tf.multinomial(logits, num_samples=1) elif train_mode: next_symbols = loop_state.train_inputs[step] else: next_symbols = tf.to_int32(tf.argmax(logits, axis=1)) int_unfinished_mask = tf.to_int32( tf.logical_not(loop_state.finished)) # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise # this have to be rewritten assert PAD_TOKEN_INDEX == 0 next_symbols = next_symbols * int_unfinished_mask has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX) has_finished = tf.logical_or(loop_state.finished, has_just_finished) new_loop_state = LoopState( step=step + 1, input_symbol=next_symbols, train_inputs=loop_state.train_inputs, prev_rnn_state=next_state, prev_rnn_output=cell_output, rnn_outputs=loop_state.rnn_outputs.write( step + 1, cell_output), prev_contexts=list(contexts), prev_logits=logits, logits=loop_state.logits.write(step, logits), finished=has_finished, mask=loop_state.mask.write(step, tf.logical_not(has_finished)), attention_loop_states=list(att_loop_states)) return new_loop_state