Beispiel #1
0
def multilayer_perceptron(input_,
                          layer_sizes,
                          activation=tf.tanh,
                          dropout_plc=None,
                          scope="mlp"):
    mlp_input = input_

    with tf.variable_scope(scope):
        for i, size in enumerate(layer_sizes):
            mlp_input = nonlinear(mlp_input,
                                  size,
                                  activation=activation,
                                  scope="mlp_layer_{}".format(i))
            if dropout_plc:
                mlp_input = tf.nn.dropout(mlp_input, dropout_plc)

    return mlp_input
 def _projection(prev_state, prev_output, ctx_tensors, train_mode):
     return nonlinear([prev_state, prev_output] + ctx_tensors,
                      output_size,
                      activation=activation_fn)
 def _projection(prev_state, prev_output, ctx_tensors, train_mode):
     return nonlinear([prev_state] + ctx_tensors,
                      output_size,
                      activation=tf.nn.relu,
                      scope="AttnOutputProjection")
Beispiel #4
0
        def body(*args) -> LoopState:
            loop_state = LoopState(*args)
            step = loop_state.step

            with tf.variable_scope(self.step_scope):
                # Compute the input to the RNN
                rnn_input = self.input_projection(*loop_state)

                # Run the RNN.
                cell = self._get_rnn_cell()
                if self._rnn_cell_str == 'GRU':
                    cell_output, state = cell(rnn_input,
                                              loop_state.prev_rnn_output)
                    next_state = state
                    attns = [
                        a.attention(cell_output, loop_state.prev_rnn_output,
                                    rnn_input, att_loop_state, loop_state.step)
                        for a, att_loop_state in zip(
                            att_objects, loop_state.attention_loop_states)
                    ]
                    if att_objects:
                        contexts, att_loop_states = zip(*attns)
                    else:
                        contexts, att_loop_states = [], []

                    if self._conditional_gru:
                        cell_cond = self._get_conditional_gru_cell()
                        cond_input = tf.concat(contexts, -1)
                        cell_output, state = cell_cond(cond_input,
                                                       state,
                                                       scope="cond_gru_2_cell")
                elif self._rnn_cell_str == 'LSTM':
                    prev_state = tf.contrib.rnn.LSTMStateTuple(
                        loop_state.prev_rnn_state, loop_state.prev_rnn_output)
                    cell_output, state = cell(rnn_input, prev_state)
                    next_state = state.c
                    attns = [
                        a.attention(cell_output, loop_state.prev_rnn_output,
                                    rnn_input, att_loop_state, loop_state.step)
                        for a, att_loop_state in zip(
                            att_objects, loop_state.attention_loop_states)
                    ]
                    if att_objects:
                        contexts, att_loop_states = zip(*attns)
                    else:
                        contexts, att_loop_states = [], []
                else:
                    raise ValueError("Unknown RNN cell.")

                with tf.name_scope("rnn_output_projection"):
                    if attns:
                        output = nonlinear([cell_output] + list(contexts),
                                           cell.output_size,
                                           activation=tf.nn.relu,
                                           scope="AttnOutputProjection")
                    else:
                        output = cell_output
                        att_loop_states = []

                logits = self._logit_function(output)

            self.step_scope.reuse_variables()

            if sample:
                next_symbols = tf.multinomial(logits, num_samples=1)
            elif train_mode:
                next_symbols = loop_state.train_inputs[step]
            else:
                next_symbols = tf.to_int32(tf.argmax(logits, axis=1))
                int_unfinished_mask = tf.to_int32(
                    tf.logical_not(loop_state.finished))

                # Note this works only when PAD_TOKEN_INDEX is 0. Otherwise
                # this have to be rewritten
                assert PAD_TOKEN_INDEX == 0
                next_symbols = next_symbols * int_unfinished_mask

            has_just_finished = tf.equal(next_symbols, END_TOKEN_INDEX)
            has_finished = tf.logical_or(loop_state.finished,
                                         has_just_finished)

            new_loop_state = LoopState(
                step=step + 1,
                input_symbol=next_symbols,
                train_inputs=loop_state.train_inputs,
                prev_rnn_state=next_state,
                prev_rnn_output=cell_output,
                rnn_outputs=loop_state.rnn_outputs.write(
                    step + 1, cell_output),
                prev_contexts=list(contexts),
                prev_logits=logits,
                logits=loop_state.logits.write(step, logits),
                finished=has_finished,
                mask=loop_state.mask.write(step, tf.logical_not(has_finished)),
                attention_loop_states=list(att_loop_states))
            return new_loop_state