Exemple #1
0
    def _context_attention(self):
        with tf.name_scope('context_attention'):
            context_attention = BahdanauAttention(
                self._context, memory_len=self._history_size, mask_value=1e-18)

            self._encoder_state_with_context = context_attention(
                self._encoder_state)
Exemple #2
0
 def __init__(self,
              tar_vocab_size,
              batch_sz,
              name="gru",
              embedding_dim=300,
              learn_embedding=True,
              embedding_matrix=None,
              dec_units=128):
     """Initialize attention based decoder architecture.
     
     Arguments:
         tar_vocab_size {int} -- Size of the target sequence vocabulary size.
     
     Keyword Arguments:
         name {str} -- Name of the recurrent layer. Choices : ['lstm', 'gru'] (default: {"gru"})
         embedding_dim {int} -- Size of the token embedding. (default: {300})
         learn_embedding {bool} -- Boolean flag as indicator for learning embedding or using a pre-trained done. (default: {True})
         embedding_matrix {numpy} -- If using pre-trained embedding. Load pre-trained embedding here. (default: {None})
         dec_units {int} -- Number of decoder nodes. (default: {128})
     
     Raises:
         ValueError: Raise error when wrong model name is passed.
     """
     super(keras.Model, self).__init__()
     self.batch_sz = batch_sz
     self.dec_units = dec_units
     if learn_embedding == True:
         # Learn embedding as a part of the network
         self.embedding = keras.layers.Embedding(input_dim=tar_vocab_size,
                                                 output_dim=embedding_dim,
                                                 input_length=max_length,
                                                 mask_zero=True)
     else:
         # Use pre-trained embeddings like glove
         self.embedding = keras.layers.Embedding(input_dim=tar_vocab_size,
                                                 output_dim=embedding_dim,
                                                 weights=[embedding_matrix],
                                                 trainable=False,
                                                 input_length=max_length)
     # Attention layer
     self.attention = BahdanauAttention(self.dec_units)
     # Decoder
     if name == "lstm":
         self.decoder_layer = keras.layers.LSTM(
             self.dec_units,
             return_sequences=True,
             return_state=False,
             recurrent_initializer="glorot_uniform")
     elif name == "gru":
         self.decoder_layer = keras.layers.GRU(
             self.dec_units,
             return_sequences=True,
             return_state=False,
             recurrent_initializer="glorot_uniform")
     else:
         raise ValueError("Wrong encoder type passed! {}".format(encoder))
     # Dense layer
     self.fc = keras.layers.Dense(tar_vocab_size)
    def __init__(self, n_layers, n_word_src, n_word_dst, n_units):
        super(NStepEncDec, self).__init__()
        with self.init_scope():
            self.embed_src = L.EmbedID(n_word_src, n_units)
            self.embed_dst = L.EmbedID(n_word_dst, n_units)
            self.encoder = L.NStepBiLSTM(n_layers, n_units, n_units, 0.1)
            self.decoder = L.NStepLSTM(n_layers, n_units, n_units, 0.1)
            self.attention_mechanism = BahdanauAttention(n_units)
            self.decoder_with_attn = AttentionWrapper(self.decoder, self.attention_mechanism)
            self.fc = L.Linear(n_units, n_word_dst)

        self.n_layers = n_layers
        self.n_units = n_units
    def __init__(self, embedding_dim, units, vocab_size):
        super(RNN_Decoder, self).__init__()
        self.units = units

        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.gru = tf.keras.layers.GRU(self.units,  # Dimension of output space.
                                       return_sequences=True,
                                       # whether to return the last output in the output sequence or full sequence.
                                       return_state=True,  # whether to return the last state in addition to the output.
                                       recurrent_initializer='glorot_uniform')
        self.fc1 = tf.keras.layers.Dense(self.units)
        self.fc2 = tf.keras.layers.Dense(vocab_size)

        self.attention = BahdanauAttention(self.units)
Exemple #5
0
    def _decoder(self):
        with tf.variable_scope('decoder'):
            self._initialize_decoder_params()

            attention_cell = MultiAttentionWrapper(
                tf.contrib.rnn.GRUCell(self._state_size),
                BahdanauAttention(self._encoder_outputs,
                                  memory_len=self._input_seq_len),
                BahdanauAttention(self._context,
                                  memory_len=self._history_size,
                                  mask_value=1e-18))

            cell = tf.contrib.rnn.MultiRNNCell(
                [
                    # Only first cell has attention
                    attention_cell,
                    # Other cells
                    tf.contrib.rnn.GRUCell(self._state_size),
                    tf.contrib.rnn.GRUCell(self._state_size)
                ],
                state_is_tuple=True)

            decoder_outputs_ta, _, _ = tf.nn.raw_rnn(cell,
                                                     self._decoder_loop_fn)
            decoder_outputs = decoder_outputs_ta.stack()

            tf.summary.histogram('decoder_outputs', decoder_outputs)

            num_steps, batch_size, decoder_output_size = tf.unstack(
                tf.shape(decoder_outputs))
            self._decoder_logits = tf.reshape(
                self._output_projection_layer(
                    tf.reshape(decoder_outputs, [-1, decoder_output_size])),
                [num_steps, batch_size, self._embeddings_shape[0]])

            self.decoder_embedding_ids = tf.cast(
                tf.argmax(self._decoder_logits, 2), tf.int32)
Exemple #6
0
    def __init__(
        self,
        rnn_type: str = "gru",
        emb_size: int = 0,
        hidden_size: int = 0,
        encoder: Encoder = None,
        attention: str = "bahdanau",
        num_layers: int = 1,
        vocab_size: int = 0,
        dropout: float = 0.0,
        emb_dropout: float = 0.0,
        hidden_dropout: float = 0.0,
        init_hidden: str = "bridge",
        input_feeding: bool = True,
        freeze: bool = False,
        **kwargs
    ) -> None:
        """
        Create a recurrent decoder with attention.

        :param rnn_type: rnn type, valid options: "lstm", "gru"
        :param emb_size: target embedding size
        :param hidden_size: size of the RNN
        :param encoder: encoder connected to this decoder
        :param attention: type of attention, valid options: "bahdanau", "luong"
        :param num_layers: number of recurrent layers
        :param vocab_size: target vocabulary size
        :param hidden_dropout: Is applied to the input to the attentional layer.
        :param dropout: Is applied between RNN layers.
        :param emb_dropout: Is applied to the RNN input (word embeddings).
        :param init_hidden: If "bridge" (default), the decoder hidden states are
            initialized from a projection of the last encoder state,
            if "zeros" they are initialized with zeros,
            if "last" they are identical to the last encoder state
            (only if they have the same size)
        :param input_feeding: Use Luong's input feeding.
        :param freeze: Freeze the parameters of the decoder during training.
        :param kwargs:
        """

        super(RecurrentDecoder, self).__init__()

        self.emb_dropout = torch.nn.Dropout(p=emb_dropout, inplace=False)
        self.type = rnn_type
        self.hidden_dropout = torch.nn.Dropout(p=hidden_dropout, inplace=False)
        self.hidden_size = hidden_size
        self.emb_size = emb_size

        rnn = nn.GRU if rnn_type == "gru" else nn.LSTM

        self.input_feeding = input_feeding
        if self.input_feeding:  # Luong-style
            # combine embedded prev word +attention vector before feeding to rnn
            self.rnn_input_size = emb_size + hidden_size
        else:
            # just feed prev word embedding
            self.rnn_input_size = emb_size

        # the decoder RNN
        self.rnn = rnn(
            self.rnn_input_size,
            hidden_size,
            num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0.0,
        )

        # combine output with context vector before output layer (Luong-style)
        self.att_vector_layer = nn.Linear(
            hidden_size + encoder.output_size, hidden_size, bias=True
        )

        self.output_layer = nn.Linear(hidden_size, vocab_size, bias=False)
        self._output_size = vocab_size

        if attention == "bahdanau":
            self.attention = BahdanauAttention(
                hidden_size=hidden_size,
                key_size=encoder.output_size,
                query_size=hidden_size,
            )
        elif attention == "luong":
            self.attention = LuongAttention(
                hidden_size=hidden_size, key_size=encoder.output_size
            )
        else:
            raise ValueError(
                "Unknown attention mechanism: %s. "
                "Valid options: 'bahdanau', 'luong'." % attention
            )

        self.num_layers = num_layers
        self.hidden_size = hidden_size

        # to initialize from the final encoder state of last layer
        self.init_hidden_option = init_hidden
        if self.init_hidden_option == "bridge":
            self.bridge_layer = nn.Linear(encoder.output_size, hidden_size, bias=True)
        elif self.init_hidden_option == "last":
            if encoder.output_size != self.hidden_size:
                if encoder.output_size != 2 * self.hidden_size:  # bidirectional
                    raise ValueError(
                        "For initializing the decoder state with the "
                        "last encoder state, their sizes have to match "
                        "(encoder: {} vs. decoder:  {})".format(
                            encoder.output_size, self.hidden_size
                        )
                    )
        if freeze:
            freeze_params(self)