Beispiel #1
0
    def decode(self, targets, encoder_outputs, attention_bias, training):
        with tf.name_scope("decode"):
            # Prepare inputs to decoder layers by shifting targets, adding positional
            # encoding and applying dropout.
            decoder_inputs = self.embedding_softmax_layer(targets)
            decoder_inputs = tf.cast(decoder_inputs, tf.float32)
            attention_bias = tf.cast(attention_bias, tf.float32)
            with tf.name_scope("shift_targets"):
                # Shift targets to the right, and remove the last element
                decoder_inputs = tf.pad(decoder_inputs,
                                        [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
            with tf.name_scope("add_pos_encoding"):
                length = tf.shape(decoder_inputs)[1]
                pos_encoding = transformer_utils.get_position_encoding(
                    length, self.args.hidden_size)
                pos_encoding = tf.cast(pos_encoding, tf.float32)
                decoder_inputs += pos_encoding
            if training:
                decoder_inputs = tf.nn.dropout(
                    decoder_inputs, rate=self.args.dropout)

                # Run values
            decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias(
                length, dtype=tf.float32)
            outputs = self.decoder_stack(
                decoder_inputs,
                encoder_outputs,
                decoder_self_attention_bias,
                attention_bias,
                training=training)
            logits = self.embedding_softmax_layer(outputs, mode="linear")
            logits = tf.cast(logits, tf.float32)

            return logits
    def __call__(self, adj, nodes, roles, targ, mask):
        """
        Puts the tensors through encoders and decoders
        :param adj: Adjacency matrices of input example
        :type adj: tf.tensor
        :param nodes: node features
        :type nodes: tf.tensor
        :param targ: target sequences
        :type targ: tf.tensor
        :return: output probability distribution
        :rtype: tf.tensor
        """
        node_tensor = self.emb_node_layer(nodes)
        role_tensor = self.emb_role_layer(roles)
        if targ is not None:
            decoder_inputs = self.emb_tgt_layer(targ)
            decoder_inputs = tf.cast(decoder_inputs, tf.float32)

        node_tensor = tf.cast(node_tensor, tf.float32)
        role_tensor = tf.cast(role_tensor, tf.float32)

        enc_output = self.encoder(node_tensor, adj, role_tensor,
                                  self.num_heads, self.encoder.trainable)
        attention_bias = transformer_utils.get_padding_bias(nodes)
        attention_bias = tf.cast(attention_bias, tf.float32)

        if targ is None:
            predictions = self.predict(enc_output, attention_bias, False)
            return predictions

        with tf.name_scope("shift_targets"):
            # Shift targets to the right, and remove the last element
            decoder_inputs = tf.pad(decoder_inputs,
                                    [[0, 0], [1, 0], [0, 0]])[:, :-1, :]
        attention_bias = tf.cast(attention_bias, tf.float32)
        with tf.name_scope("add_pos_encoding"):
            length = tf.shape(decoder_inputs)[1]
            pos_encoding = transformer_utils.get_position_encoding(
                length, self.args.hidden_size)
            pos_encoding = tf.cast(pos_encoding, tf.float32)
            decoder_inputs += pos_encoding
        if self.trainable:
            decoder_inputs = tf.nn.dropout(decoder_inputs,
                                           rate=self.args.dropout)

            # Run values
        decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias(
            length, dtype=tf.float32)
        outputs = self.decoder_stack(decoder_inputs,
                                     enc_output,
                                     decoder_self_attention_bias,
                                     attention_bias,
                                     training=self.trainable)
        predictions = self.final_layer(outputs)

        return predictions
    def _get_symbols_to_logits_fn(self, max_decode_length, training):
        """Returns a decoding function that calculates logits of the next tokens."""

        timing_signal = transformer_utils.get_position_encoding(
            max_decode_length + 1, self.args.hidden_size)
        decoder_self_attention_bias = transformer_utils.get_decoder_self_attention_bias(
            max_decode_length)

        def symbols_to_logits_fn(ids, i, cache):
            """Generate logits for next potential IDs.
            Args:
              ids: Current decoded sequences. int tensor with shape [batch_size *
                beam_size, i + 1]
              i: Loop index
              cache: dictionary of values storing the encoder output, encoder-decoder
                attention bias, and previous decoder attention values.
            Returns:
              Tuple of
                (logits with shape [batch_size * beam_size, vocab_size],
                 updated cache values)
            """
            # Set decoder input to the last generated IDs
            decoder_input = ids[:, -1:]

            # Preprocess decoder input by getting embeddings and adding timing signal.
            decoder_input = self.emb_tgt_layer(decoder_input)
            decoder_input += timing_signal[i:i + 1]

            self_attention_bias = decoder_self_attention_bias[:, :,
                                                              i:i + 1, :i + 1]
            decoder_outputs = self.decoder_stack(
                decoder_input,
                cache.get("encoder_outputs"),
                self_attention_bias,
                cache.get("encoder_decoder_attention_bias"),
                training=training,
                cache=cache)
            logits = self.final_layer(decoder_outputs)
            logits = tf.squeeze(logits, axis=[1])
            return logits, cache

        return symbols_to_logits_fn
Beispiel #4
0
    def encode(self, inputs, attention_bias, training):
        with tf.name_scope("encode"):
            # Prepare inputs to the layer stack by adding positional encodings and
            # applying dropout.
            embedded_inputs = self.embedding_softmax_layer(inputs)
            embedded_inputs = tf.cast(embedded_inputs, tf.float32)
            inputs_padding = transformer_utils.get_padding(inputs)
            attention_bias = tf.cast(attention_bias, tf.float32)

        with tf.name_scope("add_positional_encoding"):
            length = tf.shape(embedded_inputs)[1]
            pos_encoding = transformer_utils.get_position_encoding(
                length, self.args.hidden_size)
            pos_encoding = tf.cast(pos_encoding, tf.float32)
            encoder_inputs = embedded_inputs + pos_encoding

        if training:
            encoder_inputs = tf.nn.dropout(
                encoder_inputs, rate=self.args.dropout)

        return self.encoder_stack(encoder_inputs, attention_bias, inputs_padding, training=training)