Esempio n. 1
0
    def _compute_log_probs(self, logits):
        """ Computes log probabilities.

        Here, `num_samples` == `beam_size` * `batch_size`.

        Args:
            logits: The logits Tensor with shape [num_samples, vocab_size],
              or a list of logits Tensors.

        Returns: The log probability Tensor with shape [num_samples, vocab_size].

        """
        logits = nest.flatten(logits)
        if len(logits) == 1:
            probs = advanced_log_softmax(logits[0])  # negative
        else:
            assert len(logits) == len(self._ensemble_weights), (
                "ensemble weights must have the same length with logits")
            dim_vocab = logits[0].get_shape().as_list()[-1]
            # [1, batch_size * beam_size * vocab_target]
            probs = nest.map_structure(
                lambda x: tf.expand_dims(
                    tf.reshape(advanced_softmax(x), shape=[-1]), axis=0),
                logits)
            # [num_models, xxx]
            probs = tf.concat(probs, axis=0)
            # [1, num_models]
            weights = tf.expand_dims(
                tf.convert_to_tensor(self._ensemble_weights, dtype=tf.float32),
                axis=0)
            probs = tf.matmul(weights, probs)
            probs = tf.log(tf.reshape(probs, [-1, dim_vocab]))
        return probs
Esempio n. 2
0
    def _compute_log_probs(self, logits):
        """ Computes log probabilities.

        Here, `num_samples` == `beam_size` * `batch_size`.

        Args:
            logits: The logits Tensor with shape [num_samples, vocab_size],
              or a list of logits Tensors.

        Returns: The log probability Tensor with shape [num_samples, vocab_size].

        """
        logits = nest.flatten(logits)
        if len(logits) == 1:
            probs = advanced_log_softmax(logits[0])  # negative
        else:
            assert len(logits) == len(self._ensemble_weights), (
                "ensemble weights must have the same length with logits")
            dim_vocab = logits[0].get_shape().as_list()[-1]
            # [1, batch_size * beam_size * vocab_target]
            probs = nest.map_structure(
                lambda x: tf.expand_dims(
                    tf.reshape(advanced_softmax(x), shape=[-1]), axis=0),
                logits)
            # [num_models, xxx]
            probs = tf.concat(probs, axis=0)
            # [1, num_models]
            weights = tf.expand_dims(
                tf.convert_to_tensor(self._ensemble_weights, dtype=tf.float32),
                axis=0)
            probs = tf.matmul(weights, probs)
            probs = tf.log(tf.reshape(probs, [-1, dim_vocab]))
        return probs
Esempio n. 3
0
    def att_fn(self, query, keys, bias=None):
        """ Computes attention scores.

        Args:
            query: Attention query tensor with shape
              [batch_size, channels_query]
            keys: Attention keys tensor with shape
              [batch_size, num_of_keys, channels_key]
            bias: The bias tensor for attention keys

        Returns: A Tensor, [batch_size, num_of_keys]
        """
        v_att = tf.get_variable("v_att", shape=[self.params["num_units"]], dtype=tf.float32)
        logits = tf.reduce_sum(v_att * tf.tanh(keys + tf.expand_dims(query, 1)), [2])
        if bias is not None:
            logits += bias
        attention_scores = advanced_softmax(logits)
        attention_scores = dropout_wrapper(attention_scores, self.params["dropout_attention_keep_prob"])
        return attention_scores
def dot_product_attention(q, k, bias=None, dropout_keep_prob=1.0):
    """ Computes attention weight according to query and key.

    Args:
        q: A query Tensor with shape [..., length_q, depth].
        k: A keys Tensor with shape [..., length_k, depth].
        bias: A bias Tensor with shape [..., 1, depth].
        dropout_keep_prob: A float scalar.

    Returns: The attention scores Tensor with shape
      [..., length_q, length_k].
    """
    with tf.variable_scope("dot_product_attention", values=[q, k]):
        logits = tf.matmul(q, k, transpose_b=True)
        if bias is not None:
            logits += bias
        weights = advanced_softmax(logits)
        # dropout the attention links for each of the heads
        weights = dropout_wrapper(weights, keep_prob=dropout_keep_prob)
        return weights
Esempio n. 5
0
    def _dot_product_attention(self, q, k, bias):
        """ Computes attention weight according to query and key.

        Args:
            q: A query Tensor with shape [batch_size, num_heads, length_q, depth / num_heads].
            k: A keys Tensor with shape [batch_size, num_heads, length_k, depth / num_heads].
            bias: A bias Tensor with shape [batch_size, 1, 1, depth / num_heads].

        Returns: The attention scores Tensor with shape
          [batch_size, num_heads, length_q, length_k].
        """
        with tf.variable_scope("dot_product_attention", values=[q, k]):
            logits = tf.matmul(q, k, transpose_b=True)
            if bias is not None:
                logits += bias
            weights = algebra_ops.advanced_softmax(logits)
            # dropout the attention links for each of the heads
            weights = dropout_wrapper(
                weights, keep_prob=self._dropout_attention_keep_prob)
            return weights
Esempio n. 6
0
def dot_product_attention(q, k, bias=None, dropout_keep_prob=1.0):
    """ Computes attention weight according to query and key.

    Args:
        q: A query Tensor with shape [..., length_q, depth].
        k: A keys Tensor with shape [..., length_k, depth].
        bias: A bias Tensor with shape [..., 1, depth].
        dropout_keep_prob: A float scalar.

    Returns: The attention scores Tensor with shape
      [..., length_q, length_k].
    """
    with tf.variable_scope("dot_product_attention", values=[q, k]):
        logits = tf.matmul(q, k, transpose_b=True)
        if bias is not None:
            logits += bias
        weights = advanced_softmax(logits)
        # dropout the attention links for each of the heads
        weights = dropout_wrapper(weights, keep_prob=dropout_keep_prob)
        return weights
    def att_fn(self, query, keys, bias=None):
        """ Computes attention scores.

        Args:
            query: Attention query tensor with shape
              [batch_size, channels_query]
            keys: Attention keys tensor with shape
              [batch_size, num_of_keys, channels_key]
            bias: The bias tensor for attention keys

        Returns: A Tensor, [batch_size, num_of_keys]
        """
        v_att = tf.get_variable("v_att",
                                shape=[self.params["num_units"]],
                                dtype=tf.float32)
        logits = tf.reduce_sum(
            v_att * tf.tanh(keys + tf.expand_dims(query, 1)), [2])
        if bias is not None:
            logits += bias
        attention_scores = advanced_softmax(logits)
        attention_scores = dropout_wrapper(
            attention_scores, self.params["dropout_attention_keep_prob"])
        return attention_scores