Ejemplo n.º 1
0
    class Net(nn.Cell):
        def __init__(self):
            super(Net, self).__init__()
            self.value = Tensor([[1, 2, 3], [4, 5, 6]], dtype=mstype.float32)

        def construct(self):
            return self.value.transpose(1.0, 0)
Ejemplo n.º 2
0
    def construct(
        self,
        query: ms.Tensor,
        key: ms.Tensor,
        value: ms.Tensor,
        attn_mask: Optional[ms.Tensor] = None,
    ) -> Tuple[ms.Tensor, ms.Tensor]:
        r"""
        Args:
            query: [batch, num_attention_heads, len_query, dim_query]
            key: [batch, num_attention_heads, len_key, dim_key]
            value: [batch, num_attention_heads, len_value, dim_value]
            attn_mask: [batch, num_attention_heads, len_query, len_key]
        """

        attention = ops.matmul(query, key.transpose(0, 1, 3, 2))
        attention = attention / ops.sqrt(generate_factor(query.shape[-1]))
        if attn_mask is not None:
            attention = attention + attn_mask
        attention = ops.Softmax(axis=-1)(attention)
        attention = self.dropout(attention)
        context = ops.matmul(attention, value)
        return context, attention