class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.value = Tensor([[1, 2, 3], [4, 5, 6]], dtype=mstype.float32) def construct(self): return self.value.transpose(1.0, 0)
def construct( self, query: ms.Tensor, key: ms.Tensor, value: ms.Tensor, attn_mask: Optional[ms.Tensor] = None, ) -> Tuple[ms.Tensor, ms.Tensor]: r""" Args: query: [batch, num_attention_heads, len_query, dim_query] key: [batch, num_attention_heads, len_key, dim_key] value: [batch, num_attention_heads, len_value, dim_value] attn_mask: [batch, num_attention_heads, len_query, len_key] """ attention = ops.matmul(query, key.transpose(0, 1, 3, 2)) attention = attention / ops.sqrt(generate_factor(query.shape[-1])) if attn_mask is not None: attention = attention + attn_mask attention = ops.Softmax(axis=-1)(attention) attention = self.dropout(attention) context = ops.matmul(attention, value) return context, attention