Example #1
0
    def forward(self, q, k, v, mask=None):
        attn = flow.bmm(q, k.transpose(1, 2))
        attn = attn / self.temperature

        if mask is not None:
            attn = attn.masked_fill(mask, -np.inf)

        attn = self.softmax(attn)
        attn = self.dropout(attn)
        output = flow.bmm(attn, v)

        return output, attn
Example #2
0
def _test_bmm_backward(test_case, device):
    input1 = flow.tensor(
        [
            [
                [
                    -0.0036776792258024216, 1.9946473836898804,
                    -0.423959881067276
                ],
                [
                    1.0892143249511719, 0.04005361348390579,
                    -0.27883127331733704
                ],
            ],
            [
                [
                    -0.970306396484375, 0.017771577462553978,
                    0.019596196711063385
                ],
                [
                    0.27402883768081665, -0.8192587494850159,
                    -0.3135920464992523
                ],
            ],
        ],
        dtype=flow.float32,
        device=flow.device(device),
        requires_grad=True,
    )
    input2 = flow.tensor(
        [
            [
                [1.118346929550171, -0.930071234703064],
                [1.1238232851028442, 1.373764157295227],
                [0.17178462445735931, -1.1010534763336182],
            ],
            [
                [0.6694859862327576, 0.9250285029411316],
                [-1.0835869312286377, 0.4192655086517334],
                [1.2616937160491943, 0.33809131383895874],
            ],
        ],
        dtype=flow.float32,
        device=flow.device(device),
        requires_grad=True,
    )
    of_out = flow.bmm(input1, input2)
    of_out = of_out.sum()
    of_out.backward()
    np_grad = [
        [
            [0.18827569484710693, 2.4975874423980713, -0.9292688369750977],
            [0.18827569484710693, 2.4975874423980713, -0.9292688369750977],
        ],
        [
            [1.5945144891738892, -0.6643214225769043, 1.5997850894927979],
            [1.5945144891738892, -0.6643214225769043, 1.5997850894927979],
        ],
    ]
    test_case.assertTrue(
        np.allclose(input1.grad.numpy(), np_grad, atol=1e-05, rtol=1e-05))
Example #3
0
 def test_bmm_exception_dim_not_right(test_case):
     x = flow.tensor((2, 2))
     with test_case.assertRaises(RuntimeError) as ctx:
         y = flow.bmm(x, x)
     test_case.assertTrue(
         "Expected 3-dimensional tensor, but got 1-dimensional tensor for argument #1"
         in str(ctx.exception))
Example #4
0
def _scaled_dot_product_attention(
    q: Tensor,
    k: Tensor,
    v: Tensor,
    attn_mask: Optional[Tensor] = None,
    dropout_p: float = 0.0,
) -> Tuple[Tensor, Tensor]:
    B, Nt, E = q.shape
    q = q / math.sqrt(E)
    # (B, Nt, E) x (B, E, Ns) -> (B, Nt, Ns)
    attn = flow.bmm(q, k.transpose(-2, -1))
    if attn_mask is not None:
        attn += attn_mask
    attn = flow.softmax(attn, dim=-1)
    if dropout_p > 0.0:
        attn = flow.nn.functional.dropout(attn, p=dropout_p)
    # (B, Nt, Ns) x (B, Ns, E) -> (B, Nt, E)
    output = flow.bmm(attn, v)
    return output, attn
Example #5
0
def _test_bmm(test_case, device):
    input1 = flow.tensor(np.random.randn(10, 3, 4),
                         dtype=flow.float32,
                         device=flow.device(device))
    input2 = flow.tensor(np.random.randn(10, 4, 5),
                         dtype=flow.float32,
                         device=flow.device(device))
    of_out = flow.bmm(input1, input2)
    np_out = np.matmul(input1.numpy(), input2.numpy())
    test_case.assertTrue(np.allclose(of_out.numpy(), np_out, 1e-05, 1e-05))
Example #6
0
def _bmm(self, other):
    return flow.bmm(self, other)