Beispiel #1
0
def causal_linear_attention(q, k, v, amp_enabled=False):
    from fast_transformers.causal_product import CausalDotProduct
    is_half = isinstance(q, torch.cuda.HalfTensor) or amp_enabled

    if is_half:
        q, k, v = map(lambda t: t.float(), (q, k, v))

    D_inv = 1. / torch.einsum('...nd,...nd->...n', q, k.cumsum(dim=-2))
    out = CausalDotProduct.apply(q, k, v)
    out = torch.einsum('...nd,...n->...nd', out, D_inv)

    if is_half:
        out = out.half()

    return out
Beispiel #2
0
def causal_linear_attention(q, k, v):
    from fast_transformers.causal_product import CausalDotProduct
    return CausalDotProduct.apply(q, k, v)
 def cuda_causal_numerator(queries, keys_t, values):
     return CausalDotProduct.apply(queries,
                                   keys_t.transpose(-2, -1),
                                   values)
def causal_linear_attention(q, k, v):
    return CausalDotProduct.apply(q, k, v)