Beispiel #1
0
def attention(query, key, value, mask = None, dropout = None):
    """
    Compute scaled dot product attention
    """
    #logger.info('logger info in attention')
    #logger.debug('logger debug in attention')
    d_k = query.shape[-1]
    scores = gemm2(query, key, transpose_b = True) / math.sqrt(d_k) 
    p_attn = nd.softmax(scores, axis = -1)
    if dropout is not None:
        p_atten = dropout(p_attn)
    return gemm2(p_attn, value), p_attn
Beispiel #2
0
def jacobian_sn_mlp_block_bf(
        layers: List[Tuple[mx.gluon.HybridBlock, Tensor]]) -> Tensor:
    """
    Brute force computation of the jacobian of a SNMlpBlock jac is of shape
    (Batch dim1, ..., Output dim, Input dim)

    Parameters
    ----------
    layers
        A list of tuples where each tuple (layer, input) is associated to a
        composing layer of the SNMLPBlock, where layer corresponds to the
        associated object layer, along with its input tensor.

    Returns
    -------
    Tensor
        Jacobian of the SNMLPBlock computed at a given input
    """
    for i, (layer, input) in enumerate(layers[::-1]):
        if isinstance(layer, SNDense):
            # broadcast weight of size (Output dim, Input dim)
            # to (Batch dim1, ..., Output dim, Input dim)
            jac_t = _broadcast_param(
                layer.weight,
                axes=range(len(input.shape[:-1])),
                sizes=input.shape[:-1],
            )
            if i == 0:
                jac = jac_t
            else:
                jac = gemm2(jac, jac_t)
        else:
            # act_deriv is of shape (Batch dim1, ..., Input dim)
            act_deriv = get_activation_deriv(layer)(mx.ndarray, input)
            # to (Batch dim1, ..., Output dim, Input dim) to fit the jacobian
            jac_t = act_deriv.expand_dims(len(jac.shape[:-2])).broadcast_axes(
                axis=len(jac.shape[:-2]), size=jac.shape[-2])
            jac = jac * jac_t
    return jac
Beispiel #3
0
def ISSM(z, b, F, a, g, sigma, m_prior, S_prior):
    '''
    The documentation for this code can be found in :
    https://gluon.mxnet.io/chapter12_time-series/issm-scratch.html
    '''

    H = F.shape[0] # dim of latent state
    T = z.shape[0] # num of observations

    eye_h = nd.array(np.eye(H))

    mu_seq = []
    S_seq = []
    log_p_seq = []

    for t in range(T):

        if t == 0:
            # At the first time step, use the prior
            mu_h = m_prior
            S_hh = S_prior
        else:
            # Otherwise compute using update eqns.
            F_t = F[:, :, t]
            g_t = g[:, t].reshape((H,1))

            mu_h = gemm2(F_t, mu_t)
            S_hh = gemm2(F_t, gemm2(S_t, F_t, transpose_b=1)) + \
                   gemm2(g_t, g_t, transpose_b=1)

        a_t = a[:, t].reshape((H,1))
        mu_v = gemm2(mu_h, a_t, transpose_a=1)

        # Compute the Kalman gain (vector)
        S_hh_x_a_t = gemm2(S_hh, a_t)

        sigma_t = sigma[t]
        S_vv = gemm2(a_t, S_hh_x_a_t, transpose_a=1) + nd.square(sigma_t)
        kalman_gain = nd.broadcast_div(S_hh_x_a_t, S_vv)

        # Compute the error (delta)
        delta = z[t] - b[t] - mu_v

        # Filtered estimates
        mu_t = mu_h + gemm2(kalman_gain, delta)

        # Joseph's symmetrized update for covariance:
        ImKa = nd.broadcast_sub(eye_h, gemm2(kalman_gain, a_t, transpose_b=1))
        S_t = gemm2(gemm2(ImKa, S_hh), ImKa, transpose_b=1) + \
                nd.broadcast_mul(gemm2(kalman_gain, kalman_gain, transpose_b=1), nd.square(sigma_t))

        # likelihood term
        log_p = (-0.5 * (delta * delta / S_vv
                         + np.log(2.0 * np.pi)
                         + nd.log(S_vv))
                 )

        mu_seq.append(mu_t)
        S_seq.append(S_t)
        log_p_seq.append(log_p)


    return log_p_seq