def kalman_filter(Y, A, C, Q, R, mu0, Q0): N = Y.shape[0] T, D, Dnlags = A.shape nlags = Dnlags // D AA = np.stack([component_matrix(At, nlags) for At in A], axis=0) p = C.shape[0] CC = hs([C, np.zeros((p, D * (nlags - 1)))]) QQ = np.zeros((T, Dnlags, Dnlags)) QQ[:, :D, :D] = Q QQ0 = block_diag(*[Q0 for _ in range(nlags)]) mu_predict = np.stack([np.tile(mu0, nlags) for _ in range(N)], axis=0) sigma_predict = np.stack([QQ0 for _ in range(N)], axis=0) mus_filt = np.zeros((N, T, Dnlags)) sigmas_filt = np.zeros((N, T, Dnlags, Dnlags)) ll = 0. for t in range(T): # condition # dot3(CC, sigma_predict, CC.T) + R tmp1 = einsum2('ik,nkj->nij', CC, sigma_predict) sigma_pred = np.dot(tmp1, CC.T) + R sigma_pred = sym(sigma_pred) res = Y[..., t, :] - np.dot(mu_predict, CC.T) L = np.linalg.cholesky(sigma_pred) v = solve_triangular(L, res, lower=True) # log-likelihood over all trials ll += (-0.5 * np.sum(v * v) - np.sum(np.log(np.diagonal(L, axis1=1, axis2=2))) - N / 2. * np.log(2. * np.pi)) mus_filt[..., t, :] = mu_predict + einsum2( 'nki,nk->ni', tmp1, solve_triangular(L, v, 'T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) sigmas_filt[..., t, :, :] = sym(sigma_predict - einsum2('nki,nkj->nij', tmp2, tmp2)) # prediction mu_predict = einsum2('ik,nk->ni', AA[t], mus_filt[..., t, :]) sigma_predict = einsum2('ik,nkl->nil', AA[t], sigmas_filt[..., t, :, :]) sigma_predict = sym( einsum2('nil,jl->nij', sigma_predict, AA[t]) + QQ[t]) return ll, mus_filt, sigmas_filt
def rts_smooth_loop(Y, A, C, Q, R, mu0, Q0): N = Y.shape[0] T, D, _ = A.shape p = C.shape[0] mu_predict = np.zeros((T+1, D)) sigma_predict = np.zeros((T+1, D, D)) mus_smooth = np.empty((N, T, D)) sigmas_smooth = np.empty((N, T, D, D)) sigmas_smooth_tnt = np.empty((N, T-1, D, D)) ll = 0. for n in range(N): mu_predict[0] = mu0 sigma_predict[0] = Q0 for t in range(T): # condition tmp1 = np.dot(C, sigma_predict[t,:,:]) sigma_pred = np.dot(tmp1, C.T) + R L = np.linalg.cholesky(sigma_pred) v = solve_triangular(L, Y[n,t,:] - np.dot(C, mu_predict[t,:]), lower=True) # log-likelihood over all trials ll += -0.5*np.dot(v,v) - 2.*np.sum(np.log(np.diag(L))) \ - 0.5*np.log(2.*np.pi) mus_smooth[n,t,:] = mu_predict[t] + np.dot(tmp1.T, solve_triangular(L, v, trans='T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) sigmas_smooth[n,t,:,:] = sigma_predict[t] - np.dot(tmp2.T, tmp2) # prediction mu_predict[t+1] = np.dot(A[t], mus_smooth[n,t,:]) sigma_predict[t+1] = dot3(A[t], sigmas_smooth[n,t,:,:], A[t].T) + Q[t] for t in range(T-2, -1, -1): # these names are stolen from mattjj and scott temp_nn = np.dot(A[t], sigmas_smooth[n,t,:,:]) L = np.linalg.cholesky(sigma_predict[t+1,:,:]) v = solve_triangular(L, temp_nn, lower=True) Gt_T = solve_triangular(L, v, trans='T', lower=True) # {mus,sigmas}_smooth[n,t] contain the filtered estimates so we're # overwriting them on purpose mus_smooth[n,t,:] = mus_smooth[n,t,:] + np.dot(T_(Gt_T), mus_smooth[n,t+1,:] - mu_predict[t+1,:]) sigmas_smooth[n,t,:,:] = sigmas_smooth[n,t,:,:] + dot3(T_(Gt_T), sigmas_smooth[n,t+1,:,:] - sigma_predict[:,t+1,:,:], Gt_T) sigmas_smooth_tnt[n,t,:,:] = np.dot(sigmas_smooth[n,t+1,:,:], Gt_T) return ll, mus_smooth, sigmas_smooth, sigmas_smooth_tnt
def L2_obj(At, L_Q): AtB2T = einsum2('tik,tjk->tij', At, B2) B2AtT = einsum2('tik,tjk->tij', B2, At) # einsum2 is faster #AtB3AtT = np.einsum('tik,tkl,tjl->tij', At, B3, At) tmp = einsum2('tik,tkl->til', At, B3) AtB3AtT = einsum2('til,tjl->tij', tmp, At) elbo_2 = np.sum(B1 - AtB2T - B2AtT + AtB3AtT, axis=0) L_Q_inv_elbo_2 = solve_triangular(L_Q, elbo_2, lower=True) obj = np.trace(solve_triangular(L_Q, L_Q_inv_elbo_2, lower=True, trans='T')) obj += lam0*np.sum(At**2) AtmAtm1_2 = (At[1:] - At[:-1])**2 obj += lam1*np.sum(AtmAtm1_2) return obj
def lds_logZ(Y, A, C, Q, R, mu0, Q0): """ Log-partition function computed via Kalman filter that broadcasts over the first dimension. Note: This function doesn't handle control inputs (yet). Y : ndarray, shape=(N, T, D) Observations A : ndarray, shape=(T, D, D) Time-varying dynamics matrices C : ndarray, shape=(p, D) Observation matrix mu0: ndarray, shape=(D,) mean of initial state variable Q0 : ndarray, shape=(D, D) Covariance of initial state variable Q : ndarray, shape=(T, D, D) Covariance of latent states R : ndarray, shape=(T, D, D) Covariance of observations """ N = Y.shape[0] T, D, _ = A.shape p = C.shape[0] mu_predict = np.stack([mu0 for _ in range(N)], axis=0) sigma_predict = np.stack([Q0 for _ in range(N)], axis=0) mus_filt = np.zeros((N, D)) sigmas_filt = np.zeros((N, D, D)) ll = 0. for t in range(T): # condition #sigma_pred = dot3(C, sigma_predict, C.T) + R tmp1 = einsum2('ik,nkj->nij', C, sigma_predict) sigma_y = einsum2('nik,jk->nij', tmp1, C) + R sigma_y = sym(sigma_y) L = np.linalg.cholesky(sigma_y) # res[n] = Y[n,t,:] = np.dot(C, mu_predict[n]) # the transpose works b/c of how dot broadcasts res = Y[..., t, :] - einsum2('ik,nk->ni', C, mu_predict) v = solve_triangular(L, res, lower=True) # log-likelihood over all trials ll += (-0.5 * np.sum(v * v) - np.sum(np.log(np.diagonal(L, axis1=-1, axis2=-2))) - p / 2. * np.log(2. * np.pi)) #mus_filt = mu_predict + np.dot(tmp1, solve_triangular(L, v, 'T')) mus_filt = mu_predict + einsum2( 'nki,nk->ni', tmp1, solve_triangular(L, v, trans='T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) #sigmas_filt = sigma_predict - np.dot(tmp2, tmp2.T) sigmas_filt = sigma_predict - einsum2('nki,nkj->nij', tmp2, tmp2) sigmas_filt = sym(sigmas_filt) # prediction #mu_predict = np.dot(A[t], mus_filt[t]) mu_predict = einsum2('ik,nk->ni', A[t], mus_filt) #sigma_predict = dot3(A[t], sigmas_filt[t], A[t].T) + Q[t] sigma_predict = einsum2('ik,nkl->nil', A[t], sigmas_filt) #sigma_predict = einsum2('nil,jl->nij', sigma_predict, A[t]) + Q[t] sigma_predict = einsum2('nil,jl->nij', sigma_predict, A[t]) + Q sigma_predict = sym(sigma_predict) return ll
def em_objective(Y, params, fixedparams, ldsregparams, mus_smooth, sigmas_smooth, sigmas_tnt_smooth): At, L_Q, L_Q0 = params _, D, Dnlags = A.shape nlags = Dnlags // D ntrials, T, p = Y.shape C, L_R = fixedparams w_s = 1. x_smooth_0_outer = einsum2('ri,rj->rij', mus_smooth[:,0,:D], mus_smooth[:,0,:D]) B0 = w_s*np.sum(sigmas_smooth[:,0,:D,:D] + x_smooth_0_outer, axis=0) x_smooth_outer = einsum2('rti,rtj->rtij', mus_smooth[:,1:,:D], mus_smooth[:,1:,:D]) B1 = w_s*np.sum(sigmas_smooth[:,1:,:D,:D] + x_smooth_outer, axis=0) z_smooth_outer = einsum2('rti,rtj->rtij', mus_smooth[:,:-1,:], mus_smooth[:,:-1,:]) B3 = w_s*np.sum(sigmas_smooth[:,:-1,:,:] + z_smooth_outer, axis=0) # this was the original #B1_B3 = w_s*np.sum(sigmas_smooth + mus_smooth_outer, axis=0) #B1, B3 = B1_B3[1:], B1_B3[:-1] mus_smooth_outer_l1 = einsum2('rti,rtj->rtij', mus_smooth[:,1:,:D], mus_smooth[:,:-1,:]) B2 = w_s*np.sum(sigmas_tnt_smooth[:,:,:D,:] + mus_smooth_outer_l1, axis=0) L_Q0_inv_B0 = solve_triangular(L_Q0, B0, lower=True) L1 = (ntrials*2.*np.sum(np.log(np.diag(L_Q0))) + np.trace(solve_triangular(L_Q0, L_Q0_inv_B0, lower=True, trans='T'))) AtB2T = einsum2('tik,tjk->tij', At, B2) B2AtT = einsum2('tik,tjk->tij', B2, At) tmp = einsum2('tik,tkl->til', At, B3) AtB3AtT = einsum2('tik,tjk->tij', tmp, At) tmp = np.sum(B1 - AtB2T - B2AtT + AtB3AtT, axis=0) L_Q_inv_tmp = solve_triangular(L_Q, tmp, lower=True) L2 = (ntrials*(T-1)*2.*np.sum(np.log(np.diag(L_Q))) + np.trace(solve_triangular(L_Q, L_Q_inv_tmp, lower=True, trans='T'))) lam0, lam1 = ldsregparams penalty = 0. if lam0 > 0.: penalty += lam0*np.sum(At**2) if lam1 > 0.: AtmAtm1_2 = (At[1:] - At[:-1])**2 penalty += lam1*np.sum(AtmAtm1_2) res = Y - einsum2('ik,ntk->nti', C, mus_smooth[:,:,:D]) CP_smooth = einsum2('ik,ntkj->ntij', C, sigmas_smooth[:,:,:D,:D]) B4 = w_s*(np.sum(einsum2('nti,ntj->ntij', res, res), axis=(0,1)) + np.sum(einsum2('ntik,jk->ntij', CP_smooth, C), axis=(0,1))) L_R_inv_B4 = solve_triangular(L_R, B4, lower=True) L3 = (ntrials*T*2.*np.sum(np.log(np.diag(L_R))) + np.trace(solve_triangular(L_R, L_R_inv_B4, lower=True, trans='T'))) return L1 + L2 + L3 + penalty, L1, L2, L3, penalty
def rts_smooth_fast(Y, A, C, Q, R, mu0, Q0, compute_lag1_cov=False): """ RTS smoother that broadcasts over the first dimension. Handles multiple lag dependence using component form. Note: This function doesn't handle control inputs (yet). Y : ndarray, shape=(N, T, D) Observations A : ndarray, shape=(T, D*nlag, D*nlag) Time-varying dynamics matrices C : ndarray, shape=(p, D) Observation matrix mu0: ndarray, shape=(D,) mean of initial state variable Q0 : ndarray, shape=(D, D) Covariance of initial state variable Q : ndarray, shape=(D, D) Covariance of latent states R : ndarray, shape=(D, D) Covariance of observations """ N, T, _ = Y.shape _, D, Dnlags = A.shape nlags = Dnlags // D AA = np.stack([component_matrix(At, nlags) for At in A], axis=0) L_R = np.linalg.cholesky(R) p = C.shape[0] CC = hs([C, np.zeros((p, D*(nlags-1)))]) tmp = solve_triangular(L_R, CC, lower=True) Rinv_CC = solve_triangular(L_R, tmp, trans='T', lower=True) CCT_Rinv_CC = einsum2('ki,kj->ij', CC, Rinv_CC) # tile L_R across number of trials so solve_triangular # can broadcast over trials properly L_R = np.tile(L_R, (N, 1, 1)) QQ = np.zeros((T, Dnlags, Dnlags)) QQ[:,:D,:D] = Q QQ0 = block_diag(*[Q0 for _ in range(nlags)]) mu_predict = np.empty((N, T+1, Dnlags)) sigma_predict = np.empty((N, T+1, Dnlags, Dnlags)) mus_smooth = np.empty((N, T, Dnlags)) sigmas_smooth = np.empty((N, T, Dnlags, Dnlags)) if compute_lag1_cov: sigmas_smooth_tnt = np.empty((N, T-1, Dnlags, Dnlags)) else: sigmas_smooth_tnt = None ll = 0. mu_predict[:,0,:] = np.tile(mu0, nlags) sigma_predict[:,0,:,:] = QQ0.copy() I_tiled = np.tile(np.eye(D), (N, 1, 1)) for t in range(T): # condition # sigma_x = dot3(C, sigma_predict, C.T) + R tmp1 = einsum2('ik,nkj->nij', CC, sigma_predict[:,t,:,:]) res = Y[...,t,:] - einsum2('ik,nk->ni', CC, mu_predict[...,t,:]) # Rinv * res tmp2 = solve_triangular(L_R, res, lower=True) tmp2 = solve_triangular(L_R, tmp2, trans='T', lower=True) # C^T Rinv * res tmp3 = einsum2('ki,nk->ni', Rinv_CC, res) # (Pinv + C^T Rinv C)_inv * tmp3 # Pinv = np.linalg.inv(sigma_predict[:,t,:,:]) L_P = np.linalg.cholesky(sigma_predict[:,t,:,:]) tmp = solve_triangular(L_P, I_tiled, lower=True) Pinv = solve_triangular(L_P, tmp, trans='T', lower=True) tmp4 = sym(Pinv + CCT_Rinv_CC) L_tmp4 = np.linalg.cholesky(tmp4) tmp3 = solve_triangular(L_tmp4, tmp3, lower=True) tmp3 = solve_triangular(L_tmp4, tmp3, trans='T', lower=True) # Rinv C * tmp3 tmp3 = einsum2('ik,nk->ni', Rinv_CC, tmp3) # add the two Woodbury * res terms together tmp = tmp2 - tmp3 # # log-likelihood over all trials # # TODO: recompute with new tmp variables # ll += (-0.5*np.sum(v*v) # - 2.*np.sum(np.log(np.diagonal(L, axis1=1, axis2=2))) # - p/2.*np.log(2.*np.pi)) mus_smooth[:,t,:] = mu_predict[:,t,:] + einsum2('nki,nk->ni', tmp1, tmp) # tmp2 = L^{-1}*C*sigma_predict # tmp2 = solve_triangular(L, tmp1, lower=True) # Rinv * tmp1 tmp2 = solve_triangular(L_R, tmp1, lower=True) tmp2 = solve_triangular(L_R, tmp2, trans='T', lower=True) # C^T Rinv * tmp1 tmp3 = einsum2('ki,nkj->nij', Rinv_CC, tmp1) # (Pinv + C^T Rinv C)_inv * tmp3 tmp3 = solve_triangular(L_tmp4, tmp3, lower=True) tmp3 = solve_triangular(L_tmp4, tmp3, trans='T', lower=True) # Rinv C * tmp3 tmp3 = einsum2('ik,nkj->nij', Rinv_CC, tmp3) # add the two Woodbury * tmp1 terms together, left-multiply by tmp1 tmp = einsum2('nki,nkj->nij', tmp1, tmp2 - tmp3) sigmas_smooth[:,t,:,:] = sym(sigma_predict[:,t,:,:] - tmp) # prediction #mu_predict = np.dot(A[t], mus_smooth[t]) mu_predict[:,t+1,:] = einsum2('ik,nk->ni', AA[t], mus_smooth[:,t,:]) #sigma_predict = dot3(A[t], sigmas_smooth[t], A[t].T) + Q[t] tmp = einsum2('ik,nkl->nil', AA[t], sigmas_smooth[:,t,:,:]) sigma_predict[:,t+1,:,:] = sym(einsum2('nil,jl->nij', tmp, AA[t]) + QQ[t]) for t in range(T-2, -1, -1): # these names are stolen from mattjj and slinderman #temp_nn = np.dot(A[t], sigmas_smooth[n,t,:,:]) temp_nn = einsum2('ik,nkj->nij', AA[t], sigmas_smooth[:,t,:,:]) L = np.linalg.cholesky(sigma_predict[:,t+1,:,:]) v = solve_triangular(L, temp_nn, lower=True) # Look in Saarka for dfn of Gt_T Gt_T = solve_triangular(L, v, trans='T', lower=True) # {mus,sigmas}_smooth[n,t] contain the filtered estimates so we're # overwriting them on purpose #mus_smooth[n,t,:] = mus_smooth[n,t,:] + np.dot(T_(Gt_T), mus_smooth[n,t+1,:] - mu_predict[t+1,:]) mus_smooth[:,t,:] = mus_smooth[:,t,:] + einsum2('nki,nk->ni', Gt_T, mus_smooth[:,t+1,:] - mu_predict[:,t+1,:]) #sigmas_smooth[n,t,:,:] = sigmas_smooth[n,t,:,:] + dot3(T_(Gt_T), sigmas_smooth[n,t+1,:,:] - temp_nn, Gt_T) tmp = einsum2('nki,nkj->nij', Gt_T, sigmas_smooth[:,t+1,:,:] - sigma_predict[:,t+1,:,:]) tmp = einsum2('nik,nkj->nij', tmp, Gt_T) sigmas_smooth[:,t,:,:] = sym(sigmas_smooth[:,t,:,:] + tmp) if compute_lag1_cov: # This matrix is NOT symmetric, so don't symmetrize! #sigmas_smooth_tnt[n,t,:,:] = np.dot(sigmas_smooth[n,t+1,:,:], Gt_T) sigmas_smooth_tnt[:,t,:,:] = einsum2('nik,nkj->nij', sigmas_smooth[:,t+1,:,:], Gt_T) return ll, mus_smooth, sigmas_smooth, sigmas_smooth_tnt
def rts_smooth(Y, A, C, Q, R, mu0, Q0): N = Y.shape[0] T, D, _ = A.shape p = C.shape[0] mu_predict = np.zeros((N, T+1, D)) sigma_predict = np.zeros((N, T+1, D, D)) mu_predict[:,0,:] = mu0 sigma_predict[:,0,:,:] = Q0 Qt = _ensure_ndim(Q, T, 3) mus_smooth = np.empty((N, T, D)) sigmas_smooth = np.empty((N, T, D, D)) sigmas_smooth_tnt = np.empty((N, T-1, D, D)) ll = 0. for t in range(T): # condition # dot3(C, sigma_predict, C.T) + R #tmp1 = np.einsum('ik,nkj->nij', C, sigma_predict) tmp1 = einsum2('ik,nkj->nij', C, sigma_predict[:,t,:,:]) sigma_pred = np.dot(tmp1, C.T) + R sigma_pred = sym(sigma_pred) L = np.linalg.cholesky(sigma_pred) # res[n] = Y[n,t,:] - np.dot(C, mu_predict[n]) # the transpose works b/c of how dot broadcasts res = Y[...,t,:] - np.dot(mu_predict[:,t,:], C.T) v = solve_triangular(L, res, lower=True) # log-likelihood over all trials ll += (-0.5*np.sum(v*v) - 2.*np.sum(np.log(np.diagonal(L, axis1=1, axis2=2))) - p/2.*np.log(2.*np.pi)) #mus_smooth[...,t,:] = mu_predict + np.einsum('nki,nk->ni', tmp1, solve_triangular(L, v, 'T')) mus_smooth[...,t,:] = mu_predict[:,t,:] + einsum2('nki,nk->ni', tmp1, solve_triangular(L, v, trans='T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) #sigmas_smooth[...,t,:,:] = sigma_predict - np.einsum('nki,nkj->nij', tmp2, tmp2) sigmas_smooth[...,t,:,:] = sym(sigma_predict[:,t,:,:] - einsum2('nki,nkj->nij', tmp2, tmp2)) # prediction #mu_predict = np.dot(A[t], mus_smooth[t]) #mu_predict = np.einsum('ik,nk->ni', A[t], mus_smooth[...,t,:]) mu_predict[:,t+1,:] = einsum2('ik,nk->ni', A[t], mus_smooth[...,t,:]) #sigma_predict = dot3(A[t], sigmas_smooth[t], A[t].T) + Q[t] #sigma_predict = np.einsum('ik,nkl,jl->nij', A[t], sigmas_smooth[...,t,:,:], A[t]) + Q[t] tmp = einsum2('ik,nkl->nil', A[t], sigmas_smooth[:,t,:,:]) sigma_predict[:,t+1,:,:] = sym(einsum2('nil,jl->nij', tmp, A[t]) + Qt[t]) for t in range(T-2, -1, -1): # these names are stolen from mattjj and scott #temp_nn = np.dot(A[t], sigmas_smooth[n,t,:,:]) temp_nn = einsum2('ik,nkj->nij', A[t], sigmas_smooth[:,t,:,:]) L = np.linalg.cholesky(sigma_predict[:,t+1,:,:]) v = solve_triangular(L, temp_nn, lower=True) # Look in Saarka for dfn of Gt_T Gt_T = solve_triangular(L, v, trans='T', lower=True) # {mus,sigmas}_smooth[n,t] contain the filtered estimates so we're # overwriting them on purpose #mus_smooth[n,t,:] = mus_smooth[n,t,:] + np.dot(T_(Gt_T), mus_smooth[n,t+1,:] - mu_predict[t+1,:]) mus_smooth[:,t,:] = mus_smooth[:,t,:] + einsum2('nki,nk->ni', Gt_T, mus_smooth[:,t+1,:] - mu_predict[:,t+1,:]) tmp = einsum2('nki,nkj->nij', Gt_T, sigmas_smooth[:,t+1,:,:] - sigma_predict[:,t+1,:,:]) tmp = einsum2('nik,nkj->nij', tmp, Gt_T) sigmas_smooth[:,t,:,:] = sym(sigmas_smooth[:,t,:,:] + tmp) # don't symmetrize this one #sigmas_smooth_tnt[n,t,:,:] = np.dot(sigmas_smooth[n,t+1,:,:], Gt_T) sigmas_smooth_tnt[:,t,:,:] = einsum2('nik,nkj->nij', sigmas_smooth[:,t+1,:,:], Gt_T) return ll, mus_smooth, sigmas_smooth, sigmas_smooth_tnt
def kalman_filter_loop(Y, A, C, Q, R, mu0, Q0): """ Kalman filter that broadcasts over the first dimension. Note: This function doesn't handle control inputs (yet). Y : ndarray, shape=(N, T, D) Observations A : ndarray, shape=(T, D, D) Time-varying dynamics matrices C : ndarray, shape=(p, D) Observation matrix Q : ndarray, shape=(T, D, D) Covariance of latent states R : ndarray, shape=(T, D, D) Covariance of observations mu0: ndarray, shape=(D,) mean of initial state variable Q0 : ndarray, shape=(D, D) Covariance of initial state variable """ N = Y.shape[0] T, D, _ = A.shape p = C.shape[0] mu_predict = np.stack([mu0 for _ in range(N)], axis=0) sigma_predict = np.stack([Q0 for _ in range(N)], axis=0) mus_filt = np.zeros((N, T, D)) sigmas_filt = np.zeros((N, T, D, D)) ll = 0. for n in range(N): for t in range(T): # condition tmp1 = np.dot(C, sigma_predict[n]) sigma_pred = np.dot(tmp1, C.T) + R L = np.linalg.cholesky(sigma_pred) v = solve_triangular(L, Y[n,t,:] - np.dot(C, mu_predict[n]), lower=True) # log-likelihood over all trials ll += -0.5*np.dot(v,v) - 2.*np.sum(np.log(np.diag(L))) \ - 0.5*np.log(2.*np.pi) mus_filt[n,t,:] = mu_predict[n] + np.dot(tmp1.T, solve_triangular(L, v, trans='T', lower=True)) tmp2 = solve_triangular(L, tmp1, lower=True) sigmas_filt[n,t,:,:] = sigma_predict[n] - np.dot(tmp2.T, tmp2) # prediction mu_predict[n] = np.dot(A[t], mus_filt[n,t,:]) sigma_predict[n] = dot3(A[t], sigmas_filt[n,t,:,:], A[t].T) + Q[t] return ll, mus_filt, sigmas_filt
def rts_smooth(Y, A, C, Q, R, mu0, Q0, compute_lag1_cov=False): N, T, _ = Y.shape _, D, Dnlags = A.shape nlags = Dnlags // D AA = np.stack([component_matrix(At, nlags) for At in A], axis=0) p = C.shape[0] CC = hs([C, np.zeros((p, D * (nlags - 1)))]) QQ = np.zeros((T, Dnlags, Dnlags)) QQ[:, :D, :D] = Q QQ0 = block_diag(*[Q0 for _ in range(nlags)]) mu_predict = np.empty((N, T + 1, Dnlags)) sigma_predict = np.empty((N, T + 1, Dnlags, Dnlags)) #mus_filt = np.zeros((N, T, Dnlags)) #sigmas_filt = np.zeros((N, T, Dnlags, Dnlags)) mus_smooth = np.empty((N, T, Dnlags)) sigmas_smooth = np.empty((N, T, Dnlags, Dnlags)) if compute_lag1_cov: sigmas_smooth_tnt = np.empty((N, T - 1, Dnlags, Dnlags)) else: sigmas_smooth_tnt = None ll = 0. mu_predict[:, 0, :] = np.tile(mu0, nlags) sigma_predict[:, 0, :, :] = QQ0.copy() for t in range(T): # condition # sigma_x = dot3(C, sigma_predict, C.T) + R tmp1 = einsum2('ik,nkj->nij', CC, sigma_predict[:, t, :, :]) sigma_x = einsum2('nik,jk->nij', tmp1, CC) + R sigma_x = sym(sigma_x) L = np.linalg.cholesky(sigma_x) # res[n] = Y[n,t,:] = np.dot(C, mu_predict[n,t,:]) res = Y[..., t, :] - einsum2('ik,nk->ni', CC, mu_predict[..., t, :]) v = solve_triangular(L, res, lower=True) # log-likelihood over all trials ll += (-0.5 * np.sum(v * v) - np.sum(np.log(np.diagonal(L, axis1=1, axis2=2))) - N / 2. * np.log(2. * np.pi)) mus_smooth[:, t, :] = mu_predict[:, t, :] + einsum2( 'nki,nk->ni', tmp1, solve_triangular(L, v, 'T', lower=True)) # tmp2 = L^{-1}*C*sigma_predict tmp2 = solve_triangular(L, tmp1, lower=True) sigmas_smooth[:, t, :, :] = sym(sigma_predict[:, t, :, :] - einsum2('nki,nkj->nij', tmp2, tmp2)) # prediction #mu_predict = np.dot(A[t], mus_smooth[t]) mu_predict[:, t + 1, :] = einsum2('ik,nk->ni', AA[t], mus_smooth[:, t, :]) #sigma_predict = dot3(A[t], sigmas_smooth[t], A[t].T) + Q[t] tmp = einsum2('ik,nkl->nil', AA[t], sigmas_smooth[:, t, :, :]) sigma_predict[:, t + 1, :, :] = sym( einsum2('nil,jl->nij', tmp, AA[t]) + QQ[t]) for t in range(T - 2, -1, -1): # these names are stolen from mattjj and slinderman #temp_nn = np.dot(A[t], sigmas_smooth[n,t,:,:]) temp_nn = einsum2('ik,nkj->nij', AA[t], sigmas_smooth[:, t, :, :]) L = np.linalg.cholesky(sigma_predict[:, t + 1, :, :]) v = solve_triangular(L, temp_nn, lower=True) # Look in Saarka for dfn of Gt_T Gt_T = solve_triangular(L, v, 'T', lower=True) # {mus,sigmas}_smooth[n,t] contain the filtered estimates so we're # overwriting them on purpose #mus_smooth[n,t,:] = mus_smooth[n,t,:] + np.dot(T_(Gt_T), mus_smooth[n,t+1,:] - mu_predict[t+1,:]) mus_smooth[:, t, :] = mus_smooth[:, t, :] + einsum2( 'nki,nk->ni', Gt_T, mus_smooth[:, t + 1, :] - mu_predict[:, t + 1, :]) #sigmas_smooth[n,t,:,:] = sigmas_smooth[n,t,:,:] + dot3(T_(Gt_T), sigmas_smooth[n,t+1,:,:] - temp_nn, Gt_T) tmp = einsum2('nki,nkj->nij', Gt_T, sigmas_smooth[:, t + 1, :, :] - temp_nn) tmp = einsum2('nik,nkj->nij', tmp, Gt_T) sigmas_smooth[:, t, :, :] = sym(sigmas_smooth[:, t, :, :] + tmp) if compute_lag1_cov: # This matrix is NOT symmetric, so don't symmetrize! #sigmas_smooth_tnt[n,t,:,:] = np.dot(sigmas_smooth[n,t+1,:,:], Gt_T) sigmas_smooth_tnt[:, t, :, :] = einsum2('nik,nkj->nij', sigmas_smooth[:, t + 1, :, :], Gt_T) return ll, mus_smooth, sigmas_smooth, sigmas_smooth_tnt