def Cs(self): if self.single_subspace: return np.array( [block_diag(*[em.Cs[0] for em in self.emissions_models])]) else: return np.array([ block_diag(*[em.Cs[k] for em in self.emissions_models]) for k in range(self.K) ])
def _initialize_with_pca(self, datas, inputs=None, masks=None, tags=None, num_iters=20): for data in datas: assert data.shape[1] == self.N N_offsets = np.cumsum(self.N_vec)[:-1] pcas = [] split_datas = list( zip(*[np.split(data, N_offsets, axis=1) for data in datas])) split_masks = list( zip(*[np.split(mask, N_offsets, axis=1) for mask in masks])) assert len(split_masks) == len(split_datas) == self.P for em, dps, mps in zip(self.emissions_models, split_datas, split_masks): pcas.append(em._initialize_with_pca(dps, inputs, mps, tags)) # Combine the PCA objects from sklearn.decomposition import PCA pca = PCA(self.D) pca.components_ = block_diag(*[p.components_ for p in pcas]) pca.mean_ = np.concatenate([p.mean_ for p in pcas]) # Not super pleased with this, but it should work... pca.noise_variance_ = np.concatenate( [p.noise_variance_ * np.ones(n) for p, n in zip(pcas, self.N_vec)]) return pca
def linear_regression(Xs, ys, weights=None, mu0=0, sigma0=1.e8, nu0=1, psi0=1, fit_intercept=True): Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.ones((P, D)) sigma0 = sigma0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigma0 = block_diag(sigma0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigma0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = psi0 * np.eye(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + P + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
def fit_linear_regression(Xs, ys, weights=None, mu0=0, sigmasq0=1, alpha0=1, beta0=1, fit_intercept=True): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) D = Xs[0].shape[1] P = ys[0].shape[1] assert all([X.shape[1] == D for X in Xs]) assert all([y.shape[1] == P for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) mu0 = mu0 * np.zeros((P, D)) sigmasq0 = sigmasq0 * np.eye(D) # Make sure the weights are the weights if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: mu0 = np.column_stack((mu0, np.zeros(P))) sigmasq0 = block_diag(sigmasq0, np.eye(1)) # Compute the posterior J = np.linalg.inv(sigmasq0) h = np.dot(J, mu0.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance alpha = alpha0 beta = beta0 * np.ones(P) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat alpha += 0.5 * np.sum(weight) beta += 0.5 * np.sum(weight[:, None] * resid**2, axis=0) # Get MAP estimate of posterior mode of precision sigmasq = beta / (alpha + 1e-16) if fit_intercept: return W, b, sigmasq else: return W, sigmasq
def fit_linear_regression(Xs, ys, weights=None, fit_intercept=True, prior_mean=0, prior_variance=1, nu0=1, Psi0=1 ): """ Fit a linear regression y_i ~ N(Wx_i + b, diag(S)) for W, b, S. :param Xs: array or list of arrays :param ys: array or list of arrays :param fit_intercept: if False drop b """ Xs = Xs if isinstance(Xs, (list, tuple)) else [Xs] ys = ys if isinstance(ys, (list, tuple)) else [ys] assert len(Xs) == len(ys) p, d = Xs[0].shape[1], ys[0].shape[1] assert all([X.shape[1] == p for X in Xs]) assert all([y.shape[1] == d for y in ys]) assert all([X.shape[0] == y.shape[0] for X, y in zip(Xs, ys)]) prior_mean = prior_mean * np.zeros((d, p)) prior_variance = prior_variance * np.eye(p) # Check the weights. Default to all ones. if weights is not None: weights = weights if isinstance(weights, (list, tuple)) else [weights] else: weights = [np.ones(X.shape[0]) for X in Xs] # Add weak prior on intercept if fit_intercept: prior_mean = np.column_stack((prior_mean, np.zeros(d))) prior_variance = block_diag(prior_variance, np.eye(1)) # Compute the posterior J = np.linalg.inv(prior_variance) h = np.dot(J, prior_mean.T) for X, y, weight in zip(Xs, ys, weights): X = np.column_stack((X, np.ones(X.shape[0]))) if fit_intercept else X J += np.dot(X.T * weight, X) h += np.dot(X.T * weight, y) # Solve for the MAP estimate W = np.linalg.solve(J, h).T if fit_intercept: W, b = W[:, :-1], W[:, -1] else: b = 0 # Compute the residual and the posterior variance nu = nu0 Psi = Psi0 * np.eye(d) for X, y, weight in zip(Xs, ys, weights): yhat = np.dot(X, W.T) + b resid = y - yhat nu += np.sum(weight) tmp1 = np.einsum('t,ti,tj->ij', weight, resid, resid) tmp2 = np.sum(weight[:, None, None] * resid[:, :, None] * resid[:, None, :], axis=0) assert np.allclose(tmp1, tmp2) Psi += tmp1 # Get MAP estimate of posterior covariance Sigma = Psi / (nu + d + 1) if fit_intercept: return W, b, Sigma else: return W, Sigma
def rts_smooth_fast(Y, A, C, Q, R, mu0, Q0, compute_lag1_cov=False): """ RTS smoother that broadcasts over the first dimension. Handles multiple lag dependence using component form. Note: This function doesn't handle control inputs (yet). Y : ndarray, shape=(N, T, D) Observations A : ndarray, shape=(T, D*nlag, D*nlag) Time-varying dynamics matrices C : ndarray, shape=(p, D) Observation matrix mu0: ndarray, shape=(D,) mean of initial state variable Q0 : ndarray, shape=(D, D) Covariance of initial state variable Q : ndarray, shape=(D, D) Covariance of latent states R : ndarray, shape=(D, D) Covariance of observations """ N, T, _ = Y.shape _, D, Dnlags = A.shape nlags = Dnlags // D AA = np.stack([component_matrix(At, nlags) for At in A], axis=0) L_R = np.linalg.cholesky(R) p = C.shape[0] CC = hs([C, np.zeros((p, D*(nlags-1)))]) tmp = solve_triangular(L_R, CC, lower=True) Rinv_CC = solve_triangular(L_R, tmp, trans='T', lower=True) CCT_Rinv_CC = einsum2('ki,kj->ij', CC, Rinv_CC) # tile L_R across number of trials so solve_triangular # can broadcast over trials properly L_R = np.tile(L_R, (N, 1, 1)) QQ = np.zeros((T, Dnlags, Dnlags)) QQ[:,:D,:D] = Q QQ0 = block_diag(*[Q0 for _ in range(nlags)]) mu_predict = np.empty((N, T+1, Dnlags)) sigma_predict = np.empty((N, T+1, Dnlags, Dnlags)) mus_smooth = np.empty((N, T, Dnlags)) sigmas_smooth = np.empty((N, T, Dnlags, Dnlags)) if compute_lag1_cov: sigmas_smooth_tnt = np.empty((N, T-1, Dnlags, Dnlags)) else: sigmas_smooth_tnt = None ll = 0. mu_predict[:,0,:] = np.tile(mu0, nlags) sigma_predict[:,0,:,:] = QQ0.copy() I_tiled = np.tile(np.eye(D), (N, 1, 1)) for t in range(T): # condition # sigma_x = dot3(C, sigma_predict, C.T) + R tmp1 = einsum2('ik,nkj->nij', CC, sigma_predict[:,t,:,:]) res = Y[...,t,:] - einsum2('ik,nk->ni', CC, mu_predict[...,t,:]) # Rinv * res tmp2 = solve_triangular(L_R, res, lower=True) tmp2 = solve_triangular(L_R, tmp2, trans='T', lower=True) # C^T Rinv * res tmp3 = einsum2('ki,nk->ni', Rinv_CC, res) # (Pinv + C^T Rinv C)_inv * tmp3 # Pinv = np.linalg.inv(sigma_predict[:,t,:,:]) L_P = np.linalg.cholesky(sigma_predict[:,t,:,:]) tmp = solve_triangular(L_P, I_tiled, lower=True) Pinv = solve_triangular(L_P, tmp, trans='T', lower=True) tmp4 = sym(Pinv + CCT_Rinv_CC) L_tmp4 = np.linalg.cholesky(tmp4) tmp3 = solve_triangular(L_tmp4, tmp3, lower=True) tmp3 = solve_triangular(L_tmp4, tmp3, trans='T', lower=True) # Rinv C * tmp3 tmp3 = einsum2('ik,nk->ni', Rinv_CC, tmp3) # add the two Woodbury * res terms together tmp = tmp2 - tmp3 # # log-likelihood over all trials # # TODO: recompute with new tmp variables # ll += (-0.5*np.sum(v*v) # - 2.*np.sum(np.log(np.diagonal(L, axis1=1, axis2=2))) # - p/2.*np.log(2.*np.pi)) mus_smooth[:,t,:] = mu_predict[:,t,:] + einsum2('nki,nk->ni', tmp1, tmp) # tmp2 = L^{-1}*C*sigma_predict # tmp2 = solve_triangular(L, tmp1, lower=True) # Rinv * tmp1 tmp2 = solve_triangular(L_R, tmp1, lower=True) tmp2 = solve_triangular(L_R, tmp2, trans='T', lower=True) # C^T Rinv * tmp1 tmp3 = einsum2('ki,nkj->nij', Rinv_CC, tmp1) # (Pinv + C^T Rinv C)_inv * tmp3 tmp3 = solve_triangular(L_tmp4, tmp3, lower=True) tmp3 = solve_triangular(L_tmp4, tmp3, trans='T', lower=True) # Rinv C * tmp3 tmp3 = einsum2('ik,nkj->nij', Rinv_CC, tmp3) # add the two Woodbury * tmp1 terms together, left-multiply by tmp1 tmp = einsum2('nki,nkj->nij', tmp1, tmp2 - tmp3) sigmas_smooth[:,t,:,:] = sym(sigma_predict[:,t,:,:] - tmp) # prediction #mu_predict = np.dot(A[t], mus_smooth[t]) mu_predict[:,t+1,:] = einsum2('ik,nk->ni', AA[t], mus_smooth[:,t,:]) #sigma_predict = dot3(A[t], sigmas_smooth[t], A[t].T) + Q[t] tmp = einsum2('ik,nkl->nil', AA[t], sigmas_smooth[:,t,:,:]) sigma_predict[:,t+1,:,:] = sym(einsum2('nil,jl->nij', tmp, AA[t]) + QQ[t]) for t in range(T-2, -1, -1): # these names are stolen from mattjj and slinderman #temp_nn = np.dot(A[t], sigmas_smooth[n,t,:,:]) temp_nn = einsum2('ik,nkj->nij', AA[t], sigmas_smooth[:,t,:,:]) L = np.linalg.cholesky(sigma_predict[:,t+1,:,:]) v = solve_triangular(L, temp_nn, lower=True) # Look in Saarka for dfn of Gt_T Gt_T = solve_triangular(L, v, trans='T', lower=True) # {mus,sigmas}_smooth[n,t] contain the filtered estimates so we're # overwriting them on purpose #mus_smooth[n,t,:] = mus_smooth[n,t,:] + np.dot(T_(Gt_T), mus_smooth[n,t+1,:] - mu_predict[t+1,:]) mus_smooth[:,t,:] = mus_smooth[:,t,:] + einsum2('nki,nk->ni', Gt_T, mus_smooth[:,t+1,:] - mu_predict[:,t+1,:]) #sigmas_smooth[n,t,:,:] = sigmas_smooth[n,t,:,:] + dot3(T_(Gt_T), sigmas_smooth[n,t+1,:,:] - temp_nn, Gt_T) tmp = einsum2('nki,nkj->nij', Gt_T, sigmas_smooth[:,t+1,:,:] - sigma_predict[:,t+1,:,:]) tmp = einsum2('nik,nkj->nij', tmp, Gt_T) sigmas_smooth[:,t,:,:] = sym(sigmas_smooth[:,t,:,:] + tmp) if compute_lag1_cov: # This matrix is NOT symmetric, so don't symmetrize! #sigmas_smooth_tnt[n,t,:,:] = np.dot(sigmas_smooth[n,t+1,:,:], Gt_T) sigmas_smooth_tnt[:,t,:,:] = einsum2('nik,nkj->nij', sigmas_smooth[:,t+1,:,:], Gt_T) return ll, mus_smooth, sigmas_smooth, sigmas_smooth_tnt