def _baumwelch(self, obs:np.ndarray, alpha:np.ndarray, beta:np.ndarray, remap:bool=False)->Tuple[np.ndarray]: """Baum-Welch算法(向前向后算法) :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 - alpha: 向前传递因子(TxN) - beta: 向后传递因子(TxN) - remap: 是否重新映射发射概率 :Returns: xi和gamma参数 """ if remap: self._map_b(obs) T = len(obs) # E-Step: xi(TxNxN), gamma(TxN) # xi[t][i][j] xi = np.full((T, self.n, self.n), -np.inf, dtype=self.precision) for t in np.arange(T-1): numer = alpha[t, :].reshape((self.n, 1)) \ + self.A \ + self.b[:, t+1].reshape((1, self.n)) \ + beta[t+1, :].reshape((1, self.n)) denom = lse(numer) xi[t] = numer - denom # gamma[t][i]: 对xi的j求和 gamma = lse(xi, axis=2) return xi, gamma
def train(self, obs, iters=1, eps=0.0001, verbose:bool=True): """训练HMM模型参数 :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 - iters: 迭代次数 - eps: 训练精度 """ for k in range(iters): # 训练 self._map_b(obs) alpha = self._forward(obs) beta = self._backward(obs) xi, gamma = self._baumwelch(obs, alpha, beta) model = self._estimate(obs, alpha, beta, xi, gamma) # 更新参数 prob_old = lse(alpha[-1]) self._updatemodel(model) prob_new = self.calc_prob(obs) # 检测收敛精度 prod_d = abs(prob_old - prob_new) if verbose: print("Iter: {:3}," " L(lambda|O) = {:.6e}," " L(lambda_new|O) = {:.6e}," " eps = {:.6f}" .format(k, prob_old, prob_new, prod_d)) if (prod_d < eps): break
def calc_prob(self, obs:np.ndarray)->float: """计算观测序列的log似然度 :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 """ return lse(self._forward(obs, True)[-1])
def _backward(self, obs:np.ndarray, remap:bool=False)->np.ndarray: """向后算法(类似于forward) :: 在t,i处beta网格计算示意图: q1 q2 ... qN o0 ... ot t,i ... # # # -> beta[t+1], A[i:, :], B[j, o(t+1)] oT :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 - remap: 是否重新映射发射概率 :Returns: 向后传递因子beta(TxN),也称向后网格、向后概率 """ if remap: self._map_b(obs) T = len(obs) beta = np.empty((T, self.n), dtype=self.precision) # 初始状态 beta[-1] = np.log(np.ones(self.n, dtype=self.precision)) # 迭代 for t in np.arange(T-2, -1, -1): for i in np.arange(self.n): beta[t, i] = lse(beta[t+1] + self.A[i, :] + self.b[:, t+1]) return beta
def _estimate(self, obs:np.ndarray, alpha, beta, xi, gamma)->dict: """计算Maximization模型新参数 :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 - alpha: 向前传递因子(TxN) - xi: baum-welch参数(TxNxN) - gamma: baum-welch参数(TxN) :Returns: HMM模型参数(A,B,pi) """ # M-Step: new A,B,pi denom = lse(gamma, axis=0) A = lse(xi, axis=0) - denom.reshape((self.n, 1)) B = np.empty((self.n, self.m), dtype=self.precision) for k in np.arange(self.m): B[:, k] = lse(gamma[k==obs, :], axis=0) - denom.reshape((1, self.n)) pi = gamma[0] # New model model = {} model['A'] = A model['B'] = B model['pi'] = pi return model
def _forward(self, obs:np.ndarray, remap:bool=False)->np.ndarray: """向前算法 :: 在t,j处alpha网格计算示意图: q1 q2 ... qN o0 ... # # # -> alpha[t-1], A[:, j] ot t,j -> B[j, ot] ... oT :Parameters: - obs: 观测序列O={o1, o2, ... oT},值为观测集中的值 - remap: 是否重新映射发射概率 :Returns: 向前传递因子alpha(TxN),也称向前网格、向前概率 """ if remap: self._map_b(obs) # 低空间复杂度版本(只能保存最后一行alpha的值) # alpha = self.pi + self.b[:, 0] # p = np.copy(alpha) # for t in range(1, len(obs)): # for j in range(self.n): # alpha[j] = lse(p + self.A[:, j]) + self.b[j, t] # p = np.copy(alpha) # return alpha T = len(obs) alpha = np.empty((T, self.n), dtype=self.precision) # 初始状态 alpha[0] = self.pi + self.b[:, 0] # 迭代 for t in np.arange(1, T): for j in np.arange(self.n): alpha[t, j] = lse(alpha[t-1] + self.A[:, j]) + self.b[j, t] return alpha
def _map_b(self, obs: np.ndarray): """利用GMM计算发射概率 b与GMM计算的映射关系: - b: (NxT), b[j, t]表示从状态j生成ot的概率 - bm: (NxMxT), b[j, m, t]表示从状态j生成ot的概率的第m个混合分量 :Parameters: - obs: 观测序列 """ T = obs.shape[0] self.b = np.empty((self.n, T), dtype=self.precision) self.bm = np.empty((self.n, self.m, T), dtype=self.precision) # 可以直接使用gaussian_mixture_distribution计算,但没法获取混合分量 # for j in np.arange(self.n): # self.b[j] = gaussian_mixture_distribution_log( # self.w[j], obs, self.mu[j], self.si[j]) for j in np.arange(self.n): for m in np.arange(self.m): self.bm[j, m] = gaussian_multi_distribution_log( obs, self.mu[j, m], self.si[j, m]) self.b[j] = lse(self.w[j].reshape(self.m, 1) + self.bm[j], axis=0)
def _estimate(self, obs: np.ndarray, alpha, beta, xi, gamma): """计算Maximization模型新参数 :Parameters: - obs: 观测序列O[TxD]={o1, o2, ... oT},值为观测集中的值 - alpha: 向前传递因子(TxN) - xi: baum-welch参数(TxNxN) - gamma: baum-welch参数(TxN) :Returns: HMM模型参数(A,pi,mu,si,w) """ T = obs.shape[0] # M-Step: new A,pi,mu,si,w # A[NxN] A = lse(xi, axis=0) - lse(gamma, axis=0).reshape((self.n, 1)) # pi[N] pi = gamma[0] # xi_mix[TxNxM] xi_mix = np.empty((T, self.n, self.m), dtype=self.precision) for t in np.arange(T): """ # 用2层for循环,便于理解数组乘法代替for循环 for j in np.arange(self.n): xi_mix[t, j, :] = (alpha[t, j] + beta[t, j] + self.w[j, :] + self.bm[j, :, t]) xi_mix[t, j, :] -= lse(alpha[t] + beta[t]) xi_mix[t, j, :] -= lse(self.w[j] + self.bm[j, :, t]) """ xi_mix[t] = (alpha[t] + beta[t]).reshape( self.n, 1) + self.w + self.bm[:, :, t] xi_mix[t] -= lse(alpha[t] + beta[t]) xi_mix[t] -= lse(self.w + self.bm[:, :, t], axis=1).reshape(self.n, 1) # w[NxM] w = lse(xi_mix, axis=0) - lse(xi_mix, axis=(0, 2)).reshape(self.n, 1) # xi_mix取exp,用于计算非log值的均值和方差 xi_mix = np.exp(xi_mix) # mu[NxMxD] """ # 用2层for循环 mu = np.zeros((self.n, self.m, self.d), dtype=self.precision) for i in np.arange(self.n): for m in np.arange(self.m): mu[i, m] = np.dot(xi_mix[:, i, m].reshape(1, T), obs) mu[i, m] /= np.sum(xi_mix[:, i, m]) """ mu = np.dot( # dot(TxNxM -> NxMxT, TxD) -> NxMxD np.swapaxes(np.swapaxes(xi_mix, 0, 1), 1, 2), obs) / np.sum(xi_mix, axis=0).reshape(self.n, self.m, 1) # Sigma[NxMxDxD] """ # 用2层for循坏 si = np.zeros((self.n, self.m, self.d, self.d), dtype=self.precision) for i in np.arange(self.n): for m in np.arange(self.m): dt = obs - self.mu[i, m] # TxD si[i, m] = np.sum( xi_mix[:, i, m].reshape(T, 1, 1) * np.matmul( # matmul(TxDx1, Tx1xD) -> TxDxD dt.reshape(T, self.d, 1), dt.reshape(T, 1, self.d)), axis=0) si[i, m] /= np.sum(xi_mix[:, i, m]) """ # dt = 1x1xTxD - NxMx1xD = NxMxTxD dt = obs.reshape(1, 1, T, self.d) - self.mu.reshape( self.n, self.m, 1, self.d) si = np.sum( # TxNxM -> NxMxT -> NxMxTx1x1 np.swapaxes(np.swapaxes(xi_mix, 0, 1), 1, 2).reshape(self.n, self.m, T, 1, 1) * \ np.matmul( # matmul(NxMxTxDx1, NxMxTx1xD) -> NxMxTxDxD dt.reshape(self.n, self.m, T, self.d, 1), dt.reshape(self.n, self.m, T, 1, self.d)), axis=2) / np.sum(xi_mix, axis=0).reshape(self.n, self.m, 1, 1) si[:, :] += np.matrix(self.min_std * np.eye( (self.d), dtype=self.precision)) # New model model = {} model['A'] = A model['pi'] = pi model['w'] = w model['mu'] = mu model['si'] = si return model