def log_prob(self, x, marginal=None, reg=None): """ :param x: :param marginal: :type marginal: slice :return: """ if marginal is not None: _mu = self.mu[marginal] _sigma = self.sigma[marginal, marginal] if reg is not None: _sigma += np.eye(marginal.stop - marginal.start) * reg return multi_variate_normal(x, _mu, _sigma) return multi_variate_normal(x, self.mu, self.sigma)
def compute_resp(self, demo=None, dep=None, table=None, marginal=None, norm=True): sample_size = demo.shape[0] B = np.ones((self.nb_states, sample_size)) if marginal != []: for i in range(self.nb_states): mu, sigma = (self.mu, self.sigma) if marginal is not None: mu, sigma = self.get_marginal(marginal) if dep is None: B[i, :] = multi_variate_normal(demo, mu[i], sigma[i], log=False) else: # block diagonal computation B[i, :] = 1.0 for d in dep: dGrid = np.ix_([i], d, d) B[[i], :] *= multi_variate_normal(demo, mu[i, d], sigma[dGrid][:, :, 0], log=False) B *= self.priors[:, None] if norm: return B / np.sum(B, axis=0) else: return B
def em(self, data, reg=1e-8, maxiter=100, minstepsize=1e-5, diag=False, reg_finish=False, kmeans_init=False, random_init=True, dep_mask=None, verbose=False, only_scikit=False, no_init=False): """ :param data: [np.array([nb_timesteps, nb_dim])] :param reg: [list([nb_dim]) or float] Regulariazation for EM :param maxiter: :param minstepsize: :param diag: [bool] Use diagonal covariance matrices :param reg_finish: [np.array([nb_dim]) or float] Regulariazation for finish step :param kmeans_init: [bool] Init components with k-means. :param random_init: [bool] Init components randomely. :param dep_mask: [np.array([nb_dim, nb_dim])] Composed of 0 and 1. Mask given the dependencies in the covariance matrices :return: """ self.reg = reg nb_min_steps = 5 # min num iterations nb_max_steps = maxiter # max iterations max_diff_ll = minstepsize # max log-likelihood increase nb_samples = data.shape[0] if not no_init: if random_init and not only_scikit: self.init_params_random(data) elif kmeans_init and not only_scikit: self.init_params_kmeans(data) else: if diag: self.init_params_scikit(data, 'diag') else: self.init_params_scikit(data, 'full') if only_scikit: return data = data.T LL = np.zeros(nb_max_steps) for it in range(nb_max_steps): # E - step L = np.zeros((self.nb_states, nb_samples)) L_log = np.zeros((self.nb_states, nb_samples)) for i in range(self.nb_states): L_log[i, :] = np.log(self.priors[i]) + multi_variate_normal( data.T, self.mu[i], self.sigma[i], log=True) L = np.exp(L_log) GAMMA = L / np.sum(L, axis=0) GAMMA2 = GAMMA / np.sum(GAMMA, axis=1)[:, np.newaxis] # M-step self.mu = np.einsum('ac,ic->ai', GAMMA2, data) # a states, c sample, i dim # nb_dim, nb_states, nb_samples dx = data[None, :] - self.mu[:, :, None] self.sigma = np.einsum('acj,aic->aij', np.einsum('aic,ac->aci', dx, GAMMA2), dx) # a states, c sample, i-j dim self.sigma += self.reg if diag: self.sigma *= np.eye(self.nb_dim) if dep_mask is not None: self.sigma *= dep_mask # print self.Sigma[:,u :, i] # Update initial state probablility vector self.priors = np.mean(GAMMA, axis=1) LL[it] = np.mean(np.log(np.sum(L, axis=0))) # Check for convergence if it > nb_min_steps: if LL[it] - LL[it - 1] < max_diff_ll: if reg_finish is not False: self.sigma = np.einsum( 'acj,aic->aij', np.einsum('aic,ac->aci', dx, GAMMA2), dx) + reg_finish if verbose: print colored( 'Converged after %d iterations: %.3e' % (it, LL[it]), 'red', 'on_white') return GAMMA if verbose: print( "GMM did not converge before reaching max iteration. Consider augmenting the number of max iterations." ) return GAMMA