def forward(log_emlik, log_startprob, log_transmat): log_alpha = np.zeros_like(log_emlik) """Forward probabilities in log domain. Args: log_emlik: NxM array of emission log likelihoods, N frames, M states log_startprob: log probability to start in state i log_transmat: log transition probability from state i to j Output: forward_prob: NxM array of forward log probabilities for each of the M states in the model """ # First log alpha log_alpha[0] = log_startprob + log_emlik[0] for n in range(1, log_alpha.shape[0]): for j in range(0, log_alpha.shape[1]): log_emlik_j_n = log_emlik[n, j] log_alpha_prev = log_alpha[n - 1] log_transmat_j = log_transmat[:, j] log_alpha[n, j] = tools.logsumexp(log_alpha_prev + log_transmat_j) + log_emlik_j_n log_alpha_last = log_alpha[log_alpha.shape[0] - 1] sequence_likelihood = tools.logsumexp(log_alpha_last) return log_alpha, sequence_likelihood
def posterior(self, data): """ Calculate posterior over mixture components for each given data point. @type data: array_like @param data: data points @type: ndarray @return: posterior over mixture components """ # make sure data has right shape data = asarray(data).reshape(1, -1) # reshape parameters priors = self.priors.reshape(-1, 1) means = self.means.reshape(-1, 1) scales = self.scales.reshape(-1, 1) data_centered = data - means # calculate posterior post = log(priors) - 0.5 * square(data_centered) / square(scales) - log(scales) post = exp(post - logsumexp(post, 0)) return post
def train(self, data, max_iter=10, tol=1e-5): """ Fits the parameters to the given data. @type data: array_like @param data: data stored in columns @type max_iter: integer @param max_iter: the maximum number of EM iterations @type tol: float @param tol: stop if performance improves less than this threshold """ value = -mean(self.loglikelihood(data)) \ + self.gamma * (self.alpha + 1) * sum(log(self.scales)) \ + self.gamma / 2. * sum(self.beta / square(self.scales)) if Distribution.VERBOSITY > 2: print 0, value # compute squared norms of data points sqnorms = sum(square(data), 0).reshape(1, -1) for i in range(max_iter): scales = self.scales.reshape(-1, 1) # calculate posterior over scales (E) post = -0.5 * sqnorms / square(scales) - self.dim * log(scales) post = exp(post - logsumexp(post, 0)) try: # adjust parameters (M) self.scales = sqrt((mean(post * sqnorms, 1) + self.gamma * self.beta) / \ (self.dim * mean(post, 1) + self.gamma * (self.alpha + 1))) except FloatingPointError: indices, = where(sum(post, 1) == 0.) if Distribution.VERBOSITY > 0: print 'Degenerated scales {0}.'.format( self.scales[indices]) # reset problematic scales self.scales[indices] = 0.75 + rand(len(indices)) / 2. value = self.evaluate(data) # check for convergence value_ = -mean(self.loglikelihood(data)) \ + self.gamma * (self.alpha + 1.) * sum(log(self.scales)) \ + self.gamma / 2. * sum(self.beta / square(self.scales)) if value - value_ < tol: break value = value_ if Distribution.VERBOSITY > 2: print i + 1, value
def loglikelihood(self, data): # allocate memory logjoint = shmarray.zeros([len(self), data.shape[1]]) # compute joint density over components and data points def loglikelihood_(i): logjoint[i, :] = self[i].loglikelihood(data) + log(self.priors[i]) mapp(loglikelihood_, range(len(self))) # marginalize return asarray(logsumexp(logjoint, 0)).flatten()
def energy_gradient(self, data): scales = self.scales.reshape(self.num_scales, 1) # compute posterior over scales sqnorms = sum(square(data), 0).reshape(1, -1) # slow, but stable post = -0.5 * sqnorms / square(scales) - self.dim * log(scales) post = exp(post - logsumexp(post, 0)) # compute energy gradient return multiply(dot(1. / square(scales).T, post), data)
def logdrcdf(norm): """ Logarithm of the derivative of the radial CDF. """ # allocate memory result = zeros([self.gsm.num_scales, len(norm)]) for j in range(self.gsm.num_scales): result[j, :] = logdgrcdf(norm / self.gsm.scales[j], self.gsm.dim) - log(self.gsm.scales[j]) result -= log(self.gsm.num_scales) return logsumexp(result, 0)
def viterbi(log_emlik, log_startprob, log_transmat): """Viterbi path. Args: log_emlik: NxM array of emission log likelihoods, N frames, M states log_startprob: log probability to start in state i log_transmat: transition log probability from state i to j Output: viterbi_loglik: log likelihood of the best path viterbi_path: best path """ log_viterbi = np.zeros_like(log_emlik) best_path = np.zeros_like(log_emlik) log_viterbi[0] = log_startprob + log_emlik[0] for n in range(1, log_viterbi.shape[0]): for j in range(0, log_viterbi.shape[1]): log_emlik_j_n = log_emlik[n, j] log_viterbi_prev = log_viterbi[n - 1] log_transmat_j = log_transmat[:, j] # Viterbi approximation s = log_viterbi_prev + log_transmat_j log_viterbi[n, j] = np.max(s) + log_emlik_j_n # Best path #print(np.argmax(s)) best_path[n, j] = np.argmax(s) log_viterbi_last = log_viterbi[log_viterbi.shape[0] - 1] log_lik = tools.logsumexp(log_viterbi_last) ## Check this code, result is wrong best_path = np.delete(best_path, 0, 0) s_star = np.zeros(best_path.shape[0]) for i in range(0, best_path.shape[0]): s_star[i] = np.argmax(best_path[i]) # Backtracking best = np.zeros(best_path.shape[0]) for i in range(best_path.shape[0] - 1, 0, -1): index = int(s_star[i]) best[i] = best_path[i, index] print(best) return log_lik
def energy(self, data): # make sure data has right shape data = asarray(data).reshape(1, -1) # reshape parameters priors = self.priors.reshape(-1, 1) means = self.means.reshape(-1, 1) scales = self.scales.reshape(-1, 1) # joint density of indices and data joint = log(priors) - 0.5 * square(data - means) / square(scales) \ - log(scales) return -logsumexp(joint, 0).reshape(1, -1)
def logdrcdf(norm): """ Logarithm of the derivative of the radial CDF. """ # allocate memory result = zeros([self.gsm.num_scales, len(norm)]) for j in range(self.gsm.num_scales): result[j, :] = logdgrcdf(norm / self.gsm.scales[j], self.gsm.dim) - log( self.gsm.scales[j]) result -= log(self.gsm.num_scales) return logsumexp(result, 0)
def energy_gradient(self, data): # make sure data has right shape data = asarray(data).reshape(1, -1) # reshape parameters priors = self.priors.reshape(-1, 1) means = self.means.reshape(-1, 1) scales = self.scales.reshape(-1, 1) data_centered = data - means # calculate posterior post = log(priors) - 0.5 * square(data_centered) / square(scales) - log(scales) post = exp(post - logsumexp(post, 0)) return sum(multiply(data_centered / square(scales), post), 0).reshape(1, -1)
def gmmloglik(log_emlik, weights): """Log Likelihood for a GMM model based on Multivariate Normal Distribution. Args: log_emlik: array like, shape (N, K). contains the log likelihoods for each of N observations and each of K distributions weights: weight vector for the K components in the mixture Output: gmmloglik: scalar, log likelihood of data given the GMM model. """ result = 0 for i in range(0, log_emlik.shape[0]): obsloglik = log_emlik[i] log_weights = np.log(weights) logsumexp = tools.logsumexp(log_weights + obsloglik) result += logsumexp return result
def logposterior(self, data): """ Computes the log-posterior distribution over components. @type data: array_like @param data: data points stored in columns """ # allocate memory logpost = shmarray.zeros([len(self), data.shape[1]]) # compute log-joint def logposterior_(i): logpost[i, :] = self[i].loglikelihood(data) + log(self.priors[i]) mapp(logposterior_, range(len(self))) # normalize to get log-posterior logpost -= logsumexp(logpost, 0) return asarray(logpost)
def train(self, data, max_iter=10, tol=1e-5): """ Fits the parameters to the given data. @type data: array_like @param data: data stored in columns @type max_iter: integer @param max_iter: the maximum number of EM iterations @type tol: float @param tol: stop if performance improves less than this threshold """ value = self.evaluate(data) if Distribution.VERBOSITY > 2: print 0, value # make sure data has the right shape data = asarray(data).reshape(1, -1) for i in range(max_iter): # reshape parameters priors = self.priors.reshape(-1, 1) means = self.means.reshape(-1, 1) scales = self.scales.reshape(-1, 1) # calculate posterior (E) post = log(priors) - 0.5 * square(data - means) / square(scales) - log(scales) post = exp(post - logsumexp(post, 0)) try: weights = post / sum(post, 1).reshape(-1, 1) except FloatingPointError: if Distribution.VERBOSITY > 0: print 'Mixture with zero posterior probability detected.' indices, = where(sum(post, 1) == 0.) # reset problematic components self.means[indices] = mean(data) + randn(len(indices)) / 100. self.scales[indices] = std(data) * (0.75 + rand(len(indices)) / 2.) value = self.evaluate(data) continue # update parameters (M) self.priors = sum(post, 1) / sum(post) self.means = sum(multiply(data, weights), 1) self.scales = sqrt(sum(multiply(square(data - self.means.reshape(-1, 1)), weights), 1)) # check for convergence value_ = self.evaluate(data) if value - value_ < tol: break value = value_ if Distribution.VERBOSITY > 2: print i + 1, value