def gibbs_sample_inside_loop_i_embed(self, i_embed, j_prev_assignment=None, anneal_temp=1, i_utt=None): """ Perform the inside loop of Gibbs sampling for data vector `i_embed`. """ # Temp # print "j_prev_assignment", j_prev_assignment # print self.lm.unigram_counts # print self.lm.bigram_counts # print # Compute log probability of `X[i]` belonging to each component; this # is the bigram version of (24.26) in Murphy, p. 843. if j_prev_assignment is not None: log_prob_z = np.log(self.lm.prob_vec_given_j(j_prev_assignment)) else: log_prob_z = self.lm.log_prob_vec_i() # print log_prob_z # Scale with language model scaling factor log_prob_z *= self.lms # print log_prob_z if i_utt is not None and i_utt == i_debug_monitor: logger.debug("lms * log(P(z=i|z_prev=j)): " + str(log_prob_z)) logger.debug("log(p(x|z=i)): " + str(self.acoustic_model.components.log_post_pred(i_embed))) # Bigram version of (24.23) in Murphy, p. 842 log_prob_z[:self.acoustic_model.components.K] += self.acoustic_model.components.log_post_pred(i_embed) # Empty (unactive) components log_prob_z[self.acoustic_model.components.K:] += self.acoustic_model.components.log_prior(i_embed) if anneal_temp != 1: log_prob_z = log_prob_z - _cython_utils.logsumexp(log_prob_z) log_prob_z_anneal = 1./anneal_temp * log_prob_z - _cython_utils.logsumexp(1./anneal_temp * log_prob_z) prob_z = np.exp(log_prob_z_anneal) else: prob_z = np.exp(log_prob_z - _cython_utils.logsumexp(log_prob_z)) assert not np.isnan(np.sum(prob_z)) if i_utt is not None and i_utt == i_debug_monitor: logger.debug("P(z=i|x): " + str(prob_z)) # Sample the new component assignment for `X[i]` k = utils.draw(prob_z) # There could be several empty, unactive components at the end if k > self.acoustic_model.components.K: k = self.acoustic_model.components.K if i_utt is not None and i_utt == i_debug_monitor: logger.debug("Adding item " + str(i_embed) + " to acoustic model component " + str(k)) self.acoustic_model.components.add_item(i_embed, k) return k
def log_marg_i(self, i): #, lms=1.): """ Return the log marginal of the i'th data vector: p(x_i) Here it is assumed that x_i is not currently in the acoustic model, so the -1 term used in the denominator in (24.26) in Murphy, p. 843 is dropped (since x_i is already not included in the counts). """ assert i != -1 # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * ( np.log( float(self.alpha) / self.components.K_max + self.components.counts) # - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha - 1.) - np.log( _cython_utils.sum_ints(self.components.counts) + self.alpha)) # log_prob_z = lms * ( # np.ones(self.components.K_max)*( # np.log(float(self.alpha)/self.components.K_max + self.components.counts) # - np.log(np.sum(self.components.counts) + self.alpha - 1.) # ) # ) # logger.info("log_prob_z: " + str(log_prob_z)) # (24.23) in Murphy, p. 842 log_prob_z[:self.components.K] += self.components.log_post_pred(i) # Empty (unactive) components log_prob_z[self.components.K:] += self.components.log_prior(i) return _cython_utils.logsumexp(log_prob_z)
def log_marg_i(self, i): #, lms=1.): """ Return the log marginal of the i'th data vector: p(x_i) Here it is assumed that x_i is not currently in the acoustic model, so the -1 term used in the denominator in (24.26) in Murphy, p. 843 is dropped (since x_i is already not included in the counts). """ assert i != -1 # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * ( np.log(float(self.alpha)/self.components.K_max + self.components.counts) # - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha - 1.) - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha) ) # log_prob_z = lms * ( # np.ones(self.components.K_max)*( # np.log(float(self.alpha)/self.components.K_max + self.components.counts) # - np.log(np.sum(self.components.counts) + self.alpha - 1.) # ) # ) # logger.info("log_prob_z: " + str(log_prob_z)) # (24.23) in Murphy, p. 842 log_prob_z[:self.components.K] += self.components.log_post_pred(i) # Empty (unactive) components log_prob_z[self.components.K:] += self.components.log_prior(i) return _cython_utils.logsumexp(log_prob_z)
def log_marg_i(self, i, log_prob_z=[], log_prob_z_given_y=[], scale=False): """ Return the log marginal of the i'th data vector: p(x_i) Here it is assumed that x_i is not currently in the acoustic model, so the -1 term used in the denominator in (24.26) in Murphy, p. 843 is dropped (since x_i is already not included in the counts). """ assert i != -1 if not len(log_prob_z): # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * ( np.log(float(self.alpha)/self.components.K_max + self.components.counts) # - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha - 1.) - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha) ) if len(log_prob_z_given_y): log_prob_z += log_prob_z_given_y log_prob_z -= logsumexp(log_prob_z) # print('In fbgmm log_marg_i %d, prior: ' % (i) + str(np.min(log_prob_z)) + ' ' + str(np.max(log_prob_z))) # log_prob_z = lms * ( # np.ones(self.components.K_max)*( # np.log(float(self.alpha)/self.components.K_max + self.components.counts) # - np.log(np.sum(self.components.counts) + self.alpha - 1.) # ) # ) # logger.info("log_prob_z: " + str(log_prob_z)) if scale: log_likelihood_z = np.nan * np.ones(log_prob_z.shape) log_likelihood_z[:self.components.K] = self.components.log_post_pred(i) log_likelihood_z[self.components.K:] = self.components.log_prior(i) log_prob_z += log_likelihood_z - _cython_utils.logsumexp(log_likelihood_z) else: # (24.23) in Murphy, p. 842 log_prob_z[:self.components.K] += self.components.log_post_pred(i) # Empty (unactive) components log_prob_z[self.components.K:] += self.components.log_prior(i) # print('In fbgmm log_marg_i %d, posterior: ' % (i) + str(np.min(log_prob_z)) + ' ' + str(np.max(log_prob_z))) return _cython_utils.logsumexp(log_prob_z)
def log_marg_i(self, i, log_prob_z=[]): """ Return the log marginal of the i'th data vector: p(x_i) Here it is assumed that x_i is not currently in the acoustic model, so the -1 term used in the denominator in (24.26) in Murphy, p. 843 is dropped (since x_i is already not included in the counts). """ assert i != -1 L = len(self.hierarchy[i]) if not len(log_prob_z): # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * ( np.log( float(self.alpha) / self.components.K_max + self.components.counts) # - np.log(_cython_utils.sum_ints(self.components.counts) + self.alpha - 1.) - np.log( _cython_utils.sum_ints(self.components.counts) + self.alpha)) log_prior_z = self.log_prob_z_given_l(log_prob_z, L) # print('In hfbgmm, log_prior_z: ', log_prior_z) log_post_pred = self.components.log_post_pred(i) # print('embedding %d log_post_pred: ' % i + str(log_post_pred)) log_post_pred_active = self.components.log_post_pred_active( i, log_post_pred) # print('In hfbgmm log_marg_i, log_pred_active_z: ' + str(log_post_pred_active)) log_likelihood_z = np.nan * np.ones(log_prior_z.shape) log_likelihood_z[:-1] = log_post_pred_active log_likelihood_z[-1] = self.components.log_post_pred_inactive( log_post_pred, log_post_pred_active) # print('In hfbgmm log_marg_i, l=%d, log_likelihood_z: ' % L + str(log_likelihood_z - _cython_utils.logsumexp(log_likelihood_z))) log_post_pred_inactive = self.components.log_post_pred_inactive( log_post_pred, log_post_pred_active) # print('In HFBGMM log_marg_i, l=%d, log_post_pred_inactive: ' % L + str(log_post_pred_inactive)) return _cython_utils.logsumexp( log_prior_z + log_likelihood_z - _cython_utils.logsumexp(log_likelihood_z))
def log_marg_i_embed_unigram(self, i_embed): """Return the unigram log marginal of the i'th data vector: p(x_i)""" assert i_embed != -1 # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * self.lm.log_prob_vec_i() # logger.info("log_prob_z: " + str(log_prob_z)) # (24.23) in Murphy, p. 842` log_prob_z[:self.acoustic_model.components.K] += self.acoustic_model.components.log_post_pred( i_embed ) # Empty (unactive) components log_prob_z[self.acoustic_model.components.K:] += self.acoustic_model.components.log_prior(i_embed) return _cython_utils.logsumexp(log_prob_z)
def log_marg_i_embed_unigram(self, i_embed): """Return the unigram log marginal of the i'th data vector: p(x_i)""" assert i_embed != -1 # Compute log probability of `X[i]` belonging to each component # (24.26) in Murphy, p. 843 log_prob_z = self.lms * self.lm.log_prob_vec_i() # logger.info("log_prob_z: " + str(log_prob_z)) # (24.23) in Murphy, p. 842` log_prob_z[:self.acoustic_model.components. K] += self.acoustic_model.components.log_post_pred(i_embed) # Empty (unactive) components log_prob_z[self.acoustic_model.components. K:] += self.acoustic_model.components.log_prior(i_embed) return _cython_utils.logsumexp(log_prob_z)
def forward_backward_viterbi(vec_embed_log_probs, log_p_continue, N, n_slices_min=0, n_slices_max=0, i_utt=None, anneal_temp=None): """ Viterbi segment an utterance of length `N` based on its `vec_embed_log_probs` vector and return a bool vector of boundaries. Parameters ---------- vec_embed_log_probs : N(N + 1)/2 length vector For t = 1, 2, ..., N the entries `vec_embed_log_probs[i:i + t]` contains the log probabilties of sequence[0:t] up to sequence[t - 1:t], with i = t(t - 1)/2. If you have a NxN matrix where the upper triangular (i, j)'th entry is the log probability of sequence[i:j + 1], then by stacking the upper triangular terms column-wise, you get vec_embed_log_probs`. Written out: `vec_embed_log_probs` = [log_prob(seq[0:1]), log_prob(seq[0:2]), log_prob(seq[1:2]), log_prob(seq[0:3]), ..., log_prob(seq[N-1:N])]. n_slices_max : int See `UnigramAcousticWordseg`. If 0, then the full length are considered. This won't necessarily lead to problems, since unassigned embeddings would still be ignored since their assignments are -1 and the would therefore have a log probability of -inf. i_utt : int If provided, index of the utterance for which to print a debug trace; this happens if it matches the global `i_debug_monitor`. anneal_temp : None This parameter is ignored in this function (duck typing). Return ------ (log_prob, boundaries) : (float, vector of bool) The `log_prob` is the sum of the log probabilties in `vec_embed_log_probs` for the embeddings for the final segmentation. """ boundaries = np.zeros(N, dtype=bool) boundaries[-1] = True log_alphas = np.ones(N) log_alphas[0] = 0.0 n_slices_min_cut = -(n_slices_min - 1) if n_slices_min > 1 else None # Forward filtering i = 0 for t in xrange(1, N): if np.all(vec_embed_log_probs[i:i + t][-n_slices_max:] + log_alphas[:t][-n_slices_max:] == -np.inf): # logger.debug("log_alphas[:t] " + str(log_alphas[:t])) log_alphas[t] = -np.inf else: log_alphas[t] = np.max( vec_embed_log_probs[i:i + t][-n_slices_max:n_slices_min_cut] + log_alphas[:t][-n_slices_max:n_slices_min_cut]) # if i_utt == i_debug_monitor: # logger.debug("...") i += t if i_utt == i_debug_monitor: logger.debug("log_alphas: " + str(log_alphas)) # Backward sampling t = N log_prob = 0. while True: i = int(0.5 * (t - 1) * t) log_p_k = ( vec_embed_log_probs[i:i + t][-n_slices_max:n_slices_min_cut] + log_alphas[:t][-n_slices_max:n_slices_min_cut]) if np.all(log_p_k == -np.inf): logger.debug( "Only impossible solutions for initial back-sampling for utterance " + str(i_utt)) # Look for first point where we can actually sample and insert a boundary at this point while np.all(log_p_k == -np.inf): t = t - 1 if t == 0: break # this is a very crappy utterance i = 0.5 * (t - 1) * t log_p_k = (vec_embed_log_probs[i:i + t][-n_slices_max:] + log_alphas[:t][-n_slices_max:]) logger.debug("Backtracked to cut " + str(t)) boundaries[t - 1] = True # insert the boundary p_k = np.exp(log_p_k[::-1] - _cython_utils.logsumexp(log_p_k)) k = np.argmax(p_k) + 1 if n_slices_min_cut is not None: k += n_slices_min - 1 if i_utt == i_debug_monitor: # logger.debug( # "log p(y|h-): " + np.array_repr(vec_embed_log_probs[i:i + t][-n_slices_max:n_slices_min_cut][::-1]) # ) # logger.debug( # "log alphas: " + np.array_repr(log_alphas[:t][-n_slices_max:n_slices_min_cut][::-1]) # ) logger.debug("log P(k): " + str(log_p_k)) logger.debug("P(k): " + str(p_k)) logger.debug("argmax P(k) for k: " + str(k)) logger.debug("Embedding log prob: " + str(vec_embed_log_probs[i + t - k])) log_prob += vec_embed_log_probs[i + t - k] if t - k - 1 < 0: break boundaries[t - k - 1] = True t = t - k return log_prob, boundaries
def forward_backward(vec_embed_log_probs, log_p_continue, N, n_slices_min=0, n_slices_max=0, i_utt=None, anneal_temp=1): """ Segment an utterance of length `N` based on its `vec_embed_log_probs` vector and return a bool vector of boundaries. Parameters ---------- vec_embed_log_probs : N(N + 1)/2 length vector For t = 1, 2, ..., N the entries `vec_embed_log_probs[i:i + t]` contains the log probabilties of sequence[0:t] up to sequence[t - 1:t], with i = t(t - 1)/2. If you have a NxN matrix where the upper triangular (i, j)'th entry is the log probability of sequence[i:j + 1], then by stacking the upper triangular terms column-wise, you get vec_embed_log_probs`. Written out: `vec_embed_log_probs` = [log_prob(seq[0:1]), log_prob(seq[0:2]), log_prob(seq[1:2]), log_prob(seq[0:3]), ..., log_prob(seq[N-1:N])]. n_slices_max : int See `UnigramAcousticWordseg`. If 0, then the full length are considered. This won't necessarily lead to problems, since unassigned embeddings would still be ignored since their assignments are -1 and the would therefore have a log probability of -inf. Return ------ (log_prob, boundaries) : (float, vector of bool) The `log_prob` is the sum of the log probabilties in `vec_embed_log_probs` for the embeddings for the final segmentation. """ n_slices_min_cut = -(n_slices_min - 1) if n_slices_min > 1 else None boundaries = np.zeros(N, dtype=bool) boundaries[-1] = True log_alphas = np.ones(N) log_alphas[0] = 0.0 # Forward filtering i = 0 for t in xrange(1, N): if np.all(vec_embed_log_probs[i:i + t][-n_slices_max:] + log_alphas[:t][-n_slices_max:] == -np.inf): log_alphas[t] = -np.inf else: log_alphas[t] = (_cython_utils.logsumexp( vec_embed_log_probs[i:i + t][-n_slices_max:n_slices_min_cut] + log_alphas[:t][-n_slices_max:n_slices_min_cut]) + log_p_continue) # if i_utt == i_debug_monitor: # logger.debug("...") i += t if i_utt == i_debug_monitor: logger.debug("log_alphas: " + str(log_alphas)) # Backward sampling t = N log_prob = np.float64(0.) while True: i = int(0.5 * (t - 1) * t) log_p_k = ( vec_embed_log_probs[i:i + t][-n_slices_max:n_slices_min_cut] + log_alphas[:t][-n_slices_max:n_slices_min_cut]) assert not np.isnan(np.sum(log_p_k)) if np.all(log_p_k == -np.inf): logger.debug( "Only impossible solutions for initial back-sampling for utterance " + str(i_utt)) # Look for first point where we can actually sample and insert a boundary at this point while np.all(log_p_k == -np.inf): t = t - 1 if t == 0: break # this is a very crappy utterance i = int(0.5 * (t - 1) * t) log_p_k = (vec_embed_log_probs[i:i + t][-n_slices_max:] + log_alphas[:t][-n_slices_max:]) logger.debug("Backtracked to cut " + str(t)) boundaries[t - 1] = True # insert the boundary if anneal_temp != 1: log_p_k = log_p_k[::-1] - _cython_utils.logsumexp(log_p_k) log_p_k_anneal = ( 1. / anneal_temp * log_p_k - _cython_utils.logsumexp(1. / anneal_temp * log_p_k)) p_k = np.exp(log_p_k_anneal) else: p_k = np.exp(log_p_k[::-1] - _cython_utils.logsumexp(log_p_k)) k = _cython_utils.draw(p_k) + 1 if n_slices_min_cut is not None: k += n_slices_min - 1 if i_utt == i_debug_monitor: logger.debug("log P(k): " + str(log_p_k)) logger.debug("P(k): " + str(p_k)) logger.debug("k sampled from P(k): " + str(k)) logger.debug("Embedding log prob: " + str(vec_embed_log_probs[i + t - k])) log_prob += vec_embed_log_probs[i + t - k] if t - k - 1 < 0: break boundaries[t - k - 1] = True t = t - k if log_prob == -np.inf: assert False return log_prob, boundaries
def gibbs_sample_inside_loop_i_embed(self, i_embed, j_prev_assignment=None, anneal_temp=1, i_utt=None): """ Perform the inside loop of Gibbs sampling for data vector `i_embed`. """ # Temp # print "j_prev_assignment", j_prev_assignment # print self.lm.unigram_counts # print self.lm.bigram_counts # print # Compute log probability of `X[i]` belonging to each component; this # is the bigram version of (24.26) in Murphy, p. 843. if j_prev_assignment is not None: log_prob_z = np.log(self.lm.prob_vec_given_j(j_prev_assignment)) else: log_prob_z = self.lm.log_prob_vec_i() # print log_prob_z # Scale with language model scaling factor log_prob_z *= self.lms # print log_prob_z if i_utt is not None and i_utt == i_debug_monitor: logger.debug("lms * log(P(z=i|z_prev=j)): " + str(log_prob_z)) logger.debug( "log(p(x|z=i)): " + str(self.acoustic_model.components.log_post_pred(i_embed))) # Bigram version of (24.23) in Murphy, p. 842 log_prob_z[:self.acoustic_model.components. K] += self.acoustic_model.components.log_post_pred(i_embed) # Empty (unactive) components log_prob_z[self.acoustic_model.components. K:] += self.acoustic_model.components.log_prior(i_embed) if anneal_temp != 1: log_prob_z = log_prob_z - _cython_utils.logsumexp(log_prob_z) log_prob_z_anneal = 1. / anneal_temp * log_prob_z - _cython_utils.logsumexp( 1. / anneal_temp * log_prob_z) prob_z = np.exp(log_prob_z_anneal) else: prob_z = np.exp(log_prob_z - _cython_utils.logsumexp(log_prob_z)) assert not np.isnan(np.sum(prob_z)) if i_utt is not None and i_utt == i_debug_monitor: logger.debug("P(z=i|x): " + str(prob_z)) # Sample the new component assignment for `X[i]` k = utils.draw(prob_z) # There could be several empty, unactive components at the end if k > self.acoustic_model.components.K: k = self.acoustic_model.components.K if i_utt is not None and i_utt == i_debug_monitor: logger.debug("Adding item " + str(i_embed) + " to acoustic model component " + str(k)) self.acoustic_model.components.add_item(i_embed, k) return k