def m_step(self, all_word_paths): # keep a dictionary of score-weighted pseudocounts of aligned elements pseudocounts = {} for word in all_word_paths: # get the total score to scale the paths # word_total_score = logSum([path.get_score() for path in word]) if not len(word): continue word_total_score = logSum([path[1] for path in word \ if not isinf(path[1])]) for path in word: ''' path_elements = path.get_elements() path_score = path.get_score() ''' path_elements, path_score = path path_score_scaled = path_score - word_total_score for a_element, b_element in path_elements: if a_element not in self.alignment_scores[-1]: a_element = None if a_element not in pseudocounts: pseudocounts[a_element] = {} if b_element not in pseudocounts[a_element] \ or isinf(pseudocounts[a_element][b_element]): pseudocounts[a_element][b_element] = path_score_scaled else: pseudocounts[a_element][b_element] = \ logAdd(path_score_scaled, pseudocounts[a_element][b_element]) self.pseudocounts.append(pseudocounts) # rescale all the pseudocounts so they sum to 1 pseudoprobs = {} for a_element in pseudocounts: pseudoprobs[a_element] = {} a_element_total = logSum([v for v in \ pseudocounts[a_element].values() \ if not isinf(v)]) for b_element in pseudocounts[a_element]: b_element_pseudocount = pseudocounts[a_element][b_element] if isinf(a_element_total): b_element_prob = log(0) else: b_element_prob = b_element_pseudocount - a_element_total pseudoprobs[a_element][b_element] = b_element_prob return pseudoprobs
def convert_allowables(allowables, delete_prob, insert_prob): """ convert a dict of {letter: phone} allowables to a {letter : {phone : alignment_probability} dict """ alignment_scores = {} for letter in allowables: alignment_scores[letter] = {} phones = allowables[letter] phone_scores = [] for phone in phones: score = 0 # penalize deletions if phone is None: score += log(delete_prob) # really penalize insertions if letter is None: score += log(insert_prob) phone_scores.append(score) total_phone_scores = logSum(phone_scores) for idx, score in enumerate(phone_scores): alignment_scores[letter][phones[idx]] = score - total_phone_scores return alignment_scores
def convert_allowables(allowables, delete_prob, insert_prob): """ convert a dict of {letter: phone} allowables to a {letter : {phone : alignment_probability} dict """ alignment_scores = {} for letter in allowables: alignment_scores[letter] = {} phones = allowables[letter] phone_scores = [] for phone in phones: score = 0 # penalize deletions if (phone is None): score += log(delete_prob) # really penalize insertions if (letter is None): score += log(insert_prob) phone_scores.append(score) total_phone_scores = logSum(phone_scores) for idx, score in enumerate(phone_scores): alignment_scores[letter][phones[idx]] = score - total_phone_scores return alignment_scores