def cycle(self, max_iters, min_change): self.trans_list = [(self.A[0][1], self.A[1][0])] for i in range(max_iters): sum_of_probs = 0.0 for word in self.words: alpha, unused_beta = self.forward_backward(word) sum_of_probs += sum(alpha[len(alpha) - 1]) if self.VERBOSE_FLAG: output.sum_of_probs(sum_of_probs, i, self.out) diff = sum_of_probs - self.sum_of_probs if diff < min_change: break self.sum_of_probs = sum_of_probs self.sum_of_probs_list.append(self.sum_of_probs) for word in self.words: self.counts[word] = self.soft_count(word) new_B_values, normalizers = self.maximize_emission() new_A_values = self.maximize_transition(normalizers) new_Pi_values = self.maximize_Pi() self.B = new_B_values self.A = new_A_values self.Pi = new_Pi_values self.trans_list.append((self.A[0][1], self.A[1][0])) if self.VERBOSE_FLAG: output.letter_prob(self.B, self.states, self.out) output.log_letter_prob(self.B, self.states, self.out) output.A_output(self.A, self.states, self.out) output.show_Pi(self.Pi, self.out) output.sum_of_probs(sum_of_probs, i, self.out)
def maximize_emission(self): new_B = [{} for i in self.states] # new_B[state][letter] gives TOTAL soft count # for given letter FROM given state for word in self.words: count = self.counts[word] for t in range(len(word)): for from_state in self.states: if word[t] in new_B[from_state]: to_sum = [count[t][from_state][to_state] for to_state in self.states] new_B[from_state][word[t]] += sum(to_sum) else: to_sum = [count[t][from_state][to_state] for to_state in self.states] new_B[from_state][word[t]] = sum(to_sum) if self.VERBOSE_FLAG: output.init_emissions(self.out) normalizers = [] for from_state in self.states: normalizer = sum(new_B[from_state].values()) normalizers.append(normalizer) for letter in new_B[from_state].keys(): new_B[from_state][letter] = new_B[from_state][letter] / normalizer if self.VERBOSE_FLAG: output.letter_prob(new_B, self.states, self.out) return new_B, normalizers