Example #1
0
    def cycle(self, max_iters, min_change):
        self.trans_list = [(self.A[0][1], self.A[1][0])]
        for i in range(max_iters):
            sum_of_probs = 0.0
            for word in self.words:
                alpha, unused_beta = self.forward_backward(word)
                sum_of_probs += sum(alpha[len(alpha) - 1])
            if self.VERBOSE_FLAG:
                output.sum_of_probs(sum_of_probs, i, self.out)

            diff = sum_of_probs - self.sum_of_probs
            if diff < min_change:
                break
            self.sum_of_probs = sum_of_probs
            self.sum_of_probs_list.append(self.sum_of_probs)

            for word in self.words:
                self.counts[word] = self.soft_count(word)

            new_B_values, normalizers = self.maximize_emission()
            new_A_values = self.maximize_transition(normalizers)
            new_Pi_values = self.maximize_Pi()
            self.B = new_B_values
            self.A = new_A_values
            self.Pi = new_Pi_values
            self.trans_list.append((self.A[0][1], self.A[1][0]))

        if self.VERBOSE_FLAG:
            output.letter_prob(self.B, self.states, self.out)
        output.log_letter_prob(self.B, self.states, self.out)
        output.A_output(self.A, self.states, self.out)
        output.show_Pi(self.Pi, self.out)
        output.sum_of_probs(sum_of_probs, i, self.out)
Example #2
0
 def maximize_emission(self):
     new_B = [{} for i in self.states]
     # new_B[state][letter] gives TOTAL soft count
     # for given letter FROM given state
     for word in self.words:
         count = self.counts[word]
         for t in range(len(word)):
             for from_state in self.states:
                 if word[t] in new_B[from_state]:
                     to_sum = [count[t][from_state][to_state] for to_state in self.states]
                     new_B[from_state][word[t]] += sum(to_sum)
                 else:
                     to_sum = [count[t][from_state][to_state] for to_state in self.states]
                     new_B[from_state][word[t]] = sum(to_sum)
     if self.VERBOSE_FLAG:
         output.init_emissions(self.out)
     normalizers = []
     for from_state in self.states:
         normalizer = sum(new_B[from_state].values())
         normalizers.append(normalizer)
         for letter in new_B[from_state].keys():
             new_B[from_state][letter] = new_B[from_state][letter] / normalizer
     if self.VERBOSE_FLAG:
         output.letter_prob(new_B, self.states, self.out)
     return new_B, normalizers