def train(self, parallel_corpus): counts = Counts() for aligned_sentence in parallel_corpus: trg_sentence = aligned_sentence.words src_sentence = [None] + aligned_sentence.mots total_count = self.prob_all_alignments(src_sentence, trg_sentence) for t in trg_sentence: for s in src_sentence: count = self.prob_alignment_point(s, t) normalized_count = count / total_count[t] counts.t_given_s[t][s] += normalized_count counts.any_t_given_s[s] += normalized_count self.maximize_lexical_translation_probabilities(counts)
def train(self, parallel_corpus): counts = Counts() for aligned_sentence in parallel_corpus: trg_sentence = aligned_sentence.words src_sentence = [None] + aligned_sentence.mots # E step (a): Compute normalization factors to weigh counts total_count = self.prob_all_alignments(src_sentence, trg_sentence) # E step (b): Collect counts for t in trg_sentence: for s in src_sentence: count = self.prob_alignment_point(s, t) normalized_count = count / total_count[t] counts.t_given_s[t][s] += normalized_count counts.any_t_given_s[s] += normalized_count # M step: Update probabilities with maximum likelihood estimate self.maximize_lexical_translation_probabilities(counts)