def pdf_to_cdf(categorial): cdf = [] total = None for (p, x) in categorial: if total is None: total = p else: total = logadd(total,p) cdf.append((total,x)) return cdf
def pdf_to_cdf(categorial): cdf = [] total = None for (p, x) in categorial: if total is None: total = p else: total = logadd(total, p) cdf.append((total, x)) return cdf
def expected_rule_counts(self, inside_probs, outside_probs): counts = defaultdict(float) beta_sentence = inside_probs["START"] for item in self: for split in self[item]: nts, children = zip(*split.items()) for child in children: childgamma = outside_probs[item] + inside_probs[child] + child.rule.weight counts[child.rule.rule_id] = logadd(counts[child.rule.rule_id] ,(childgamma - beta_sentence)) return counts
def normalize_by_groups(self, groups): """ Normalize the grammar given a dictionary mapping rules to equivalence class ids. """ norms = {} for r in self: group = groups[r] if group in norms: norms[group] = logadd(norms[group], self[r].weight) else: norms[group] = self[r].weight for r in self: self[r].weight = self[r].weight - norms[groups[r]]
def compute_scores(chart, item): """ Here we compute the outside scores for each rule and split, i.e. the sum of all possible trees that contain this item but do not decompose it. This is the outside computation of the inside-outside algorithm for wRTGs described in Graehl&Knight 2004 "Training tree transducers". """ if item in chart: for split in chart[item]: nts, children = zip(*split.items()) # An item may be part of multiple splits, so we need to add the outside score when we # encounter it a second time. for child in children: inside_for_siblings = [inside_probs[c] for c in children if c!=child] alpha_for_child = outside_probs[item] + sum(inside_for_siblings) + child.rule.weight if child in outside_probs: outside_probs[child] = logadd(outside_probs[child],alpha_for_child) else: outside_probs[child] = alpha_for_child compute_scores(chart, child)