Esempio n. 1
0
def pdf_to_cdf(categorial):
    cdf = []
    total = None 
    for (p, x) in categorial:
        if total is None: 
            total = p
        else: 
            total = logadd(total,p)
        cdf.append((total,x))
    return cdf
Esempio n. 2
0
def pdf_to_cdf(categorial):
    cdf = []
    total = None
    for (p, x) in categorial:
        if total is None:
            total = p
        else:
            total = logadd(total, p)
        cdf.append((total, x))
    return cdf
Esempio n. 3
0
    def expected_rule_counts(self, inside_probs, outside_probs):
        counts = defaultdict(float)

        beta_sentence = inside_probs["START"]

        for item in self: 
            for split in self[item]:
                nts, children = zip(*split.items())
                for child in children:  
                    childgamma = outside_probs[item] + inside_probs[child] + child.rule.weight
                    counts[child.rule.rule_id] = logadd(counts[child.rule.rule_id] ,(childgamma - beta_sentence))
        return counts          
Esempio n. 4
0
 def normalize_by_groups(self, groups):
     """
     Normalize the grammar given a dictionary mapping rules to equivalence class ids.
     """
     norms = {}
     for r in self: 
         group = groups[r]
         if group in norms:
             norms[group] = logadd(norms[group], self[r].weight) 
         else:
             norms[group] = self[r].weight
     for r in self: 
         self[r].weight = self[r].weight - norms[groups[r]]
Esempio n. 5
0
 def normalize_by_groups(self, groups):
     """
     Normalize the grammar given a dictionary mapping rules to equivalence class ids.
     """
     norms = {}
     for r in self:
         group = groups[r]
         if group in norms:
             norms[group] = logadd(norms[group], self[r].weight)
         else:
             norms[group] = self[r].weight
     for r in self:
         self[r].weight = self[r].weight - norms[groups[r]]
Esempio n. 6
0
 def compute_scores(chart, item):
     """
     Here we compute the outside scores for each rule and split, i.e. the 
     sum of all possible trees that contain this item but do not decompose it. 
     This is the outside computation of the inside-outside algorithm 
     for wRTGs described in Graehl&Knight 2004 "Training tree transducers". 
     """
     if item in chart:        
         for split in chart[item]:
             nts, children = zip(*split.items())
             
             # An item may be part of multiple splits, so we need to add the outside score when we 
             #  encounter it a second time.
             for child in children:
                 inside_for_siblings = [inside_probs[c] for c in children if c!=child]
                 alpha_for_child = outside_probs[item] + sum(inside_for_siblings) + child.rule.weight 
                 if child in outside_probs: 
                     outside_probs[child] = logadd(outside_probs[child],alpha_for_child)
                 else:
                     outside_probs[child] = alpha_for_child
                 compute_scores(chart, child)