def addTerminalRule(self, rule): assert(rule.unary()) if rule.level == 0: DBGrammar.addTerminalRule(self, rule) else: #find the parent rule and add as child if rule.level == 1: parRuleTable = self.terminalRules else: parRuleTable = self.intermedTerminalRules[rule.level - 1] matching = self.matchTermRule(rule, parRuleTable) if not matching: print >>sys.stderr, "WARNING: Can't find matching rule for",\ rule parRule = HierRule(rule.level - 1) parRule.setup(rule.lhs, rule.rhs, rule.prob) self.addTerminalRule(parRule) matching = parRule matching.children.append(rule) word = rule.rhs[0] self.intermedTerminalRules[rule.level][word].append(rule)
def __init__(self, dirname, mode="r"): DBGrammar.__init__(self, dirname, mode) self.intermedRules = DefaultDict(DefaultDict([])) self.intermedTerminalRules = DefaultDict(DefaultDict([])) if mode == "w": self.hierarchy = DefaultDict({}) #level -> sym -> parentSym else: assert((self.dirname/"hierarchy").exists()) self.hierarchy = pickle.load(file(self.dirname/"hierarchy", 'rb'))
def writeback(self, target): if target == "hierarchy": hierOut = file(self.dirname/"hierarchy", 'wb') px = pickle.Pickler(hierOut, protocol=2) px.dump(self.hierarchy) return DBGrammar.writeback(self, target) if target == "grammar": self.intermedRules = None elif target == "terminals": self.intermedTerminalRules = None
def addRule(self, rule): if rule.level == 0: DBGrammar.addRule(self, rule) else: #find the parent rule and add as child if rule.level == 1: parRuleTable = self.rules else: parRuleTable = self.intermedRules[rule.level - 1] matching = self.matchRule(rule, parRuleTable) if not matching: print >>sys.stderr, "Can't find matching rule for", rule matching.children.append(rule) self.intermedRules[rule.level][rule.lhs].append(rule)
def addTerminalRule(self, rule): if rule.level == 0: DBGrammar.addTerminalRule(self, rule) else: #find the parent rule and add as child if rule.level == 1: parRuleTable = self.terminalRules else: parRuleTable = self.intermedTerminalRules[rule.level - 1] matching = self.matchTermRule(rule, parRuleTable) if not matching: print >>sys.stderr, "Can't find matching rule for", rule matching.children.append(rule) word = rule.rhs[0] self.intermedTerminalRules[rule.level][word].append(rule)
def addEpsilonRule(self, rule): assert(rule.epsilon()) if rule.level == 0: DBGrammar.addRule(self, rule) else: if rule.level == 1: parLHS = self.hierarchy[rule.level][rule.lhs] matching = self.epsilonRules[parLHS] if matching: assert(rule.descendant(matching, self.hierarchy)) else: parRuleTable = self.intermedRules[rule.level - 1] matching = self.matchRule(rule, parRuleTable) if not matching: print >>sys.stderr, "WARNING: Can't find matching rule for",\ rule else: matching.children.append(rule) self.intermedRules[rule.level][rule.lhs].append(rule)
def addRule(self, rule): assert(not rule.epsilon()) if rule.level == 0: DBGrammar.addRule(self, rule) else: #find the parent rule and add as child if rule.level == 1: parRuleTable = self.rules else: parRuleTable = self.intermedRules[rule.level - 1] matching = self.matchRule(rule, parRuleTable) if not matching: print >>sys.stderr, "WARNING: Can't find matching rule for",\ rule else: matching.children.append(rule) # print "matching rule for", rule, "is", matching self.intermedRules[rule.level][rule.lhs].append(rule)
ntToWord[nt][word] = float(prob) return ntToWord def listEval(lst): undelimited = lst.lstrip("[").rstrip("]") items = undelimited.split(",") return items if __name__ == "__main__": (grammarFile, lexicon, lookahead, out) = sys.argv[1:] print "Grammar:", grammarFile, "Lexicon:", lexicon, "Lookahead:", lookahead grammar = DBGrammar(out, mode="w") print >> sys.stderr, "Nonterms" ct = 0 for line in file(grammarFile): if ct % 1000 == 0: print >> sys.stderr, ct, "..." ct += 1 fields = line.strip().split() (lhs, arrow, rhs1) = fields[0:3] assert arrow == "->" if len(fields) == 5: rhs = [rhs1, fields[3]] prob = fields[4]
def addWordLookahead(self, nt, word, prob, level): DBGrammar.addWordLookahead(self, (level, nt), word, prob)