def preload(self, sent): self.terminalRules = DefaultDict({}) for word in sent: wordRules = self.terminalDB[word] for rule in wordRules: self.terminalRules[rule.lhs][word] = rule self.ntToWord = DefaultDict({}) for word in sent: try: wordLook = self.ntToWordDB[word] for nt,prob in wordLook.items(): self.ntToWord[nt][word] = prob except KeyError: print >>sys.stderr, "WARNING: no word lookaheads for", word self.posToWord = DefaultDict({}) for word in sent: posLook = self.posToWordDB[word] for pos,prob in posLook.items(): self.posToWord[pos][word] = prob Grammar.preload(self, sent)
def lookaheadProbFull(self, nt, word): presplit = nt.split("_")[0] try: lamb = self.lambdas[presplit] except KeyError: lamb = 0.5 return Grammar.lookaheadProbFull(self, nt, word, lamb=lamb)
fields = line.strip().split() (pos, word) = fields[0:2] lst = eval(" ".join(fields[2:])) for num, prob in enumerate(lst): preterm = "%s_%d" % (pos, num) rule = Rule() rule.setup(preterm, [word], float(prob)) if [rule.lhs] == rule.rhs and rule.prob == 1.0: print >> sys.stderr, "Warning: X->X", rule.lhs, rule.rhs else: rules[rule.lhs].append(rule) grammar = Grammar(rules) if lookahead.endswith(".gz"): look = GzipFile(lookahead) else: look = file(lookahead) lambdas = readLambdas(look) ntToPos = readProductionTable(look) ntToWord = readProductionTable(look) posToWord = readProductionTable(look) grammar.setLookahead(lambdas, ntToPos, ntToWord, posToWord) print >> sys.stderr, "dumping"