예제 #1
0
    def preload(self, sent):
        self.terminalRules = DefaultDict({})
        for word in sent:
            wordRules = self.terminalDB[word]

            for rule in wordRules:
                self.terminalRules[rule.lhs][word] = rule

        self.ntToWord = DefaultDict({})
        for word in sent:
            try:
                wordLook = self.ntToWordDB[word]

                for nt,prob in wordLook.items():
                    self.ntToWord[nt][word] = prob
            except KeyError:
                print >>sys.stderr, "WARNING: no word lookaheads for", word

        self.posToWord = DefaultDict({})
        for word in sent:
            posLook = self.posToWordDB[word]

            for pos,prob in posLook.items():
                self.posToWord[pos][word] = prob

        Grammar.preload(self, sent)
예제 #2
0
 def lookaheadProbFull(self, nt, word):
     presplit = nt.split("_")[0]
     try:
         lamb = self.lambdas[presplit]
     except KeyError:
         lamb = 0.5
     return Grammar.lookaheadProbFull(self, nt, word, lamb=lamb)
예제 #3
0
        fields = line.strip().split()
        (pos, word) = fields[0:2]
        lst = eval(" ".join(fields[2:]))

        for num, prob in enumerate(lst):
            preterm = "%s_%d" % (pos, num)
            rule = Rule()
            rule.setup(preterm, [word], float(prob))

            if [rule.lhs] == rule.rhs and rule.prob == 1.0:
                print >> sys.stderr, "Warning: X->X", rule.lhs, rule.rhs
            else:
                rules[rule.lhs].append(rule)

    grammar = Grammar(rules)

    if lookahead.endswith(".gz"):
        look = GzipFile(lookahead)
    else:
        look = file(lookahead)

    lambdas = readLambdas(look)
    ntToPos = readProductionTable(look)
    ntToWord = readProductionTable(look)
    posToWord = readProductionTable(look)

    grammar.setLookahead(lambdas, ntToPos, ntToWord, posToWord)

    print >> sys.stderr, "dumping"