コード例 #1
0
ファイル: ctfScheme.py プロジェクト: melsner/ctf-roark
    def addToGrammar(self, grammar, level):
        for pos in self.termCounts:
            #pos tags are not merged by this ctf
#            print pos
            grammar.addAncestry(level, pos, pos)

        for lhs,subtab in self.ruleCounts.items():
            for rhs, prob in subtab.items():
                rule = HierRule(level)
                if rhs[0] == "EPSILON":
                    rhs = []
                rule.setup(lhs, rhs, prob)

                if rule.epsilon():
                    grammar.addEpsilonRule(rule)
                else:
                    grammar.addRule(rule)

        for lhs, subtab in self.termCounts.items():
            for word, prob in subtab.items():
                rule = HierRule(level)
                rule.setup(lhs, [word,], prob)
                grammar.addTerminalRule(rule)

        for lhs, subtab in self.ntToWord.items():
            for word, prob in subtab.items():
                grammar.addWordLookahead(lhs, word, prob, level)

        for lhs, subtab in self.ntToPos.items():
            self.ntToPos[lhs] = dict(subtab)
        grammar.addNTToPos(self.ntToPos, level)
        grammar.addLambdas(self.lambdas, level)
コード例 #2
0
            elif len(fields) == 4:
                rhs = [rhs1,]
                prob = fields[3]

            prob = float(prob)

            rule = HierRule(level)

            if lhs.startswith("EPSILON"):
                assert(len(rhs) == 1)
                assert(rhs[0].startswith("EPSILON"))
                rhs = []

            rule.setup(lhs, rhs, prob)

            if rule.epsilon() or rule.unary():
#                print >>sys.stderr, "Skipping bogus unary", rule
                pass
            else:
                grammar.addRule(rule)

        unaryFile = workDir/("%s-txt-lvl%d.unaries.gz" % (basename, level))

        print >>sys.stderr, "Unaries from", unaryFile

        ct = 0
        for line in GzipFile(unaryFile):
            if ct % 1000 == 0:
                print >>sys.stderr, ct, "..."
            ct += 1