コード例 #1
0
ファイル: roarkGrammar.py プロジェクト: melsner/ctf-roark
if __name__ == "__main__":
    (out, trees, validation) = sys.argv[1:]

    print "Output:", out, "Training:", trees, "Validation:", validation

    grammar = HierGrammar(out, mode='w')
    binarizeTo = 0

    gstats = GrammarStats()
    vstats = ValidationEvents()

    for ct,line in enumerate(file(trees)):
        if ct % 100 == 0:
            print "read trees", ct
        tree = markovBinarizeTree(treeToTuple(line.strip()), to=0)

#        print treeToStr(tree)
        gstats.record(tree)

    for ct,line in enumerate(file(validation)):
        if ct % 100 == 0:
            print "read validation trees", ct
        tree = markovBinarizeTree(treeToTuple(line.strip()), to=0)
        vstats.record(tree)

    gstats.normalize()
    gstats.learnLambdas(vstats)
    gstats.addToGrammar(grammar, 0)

    grammar.writeback("hierarchy")
コード例 #2
0
ファイル: ctfScheme.py プロジェクト: melsner/ctf-roark
#    print grammar.hierarchy

    grammar.makeMapping(topLevel)

#    print grammar.pennToLevel

    for level in range(len(ec05)):
        gstats = GrammarStats()
        vstats = ValidationEvents()

        for ct,line in enumerate(file(trees)):
            if ct % 100 == 0:
                print "read trees", ct
            tree = grammar.transform(level + 1, zeroSplit(
                binarizeTree(
                treeToTuple(line.strip()))))

#            print treeToStr(tree)
            gstats.record(tree)

        for ct,line in enumerate(file(validation)):
            if ct % 1000 == 0:
                print "read validation trees", ct
            tree = grammar.transform(level + 1, zeroSplit(
                binarizeTree(
                treeToTuple(line.strip()))))
            vstats.record(tree)

        gstats.normalize()
        gstats.learnLambdas(vstats)
コード例 #3
0
ファイル: testTreeToDeriv.py プロジェクト: melsner/ctf-roark
from treeUtils import treeToDeriv, treeToTuple, untransform

# tt = "(ROOT (S (NP (DT The) (@NP (ADJP (RBS most) (JJ troublesome)) (NN report))) (@S (VP (MD may) (VP (VB be) (NP (NP (DT the) (@NP (NNP August) (@NP (NN merchandise) (@NP (NN trade) (NN deficit))))) (ADJP (JJ due) (@ADJP (ADVP (IN out)) (NP (NN tomorrow))))))) (. .))))"
# print treeToTuple(tt)
# print treeToDeriv(treeToTuple(tt))

tt = """(ROOT (RS (DT The) (ROOTlcDT (NN government) (POS 's) (ROOTlcNP (NN plan) (ROOTlcNP (VP (VBZ is) (VPlcVBZ (ADJP (JJ stupid) (ADJPlcJJ )) (VPlcVP ))) (. .) (ROOTlcS ))))))"""
print treeToTuple(tt)
print untransform(treeToTuple(tt))