Beispiel #1
0
if __name__ == "__main__":
    (out, trees, validation) = sys.argv[1:]

    print "Output:", out, "Training:", trees, "Validation:", validation

    grammar = HierGrammar(out, mode='w')
    binarizeTo = 0

    gstats = GrammarStats()
    vstats = ValidationEvents()

    for ct,line in enumerate(file(trees)):
        if ct % 100 == 0:
            print "read trees", ct
        tree = markovBinarizeTree(treeToTuple(line.strip()), to=0)

#        print treeToStr(tree)
        gstats.record(tree)

    for ct,line in enumerate(file(validation)):
        if ct % 100 == 0:
            print "read validation trees", ct
        tree = markovBinarizeTree(treeToTuple(line.strip()), to=0)
        vstats.record(tree)

    gstats.normalize()
    gstats.learnLambdas(vstats)
    gstats.addToGrammar(grammar, 0)

    grammar.writeback("hierarchy")
Beispiel #2
0
#    print grammar.hierarchy

    grammar.makeMapping(topLevel)

#    print grammar.pennToLevel

    for level in range(len(ec05)):
        gstats = GrammarStats()
        vstats = ValidationEvents()

        for ct,line in enumerate(file(trees)):
            if ct % 100 == 0:
                print "read trees", ct
            tree = grammar.transform(level + 1, zeroSplit(
                binarizeTree(
                treeToTuple(line.strip()))))

#            print treeToStr(tree)
            gstats.record(tree)

        for ct,line in enumerate(file(validation)):
            if ct % 1000 == 0:
                print "read validation trees", ct
            tree = grammar.transform(level + 1, zeroSplit(
                binarizeTree(
                treeToTuple(line.strip()))))
            vstats.record(tree)

        gstats.normalize()
        gstats.learnLambdas(vstats)
Beispiel #3
0
from treeUtils import treeToDeriv, treeToTuple, untransform

# tt = "(ROOT (S (NP (DT The) (@NP (ADJP (RBS most) (JJ troublesome)) (NN report))) (@S (VP (MD may) (VP (VB be) (NP (NP (DT the) (@NP (NNP August) (@NP (NN merchandise) (@NP (NN trade) (NN deficit))))) (ADJP (JJ due) (@ADJP (ADVP (IN out)) (NP (NN tomorrow))))))) (. .))))"
# print treeToTuple(tt)
# print treeToDeriv(treeToTuple(tt))

tt = """(ROOT (RS (DT The) (ROOTlcDT (NN government) (POS 's) (ROOTlcNP (NN plan) (ROOTlcNP (VP (VBZ is) (VPlcVBZ (ADJP (JJ stupid) (ADJPlcJJ )) (VPlcVP ))) (. .) (ROOTlcS ))))))"""
print treeToTuple(tt)
print untransform(treeToTuple(tt))