# print "WARNING, zero-prob event", lhs, word, pos, ct continue wordRight[lhs] += (pGivenWord / total) * ct norm[lhs] += total * ct ll += log(total) * ct return ll, wordRight, norm if __name__ == "__main__": (grammarStem, out, trees, validation) = sys.argv[1:] print "Grammar stem:", grammarStem, "Output:", out, "Training:", trees,\ "Validation:", validation grammar = HierGrammar(out, mode='w') topLevel = len(ec05) - 1 for level,mapping in reversed(list(enumerate(ec05))): # print level, mapping for anc,children in mapping.items(): for child in children: grammar.addAncestry(level + 1, anc, child) grammar.addAncestry(level + 1, "@%s" % anc, "@%s" % child) # print grammar.hierarchy grammar.makeMapping(topLevel) # print grammar.pennToLevel
(prob, nt, arrow, word) = fields assert(arrow == "->") ntToWord[nt][word] = float(prob) return ntToWord def listEval(lst): undelimited = lst.lstrip("[").rstrip("]") items = undelimited.split(",") return items if __name__ == "__main__": (grammarStem, out) = sys.argv[1:] print "Grammar stem:", grammarStem, "Output:", out grammar = HierGrammar(out, mode='w') grammarStem = path(grammarStem).abspath() workDir = grammarStem.dirname() basename = grammarStem.basename() fileLst = workDir.files(basename+"-txt-lvl*") fileNums = [re.search("-txt-lvl(\d+)", fileName) for fileName in fileLst] fileNums = [int(match.group(1)) for match in fileNums if match] maxLevel = max(fileNums) print "Max grammar level:", maxLevel hierFile = workDir/("%s-txt.hier" % (basename,)) for line in file(hierFile): fields = line.strip().split()
from AIMA import DefaultDict from HierGrammar import HierGrammar, HierRule from convertLCGrammarToHier import readLambdas, readProductionTable, listEval from path import path from treeUtils import markovBinarizeTree, treeToTuple, treeToStr, zeroSplit from ctfScheme import leftCorner, GrammarStats, ValidationEvents if __name__ == "__main__": (out, trees, validation) = sys.argv[1:] print "Output:", out, "Training:", trees, "Validation:", validation grammar = HierGrammar(out, mode='w') binarizeTo = 0 gstats = GrammarStats() vstats = ValidationEvents() for ct,line in enumerate(file(trees)): if ct % 100 == 0: print "read trees", ct tree = markovBinarizeTree(treeToTuple(line.strip()), to=0) # print treeToStr(tree) gstats.record(tree) for ct,line in enumerate(file(validation)): if ct % 100 == 0: