def run(self, hogwash_job): print >>sys.stderr, "Loading grammar", self.grammar grammar = HierGrammar(self.grammar) print >>sys.stderr, "Done" print >>sys.stderr, "Parse options:" print >>sys.stderr, self.parseOpts self.parseOpts["grammar"] = grammar if self.parserType == "standard": parser = Parser(**self.parseOpts) elif self.parserType == "ctf": parser = CTFParser(**self.parseOpts) else: raise TypeError("Don't know parser type %s" % self.parserType) print >>sys.stderr, "Parsing:", self.sentence try: final = parser.parse(self.sentence) except (ParseError, TypeError): #if psyco is active, throwing a parse error will fail #because psyco doesn't realize that exceptions can be #newstyle classes, because it's *old* #so we get a type error final = parser.parseFail(self.sentence) print treeToStr(normalizeTree(final.tree())) return final.tree()
def run(self, hogwash_job): print >>sys.stderr, "Loading grammar", self.grammar grammar = HierGrammar(self.grammar) print >>sys.stderr, "Done" print >>sys.stderr, "Parse options:" print >>sys.stderr, self.parseOpts self.parseOpts["grammar"] = grammar parser = Parser(**self.parseOpts) print >>sys.stderr, "Parsing:", self.sentence try: final = parser.parse(self.sentence) res = treeToStr(normalizeTree(final.tree())) except (ParseError, TypeError): #if psyco is active, throwing a parse error will fail #because psyco doesn't realize that exceptions can be #newstyle classes, because it's *old* #so we get a type error final = parser.parseFail(self.sentence) res = treeToStr(normalizeTree(final.tree())) print res return res
def __init__(self, grammar, top="ROOT", queueLimit=10000, beamF=identityBeamF, mode=None, verbose=[], makeAnalysis=HierAnalysis, gammas=[1e-4,], deltas=[1e-3,], stepExpansionLimit=500, beamDivergenceFactor=10): Parser.__init__(self, grammar, top=top, queueLimit=queueLimit, beamF=beamF, gamma=gammas[0], mode=mode, verbose=verbose, makeAnalysis=makeAnalysis) #just for consistency... might have been set with a 'mode' flag #shouldn't matter though gammas[0] = self.gamma self.gammas = gammas self.deltas = deltas assert(len(self.deltas) == len(self.gammas) - 1) self.stepExpansionLimit = stepExpansionLimit self.beamDivergenceFactor = beamDivergenceFactor
from topdownParser import Grammar, Rule, Parser, normalizeTree, treeToStr from DBGrammar import DBGrammar if __name__ == "__main__": inf = sys.argv[1] print >>sys.stderr, "loading grammar", inf grammar = DBGrammar(inf) print >>sys.stderr, "done" debug = ["index", "pop", "push", "threshold"] parser = Parser(grammar, top="ROOT_0", mode="lex", queueLimit=5e5, verbose=["index"]) sent = "The stocks fell ." # sent = "Members of the House Ways and Means Committee introduced legislation that would restrict how the new savings-and-loan bailout agency can raise capital , creating another potential obstacle to the government 's sale of sick thrifts ." # sent = "The government 's plan" # sent = "John Smith and Mary Roe are friends ." #import cProfile #final = cProfile.run('parser.parse(sent.split())', 'profile-out4') final = parser.parse(sent.split()) print final print list(final.derivation()) print treeToStr(final.tree()) print treeToStr(normalizeTree(final.tree()))
from Hogwash import Session #main hogwash class from Hogwash.Results import ResultsFile #type for file created by hw job from Hogwash.Action import Action #supertype for runnable objects from Hogwash.Errors import BadExitCode #error if the program crashed from waterworks.Processes import bettersystem #run an external command import sys from path import path import os from shutil import copy from iterextras import batch from StringIO import StringIO #store output of process from distributedParser import Parse from topdownParser import Parser, Grammar, treeToStr, normalizeTree if __name__ == "__main__": session = Session(sys.argv[1], read_only=True, verbose=0) p = Parser(Grammar({})) for job in session[:100]: if job.status != "finished": sent = job.args[0].sentence fail = p.parseFail(sent) print treeToStr(normalizeTree(fail.tree(), unbinarize=True)) else: print treeToStr(normalizeTree(job.results, unbinarize=False), epsilonSym="EPSILON")