help="first NUM sentences only", metavar="NUM", default=None) optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", \ help="trace for each sentence", default=False) optparser.add_option("-d", "--debug", dest="debug", action="store_true", \ help="debug info: matched brackets", default=False) optparser.add_option("-E", "--EVALB", dest="evalb", action="store_true", \ help="exact EVALB mode: neglecting error sentences due to puncs", default=False) optparser.add_option("-p", "--punc", dest="punc", action="store_false", \ help="keep punctuations (default is to delete them first)", default=True) optparser.add_option("-b", "--nobackoff", dest="backoff", action="store_false", \ help="no backing off to vanilla evalb", default=True) (opts, args) = optparser.parse_args() from readkbest import readonebest testtrees = readonebest(args[0]) goldtrees = readonebest(args[1]) parseval = Parseval() for i, (ta, tb) in enumerate(xzip(testtrees, goldtrees)): if opts.first is not None and i >= opts.first: break par = Parseval(ta, tb, del_puncs=opts.punc) if opts.verbose: print "%d\t%s" % (i+1, par) parseval += par print parseval
if __name__ == "__main__": import optparse optparser = optparse.OptionParser(usage="usage: cat <forests> | %prog -g <GOLDFILE> [-s <suffix>]") optparser.add_option("-g", "--gold", dest="goldfile", \ help="gold file", metavar="FILE", default=None) optparser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="no dumping", default=False) optparser.add_option("-r", "--remove", dest="remove_sp", action="store_true", \ help="remove spurious", default=False) optparser.add_option("-s", "--suffix", dest="suffix", help="dump suffix (1.suffix)", metavar="SUF") (opts, args) = optparser.parse_args() if opts.goldfile is None: opts.error("must specify gold file") else: goldtrees = readonebest(opts.goldfile) for i, forest in enumerate(Forest.load("-")): forest.goldtree = goldtrees.next() if opts.remove_sp: remove(forest) if opts.suffix is not None: forest.dump(open("%d.%s" % (i+1, opts.suffix), "wt")) elif not opts.quiet: forest.dump()
debug = True from tree import Tree from readkbest import readkbest, readonebest from utility import getfile from parseval import Parseval if __name__ == "__main__": kbestfilename = sys.argv[1] goldfilename = sys.argv[2] kbesttrees = readkbest(kbestfilename) goldtrees = readonebest(goldfilename) # assert len(kbesttrees) == len(goldtrees), "unmatched number of sentences: %d test vs. %d gold" \ # % (len(kbesttrees), len(goldtrees)) onebest = Parseval() oracle = Parseval() for (i, goldtree) in enumerate(goldtrees): ## print >> logs, i, ## print goldtree _, klist = kbesttrees.next() # generator for (k, (logprob, testtree)) in enumerate(klist): ## generator again