コード例 #1
0
ファイル: parseval.py プロジェクト: rupenp/transforest
						 help="first NUM sentences only", metavar="NUM", default=None)
	optparser.add_option("-v", "--verbose", dest="verbose", action="store_true", \
						 help="trace for each sentence", default=False)
	optparser.add_option("-d", "--debug", dest="debug", action="store_true", \
						 help="debug info: matched brackets", default=False)
	optparser.add_option("-E", "--EVALB", dest="evalb", action="store_true", \
						 help="exact EVALB mode: neglecting error sentences due to puncs", default=False)
	optparser.add_option("-p", "--punc", dest="punc", action="store_false", \
						 help="keep punctuations (default is to delete them first)", default=True)
	optparser.add_option("-b", "--nobackoff", dest="backoff", action="store_false", \
						 help="no backing off to vanilla evalb", default=True)

	(opts, args) = optparser.parse_args()

	from readkbest import readonebest
	testtrees = readonebest(args[0])
	goldtrees = readonebest(args[1])

	parseval = Parseval()
	for i, (ta, tb) in enumerate(xzip(testtrees, goldtrees)): 

		if opts.first is not None and i >= opts.first:
			break

		par = Parseval(ta, tb, del_puncs=opts.punc)
		if opts.verbose:
			print "%d\t%s" % (i+1, par)
		parseval += par

	print parseval
								 
コード例 #2
0
ファイル: addgold.py プロジェクト: rupenp/transforest
if __name__ == "__main__":
	
	import optparse
	optparser = optparse.OptionParser(usage="usage: cat <forests> | %prog -g <GOLDFILE> [-s <suffix>]")
	optparser.add_option("-g", "--gold", dest="goldfile", \
						 help="gold file", metavar="FILE", default=None)
	optparser.add_option("-q", "--quiet", dest="quiet", action="store_true", help="no dumping", default=False)
	optparser.add_option("-r", "--remove", dest="remove_sp", action="store_true", \
						 help="remove spurious", default=False)
	optparser.add_option("-s", "--suffix", dest="suffix", help="dump suffix (1.suffix)", metavar="SUF")

	(opts, args) = optparser.parse_args()

	if opts.goldfile is None:
		opts.error("must specify gold file")
	else:
		goldtrees = readonebest(opts.goldfile)


	for i, forest in enumerate(Forest.load("-")):
		forest.goldtree = goldtrees.next()
		if opts.remove_sp:
			remove(forest)
		if opts.suffix is not None:
			forest.dump(open("%d.%s" % (i+1, opts.suffix), "wt"))
		elif not opts.quiet:
			forest.dump()

		
		
コード例 #3
0
ファイル: nbest_oracle.py プロジェクト: rupenp/transforest
debug = True

from tree import Tree

from readkbest import readkbest, readonebest
from utility import getfile
from parseval import Parseval
	
if __name__ == "__main__":

	kbestfilename = sys.argv[1]
	goldfilename = sys.argv[2]
	
	kbesttrees = readkbest(kbestfilename)

	goldtrees = readonebest(goldfilename)

# 	assert len(kbesttrees) == len(goldtrees), "unmatched number of sentences: %d test vs. %d gold" \
# 		   % (len(kbesttrees), len(goldtrees))


	onebest = Parseval()
	oracle = Parseval()
	
	for (i,  goldtree) in enumerate(goldtrees):

##		print >> logs, i,
##		print goldtree
		_, klist = kbesttrees.next()  # generator

		for (k, (logprob, testtree)) in enumerate(klist):   ## generator again