def evaluate(documents, sortMethod, verbose, cutoffs=[], task="GE.2"): workdir = tempfile.gettempdir() outdir = os.path.join(workdir, "events") cutoffs.sort() eventList = sortByScore(documents, sortMethod) results = {} startTime = time.time() for cutoff in cutoffs: print "Cutoff", cutoff, str( datetime.timedelta(seconds=time.time() - startTime)) markForRemoval(eventList, cutoff) STTools.writeSet( documents, outdir, validate=True) # validation will remove events with 0 arguments #results[cutoff] = getResults(BioNLP11GeniaTools.evaluateGE(outdir, task=2, evaluations=["approximate"], verbose=False, silent=not verbose)) if "REL" not in task: results[cutoff] = getResults( BioNLP11GeniaTools.evaluate(outdir, task=task)[1]) else: results[cutoff] = {} print results #print results[cutoff]["approximate"]["ALL-TOTAL"] #shutil.rmtree(workdir) #maxEvents = results[0.0]["approximate"]["ALL-TOTAL"]["answer"] maxEvents = results[0.0]["answer"] print "Max events", maxEvents return results, maxEvents
def evaluate(documents, sortMethod, verbose, cutoffs=[], task="GE.2"): workdir = tempfile.gettempdir() outdir = os.path.join(workdir, "events") cutoffs.sort() eventList = sortByScore(documents, sortMethod) results = {} startTime = time.time() for cutoff in cutoffs: print "Cutoff", cutoff, str(datetime.timedelta(seconds=time.time()-startTime)) markForRemoval(eventList, cutoff) STTools.writeSet(documents, outdir, validate=True) # validation will remove events with 0 arguments #results[cutoff] = getResults(BioNLP11GeniaTools.evaluateGE(outdir, task=2, evaluations=["approximate"], verbose=False, silent=not verbose)) if "REL" not in task: results[cutoff] = getResults(BioNLP11GeniaTools.evaluate(outdir, task=task)[1]) else: results[cutoff] = {} print results #print results[cutoff]["approximate"]["ALL-TOTAL"] #shutil.rmtree(workdir) #maxEvents = results[0.0]["approximate"]["ALL-TOTAL"]["answer"] maxEvents = results[0.0]["answer"] print "Max events", maxEvents return results, maxEvents
metavar="FILE") optparser.add_option("--binSize", default=1, type="int", dest="binSize", help="", metavar="FILE") (options, args) = optparser.parse_args() if options.manual != None and options.input == None: precisions = getManualEvaluationPrecisions(options.manual) plotManualEvaluationPrecisions(precisions, options.binSize) else: cutoffs = [float(x) / options.steps for x in range(options.steps)] print "Loading documents" documents = STTools.loadSet(options.input, readScores=True) # print "Testing evaluator" # tempdir = tempfile.mkdtemp() # print tempdir # STTools.writeSet(documents, tempdir, debug=True, validate=False) # validation will remove events with 0 arguments # BioNLP11GeniaTools.evaluate(tempdir, task=options.task) #shutil.rmtree(tempdir) print "Processing scores" print processScores(documents, normalize="normalize" in options.sortmethod) print "Evaluating" results, maxEvents = evaluate(documents, options.sortmethod, verbose=options.verbose, cutoffs=cutoffs, task=options.task)
if trigger.type == "Entity": counts["t2-entities-removed"] += 1 else: triggersToKeep.append(trigger) document.triggers = triggersToKeep if __name__=="__main__": import sys import STTools from optparse import OptionParser # Import Psyco if available try: import psyco psyco.full() print >> sys.stderr, "Found Psyco, using" except ImportError: print >> sys.stderr, "Psyco not installed" optparser = OptionParser(usage="%prog [options]\n") optparser.add_option("-i", "--input", default=None, dest="input", help="", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="") optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="") optparser.add_option("--noScores", default=False, action="store_true", dest="noScores", help="") (options, args) = optparser.parse_args() if options.output == None: options.output = options.input + "-validated.tar.gz" print >> sys.stderr, "Reading documents" documents = STTools.loadSet(options.input, readScores=(not options.noScores)) print >> sys.stderr, "Writing documents" STTools.writeSet(documents, options.output, validate=True, writeScores=(not options.noScores), task=2, debug=options.debug)
optparser.add_option("-o", "--output", default=None, dest="output", help="", metavar="FILE") optparser.add_option("-m", "--manual", default=None, dest="manual", help="", metavar="FILE") optparser.add_option("-g", "--graphs", default="prf", dest="graphs", help="", metavar="FILE") optparser.add_option("-s", "--sortmethod", default="unmerging", dest="sortmethod", help="") optparser.add_option("-v", "--verbose", default=False, action="store_true", dest="verbose", help="") optparser.add_option("--steps", default=10, type="int", dest="steps", help="", metavar="FILE") optparser.add_option("--binSize", default=1, type="int", dest="binSize", help="", metavar="FILE") (options, args) = optparser.parse_args() if options.manual != None and options.input == None: precisions = getManualEvaluationPrecisions(options.manual) plotManualEvaluationPrecisions(precisions, options.binSize) else: cutoffs = [float(x)/options.steps for x in range(options.steps)] print "Loading documents" documents = STTools.loadSet(options.input, readScores=True) # print "Testing evaluator" # tempdir = tempfile.mkdtemp() # print tempdir # STTools.writeSet(documents, tempdir, debug=True, validate=False) # validation will remove events with 0 arguments # BioNLP11GeniaTools.evaluate(tempdir, task=options.task) #shutil.rmtree(tempdir) print "Processing scores" print processScores(documents, normalize="normalize" in options.sortmethod) print "Evaluating" results, maxEvents = evaluate(documents, options.sortmethod, verbose=options.verbose, cutoffs=cutoffs, task=options.task) if options.output == None: output = "scorefig-" + options.sortmethod + ".pdf" else: output = options.output resultsToGraph(results, options.output, maxEvents, manualEvaluationFile=options.manual)
optparser.add_option("-r", "--origIds", default=False, action="store_true", dest="origIds", help="Use stored original ids (can cause problems with duplicates).") optparser.add_option("--stSitesAreArguments", default=False, action="store_true", dest="stSitesAreArguments", help="") optparser.add_option("-n", "--xmlCorpusName", default="CORPUS", dest="xmlCorpusName", help="") optparser.add_option("-a", "--task", default=2, type="int", dest="task", help="1 or 2") optparser.add_option("-d", "--debug", default=False, action="store_true", dest="debug", help="Verbose output.") optparser.add_option("-x", "--extra", default=False, action="store_true", dest="extra", help="Verbose output.") (options, args) = optparser.parse_args() if options.conversion in ("TO-ST", "TO-ST-RELATIONS"): print >> sys.stderr, "Loading XML" xml = ETUtils.ETFromObj(options.input) print >> sys.stderr, "Converting to ST Format" toSTFormat(xml, options.output, options.outputTag, options.origIds, debug=options.debug, allAsRelations=options.conversion=="TO-ST-RELATIONS", writeExtra=options.extra) elif options.conversion == "TO-XML": import STTools print >> sys.stderr, "Loading ST format" documents = STTools.loadSet(options.input, "GE", level="a2", sitesAreArguments=options.stSitesAreArguments, a2Tag="a2", readScores=False, debug=options.debug) print >> sys.stderr, "Converting to XML" toInteractionXML(documents, options.xmlCorpusName, options.output) elif options.conversion == "ROUNDTRIP": import STTools print >> sys.stderr, "Loading ST format" documents = STTools.loadSet(options.input, "GE", level="a2", sitesAreArguments=options.stSitesAreArguments, a2Tag="a2", readScores=False, debug=options.debug) print >> sys.stderr, "Converting to XML" xml = toInteractionXML(documents) print >> sys.stderr, "Converting to ST Format" toSTFormat(xml, options.output, options.outputTag, options.origIds, debug=options.debug, writeExtra=options.extra) else: print >> sys.stderr, "Unknown conversion option", options.conversion
xml = ETUtils.ETFromObj(options.input) print >> sys.stderr, "Converting to ST Format" toSTFormat(xml, options.output, options.outputTag, options.origIds, debug=options.debug, allAsRelations=options.conversion == "TO-ST-RELATIONS", writeExtra=options.extra) elif options.conversion == "TO-XML": import STTools print >> sys.stderr, "Loading ST format" documents = STTools.loadSet( options.input, "GE", level="a2", sitesAreArguments=options.stSitesAreArguments, a2Tags=options.inputTags, readScores=False, debug=options.debug) print >> sys.stderr, "Converting to XML" toInteractionXML(documents, options.xmlCorpusName, options.output) elif options.conversion == "ROUNDTRIP": import STTools print >> sys.stderr, "Loading ST format" documents = STTools.loadSet( options.input, "GE", level="a2", sitesAreArguments=options.stSitesAreArguments, a2Tags=options.inputTags, readScores=False,
help="", metavar="FILE") optparser.add_option("-o", "--output", default=None, dest="output", help="") optparser.add_option("--debug", default=False, action="store_true", dest="debug", help="") optparser.add_option("--noScores", default=False, action="store_true", dest="noScores", help="") (options, args) = optparser.parse_args() if options.output == None: options.output = options.input + "-validated.tar.gz" print >> sys.stderr, "Reading documents" documents = STTools.loadSet(options.input, readScores=(not options.noScores)) print >> sys.stderr, "Writing documents" STTools.writeSet(documents, options.output, validate=True, writeScores=(not options.noScores), task=2, debug=options.debug)