def evaluate(golds, systems, stream, format = "json", trace = False): tgn = tsn = tmn = 0; tga = tsa = tma = 0; tgp = tsp = tmp = 0; scores = []; result = {"n": 0}; for gold, system in zip(golds, systems): gnames, garguments, gproperties = tuples(gold); snames, sarguments, sproperties = tuples(system); gn = len(gnames); sn = len(snames); mn = len(gnames & snames); ga = len(garguments); sa = len(sarguments); ma = len(garguments & sarguments); gp = len(gproperties); sp = len(sproperties); mp = len(gproperties & sproperties); tgn += gn; tsn += sn; tmn += mn; tga += ga; tsa += sa; tma += ma; tgp += gp; tsp += sp; tmp += mp; result["n"] += 1; p, r, f = fscore(tgn, tsn, tmn); result["names"] = {"g": tgn, "s": tsn, "m": tmn, "p": p, "r": r, "f": f}; p, r, f = fscore(tga, tsa, tma); result["arguments"] = {"g": tga, "s": tsa, "m": tma, "p": p, "r": r, "f": f}; p, r, f = fscore(tgp, tsp, tmp); result["properties"] = {"g": tgp, "s": tsp, "m": tmp, "p": p, "r": r, "f": f}; print(result, file = stream);
def evaluate(golds, systems, format="json", limit=5, trace=0, useanchor=False): if not limit: limit = 5 tg = ts = tc = n = 0 gprefix = "g" sprefix = "s" scores = dict() if trace else None for gold, system in intersect(golds, systems): id = gold.id ginstances, gattributes, grelations, gn = tuples( gold, gprefix, useanchor) sinstances, sattributes, srelations, sn = tuples( system, sprefix, useanchor) if trace > 1: print("gold instances: {}\ngold attributes: {}\ngold relations: {}" "".format(ginstances, gattributes, grelations)) print( "system instances: {}\nsystem attributes: {}\nsystem relations: {}" "".format(sinstances, sattributes, srelations)) correct, gold, system \ = get_amr_match(None, None, gold.id, limit = limit, instance1 = ginstances, attributes1 = gattributes, relation1 = grelations, prefix1 = gprefix, instance2 = sinstances, attributes2 = sattributes, relation2 = srelations, prefix2 = sprefix) gold -= gn system -= sn tg += gold ts += system tc += correct n += 1 if trace: if id in scores: print("smatch.evaluate(): duplicate graph identifier: {}" "".format(id), file=sys.stderr) scores[id] = { "g": gold, "s": system, "c": correct } if trace > 1: p, r, f = fscore(gold, system, correct) print("G: {}; S: {}; C: {}; P: {}; R: {}; F: {}" "".format(gold, system, correct, p, r, f), file=sys.stderr) p, r, f = fscore(tg, ts, tc) result = { "n": n, "g": tg, "s": ts, "c": tc, "p": p, "r": r, "f": f } if trace: result["scores"] = scores return result
def evaluate(golds, systems, format = "json", trace = 0): tglp = tslp = tclp = 0; tgup = tsup = tcup = 0; tglr = tslr = tclr = 0; tgur = tsur = tcur = 0; tp = tr = 0; scores = dict() if trace else None; result = {"n": 0, "labeled": dict(), "unlabeled": dict()}; for gold, system in intersect(golds, systems): glprimary, glremote, guprimary, guremote = tuples(gold); slprimary, slremote, suprimary, suremote = tuples(system); glp = len(glprimary); slp = len(slprimary); clp = len(glprimary & slprimary); gup = len(guprimary); sup = len(suprimary); cup = len(guprimary & suprimary); glr = len(glremote); slr = len(slremote); clr = len(glremote & slremote); gur = len(guremote); sur = len(suremote); cur = len(guremote & suremote); tglp += glp; tslp += slp; tclp += clp; tgup += gup; tsup += sup; tcup += cup; tglr += glr; tslr += slr; tclr += clr; tgur += gur; tsur += sur; tcur += cur; result["n"] += 1; if trace: if gold.id in scores: print("ucca.evaluate(): duplicate graph identifier: {}" "".format(gold.id), file = sys.stderr); score = {"labeled": dict(), "unlabeled": dict()}; score["labeled"]["primary"] = {"g": glp, "s": slp, "c": clp}; score["labeled"]["remote"] = {"g": glr, "s": slr, "c": clr}; score["unlabeled"]["primary"] = {"g": gup, "s": sup, "c": cup}; score["unlabeled"]["remote"] = {"g": gur, "s": sur, "c": cur}; scores[gold.id] = score; if trace > 1: print("{}: {}".format(gold.id, score)); p, r, f = fscore(tglp, tslp, tclp); result["labeled"]["primary"] = \ {"g": tglp, "s": tslp, "c": tclp, "p": p, "r": r, "f": f}; p, r, f = fscore(tglr, tslr, tclr); result["labeled"]["remote"] = \ {"g": tglr, "s": tslr, "c": tclr, "p": p, "r": r, "f": f}; p, r, f = fscore(tgup, tsup, tcup); result["unlabeled"]["primary"] = \ {"g": tgup, "s": tsup, "c": tcup, "p": p, "r": r, "f": f}; p, r, f = fscore(tgur, tsur, tcur); result["unlabeled"]["remote"] = \ {"g": tgur, "s": tsur, "c": tcur, "p": p, "r": r, "f": f}; if trace: result["scores"] = scores; return result;
def finalize(counts): p, r, f = fscore(counts["g"], counts["s"], counts["c"]) counts.update({ "p": p, "r": r, "f": f })
def evaluate(golds, systems, format = "json", limit = 5, trace = 0): if not limit: limit = 5; tg = ts = tc = n = 0; gprefix = "g"; sprefix = "s"; scores = dict() if trace else None; for gold, system in intersect(golds, systems): id = gold.id; ginstances, gattributes, grelations = tuples(gold, gprefix); sinstances, sattributes, srelations = tuples(system, sprefix); correct, gold, system \ = get_amr_match(None, None, gold.id, limit = limit, instance1 = ginstances, attributes1 = gattributes, relation1 = grelations, prefix1 = gprefix, instance2 = sinstances, attributes2 = sattributes, relation2 = srelations, prefix2 = sprefix); tg += gold; ts += system; tc += correct; n += 1; if trace: if id in scores: print("smatch.evaluate(): duplicate graph identifier: {}" "".format(id), file = sys.stderr); scores[id] = {"g": gold, "s": system, "c": correct}; if trace > 1: p, r, f = fscore(gold, system, correct); print("G: {}; S: {}; C: {}; P: {}; R: {}; F: {}" "".format(gold, system, correct, p, r, f), file = sys.stderr); if f != 1.0: print("gold instances: {}\ngold attributes {}\ngold relations: {}" "".format(ginstances, gattributes, grelations), file=sys.stderr); print("system instances: {}\nsystem attributes {}\nsystem relations: {}" "".format(sinstances, sattributes, srelations), file=sys.stderr); p, r, f = fscore(tg, ts, tc); result = {"n": n, "g": tg, "s": ts, "c": tc, "p": p, "r": r, "f": f}; if trace: result["scores"] = scores; return result;
def evaluate(golds, systems, format="json", trace=0): tglp = tslp = tclp = 0 tgup = tsup = tcup = 0 tglr = tslr = tclr = 0 tgur = tsur = tcur = 0 tp = tr = 0 scores = dict() if trace else None result = { "n": 0, "labeled": dict(), "unlabeled": dict() } for gold, system in intersect(golds, systems): glprimary, glremote, guprimary, guremote = tuples(gold) slprimary, slremote, suprimary, suremote = tuples(system) glp = len(glprimary) slp = len(slprimary) clp = len(glprimary & slprimary) gup = len(guprimary) sup = len(suprimary) cup = len(guprimary & suprimary) glr = len(glremote) slr = len(slremote) clr = len(glremote & slremote) gur = len(guremote) sur = len(suremote) cur = len(guremote & suremote) tglp += glp tslp += slp tclp += clp tgup += gup tsup += sup tcup += cup tglr += glr tslr += slr tclr += clr tgur += gur tsur += sur tcur += cur result["n"] += 1 if trace: if gold.id in scores: print("ucca.evaluate(): duplicate graph identifier: {}" "".format(gold.id), file=sys.stderr) score = { "labeled": dict(), "unlabeled": dict() } score["labeled"]["primary"] = { "g": glp, "s": slp, "c": clp } score["labeled"]["remote"] = { "g": glr, "s": slr, "c": clr } score["unlabeled"]["primary"] = { "g": gup, "s": sup, "c": cup } score["unlabeled"]["remote"] = { "g": gur, "s": sur, "c": cur } scores[gold.id] = score p, r, f = fscore(tglp, tslp, tclp) result["labeled"]["primary"] = \ {"g": tglp, "s": tslp, "c": tclp, "p": p, "r": r, "f": f} p, r, f = fscore(tglr, tslr, tclr) result["labeled"]["remote"] = \ {"g": tglr, "s": tslr, "c": tclr, "p": p, "r": r, "f": f} p, r, f = fscore(tgup, tsup, tcup) result["unlabeled"]["primary"] = \ {"g": tgup, "s": tsup, "c": tcup, "p": p, "r": r, "f": f} p, r, f = fscore(tgur, tsur, tcur) result["unlabeled"]["remote"] = \ {"g": tgur, "s": tsur, "c": tcur, "p": p, "r": r, "f": f} if trace: result["scores"] = scores return result