def evaluate(self, gold, system): if self.limits is None: self.limits = {"rrhc": 20, "mces": 500000} rrhc_limit = mces_limit = None if isinstance(self.limits, dict): if "rrhc" in self.limits: rrhc_limit = self.limits["rrhc"] if "mces" in self.limits: mces_limit = self.limits["mces"] if rrhc_limit is None or rrhc_limit < 0: rrhc_limit = 20 if mces_limit is None or mces_limit < 0: mces_limit = 500000 if self.trace > 1: print("RRHC limit: {}; MCES limit: {}".format( rrhc_limit, mces_limit), file=sys.stderr) if self.cores > 1: if self.trace > 1: print("mces.evaluate(): using {} cores".format(self.cores), file=sys.stderr) with mp.Pool(self.cores) as pool: results = pool.starmap( schedule, ((g, s, rrhc_limit, mces_limit, self.trace) for g, s in score_core.intersect(gold, system))) else: results = (schedule(g, s, rrhc_limit, mces_limit, self.trace) for g, s in score_core.intersect(gold, system)) for id, tops, labels, properties, anchors, \ edges, attributes, matches, steps, error \ in results: if error is None: self.total_matches += matches self.total_steps += steps self.update(self.total_tops, tops) self.update(self.total_labels, labels) self.update(self.total_properties, properties) self.update(self.total_anchors, anchors) self.update(self.total_edges, edges) self.update(self.total_attributes, attributes) self.total_pairs += 1 if mces_limit == 0 or steps > mces_limit: self.total_inexact += 1 if self.trace: if id in self.scores: print("mces.evaluate(): duplicate graph identifier: {}" "".format(id), file=sys.stderr) self.scores[id] \ = {"tops": tops, "labels": labels, "properties": properties, "anchors": anchors, "edges": edges, "attributes": attributes} else: print("mces.evaluate(): exception in graph #{}:\n{}" "".format(id, error)) self.scores[id] = { "error": repr(error) }
def evaluate(golds, systems, format="json", limit=5, trace=0, useanchor=False): if not limit: limit = 5 tg = ts = tc = n = 0 gprefix = "g" sprefix = "s" scores = dict() if trace else None for gold, system in intersect(golds, systems): id = gold.id ginstances, gattributes, grelations, gn = tuples( gold, gprefix, useanchor) sinstances, sattributes, srelations, sn = tuples( system, sprefix, useanchor) if trace > 1: print("gold instances: {}\ngold attributes: {}\ngold relations: {}" "".format(ginstances, gattributes, grelations)) print( "system instances: {}\nsystem attributes: {}\nsystem relations: {}" "".format(sinstances, sattributes, srelations)) correct, gold, system \ = get_amr_match(None, None, gold.id, limit = limit, instance1 = ginstances, attributes1 = gattributes, relation1 = grelations, prefix1 = gprefix, instance2 = sinstances, attributes2 = sattributes, relation2 = srelations, prefix2 = sprefix) gold -= gn system -= sn tg += gold ts += system tc += correct n += 1 if trace: if id in scores: print("smatch.evaluate(): duplicate graph identifier: {}" "".format(id), file=sys.stderr) scores[id] = { "g": gold, "s": system, "c": correct } if trace > 1: p, r, f = fscore(gold, system, correct) print("G: {}; S: {}; C: {}; P: {}; R: {}; F: {}" "".format(gold, system, correct, p, r, f), file=sys.stderr) p, r, f = fscore(tg, ts, tc) result = { "n": n, "g": tg, "s": ts, "c": tc, "p": p, "r": r, "f": f } if trace: result["scores"] = scores return result
def evaluate(gold, system, format="json", trace=0): scorer = Scorer(include_virtual=True) n = 0 scores = dict() if trace else None for g, s in intersect(gold, system): score = scorer.update(g, s, trace) n += 1 if trace: scores[g.id] = score result = scorer.report(n, scores) return result
def evaluate(golds, systems, format = "json", trace = 0): tglp = tslp = tclp = 0; tgup = tsup = tcup = 0; tglr = tslr = tclr = 0; tgur = tsur = tcur = 0; tp = tr = 0; scores = dict() if trace else None; result = {"n": 0, "labeled": dict(), "unlabeled": dict()}; for gold, system in intersect(golds, systems): glprimary, glremote, guprimary, guremote = tuples(gold); slprimary, slremote, suprimary, suremote = tuples(system); glp = len(glprimary); slp = len(slprimary); clp = len(glprimary & slprimary); gup = len(guprimary); sup = len(suprimary); cup = len(guprimary & suprimary); glr = len(glremote); slr = len(slremote); clr = len(glremote & slremote); gur = len(guremote); sur = len(suremote); cur = len(guremote & suremote); tglp += glp; tslp += slp; tclp += clp; tgup += gup; tsup += sup; tcup += cup; tglr += glr; tslr += slr; tclr += clr; tgur += gur; tsur += sur; tcur += cur; result["n"] += 1; if trace: if gold.id in scores: print("ucca.evaluate(): duplicate graph identifier: {}" "".format(gold.id), file = sys.stderr); score = {"labeled": dict(), "unlabeled": dict()}; score["labeled"]["primary"] = {"g": glp, "s": slp, "c": clp}; score["labeled"]["remote"] = {"g": glr, "s": slr, "c": clr}; score["unlabeled"]["primary"] = {"g": gup, "s": sup, "c": cup}; score["unlabeled"]["remote"] = {"g": gur, "s": sur, "c": cur}; scores[gold.id] = score; if trace > 1: print("{}: {}".format(gold.id, score)); p, r, f = fscore(tglp, tslp, tclp); result["labeled"]["primary"] = \ {"g": tglp, "s": tslp, "c": tclp, "p": p, "r": r, "f": f}; p, r, f = fscore(tglr, tslr, tclr); result["labeled"]["remote"] = \ {"g": tglr, "s": tslr, "c": tclr, "p": p, "r": r, "f": f}; p, r, f = fscore(tgup, tsup, tcup); result["unlabeled"]["primary"] = \ {"g": tgup, "s": tsup, "c": tcup, "p": p, "r": r, "f": f}; p, r, f = fscore(tgur, tsur, tcur); result["unlabeled"]["remote"] = \ {"g": tgur, "s": tsur, "c": tcur, "p": p, "r": r, "f": f}; if trace: result["scores"] = scores; return result;
def evaluate(golds, systems, format = "json", limit = 5, trace = 0): if not limit: limit = 5; tg = ts = tc = n = 0; gprefix = "g"; sprefix = "s"; scores = dict() if trace else None; for gold, system in intersect(golds, systems): id = gold.id; ginstances, gattributes, grelations = tuples(gold, gprefix); sinstances, sattributes, srelations = tuples(system, sprefix); correct, gold, system \ = get_amr_match(None, None, gold.id, limit = limit, instance1 = ginstances, attributes1 = gattributes, relation1 = grelations, prefix1 = gprefix, instance2 = sinstances, attributes2 = sattributes, relation2 = srelations, prefix2 = sprefix); tg += gold; ts += system; tc += correct; n += 1; if trace: if id in scores: print("smatch.evaluate(): duplicate graph identifier: {}" "".format(id), file = sys.stderr); scores[id] = {"g": gold, "s": system, "c": correct}; if trace > 1: p, r, f = fscore(gold, system, correct); print("G: {}; S: {}; C: {}; P: {}; R: {}; F: {}" "".format(gold, system, correct, p, r, f), file = sys.stderr); if f != 1.0: print("gold instances: {}\ngold attributes {}\ngold relations: {}" "".format(ginstances, gattributes, grelations), file=sys.stderr); print("system instances: {}\nsystem attributes {}\nsystem relations: {}" "".format(sinstances, sattributes, srelations), file=sys.stderr); p, r, f = fscore(tg, ts, tc); result = {"n": n, "g": tg, "s": ts, "c": tc, "p": p, "r": r, "f": f}; if trace: result["scores"] = scores; return result;
def evaluate(gold, system, format="json", limits={ "rrhc": 20, "mces": 500000 }, trace=0): global counter def update(total, counts): for key in ("g", "s", "c"): total[key] += counts[key] def finalize(counts): p, r, f = fscore(counts["g"], counts["s"], counts["c"]) counts.update({ "p": p, "r": r, "f": f }) rrhc_limit = mces_limit = None if isinstance(limits, dict): if "rrhc" in limits: rrhc_limit = limits["rrhc"] if "mces" in limits: mces_limit = limits["mces"] if rrhc_limit is None or rrhc_limit < 0: rrhc_limit = 20 if mces_limit is None or mces_limit < 0: mces_limit = 500000 if trace > 1: print("RRHC limit: {}; MCES limit: {}".format(rrhc_limit, mces_limit), file=sys.stderr) total_matches = total_steps = 0 total_pairs = 0 total_inexact = 0 total_tops = {"g": 0, "s": 0, "c": 0} total_labels = {"g": 0, "s": 0, "c": 0} total_properties = {"g": 0, "s": 0, "c": 0} total_anchors = {"g": 0, "s": 0, "c": 0} total_edges = {"g": 0, "s": 0, "c": 0} total_attributes = {"g": 0, "s": 0, "c": 0} scores = dict() if trace else None for g, s in intersect(gold, system): try: counter = 0 g_identities, s_identities, g_dominated, s_dominated = \ identities(g, s) pairs, rewards = initial_node_correspondences( g, s, identities1=g_identities, identities2=s_identities) if trace > 1: print("\n\ngraph #{}".format(g.id), file=sys.stderr) print("number of gold nodes: {}".format(len(g.nodes)), file=sys.stderr) print("number of system nodes: {}".format(len(s.nodes)), file=sys.stderr) print("number of edges: {}".format(len(g.edges)), file=sys.stderr) if trace > 2: print("rewards and pairs:\n{}\n{}\n" "".format(rewards, sorted(pairs)), file=sys.stderr) # # experimental: see whether random-restart hill-climbing (from SMATCH) # yields a better start into the search ... # n_smatched = 0 if g.framework in {"eds", "amr"} and rrhc_limit > 0: n_smatched, _, _, mapping \ = smatch(g, s, rrhc_limit, {"tops", "labels", "properties", "anchors", "edges", "attributes"}, 0, False) mapping = [(i, j if j >= 0 else None) for i, j in enumerate(mapping)] tops, labels, properties, anchors, edges, attributes \ = g.score(s, mapping) all = tops["c"] + labels["c"] + properties["c"] \ + anchors["c"] + edges["c"] + attributes["c"] status = "{}".format(n_smatched) if n_smatched > all: status = "{} vs. {}".format(n_smatched, all) n_smatched = all if trace > 1: print("pairs {} smatch [{}]: {}" "".format( "from" if set(pairs) != set(mapping) else "by", status, sorted(mapping)), file=sys.stderr) if set(pairs) != set(mapping): pairs = mapping n_matched = 0 best_cv, best_ce = {}, {} if g.nodes and mces_limit > 0: for i, (cv, ce) in enumerate( correspondences(g, s, pairs, rewards, mces_limit, trace, dominated1=g_dominated, dominated2=s_dominated)): # assert is_valid(ce) # assert is_injective(ce) n = sum(map(len, ce.values())) if n > n_matched: if trace > 1: print("\n[{}] solution #{}; matches: {}" "".format(counter, i, n), file=sys.stderr) n_matched = n best_cv, best_ce = cv, ce total_matches += n_matched total_steps += counter tops, labels, properties, anchors, edges, attributes \ = g.score(s, best_cv or pairs) # assert n_matched >= n_smatched; if trace: if n_smatched and n_matched != n_smatched: print("delta to smatch: {}" "".format(n_matched - n_smatched), file=sys.stderr) if g.id in scores: print("mces.evaluate(): duplicate graph identifier: {}" "".format(g.id), file=sys.stderr) scores[g.id] = { "tops": tops, "labels": labels, "properties": properties, "anchors": anchors, "edges": edges, "attributes": attributes } update(total_tops, tops) update(total_labels, labels) update(total_properties, properties) update(total_anchors, anchors) update(total_edges, edges) update(total_attributes, attributes) total_pairs += 1 if mces_limit == 0 or counter > mces_limit: total_inexact += 1 if trace > 1: print("[{}] Number of edges in correspondence: {}" "".format(counter, n_matched), file=sys.stderr) print("[{}] Total matches: {}".format(total_steps, total_matches), file=sys.stderr) print("tops: {}\nlabels: {}\nproperties: {}\nanchors: {}" "\nedges: {}\nattributes: {}" "".format(tops, labels, properties, anchors, edges, attributes), file=sys.stderr) if trace > 2: print(best_cv, file=sys.stderr) print(best_ce, file=sys.stderr) except (KeyError, AttributeError) as e: raise ValueError( "mces.evaluate(): failed evaluating graph: {}".format( g.id)) from e total_all = { "g": 0, "s": 0, "c": 0 } for counts in [ total_tops, total_labels, total_properties, total_anchors, total_edges, total_attributes ]: update(total_all, counts) finalize(counts) finalize(total_all) result = { "n": total_pairs, "exact": total_pairs - total_inexact, "tops": total_tops, "labels": total_labels, "properties": total_properties, "anchors": total_anchors, "edges": total_edges, "attributes": total_attributes, "all": total_all } if trace: result["scores"] = scores return result
def evaluate(gold, system, format="json", limit=500000, trace=0): global counter def update(total, counts): for key in ("g", "s", "c"): total[key] += counts[key] def finalize(counts): p, r, f = fscore(counts["g"], counts["s"], counts["c"]) counts.update({ "p": p, "r": r, "f": f }) if not limit: limit = 500000 total_matches = total_steps = 0 total_pairs = 0 total_inexact = 0 total_tops = {"g": 0, "s": 0, "c": 0} total_labels = {"g": 0, "s": 0, "c": 0} total_properties = {"g": 0, "s": 0, "c": 0} total_anchors = {"g": 0, "s": 0, "c": 0} total_edges = {"g": 0, "s": 0, "c": 0} total_attributes = {"g": 0, "s": 0, "c": 0} scores = dict() if trace else None for g, s in intersect(gold, system): counter = 0 g_identities, s_identities, g_dominated, s_dominated = \ identities(g, s) pairs, rewards = initial_node_correspondences(g, s, identities1=g_identities, identities2=s_identities) if trace > 1: print("\n\ngraph #{}".format(g.id)) print("Number of gold nodes: {}".format(len(g.nodes))) print("Number of system nodes: {}".format(len(s.nodes))) print("Number of edges: {}".format(len(g.edges))) if trace > 2: print("Rewards and Pairs:\n{}\n{}\n".format(rewards, pairs)) n_matched = 0 best_cv, best_ce = None, None for i, (cv, ce) in enumerate( correspondences(g, s, pairs, rewards, limit, trace, dominated1=g_dominated, dominated2=s_dominated)): assert is_valid(ce) assert is_injective(ce) n = sum(map(len, ce.values())) if n > n_matched: if trace > 1: print("\n[{}] solution #{}; matches: {}" "".format(counter, i, n)) n_matched = n best_cv, best_ce = cv, ce total_matches += n_matched total_steps += counter tops, labels, properties, anchors, edges, attributes \ = g.score(s, best_cv) if trace: if g.id in scores: print("mces.evaluate(): duplicate graph identifier: {}" "".format(g.id), file=sys.stderr) scores[g.id] = { "tops": tops, "labels": labels, "properties": properties, "anchors": anchors, "edges": edges, "attributes": attributes } update(total_tops, tops) update(total_labels, labels) update(total_properties, properties) update(total_anchors, anchors) update(total_edges, edges) update(total_attributes, attributes) total_pairs += 1 if counter > limit: total_inexact += 1 if trace > 1: print("[{}] Number of edges in correspondence: {}" "".format(counter, n_matched)) print("[{}] Total matches: {}".format(total_steps, total_matches)) print("tops: {}\nlabels: {}\nproperties: {}\nanchors: {}" "\nedges: {}\nattributes:{}" "".format(tops, labels, properties, anchors, edges, attributes)) if trace > 2: print(best_cv) print(best_ce) total_all = { "g": 0, "s": 0, "c": 0 } for counts in [ total_tops, total_labels, total_properties, total_anchors, total_edges, total_attributes ]: update(total_all, counts) finalize(counts) finalize(total_all) result = { "n": total_pairs, "exact": total_pairs - total_inexact, "tops": total_tops, "labels": total_labels, "properties": total_properties, "anchors": total_anchors, "edges": total_edges, "attributes": total_attributes, "all": total_all } if trace: result["scores"] = scores return result
def evaluate(golds, systems, format="json", trace=0): tglp = tslp = tclp = 0 tgup = tsup = tcup = 0 tglr = tslr = tclr = 0 tgur = tsur = tcur = 0 tp = tr = 0 scores = dict() if trace else None result = { "n": 0, "labeled": dict(), "unlabeled": dict() } for gold, system in intersect(golds, systems): glprimary, glremote, guprimary, guremote = tuples(gold) slprimary, slremote, suprimary, suremote = tuples(system) glp = len(glprimary) slp = len(slprimary) clp = len(glprimary & slprimary) gup = len(guprimary) sup = len(suprimary) cup = len(guprimary & suprimary) glr = len(glremote) slr = len(slremote) clr = len(glremote & slremote) gur = len(guremote) sur = len(suremote) cur = len(guremote & suremote) tglp += glp tslp += slp tclp += clp tgup += gup tsup += sup tcup += cup tglr += glr tslr += slr tclr += clr tgur += gur tsur += sur tcur += cur result["n"] += 1 if trace: if gold.id in scores: print("ucca.evaluate(): duplicate graph identifier: {}" "".format(gold.id), file=sys.stderr) score = { "labeled": dict(), "unlabeled": dict() } score["labeled"]["primary"] = { "g": glp, "s": slp, "c": clp } score["labeled"]["remote"] = { "g": glr, "s": slr, "c": clr } score["unlabeled"]["primary"] = { "g": gup, "s": sup, "c": cup } score["unlabeled"]["remote"] = { "g": gur, "s": sur, "c": cur } scores[gold.id] = score p, r, f = fscore(tglp, tslp, tclp) result["labeled"]["primary"] = \ {"g": tglp, "s": tslp, "c": tclp, "p": p, "r": r, "f": f} p, r, f = fscore(tglr, tslr, tclr) result["labeled"]["remote"] = \ {"g": tglr, "s": tslr, "c": tclr, "p": p, "r": r, "f": f} p, r, f = fscore(tgup, tsup, tcup) result["unlabeled"]["primary"] = \ {"g": tgup, "s": tsup, "c": tcup, "p": p, "r": r, "f": f} p, r, f = fscore(tgur, tsur, tcur) result["unlabeled"]["remote"] = \ {"g": tgur, "s": tsur, "c": tcur, "p": p, "r": r, "f": f} if trace: result["scores"] = scores return result
def evaluate(gold, system, format="json", limit=500000, trace=0): global counter def update(total, counts): for key in ("g", "s", "c"): total[key] += counts[key] def finalize(counts): p, r, f = fscore(counts["g"], counts["s"], counts["c"]) counts.update({ "p": p, "r": r, "f": f }) if not limit: limit = 500000 total_matches = total_steps = 0 total_pairs = 0 total_inexact = 0 total_tops = {"g": 0, "s": 0, "c": 0} total_labels = {"g": 0, "s": 0, "c": 0} total_properties = {"g": 0, "s": 0, "c": 0} total_anchors = {"g": 0, "s": 0, "c": 0} total_edges = {"g": 0, "s": 0, "c": 0} total_attributes = {"g": 0, "s": 0, "c": 0} scores = dict() if trace else None for g, s in intersect(gold, system): counter = 0 g_identities, s_identities, g_dominated, s_dominated = \ identities(g, s) pairs, rewards = initial_node_correspondences(g, s, identities1=g_identities, identities2=s_identities) if trace > 1: print("\n\ngraph #{}".format(g.id), file=sys.stderr) print("number of gold nodes: {}".format(len(g.nodes)), file=sys.stderr) print("number of system nodes: {}".format(len(s.nodes)), file=sys.stderr) print("number of edges: {}".format(len(g.edges)), file=sys.stderr) if trace > 2: print("rewards and pairs:\n{}\n{}\n" "".format(rewards, sorted(pairs)), file=sys.stderr) # # experimental: see whether random-restart hill-climbing (from SMATCH) # yields a better start into the search ... # n_smatched = 0 if False and g.framework in {"eds", "ucca", "amr"}: n_smatched, _, _, mapping \ = smatch(g, s, 50, {"tops", "labels", "properties", "anchors", "edges", "attributes"}, 0, False) mapping = [(i, j if j >= 0 else None) for i, j in enumerate(mapping)] if set(pairs) != set(mapping): print("pairs from smatch: {}".format(sorted(mapping)), file=sys.stderr) pairs = mapping n_matched = 0 best_cv, best_ce = None, None for i, (cv, ce) in enumerate( correspondences(g, s, pairs, rewards, limit, trace, dominated1=g_dominated, dominated2=s_dominated)): assert is_valid(ce) assert is_injective(ce) n = sum(map(len, ce.values())) if n > n_matched: if trace > 1: print("\n[{}] solution #{}; matches: {}" "".format(counter, i, n), file=sys.stderr) n_matched = n best_cv, best_ce = cv, ce total_matches += n_matched total_steps += counter tops, labels, properties, anchors, edges, attributes \ = g.score(s, best_cv) assert n_matched >= n_smatched if trace: if n_smatched and n_matched > n_smatched: print("improvement over smatch: {}" "".format(n_matched - n_smatched), file=sys.stderr) if g.id in scores: print("mces.evaluate(): duplicate graph identifier: {}" "".format(g.id), file=sys.stderr) scores[g.id] = { "tops": tops, "labels": labels, "properties": properties, "anchors": anchors, "edges": edges, "attributes": attributes } update(total_tops, tops) update(total_labels, labels) update(total_properties, properties) update(total_anchors, anchors) update(total_edges, edges) update(total_attributes, attributes) total_pairs += 1 if counter > limit: total_inexact += 1 if trace > 1: print("[{}] Number of edges in correspondence: {}" "".format(counter, n_matched), file=sys.stderr) print("[{}] Total matches: {}".format(total_steps, total_matches)) print("tops: {}\nlabels: {}\nproperties: {}\nanchors: {}" "\nedges: {}\nattributes: {}" "".format(tops, labels, properties, anchors, edges, attributes), file=sys.stderr) if trace > 2: print(best_cv, file=sys.stderr) print(best_ce, file=sys.stderr) total_all = { "g": 0, "s": 0, "c": 0 } for counts in [ total_tops, total_labels, total_properties, total_anchors, total_edges, total_attributes ]: update(total_all, counts) finalize(counts) finalize(total_all) result = { "n": total_pairs, "exact": total_pairs - total_inexact, "tops": total_tops, "labels": total_labels, "properties": total_properties, "anchors": total_anchors, "edges": total_edges, "attributes": total_attributes, "all": total_all } if trace: result["scores"] = scores return result