def compute_smatch_batch( gold_filename, test_filename, starts, method, restart_threshold, concept_edges, precise, missing, detailed ): """ Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. """ ps, rs, fs = [], [], [] try: gold_file = open(gold_filename) except IOError: sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" % gold_filename) sys.exit(1) try: test_file = open(test_filename) except IOError: sys.stderr.write("ERROR: Could not open test AMR file %s.\n" % test_filename) sys.exit(1) tiburonfailct = 0 parsefailct = 0 totalct = 0 decodefailct = 0 emptylinect = 0 while True: gold = gold_file.readline() test = test_file.readline().strip() if not gold: # EOF break gold = gold.strip() if not gold: sys.stderr.write("WARNING: Empty line in gold AMR file. Skipping entry.\n") continue totalct += 1 if gold: try: if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_gold = Hgraph.from_string(gold) amr_gold = Hgraph.from_concept_edge_labels(amr_gold) else: amr_gold = Hgraph.from_string(gold) l = len(amr_gold.triples()) except Exception as e: print >> sys.stderr, e sys.stderr.write("WARNING: Could not parse gold AMR. Skipping entry.\n") continue if test and not test.startswith("#"): try: amr_test = Hgraph.from_string(test) if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_test = Hgraph.from_concept_edge_labels(amr_test) else: amr_test = Hgraph.from_string(test) if precise: p, r, f = compute_smatch_precise(amr_gold, amr_test) else: p, r, f = compute_smatch_hill_climbing( amr_gold, amr_test, starts=starts, method=method, restart_threshold=restart_threshold ) if detailed: print "P:%f R:%f F:%f " % (p, r, f) else: sys.stdout.write(".") sys.stdout.flush() ps.append((p, l)) rs.append((r, l)) fs.append((f, l)) except pyparsing.ParseException: parsefailct += 1 else: if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) else: if test == "# Tiburon failed.": tiburonfailct += 1 elif test == "# Decoding failed.": decodefailct += 1 emptylinect += 1 if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) sys.stdout.write("\n") avgp = mean(ps) avgr = mean(rs) avgf = mean(fs) print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % (totalct, emptylinect, parsefailct) print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)
def compute_smatch_batch(gold_filename, test_filename, starts, method, restart_threshold, concept_edges, precise, missing, detailed): """ Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. """ ps, rs, fs = [], [], [] try: gold_file = open(gold_filename) except IOError: sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" % gold_filename) sys.exit(1) try: test_file = open(test_filename) except IOError: sys.stderr.write("ERROR: Could not open test AMR file %s.\n" % test_filename) sys.exit(1) tiburonfailct = 0 parsefailct = 0 totalct = 0 decodefailct = 0 emptylinect = 0 while True: gold = gold_file.readline() test = test_file.readline().strip() if not gold: # EOF break gold = gold.strip() if not gold: sys.stderr.write( "WARNING: Empty line in gold AMR file. Skipping entry.\n") continue totalct += 1 if gold: try: if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_gold = Hgraph.from_string(gold) amr_gold = Hgraph.from_concept_edge_labels(amr_gold) else: amr_gold = Hgraph.from_string(gold) l = len(amr_gold.triples()) except Exception as e: print >> sys.stderr, e sys.stderr.write( "WARNING: Could not parse gold AMR. Skipping entry.\n") continue if test and not test.startswith("#"): try: amr_test = Hgraph.from_string(test) if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_test = Hgraph.from_concept_edge_labels(amr_test) else: amr_test = Hgraph.from_string(test) if precise: p, r, f = compute_smatch_precise(amr_gold, amr_test) else: p, r, f = compute_smatch_hill_climbing( amr_gold, amr_test, starts=starts, method=method, restart_threshold=restart_threshold) if detailed: print "P:%f R:%f F:%f " % (p, r, f) else: sys.stdout.write(".") sys.stdout.flush() ps.append((p, l)) rs.append((r, l)) fs.append((f, l)) except pyparsing.ParseException: parsefailct += 1 else: if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) else: if test == "# Tiburon failed.": tiburonfailct += 1 elif test == "# Decoding failed.": decodefailct += 1 emptylinect += 1 if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) sys.stdout.write("\n") avgp = mean(ps) avgr = mean(rs) avgf = mean(fs) print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % ( totalct, emptylinect, parsefailct) print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)