def format_amr(l): amr_s = ' '.join(l) amr_g = Hgraph.from_string(amr_s) return amr_g
def parse_string(self, s, concepts=True): """ Parse the string s and return a new hypergraph. """ # Constants to identify items on the stack PNODE = 1 # Parent node CNODE = 2 # Child node EDGE = 3 # Hyperedge hgraph = Hgraph() stack = [] state = 0 self.id_count = 0 self.nt_id_count = 0 self.ext_id_count = 0 self.seen_nodes = set() self.explicit_ext_ids = False # States of the finite state parser #0, top level #1, expecting head nodename #2, expecting edge label or node #3, expecting further child nodes or right paren #4, expecting saw edge label, expecting child node, edge label, right paren def get_reentrance(s): re_pattern = re.compile('[^:](_[0-9]+)\.') re_list = re_pattern.findall(s) #print re_list self.reentrance_indexes.update(re_list) def insert_node(node, root=False): # Insert a node into the AMR ident, label, ext_id = node ignoreme = hgraph[ident] #Initialize dictionary for this node hgraph.node_to_concepts[ident] = label if ext_id is not None: if ident in hgraph.external_nodes and hgraph.external_nodes[ ident] != ext_id: raise ParserError, "Incompatible external node IDs for node %s." % ident hgraph.external_nodes[ident] = ext_id hgraph.rev_external_nodes[ext_id] = ident if root: hgraph.roots.append(ident) def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge #print '***********non-terminal %s' % edgelabel new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" % self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: #print '***********terminal %s' % edgelabel new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root=True) state = 5 get_reentrance(s) # Parser transitions start here #print 'begin' #print s #print 'end' for typ, token, pos in self.lexer.lex(s): #print typ, token, pos, state #log.info(typ+ ' , '+ token+ ' , '+ (str)(pos)) if state == 0: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: insert_node(self.parse_node(token), root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 1: if typ == LexTypes.NODE: stack.append( (PNODE, self.parse_node(token))) # Push head node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 2: if typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.NODE: stack.append( (EDGE, "")) # No edge specified, assume empty label stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.LPAR: stack.append( (EDGE, "")) # No edge specified, assume empty label state = 1 elif typ == LexTypes.RPAR: itemtype, node = stack.pop() assert itemtype == PNODE if stack: insert_node(node) stack.append((CNODE, node)) state = 3 else: insert_node(node, root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 3: if typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition() elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 4: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) elif typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition() else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 5: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) # Normalize external nodes new_ext_nodes = {} new_rev_ext_nodes = {} i = 0 for node, index in sorted(hgraph.external_nodes.items(), key=lambda (n, i): i): new_ext_nodes[node] = i new_rev_ext_nodes[i] = node i = i + 1 hgraph.external_nodes = new_ext_nodes hgraph.rev_external_nodes = new_rev_ext_nodes return hgraph
def parse_string(self, s, concepts = True): """ Parse the string s and return a new hypergraph. """ # Constants to identify items on the stack PNODE = 1 # Parent node CNODE = 2 # Child node EDGE = 3 # Hyperedge hgraph = Hgraph() stack = [] state = 0 self.id_count = 0 self.nt_id_count = 0 self.ext_id_count = 0 self.seen_nodes = set() self.explicit_ext_ids = False # States of the finite state parser #0, top level #1, expecting head nodename #2, expecting edge label or node #3, expecting further child nodes or right paren #4, expecting saw edge label, expecting child node, edge label, right paren def insert_node(node, root=False): # Insert a node into the AMR ident, label, ext_id = node ignoreme = hgraph[ident] #Initialize dictionary for this node hgraph.node_to_concepts[ident] = label if ext_id is not None: if ident in hgraph.external_nodes and hgraph.external_nodes[ident] != ext_id: raise ParserError, "Incompatible external node IDs for node %s." % ident hgraph.external_nodes[ident] = ext_id hgraph.rev_external_nodes[ext_id] = ident if root: hgraph.roots.append(ident) def pop_and_transition(): # Create all edges in a group from the stack, attach them to the # graph and then transition to the appropriate state in the FSA edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all nodes in hyperedge itemtype, node = stack.pop() insert_node(node) children.append(node) assert stack[-1][0] == EDGE itemtype, edgelabel = stack.pop() edges.append((edgelabel, children)) # Construct the hyperedge itemtype, parentnode = stack.pop() for edgelabel, children in edges: hypertarget = [] # build hyperedge tail for ident, label, ext_id in children: hypertarget.append(ident) hypertarget.reverse() hyperchild = tuple(hypertarget) if "$" in edgelabel: # this is a nonterminal Edge new_edge = NonterminalLabel.from_string(edgelabel) if not new_edge.index: new_edge.index = "_%i" %self.nt_id_count self.nt_id_count = self.nt_id_count + 1 else: new_edge = edgelabel ident, label, ext_id = parentnode hgraph._add_triple(ident, new_edge, hyperchild) if stack: insert_node(parentnode) stack.append((CNODE, parentnode)) state = 4 else: insert_node(parentnode, root = True) state = 5 # Parser transitions start here for typ, token, pos in self.lexer.lex(s): if state == 0: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: insert_node(self.parse_node(token), root=True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 1: if typ == LexTypes.NODE: stack.append((PNODE, self.parse_node(token))) # Push head node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 2: if typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.NODE: stack.append((EDGE, "")) # No edge specified, assume empty label stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.LPAR: stack.append((EDGE, "")) # No edge specified, assume empty label state = 1 elif typ == LexTypes.RPAR: itemtype, node = stack.pop() assert itemtype == PNODE if stack: insert_node(node) stack.append((CNODE, node)) state = 3 else: insert_node(node, root = True) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 3: if typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition(); elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 4 elif typ == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 4: if typ == LexTypes.LPAR: state = 1 elif typ == LexTypes.NODE: stack.append((CNODE, self.parse_node(token))) state = 3 elif typ == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) elif typ == LexTypes.RPAR: # Pop from stack and add edges pop_and_transition(); else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 5: raise ParserError, "Unexpected token %s at position %i." % (token, pos) # Normalize external nodes new_ext_nodes = {} new_rev_ext_nodes = {} i = 0 for node, index in sorted(hgraph.external_nodes.items(), key = lambda (n, i): i): new_ext_nodes[node] = i new_rev_ext_nodes[i] = node i = i + 1 hgraph.external_nodes = new_ext_nodes hgraph.rev_external_nodes = new_rev_ext_nodes return hgraph
def parse_string(self, s, concepts=True): """ Parse the string s and return a new abstract meaning representation. @concepts if True, method returns an L{Hgraph} object containing concept labels. """ PNODE = 1 CNODE = 2 EDGE = 3 amr = Hgraph() stack = [] state = 0 #0, top leve #1, expecting source nodename #2, expecting concept name or edge label #3, lexpecting concept name #4, expecting edge label #5, expecting expression, node name or literal string, quantity or special symbol #6, expecting right paren or more target nodes #7, expecting right paren for type, token, pos in self.lexer.lex(s): if state == 0: if type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 1: if type == LexTypes.IDENTIFIER: stack.append((PNODE, token, None)) # Push source node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 2: if type == LexTypes.SLASH: state = 3 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 3: if type == LexTypes.IDENTIFIER: assert stack[-1][0] == PNODE nodelabel = stack.pop()[1] stack.append( (PNODE, nodelabel, token)) # Push new source node with concept label state = 4 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 4: if type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if concepts and ( not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 5: if type == LexTypes.LPAR: state = 1 elif type == LexTypes.QUANTITY: stack.append((CNODE, Quantity(token), None)) state = 6 elif type == LexTypes.STRLITERAL: stack.append((CNODE, StrLiteral(token[1:-1]), None)) state = 6 elif type == LexTypes.LITERAL: stack.append((CNODE, Literal(token[1:]), None)) state = 6 elif type == LexTypes.IDENTIFIER: stack.append( (CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.EDGELABEL: # Unary edge stack.append((CNODE, None, None)) stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: # Unary edge stack.append((CNODE, None, None)) edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][ 0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop( ) if childnodelabel is not None and childnodelabel[ 0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and ( not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget = [] # build hyperedge destination for node, concept in children: if node is not None: if concepts and ( not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 6: if type == LexTypes.RPAR: # Pop from stack and add edges edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][ 0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop( ) if childnodelabel is not None and childnodelabel[ 0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and ( not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget = [] # build hyperedge destination for node, concept in children: if node is not None: if concepts and ( not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) elif type == LexTypes.COMMA: state = 7 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) elif state == 7: if type == LexTypes.IDENTIFIER: stack.append( (CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % ( token, pos) return amr
def parse_string(self, s, concepts = True): """ Parse the string s and return a new abstract meaning representation. @concepts if True, method returns an L{Hgraph} object containing concept labels. """ PNODE = 1 CNODE = 2 EDGE = 3 amr = Hgraph() stack = [] state = 0 #0, top leve #1, expecting source nodename #2, expecting concept name or edge label #3, lexpecting concept name #4, expecting edge label #5, expecting expression, node name or literal string, quantity or special symbol #6, expecting right paren or more target nodes #7, expecting right paren for type, token, pos in self.lexer.lex(s): if state == 0: if type == LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 1: if type == LexTypes.IDENTIFIER: stack.append((PNODE, token, None)) # Push source node state = 2 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 2: if type == LexTypes.SLASH: state = 3 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 3: if type == LexTypes.IDENTIFIER: assert stack[-1][0] == PNODE nodelabel = stack.pop()[1] stack.append((PNODE, nodelabel, token)) # Push new source node with concept label state = 4 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 4: if type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: forgetme, parentnodelabel, parentconcept = stack.pop() assert forgetme == PNODE if parentnodelabel[0] == '@': parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) foo = amr[parentnodelabel] # add only the node if concepts and (not parentnodelabel in amr.node_to_concepts or parentnodelabel is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if stack: stack.append((CNODE, parentnodelabel, parentconcept)) state = 6 else: amr.roots.append(parentnodelabel) state = 0 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 5: if type == LexTypes.LPAR: state = 1 elif type == LexTypes.QUANTITY: stack.append((CNODE, Quantity(token), None)) state = 6 elif type == LexTypes.STRLITERAL: stack.append((CNODE, StrLiteral(token[1:-1]), None)) state = 6 elif type == LexTypes.LITERAL: stack.append((CNODE, Literal(token[1:]), None)) state = 6 elif type == LexTypes.IDENTIFIER: stack.append((CNODE, token, None)) # Push new source node with concept label state = 6 elif type == LexTypes.EDGELABEL: # Unary edge stack.append((CNODE, None, None)) stack.append((EDGE, token[1:])) state = 5 elif type == LexTypes.RPAR: # Unary edge stack.append((CNODE, None, None)) edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop() if childnodelabel is not None and childnodelabel[0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget =[] # build hyperedge destination for node, concept in children: if node is not None: if concepts and (not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 6: if type == LexTypes.RPAR: # Pop from stack and add edges edges = [] while stack[-1][0] != PNODE: # Pop all edges children = [] while stack[-1][0] == CNODE: # Pop all external nodes for hyperedge forgetme, childnodelabel, childconcept = stack.pop() if childnodelabel is not None and childnodelabel[0] == '@': #child is external node childnodelabel = childnodelabel[1:] amr.external_nodes.append(childnodelabel) children.append((childnodelabel, childconcept)) assert stack[-1][0] == EDGE forgetme, edgelabel = stack.pop() edges.append((edgelabel, children)) forgetme, parentnodelabel, parentconcept = stack.pop() if concepts and (not parentnodelabel in amr.node_to_concepts or parentconcept is not None): amr.node_to_concepts[parentnodelabel] = parentconcept if parentnodelabel[0] == '@': #parent is external node parentnodelabel = parentnodelabel[1:] amr.external_nodes.append(parentnodelabel) for edgelabel, children in edges: hypertarget =[] # build hyperedge destination for node, concept in children: if node is not None: if concepts and (not node in amr.node_to_concepts or concept is not None): amr.node_to_concepts[node] = concept hypertarget.append(node) hyperchild = tuple(hypertarget) if edgelabel[0] == '#': # this is a nonterminal Edge edgelabel = NonterminalLabel(edgelabel[1:]) amr._add_triple(parentnodelabel, edgelabel, hyperchild) if stack: state = 6 stack.append((CNODE, parentnodelabel, parentconcept)) else: state = 0 amr.roots.append(parentnodelabel) elif type == LexTypes.COMMA: state = 7 elif type == LexTypes.EDGELABEL: stack.append((EDGE, token[1:])) state = 5 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) elif state == 7: if type == LexTypes.IDENTIFIER: stack.append((CNODE, token, None)) # Push new source node with concept label state = 6 elif type== LexTypes.LPAR: state = 1 else: raise ParserError, "Unexpected token %s at position %i." % (token, pos) return amr
def compute_smatch_batch( gold_filename, test_filename, starts, method, restart_threshold, concept_edges, precise, missing, detailed ): """ Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. """ ps, rs, fs = [], [], [] try: gold_file = open(gold_filename) except IOError: sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" % gold_filename) sys.exit(1) try: test_file = open(test_filename) except IOError: sys.stderr.write("ERROR: Could not open test AMR file %s.\n" % test_filename) sys.exit(1) tiburonfailct = 0 parsefailct = 0 totalct = 0 decodefailct = 0 emptylinect = 0 while True: gold = gold_file.readline() test = test_file.readline().strip() if not gold: # EOF break gold = gold.strip() if not gold: sys.stderr.write("WARNING: Empty line in gold AMR file. Skipping entry.\n") continue totalct += 1 if gold: try: if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_gold = Hgraph.from_string(gold) amr_gold = Hgraph.from_concept_edge_labels(amr_gold) else: amr_gold = Hgraph.from_string(gold) l = len(amr_gold.triples()) except Exception as e: print >> sys.stderr, e sys.stderr.write("WARNING: Could not parse gold AMR. Skipping entry.\n") continue if test and not test.startswith("#"): try: amr_test = Hgraph.from_string(test) if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_test = Hgraph.from_concept_edge_labels(amr_test) else: amr_test = Hgraph.from_string(test) if precise: p, r, f = compute_smatch_precise(amr_gold, amr_test) else: p, r, f = compute_smatch_hill_climbing( amr_gold, amr_test, starts=starts, method=method, restart_threshold=restart_threshold ) if detailed: print "P:%f R:%f F:%f " % (p, r, f) else: sys.stdout.write(".") sys.stdout.flush() ps.append((p, l)) rs.append((r, l)) fs.append((f, l)) except pyparsing.ParseException: parsefailct += 1 else: if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) else: if test == "# Tiburon failed.": tiburonfailct += 1 elif test == "# Decoding failed.": decodefailct += 1 emptylinect += 1 if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) sys.stdout.write("\n") avgp = mean(ps) avgr = mean(rs) avgf = mean(fs) print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % (totalct, emptylinect, parsefailct) print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)
def compute_smatch_batch(gold_filename, test_filename, starts, method, restart_threshold, concept_edges, precise, missing, detailed): """ Compute SMATCH on two files with pairwise AMRs, one-AMR-per-line. """ ps, rs, fs = [], [], [] try: gold_file = open(gold_filename) except IOError: sys.stderr.write("ERROR: Could not open gold AMR file %s.\n" % gold_filename) sys.exit(1) try: test_file = open(test_filename) except IOError: sys.stderr.write("ERROR: Could not open test AMR file %s.\n" % test_filename) sys.exit(1) tiburonfailct = 0 parsefailct = 0 totalct = 0 decodefailct = 0 emptylinect = 0 while True: gold = gold_file.readline() test = test_file.readline().strip() if not gold: # EOF break gold = gold.strip() if not gold: sys.stderr.write( "WARNING: Empty line in gold AMR file. Skipping entry.\n") continue totalct += 1 if gold: try: if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_gold = Hgraph.from_string(gold) amr_gold = Hgraph.from_concept_edge_labels(amr_gold) else: amr_gold = Hgraph.from_string(gold) l = len(amr_gold.triples()) except Exception as e: print >> sys.stderr, e sys.stderr.write( "WARNING: Could not parse gold AMR. Skipping entry.\n") continue if test and not test.startswith("#"): try: amr_test = Hgraph.from_string(test) if concept_edges: # rebuild normal AMR with concepts attached to nodes. amr_test = Hgraph.from_concept_edge_labels(amr_test) else: amr_test = Hgraph.from_string(test) if precise: p, r, f = compute_smatch_precise(amr_gold, amr_test) else: p, r, f = compute_smatch_hill_climbing( amr_gold, amr_test, starts=starts, method=method, restart_threshold=restart_threshold) if detailed: print "P:%f R:%f F:%f " % (p, r, f) else: sys.stdout.write(".") sys.stdout.flush() ps.append((p, l)) rs.append((r, l)) fs.append((f, l)) except pyparsing.ParseException: parsefailct += 1 else: if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) else: if test == "# Tiburon failed.": tiburonfailct += 1 elif test == "# Decoding failed.": decodefailct += 1 emptylinect += 1 if not missing: rs.append((0.0, l)) ps.append((0.0, l)) fs.append((0.0, l)) sys.stdout.write("\n") avgp = mean(ps) avgr = mean(rs) avgf = mean(fs) print "Total: %i\tFail(empty line): %i\tFail(invalid AMR): %i" % ( totalct, emptylinect, parsefailct) print "MEAN SMATCH: P:%f R:%f F:%f " % (avgp, avgr, avgf)