def test_dumps(): assert penman.dumps([]) == '' assert penman.dumps([penman.Graph([('a', 'instance', None)])]) == '(a)' assert penman.dumps([ penman.Graph([('a', 'instance', None)]), penman.Graph([('b', 'instance', None)]), ]) == '(a)\n\n(b)'
def test_top(self, x1, x2): assert penman.Graph().top is None assert penman.Graph([('a', 'instance', None)]).top == 'a' assert penman.Graph([('b', 'instance', None), ('a', 'ARG', 'b')]).top == 'b' assert penman.Graph(x1[1]).top == 'e2' assert penman.Graph(x2[1]).top == 10000
def test_attributes(self, x1, x2): assert penman.Graph().attributes() == [] g = penman.Graph(x1[1]) assert g.attributes() == [('e2', 'instance', '_try_v_1'), ('x1', 'instance', 'named'), ('_1', 'instance', 'proper_q'), ('e3', 'instance', '_sleep_v_1'), ('x1', 'CARG', '"Abrams"')] assert g.attributes(source='x1') == [('x1', 'instance', 'named'), ('x1', 'CARG', '"Abrams"')] assert g.attributes(target='named') == [ ('x1', 'instance', 'named'), ] assert g.attributes(relation='instance') == [ ('e2', 'instance', '_try_v_1'), ('x1', 'instance', 'named'), ('_1', 'instance', 'proper_q'), ('e3', 'instance', '_sleep_v_1'), ] g = penman.Graph(x2[1]) assert g.attributes() == [ (10000, 'instance', '_bark_v_1'), (10001, 'instance', '_dog_n_1'), (10002, 'instance', 'udef_q'), ]
def test_reentrancies(self, x1, x2): g = penman.Graph(x1[1]) assert g.reentrancies() == {'x1': 2} g = penman.Graph(x2[1]) assert g.reentrancies() == {10001: 1} # top has an implicit entrancy g = penman.decode('(b / bark :ARG1 (d / dog) :ARG1-of (w / wild))') assert g.reentrancies() == {'b': 1}
def test_edges(self, x1): assert penman.Graph().edges() == [] g = penman.Graph(x1[1]) assert g.edges() == [('e2', 'ARG1', 'x1'), ('_1', 'RSTR', 'x1'), ('e2', 'ARG2', 'e3'), ('e3', 'ARG1', 'x1')] assert g.edges(source='e2') == [('e2', 'ARG1', 'x1'), ('e2', 'ARG2', 'e3')] assert g.edges(source='e3') == [('e3', 'ARG1', 'x1')] assert g.edges(target='e3') == [('e2', 'ARG2', 'e3')] assert g.edges(relation='RSTR') == [('_1', 'RSTR', 'x1')]
def test_encode(self, x1): # empty graph g = penman.Graph([]) assert encode(g) == '()' # unlabeled single node g = penman.Graph([], top='a') assert encode(g) == '(a)' # labeled node g = penman.Graph([('a', ':instance', 'alpha')]) assert encode(g) == '(a / alpha)' # labeled node (without ':') g = penman.Graph([('a', 'instance', 'alpha')]) assert encode(g) == '(a / alpha)' # unlabeled edge to unlabeled node g = penman.Graph([('a', '', 'b')]) assert encode(g) == '(a : b)' g = penman.Graph([('a', ':', 'b')], epidata={('a', ':', 'b'): [layout.Push('b')]}) assert encode(g) == '(a : (b))' # inverted unlabeled edge g = penman.Graph([('a', '', 'b')], top='b') assert encode(g) == '(b :-of a)' # labeled edge to unlabeled node g = penman.Graph([('a', 'ARG', 'b')]) assert encode(g) == '(a :ARG b)' # inverted edge g = penman.Graph([('a', 'ARG', 'b')], top='b') assert encode(g) == '(b :ARG-of a)'
def test_encode_issue_67(self): # https://github.com/goodmami/penman/issues/61 triples = [('h', ':instance', 'have-org-role-91'), ('a', ':instance', 'activist'), ('h', ':ARG0', 'a'), ('h', ':ARG2', 'a')] assert encode(penman.Graph(triples, top='a')) == ( '(a / activist\n' ' :ARG0-of (h / have-org-role-91)\n' ' :ARG2-of h)') assert encode(penman.Graph(triples, top='h')) == ( '(h / have-org-role-91\n' ' :ARG0 (a / activist)\n' ' :ARG2 a)')
def test_AMRCodec(): c = penman.AMRCodec() assert c.invert_relation('ARG0') == 'ARG0-of' assert c.invert_relation('ARG0-of') == 'ARG0' assert c.invert_relation('domain') == 'mod' assert c.invert_relation('mod') == 'domain' assert c.invert_relation('consist-of') == 'consist-of-of' assert c.invert_relation('consist-of-of') == 'consist-of' with pytest.raises(penman.PenmanError): c.invert_relation('instance') assert c.encode( penman.Graph([('w', 'instance', 'want-01'), ('w', 'ARG0', 'b'), ('w', 'ARG1', 'g'), ('b', 'instance', 'boy'), ('g', 'instance', 'go'), ('g', 'ARG0', 'b')])) == ('(w / want-01\n' ' :ARG0 (b / boy)\n' ' :ARG1 (g / go\n' ' :ARG0 b))') g = penman.Graph([('g', 'instance', 'gold'), ('g', 'consist-of-of', 'r'), ('r', 'instance', 'ring')]) assert c.encode(g) == ('(g / gold\n' ' :consist-of-of (r / ring))') assert c.encode(g, top='r') == ('(r / ring\n' ' :consist-of (g / gold))') g = penman.Graph([('w', 'instance', 'white'), ('w', 'domain', 'c'), ('c', 'instance', 'cat')]) assert c.encode(g) == ('(w / white\n' ' :domain (c / cat))') assert c.encode(g, top='c') == ('(c / cat\n' ' :mod (w / white))') assert c.decode('(g / go)').triples() == [('g', 'instance', 'go')] # example adapted from https://github.com/goodmami/penman/issues/17 assert c.decode('(g / go :null_edge (x20 / 876-9))').triples() == [ ('g', 'instance', 'go'), ('x20', 'instance', '876-9'), ('g', 'null_edge', 'x20') ] with pytest.raises(penman.DecodeError): c.decode('(g)') # no concept or relations with pytest.raises(penman.DecodeError): c.decode('(g :ARG0 b)') # no concept with pytest.raises(penman.DecodeError): c.decode('(g :ARG0 (b / boy) / go)') # concept after relations with pytest.raises(penman.DecodeError): c.decode('(1 / one)') # bad variable form with pytest.raises(penman.DecodeError): c.decode('(g / go : (b / boy))') # anonymous relation
def connect_graph_if_not_connected(graph): try: encoded = pm_encode(graph) return graph, ParsedStatus.OK except: pass nxgraph = nx.MultiGraph() variables = graph.variables() for v1, _, v2 in graph.triples: if v1 in variables and v2 in variables: nxgraph.add_edge(v1, v2) elif v1 in variables: nxgraph.add_edge(v1, v1) triples = graph.triples.copy() new_triples = [] addition = f'a{len(variables) + 1}' triples.append(penman.Triple(addition, ':instance', 'and')) for i, conn_set in enumerate(nx.connected_components(nxgraph), start=1): edge = f':op{i}' conn_set = sorted(conn_set, key=lambda x: int(x[1:])) conn_set = [c for c in conn_set if c in variables] node = conn_set[0] new_triples.append(penman.Triple(addition, edge, node)) triples = new_triples + triples metadata = graph.metadata graph = penman.Graph(triples) graph.metadata.update(metadata) pm_encode(graph) return graph, ParsedStatus.FIXED
def to_format(self, passage, metadata=True, wikification=True, verbose=False, use_original=True, default_label=None, **kwargs): self.wikification = wikification if use_original: original = passage.extra.get("original") if original: return original textutil.annotate(passage, as_array=True) if self.wikification: if verbose: print("Wikifying passage...") WIKIFIER.wikify_passage(passage) if verbose: print("Expanding names...") self._expand_names(passage.layer(layer1.LAYER_ID)) triples = list(self._to_triples( passage, default_label=default_label)) or [("y", INSTANCE, "yes")] return (self.header(passage, **kwargs) if metadata else []) + (penman.encode(penman.Graph(triples)).split("\n"))
def _collapse_name_ops(self, amr): # identify name triples name_vars = {} for i, (v1, rel, v2) in enumerate(amr.triples): if rel == ':instance' and v2 == 'name': name_vars[v1] = 1 # check if they have ops name_vars_to_ops = defaultdict(list) for i, (v1, rel, v2) in enumerate(amr.triples): if v1 in name_vars and rel.startswith(':op'): name_vars_to_ops[v1].append((i, rel, v2.strip('"'))) triples = amr.triples.copy() for nv, ops in name_vars_to_ops.items(): ops = sorted(ops, key=lambda x: int(x[1][3:])) idx, _, lits = zip(*ops) for i in idx: triples[i] = None lit = '"' + '_'.join(lits) + '"' triples[min(idx)] = penman.Triple(nv, ':op1', lit) triples = [t for t in triples if t is not None] amr_ = penman.Graph(triples) amr_.metadata = amr.metadata return amr_
def test_encode_issue_61(self): # https://github.com/goodmami/penman/issues/61 g = penman.Graph([('i2', ':instance', 'i'), ('i', ':instance', 'i'), ('i2', ':ARG0', 'i')], top='i2') assert encode(g, indent=None) == '(i2 / i :ARG0 (i / i))'
def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) for k, v in list(self.items()): if not v: continue if k == 'con': if isinstance(v, Tree) or isinstance(v[0], Tree): continue flat = isinstance(v[0], str) if flat: v = [v] ls = [] for each in v: if not isinstance(each, Tree): ls.append(list_to_tree(each)) if flat: ls = ls[0] self[k] = ls elif k == 'amr': if isinstance(v, AMRGraph) or isinstance(v[0], AMRGraph): continue flat = isinstance(v[0][0], str) if flat: v = [v] graphs = [AMRGraph(penman.Graph(triples)) for triples in v] if flat: graphs = graphs[0] self[k] = graphs
def _split_name_ops(graph): # identify name triples name_vars = {} for i, (v1, rel, v2) in enumerate(graph.triples): if rel == ':instance' and v2 == 'name': name_vars[v1] = 1 # check if they have ops name_vars_to_ops = defaultdict(list) for i, (v1, rel, v2) in enumerate(graph.triples): if v1 in name_vars and rel.startswith(':op'): name_vars_to_ops[v1].append((i, rel, v2.strip('"'))) triples = graph.triples.copy() for nv, ops in name_vars_to_ops.items(): ops = sorted(ops, key=lambda x: int(x[1][3:])) idx, _, lits = zip(*ops) for i in idx: triples[i] = None lits = ['"' + l + '"' for lit in lits for l in lit.split('_')] tt = [] for i, l in enumerate(lits, start=1): rel = ':op' + str(i) tt.append(penman.Triple(nv, rel, l)) triples[min(idx)] = tt triples = [t if isinstance(t, list) else [t] for t in triples if t is not None] triples = [t for tt in triples for t in tt] graph_ = penman.Graph(triples) graph_.metadata = graph.metadata return graph_
def test_dumps_triples(): assert penman.dumps([penman.Graph([('a', 'instance', None)])], triples=True) == 'instance(a, None)' assert penman.dumps([penman.Graph([('a', 'instance', 'aaa')])], triples=True) == 'instance(a, aaa)' assert penman.dumps( [penman.Graph([('a', 'instance', None), ('a', 'ARG', 'b')])], triples=True) == 'instance(a, None) ^\nARG(a, b)' class TestCodec(penman.PENMANCodec): TYPE_REL = 'test' TOP_VAR = 'TOP' TOP_REL = 'top' assert penman.dumps([penman.Graph([('a', 'ARG', 'b')])], triples=True, cls=TestCodec) == 'top(TOP, a) ^\nARG(a, b)'
def to_format(self, passage, metadata=True, wikification=True): textutil.annotate(passage) lines = ["# ::id " + passage.ID, "# ::tok " + " ".join(t.text for t in passage.layer(layer0.LAYER_ID).all)] if metadata else [] if wikification: WIKIFIER.wikify_passage(passage) self._expand_names(passage.layer(layer1.LAYER_ID)) return lines + [penman.encode(penman.Graph(list(self._to_triples(passage)))) or "(y / yes)"]
def _remove_wiki(graph): metadata = graph.metadata triples = [] for t in graph.triples: v1, rel, v2 = t if rel == ':wiki': continue triples.append(t) graph = penman.Graph(triples) graph.metadata = metadata return graph
def _replace_wiki(graph): metadata = graph.metadata triples = [] for t in graph.triples: v1, rel, v2 = t if rel == ':wiki': t = penman.Triple(v1, rel, '+') triples.append(t) graph = penman.Graph(triples) graph.metadata = metadata return graph
def test_init(self): # empty graph g = penman.Graph() assert g.triples() == [] assert g.top is None # single node g = penman.Graph([('a', 'instance', None)]) assert g.triples() == [('a', 'instance', None)] assert g.top == 'a' # single node one edge (default nodetype) g = penman.Graph([('a', 'ARG1', 'b')]) assert g.triples() == [('a', 'ARG1', 'b')] assert g.top == 'a' # first triple determines top g = penman.Graph([('b', 'instance', None), ('a', 'ARG1', 'b')]) assert g.triples() == [('b', 'instance', None), ('a', 'ARG1', 'b')] assert g.top == 'b'
def test_triples(self, x1): assert penman.Graph().triples() == [] g = penman.Graph(x1[1]) assert g.triples() == [('e2', 'instance', '_try_v_1'), ('x1', 'instance', 'named'), ('_1', 'instance', 'proper_q'), ('e3', 'instance', '_sleep_v_1'), ('e2', 'ARG1', 'x1'), ('x1', 'CARG', '"Abrams"'), ('_1', 'RSTR', 'x1'), ('e2', 'ARG2', 'e3'), ('e3', 'ARG1', 'x1')] assert g.triples(source='e2') == [('e2', 'instance', '_try_v_1'), ('e2', 'ARG1', 'x1'), ('e2', 'ARG2', 'e3')] assert g.triples(target='x1') == [('e2', 'ARG1', 'x1'), ('_1', 'RSTR', 'x1'), ('e3', 'ARG1', 'x1')] assert g.triples(relation='instance') == [ ('e2', 'instance', '_try_v_1'), ('x1', 'instance', 'named'), ('_1', 'instance', 'proper_q'), ('e3', 'instance', '_sleep_v_1'), ]
def to_format(self, passage, metadata=True, wikification=True, verbose=False, use_original=True): if use_original: original = passage.extra.get("original") if original: return original textutil.annotate(passage, as_array=True) lines = self.header(passage) if metadata else [] if wikification: if verbose: print("Wikifying passage...") WIKIFIER.wikify_passage(passage) if verbose: print("Expanding names...") self._expand_names(passage.layer(layer1.LAYER_ID)) return lines + (penman.encode(penman.Graph(list(self._to_triples(passage)))).split("\n") or ["(y / yes)"])
def create_amr_graph_from_prediction(prediction): nodes = prediction['nodes'] heads = prediction['heads'] head_labels = prediction['head_labels'] sentence_id = prediction['sentence_id'] root = prediction['root'] variable_map = dict() triples = [] for idx, node in enumerate(nodes): variable_map['vv' + str(idx + 1)] = node # find top top = 'vv1' for var, value in variable_map.items(): if value == root: top = var # rename nodes for key in variable_map: variable_map[key] = variable_map[key].split('_')[0] # create instances for variable in variable_map: triples.append((variable, 'instance', variable_map[variable])) # create connections for idx, head in enumerate(heads): if (head != 0): head_var = 'vv{}'.format(head) dep_var = 'vv{}'.format(idx + 1) label = head_labels[idx] triple = (head_var, label, dep_var) triples.append(triple) graph = penman.Graph() graph.heads = heads graph.nodes = nodes graph.head_labels = head_labels graph._top = top graph._triples = [penman.Triple(*t) for t in triples] graph.id = sentence_id return graph
def collapse(g, co_map): agenda = _dereification_agenda(g, co_map) counts = defaultdict(int) types = {t.source: t.target for t in g.triples() if t.relation == 'instance'} triples = [] for triple in g.triples(): if triple.source in agenda: incoming_triple, agendum = agenda[triple.source] # only replace on the relation going into the reified node # so the collapsed relation goes in the right spot if triple == incoming_triple: triples.extend(agendum) counts[types.get(triple.source, '?')] += 1 else: triples.append(triple) g = penman.Graph(triples, top=g.top) g.collapsed_counts = counts return g
def reify_attributes(g): variables = g.variables() # filter out triples with empty instances triples = [t for t in g.triples() if t.relation != 'instance' or t.target] # ensure every node has a type # types = {t.source: t for t in triples if t.relation == 'instance'} # for src in variables.difference(types): # triples.append(penman.Triple(src, 'instance', 'amr-missing')) # ensure constants are nodes new_triples = [] for triple in triples: if triple.relation != 'instance' and triple.target not in variables: var = _unique_var('', variables, '_') new_triples.extend([ penman.Triple(var, 'instance', triple.target), penman.Triple(triple.source, triple.relation, var) ]) variables.add(var) else: new_triples.append(triple) return penman.Graph(new_triples, g.top)
def encode(d, properties=True, lnk=True, indent=False): """ Serialize a DMRS object to a PENMAN string. Args: d: a DMRS object properties (bool): if `False`, suppress variable properties lnk: if `False`, suppress surface alignments and strings indent (bool, int): if `True` or an integer value, add newlines and indentation Returns: a PENMAN-serialization of the DMRS object """ if indent is True: indent = -1 elif indent is False: indent = None triples = to_triples(d, properties=properties, lnk=lnk) g = penman.Graph(triples) try: return penman.encode(g, indent=indent) except penman.PenmanError as exc: raise PyDelphinException('could not decode with Penman') from exc
def __init__(self, *args, **kwargs) -> None: """ A dict structure holding parsed annotations. Args: *args: An iterator of key-value pairs. **kwargs: Arguments from ``**`` operator. """ super().__init__(*args, **kwargs) for k, v in list(self.items()): if not v: continue if k == 'con': if isinstance(v, Tree) or isinstance(v[0], Tree): continue flat = isinstance(v[0], str) if flat: v = [v] ls = [] for each in v: if not isinstance(each, Tree): ls.append(list_to_tree(each)) if flat: ls = ls[0] self[k] = ls elif k == 'amr': from hanlp_common.amr import AMRGraph import penman if isinstance(v, AMRGraph) or isinstance(v[0], AMRGraph): continue flat = isinstance(v[0][0], str) if flat: v = [v] graphs = [AMRGraph(penman.Graph(triples)) for triples in v] if flat: graphs = graphs[0] self[k] = graphs
def to_amr(self) -> penman.Graph: """ Convert PEG to AMR as `penman.Graph` object. """ # reverse the graph edges, as the root is defined as the last executed node of # the original PEG. This means the arrows will be backwards if you visualize # the AMR graph! amr_G = self.G.reverse(copy=True) # set arbitrary amr variable ids for i, (n, ndata) in enumerate(sorted(amr_G.nodes(data=True))): ndata["amr_ids"] = {"name": f"n{i}", "var": f"a{i}"} # get root root = self.get_amr_root() root_amr_id = amr_G.nodes()[root]["amr_ids"]["var"] # get all instance triplets instance_trips = [] for n in sorted(amr_G.nodes()): node = amr_G.nodes()[n]["data"] amr_ids = amr_G.nodes()[n]["amr_ids"] instance_trips += to_amr_triplet(node, amr_ids["var"], amr_ids["name"]) # get all edge trips edge_trips = [] for source, target, data in sorted(amr_G.edges(data=True)): source_pegn: PEGNode = self.node_data_by_id(source) target_pegn: PEGNode = self.node_data_by_id(target) edge_trips += edge_to_amr(source_pegn, target_pegn, data, amr_G) all_trips = instance_trips + edge_trips return penman.Graph(triples=all_trips, top=root_amr_id)
def reify(g, re_map, prefix=None): variables = g.variables() counts = defaultdict(int) triples = [] for triple in g.triples(): if triple.relation in re_map: concept, srcrole, tgtrole = re_map[triple.relation] var = _unique_var(concept, variables, prefix) variables.add(var) triples.extend([ penman.Triple(var, 'instance', concept), # source triple is inverse direction of original relation penman.Triple(var, srcrole, triple.source, inverted=not triple.inverted), # target triple is same direction as original relation penman.Triple(var, tgtrole, triple.target, inverted=triple.inverted) ]) counts[triple.relation] += 1 else: triples.append(triple) g = penman.Graph(triples, top=g.top) g.reified_counts = counts return g
def test_encode_with_parameters(): encode = penman.encode g = penman.Graph([('a', 'instance', 'aaa'), ('b', 'instance', 'bbb'), ('c', 'instance', 'ccc'), ('a', 'ARG1', 'b'), ('b', 'ARG1', 'c')]) assert encode(g, indent=True) == ('(a / aaa\n' ' :ARG1 (b / bbb\n' ' :ARG1 (c / ccc)))') assert encode( g, indent=False) == ('(a / aaa :ARG1 (b / bbb :ARG1 (c / ccc)))') assert encode(g, indent=None) == ('(a / aaa :ARG1 (b / bbb :ARG1 (c / ccc)))') assert encode(g, indent=0) == ('(a / aaa\n' ':ARG1 (b / bbb\n' ':ARG1 (c / ccc)))') assert encode(g, indent=2) == ('(a / aaa\n' ' :ARG1 (b / bbb\n' ' :ARG1 (c / ccc)))') assert encode(g, top='b') == ('(b / bbb\n' ' :ARG1 (c / ccc)\n' ' :ARG1-of (a / aaa))') assert encode(g, top='c') == ('(c / ccc\n' ' :ARG1-of (b / bbb\n' ' :ARG1-of (a / aaa)))')
def test_encode_atoms(self): # string value g = penman.Graph([('a', 'ARG', '"string"')]) assert encode(g) == '(a :ARG "string")' # symbol value g = penman.Graph([('a', 'ARG', 'symbol')]) assert encode(g) == '(a :ARG symbol)' # float value g = penman.Graph([('a', 'ARG', -0.01)]) assert encode(g) == '(a :ARG -0.01)' # int value g = penman.Graph([('a', 'ARG', 15)]) assert encode(g) == '(a :ARG 15)' # numeric concept g = penman.Graph([('one', 'instance', 1)]) assert encode(g) == '(one / 1)' # string concept g = penman.Graph([('one', 'instance', '"a string"')]) assert encode(g) == '(one / "a string")'