def _make_labels(nodes, links, vgen): nids = [node.nodeid for node in nodes] edges = [] for l in links: if safe_int(l.start) == LTOP_NODEID: nids = [l.start] + nids vgen.vid = 0 # start at h0 for TOP if l.post == EQ_POST: edges.append((l.start, l.end)) labels = {} # components return in order of nids for component in _connected_components(nids, edges): lbl = vgen.new(HANDLESORT)[0] for nid in component: labels[nid] = lbl return labels
def to_triples(self, short_pred=True, properties=True): """ Encode the Dmrs as triples suitable for PENMAN serialization. """ ts = [] qs = set(self.nodeids(quantifier=True)) for n in nodes(self): pred = n.pred.short_form() if short_pred else n.pred.string ts.append((n.nodeid, 'predicate', pred)) if n.lnk is not None: ts.append((n.nodeid, 'lnk', '"{}"'.format(str(n.lnk)))) if n.carg is not None: ts.append((n.nodeid, 'carg', '"{}"'.format(n.carg))) if properties and n.nodeid not in qs: for key, value in n.sortinfo.items(): ts.append((n.nodeid, key.lower(), value)) for l in links(self): if safe_int(l.start) == LTOP_NODEID: ts.append((l.start, 'top', l.end)) else: relation = '{}-{}'.format(l.rargname.upper(), l.post) ts.append((l.start, relation, l.end)) return ts
def __init__(self, nodes=None, links=None, top=None, index=None, xarg=None, lnk=None, surface=None, identifier=None): if nodes is None: nodes = [] if links is None: links = [] qeq = HandleConstraint.qeq vgen = _VarGenerator() # check this here to streamline things later if top is not None: links = [Link(LTOP_NODEID, top, None, H_POST)] + list(links) top = None labels = _make_labels(nodes, links, vgen) qs = set(l.start for l in links if (l.rargname or '').upper() == RSTR_ROLE) ivs = _make_ivs(nodes, vgen, qs) # initialize args with ARG0 for intrinsic variables args = {nid: {IVARG_ROLE: iv} for nid, iv in ivs.items()} hcons = [] for l in links: if l.start not in args: args[l.start] = {} if safe_int(l.start) != LTOP_NODEID: if not l.rargname or l.rargname.upper() == BARE_EQ_ROLE: continue # don't make an argument for bare EQ links if l.post == H_POST: hole = vgen.new(HANDLESORT)[0] hcons += [qeq(hole, labels[l.end])] args[l.start][l.rargname] = hole # if the arg is RSTR, it's a quantifier, so we can # find its intrinsic variable now if l.rargname.upper() == RSTR_ROLE: ivs[l.start] = ivs[l.end] args[l.start][IVARG_ROLE] = ivs[l.start] elif l.post == HEQ_POST: args[l.start][l.rargname] = labels[l.end] else: # NEQ_POST or EQ_POST args[l.start][l.rargname] = ivs[l.end] # ignore top link if top is already set elif top is None: # The most explicit value of post for a link that denotes a # TOP that is qeq to a label is H_POST, but I equally accept # NIL_POST for backward compatibility. HEQ_POST denotes a TOP # that selects a label directly (and this label equality would # have been captured earlier) top = labels[l.start] if l.post == H_POST or l.post == NIL_POST: hcons += [qeq(top, labels[l.end])] eps = [] for node in nodes: nid = node.nodeid if node.carg is not None: args[nid][CONSTARG_ROLE] = node.carg ep = (nid, node.pred, labels[nid], args[nid], node.lnk, node.surface, node.base) eps.append(ep) icons = None # future feature super(Dmrs, self).__init__(top=top, index=ivs.get(index), xarg=ivs.get(xarg), eps=eps, hcons=hcons, icons=icons, vars=vgen.store, lnk=lnk, surface=surface, identifier=identifier)
def test_safe_int(): assert safe_int('1') == 1 assert safe_int('1.0') == '1.0' assert safe_int('-12345') == -12345 assert safe_int('1a') == '1a'
def convert(path, source_fmt, target_fmt, select='result:mrs', properties=True, show_status=False, predicate_modifiers=False, color=False, pretty_print=False, indent=None): """ Convert between various DELPH-IN Semantics representations. Args: path (str, file): filename, testsuite directory, open file, or stream of input representations source_fmt (str): convert from this format target_fmt (str): convert to this format select (str): TSQL query for selecting data (ignored if *path* is not a testsuite directory; default: `"result:mrs"`) properties (bool): include morphosemantic properties if `True` (default: `True`) show_status (bool): show disconnected EDS nodes (ignored if *target_fmt* is not `"eds"`; default: `False`) predicate_modifiers (bool): apply EDS predicate modification for certain kinds of patterns (ignored if *target_fmt* is not an EDS format; default: `False`) color (bool): apply syntax highlighting if `True` and *target_fmt* is `"simplemrs"` (default: `False`) pretty_print (bool): if `True`, format the output with newlines and default indentation (default: `False`) indent (int, optional): specifies an explicit number of spaces for indentation (implies *pretty_print*) Returns: str: the converted representation """ if source_fmt.startswith('eds') and not target_fmt.startswith('eds'): raise ValueError( 'Conversion from EDS to non-EDS currently not supported.') if indent: pretty_print = True indent = 4 if indent is True else safe_int(indent) if len(tsql.inspect_query('select ' + select)['projection']) != 1: raise ValueError('Exactly 1 column must be given in selection query: ' '(e.g., result:mrs)') # read loads = _get_codec(source_fmt) if path is None: xs = loads(sys.stdin.read()) elif hasattr(path, 'read'): xs = loads(path.read()) elif os.path.isdir(path): ts = itsdb.TestSuite(path) xs = [next(iter(loads(r[0])), None) for r in tsql.select(select, ts)] else: xs = loads(open(path, 'r').read()) # write dumps = _get_codec(target_fmt, load=False) kwargs = {} if color: kwargs['color'] = color if pretty_print: kwargs['pretty_print'] = pretty_print if indent: kwargs['indent'] = indent if target_fmt == 'eds': kwargs['pretty_print'] = pretty_print kwargs['show_status'] = show_status if target_fmt.startswith('eds'): kwargs['predicate_modifiers'] = predicate_modifiers kwargs['properties'] = properties # this is not a great way to improve robustness when converting # many representations, but it'll do until v1.0.0. Also, it only # improves robustness on the output, not the input. # Note that all the code below is to replace the following: # return dumps(xs, **kwargs) head, joiner, tail = _get_output_details(target_fmt) parts = [] if pretty_print: joiner = joiner.strip() + '\n' def _trim(s): if head and s.startswith(head): s = s[len(head):].lstrip('\n') if tail and s.endswith(tail): s = s[:-len(tail)].rstrip('\n') return s for x in xs: try: s = dumps([x], **kwargs) except (PyDelphinException, KeyError, IndexError): logging.exception('could not convert representation') else: s = _trim(s) parts.append(s) # set these after so head and tail are used correctly in _trim if pretty_print: if head: head += '\n' if tail: tail = '\n' + tail return head + joiner.join(parts) + tail