Ejemplo n.º 1
0
def _make_labels(nodes, links, vgen):
    nids = [node.nodeid for node in nodes]
    edges = []
    for l in links:
        if safe_int(l.start) == LTOP_NODEID:
            nids = [l.start] + nids
            vgen.vid = 0  # start at h0 for TOP
        if l.post == EQ_POST:
            edges.append((l.start, l.end))
    labels = {}
    # components return in order of nids
    for component in _connected_components(nids, edges):
        lbl = vgen.new(HANDLESORT)[0]
        for nid in component:
            labels[nid] = lbl
    return labels
Ejemplo n.º 2
0
    def to_triples(self, short_pred=True, properties=True):
        """
        Encode the Dmrs as triples suitable for PENMAN serialization.
        """
        ts = []
        qs = set(self.nodeids(quantifier=True))
        for n in nodes(self):
            pred = n.pred.short_form() if short_pred else n.pred.string
            ts.append((n.nodeid, 'predicate', pred))
            if n.lnk is not None:
                ts.append((n.nodeid, 'lnk', '"{}"'.format(str(n.lnk))))
            if n.carg is not None:
                ts.append((n.nodeid, 'carg', '"{}"'.format(n.carg)))
            if properties and n.nodeid not in qs:
                for key, value in n.sortinfo.items():
                    ts.append((n.nodeid, key.lower(), value))

        for l in links(self):
            if safe_int(l.start) == LTOP_NODEID:
                ts.append((l.start, 'top', l.end))
            else:
                relation = '{}-{}'.format(l.rargname.upper(), l.post)
                ts.append((l.start, relation, l.end))
        return ts
Ejemplo n.º 3
0
    def __init__(self,
                 nodes=None,
                 links=None,
                 top=None,
                 index=None,
                 xarg=None,
                 lnk=None,
                 surface=None,
                 identifier=None):
        if nodes is None: nodes = []
        if links is None: links = []
        qeq = HandleConstraint.qeq
        vgen = _VarGenerator()

        # check this here to streamline things later
        if top is not None:
            links = [Link(LTOP_NODEID, top, None, H_POST)] + list(links)
            top = None

        labels = _make_labels(nodes, links, vgen)
        qs = set(l.start for l in links
                 if (l.rargname or '').upper() == RSTR_ROLE)
        ivs = _make_ivs(nodes, vgen, qs)

        # initialize args with ARG0 for intrinsic variables
        args = {nid: {IVARG_ROLE: iv} for nid, iv in ivs.items()}
        hcons = []
        for l in links:
            if l.start not in args:
                args[l.start] = {}
            if safe_int(l.start) != LTOP_NODEID:
                if not l.rargname or l.rargname.upper() == BARE_EQ_ROLE:
                    continue  # don't make an argument for bare EQ links
                if l.post == H_POST:
                    hole = vgen.new(HANDLESORT)[0]
                    hcons += [qeq(hole, labels[l.end])]
                    args[l.start][l.rargname] = hole
                    # if the arg is RSTR, it's a quantifier, so we can
                    # find its intrinsic variable now
                    if l.rargname.upper() == RSTR_ROLE:
                        ivs[l.start] = ivs[l.end]
                        args[l.start][IVARG_ROLE] = ivs[l.start]
                elif l.post == HEQ_POST:
                    args[l.start][l.rargname] = labels[l.end]
                else:  # NEQ_POST or EQ_POST
                    args[l.start][l.rargname] = ivs[l.end]
            # ignore top link if top is already set
            elif top is None:
                # The most explicit value of post for a link that denotes a
                # TOP that is qeq to a label is H_POST, but I equally accept
                # NIL_POST for backward compatibility. HEQ_POST denotes a TOP
                # that selects a label directly (and this label equality would
                # have been captured earlier)
                top = labels[l.start]
                if l.post == H_POST or l.post == NIL_POST:
                    hcons += [qeq(top, labels[l.end])]
        eps = []
        for node in nodes:
            nid = node.nodeid
            if node.carg is not None:
                args[nid][CONSTARG_ROLE] = node.carg
            ep = (nid, node.pred, labels[nid], args[nid], node.lnk,
                  node.surface, node.base)
            eps.append(ep)

        icons = None  # future feature

        super(Dmrs, self).__init__(top=top,
                                   index=ivs.get(index),
                                   xarg=ivs.get(xarg),
                                   eps=eps,
                                   hcons=hcons,
                                   icons=icons,
                                   vars=vgen.store,
                                   lnk=lnk,
                                   surface=surface,
                                   identifier=identifier)
Ejemplo n.º 4
0
def test_safe_int():
    assert safe_int('1') == 1
    assert safe_int('1.0') == '1.0'
    assert safe_int('-12345') == -12345
    assert safe_int('1a') == '1a'
Ejemplo n.º 5
0
def test_safe_int():
    assert safe_int('1') == 1
    assert safe_int('1.0') == '1.0'
    assert safe_int('-12345') == -12345
    assert safe_int('1a') == '1a'
Ejemplo n.º 6
0
def convert(path,
            source_fmt,
            target_fmt,
            select='result:mrs',
            properties=True,
            show_status=False,
            predicate_modifiers=False,
            color=False,
            pretty_print=False,
            indent=None):
    """
    Convert between various DELPH-IN Semantics representations.

    Args:
        path (str, file): filename, testsuite directory, open file, or
            stream of input representations
        source_fmt (str): convert from this format
        target_fmt (str): convert to this format
        select (str): TSQL query for selecting data (ignored if *path*
            is not a testsuite directory; default: `"result:mrs"`)
        properties (bool): include morphosemantic properties if `True`
            (default: `True`)
        show_status (bool): show disconnected EDS nodes (ignored if
            *target_fmt* is not `"eds"`; default: `False`)
        predicate_modifiers (bool): apply EDS predicate modification
            for certain kinds of patterns (ignored if *target_fmt* is
            not an EDS format; default: `False`)
        color (bool): apply syntax highlighting if `True` and
            *target_fmt* is `"simplemrs"` (default: `False`)
        pretty_print (bool): if `True`, format the output with
            newlines and default indentation (default: `False`)
        indent (int, optional): specifies an explicit number of spaces
            for indentation (implies *pretty_print*)
    Returns:
        str: the converted representation
    """
    if source_fmt.startswith('eds') and not target_fmt.startswith('eds'):
        raise ValueError(
            'Conversion from EDS to non-EDS currently not supported.')

    if indent:
        pretty_print = True
        indent = 4 if indent is True else safe_int(indent)

    if len(tsql.inspect_query('select ' + select)['projection']) != 1:
        raise ValueError('Exactly 1 column must be given in selection query: '
                         '(e.g., result:mrs)')

    # read
    loads = _get_codec(source_fmt)
    if path is None:
        xs = loads(sys.stdin.read())
    elif hasattr(path, 'read'):
        xs = loads(path.read())
    elif os.path.isdir(path):
        ts = itsdb.TestSuite(path)
        xs = [next(iter(loads(r[0])), None) for r in tsql.select(select, ts)]
    else:
        xs = loads(open(path, 'r').read())

    # write
    dumps = _get_codec(target_fmt, load=False)
    kwargs = {}
    if color: kwargs['color'] = color
    if pretty_print: kwargs['pretty_print'] = pretty_print
    if indent: kwargs['indent'] = indent
    if target_fmt == 'eds':
        kwargs['pretty_print'] = pretty_print
        kwargs['show_status'] = show_status
    if target_fmt.startswith('eds'):
        kwargs['predicate_modifiers'] = predicate_modifiers
    kwargs['properties'] = properties

    # this is not a great way to improve robustness when converting
    # many representations, but it'll do until v1.0.0. Also, it only
    # improves robustness on the output, not the input.
    # Note that all the code below is to replace the following:
    #     return dumps(xs, **kwargs)
    head, joiner, tail = _get_output_details(target_fmt)
    parts = []
    if pretty_print:
        joiner = joiner.strip() + '\n'

    def _trim(s):
        if head and s.startswith(head):
            s = s[len(head):].lstrip('\n')
        if tail and s.endswith(tail):
            s = s[:-len(tail)].rstrip('\n')
        return s

    for x in xs:
        try:
            s = dumps([x], **kwargs)
        except (PyDelphinException, KeyError, IndexError):
            logging.exception('could not convert representation')
        else:
            s = _trim(s)
            parts.append(s)
    # set these after so head and tail are used correctly in _trim
    if pretty_print:
        if head:
            head += '\n'
        if tail:
            tail = '\n' + tail
    return head + joiner.join(parts) + tail