Beispiel #1
0
def _interpret_selection(select, source):
    schema = tsdb.read_schema(source)
    queryobj = tsql.inspect_query('select ' + select)
    projection = queryobj['projection']
    if projection == '*' or len(projection) != 1:
        raise CommandError("select query must return a single column")
    relation, _, column = projection[0].rpartition('.')
    if not relation:
        # query could be 'i-input from item' instead of 'item.i-input'
        if len(queryobj['relations']) == 1:
            relation = queryobj['relations'][0]
        elif len(queryobj['relations']) > 1:
            raise CommandError(
                "select query may specify no more than 1 relation")
        # otherwise guess
        else:
            relation = next(
                (table for table in schema
                 if any(f.name == column for f in schema[table])),
                None)

    if relation not in schema:
        raise CommandError('invalid or missing relation in query')
    elif not any(f.name == column for f in schema[relation]):
        raise CommandError(f'invalid column in query: {column}')

    try:
        condition = select[select.index(' where ') + 7:]
    except ValueError:
        condition = ''
    return column, relation, condition
def test_parse_select_where():
    assert tsql.inspect_query('select i-input where i-wf = 2') == {
        'type': 'select',
        'projection': ['i-input'],
        'relations': [],
        'condition': ('==', ('i-wf', 2))
    }

    assert tsql.inspect_query(
        'select i-input'
        ' where i-date < 2018-01-15')['condition'] == ('<',
                                                       ('i-date',
                                                        datetime(2018, 1, 15)))

    assert tsql.inspect_query(
        'select i-input'
        ' where i-date > 15-jan-2018(15:00:00)')['condition'] == ('>',
                                                                  ('i-date',
                                                                   datetime(
                                                                       2018, 1,
                                                                       15, 15,
                                                                       0, 0)))

    assert tsql.inspect_query(
        'select i-input'
        ' where i-input ~ "Abrams"')['condition'] == ('~', ('i-input',
                                                            'Abrams'))

    assert tsql.inspect_query(
        "select i-input"
        " where i-input !~ 'Browne'")['condition'] == ('!~', ('i-input',
                                                              'Browne'))

    assert tsql.inspect_query(
        'select i-input'
        ' where i-wf = 2 & i-input ~ \'[Dd]og\'')['condition'] == ('and', [
            ('==', ('i-wf', 2)), ('~', ('i-input', '[Dd]og'))
        ])

    assert tsql.inspect_query(
        'select i-input'
        ' where i-id = 10 | i-id = 20 & i-wf = 2')['condition'] == ('or', [
            ('==', ('i-id', 10)),
            ('and', [('==', ('i-id', 20)), ('==', ('i-wf', 2))])
        ])

    assert tsql.inspect_query(
        'select i-input'
        ' where (i-id = 10 | i-id = 20) & !i-wf = 2')['condition'] == ('and', [
            ('or', [('==', ('i-id', 10)), ('==', ('i-id', 20))]),
            ('not', ('==', ('i-wf', 2)))
        ])
Beispiel #3
0
def compare(testsuite, gold, select='i-id i-input mrs'):
    """
    Compare two [incr tsdb()] profiles.

    Args:
        testsuite (str, TestSuite): path to the test [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        gold (str, TestSuite): path to the gold [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        select: TSQL query to select (id, input, mrs) triples
            (default: `i-id i-input mrs`)
    Yields:
        dict: Comparison results as::

            {"id": "item identifier",
             "input": "input sentence",
             "test": number_of_unique_results_in_test,
             "shared": number_of_shared_results,
             "gold": number_of_unique_results_in_gold}

    """
    from delphin.mrs import simplemrs, compare as mrs_compare

    if not isinstance(testsuite, itsdb.TestSuite):
        if isinstance(testsuite, itsdb.ItsdbProfile):
            testsuite = testsuite.root
        testsuite = itsdb.TestSuite(testsuite)
    if not isinstance(gold, itsdb.TestSuite):
        if isinstance(gold, itsdb.ItsdbProfile):
            gold = gold.root
        gold = itsdb.TestSuite(gold)

    queryobj = tsql.inspect_query('select ' + select)
    if len(queryobj['projection']) != 3:
        raise ValueError('select does not return 3 fields: ' + select)

    input_select = '{} {}'.format(queryobj['projection'][0],
                                  queryobj['projection'][1])
    i_inputs = dict(tsql.select(input_select, testsuite))

    matched_rows = itsdb.match_rows(tsql.select(select, testsuite),
                                    tsql.select(select, gold), 0)

    for (key, testrows, goldrows) in matched_rows:
        (test_unique, shared, gold_unique) = mrs_compare.compare_bags(
            [simplemrs.loads_one(row[2]) for row in testrows],
            [simplemrs.loads_one(row[2]) for row in goldrows])
        yield {
            'id': key,
            'input': i_inputs[key],
            'test': test_unique,
            'shared': shared,
            'gold': gold_unique
        }
def test_parse_select_complex_identifiers():
    assert tsql.inspect_query('select item.i-input') == {
        'type': 'select',
        'projection': ['item.i-input'],
        'relations': [],
        'condition': None
    }

    assert tsql.inspect_query('select item.i-id result.mrs') == {
        'type': 'select',
        'projection': ['item.i-id', 'result.mrs'],
        'relations': [],
        'condition': None
    }

    assert tsql.inspect_query('select item.i-id i-input mrs') == {
        'type': 'select',
        'projection': ['item.i-id', 'i-input', 'mrs'],
        'relations': [],
        'condition': None
    }
Beispiel #5
0
def _interpret_selection(select, source):
    queryobj = tsql.inspect_query('select ' + select)
    projection = queryobj['projection']
    if projection == '*' or len(projection) != 1:
        raise ValueError("'select' must return a single column")
    tablename, _, column = projection[0].rpartition(':')
    if not tablename:
        # query could be 'i-input from item' instead of 'item:i-input'
        if len(queryobj['tables']) == 1:
            tablename = queryobj['tables'][0]
        # otherwise guess
        else:
            tablename = source.relations.find(column)[0]
    try:
        condition = select[select.index(' where ') + 1:]
    except ValueError:
        condition = ''
    return column, tablename, condition
Beispiel #6
0
def convert(path, source_fmt, target_fmt, select='result.mrs',
            properties=True, lnk=True, color=False, indent=None,
            show_status=False, predicate_modifiers=False,
            semi=None):
    """
    Convert between various DELPH-IN Semantics representations.

    If *source_fmt* ends with ``"-lines"``, then *path* must be an
    input file containing one representation per line to be read with
    the :func:`decode` function of the source codec. If *target_fmt*
    ends with ``"-lines"``, then any :attr:`HEADER`, :attr:`JOINER`,
    or :attr:`FOOTER` defined by the target codec are ignored. The
    *source_fmt* and *target_fmt* arguments are then downcased and
    hyphens are removed to normalize the codec name.

    Note:

        For syntax highlighting, `delphin.highlight`_ must be
        installed, and it is only available for select target formats.

        .. _delphin.highlight: https://github.com/delph-in/delphin.highlight

    Args:
        path (str, file): filename, testsuite directory, open file, or
            stream of input representations
        source_fmt (str): convert from this format
        target_fmt (str): convert to this format
        select (str): TSQL query for selecting data (ignored if *path*
            is not a testsuite directory; default: `"result:mrs"`)
        properties (bool): include morphosemantic properties if `True`
            (default: `True`)
        lnk (bool): include lnk surface alignments and surface strings
            if `True` (default: `True`)
        color (bool): apply syntax highlighting if `True` and
            *target_fmt* is `"simplemrs"` (default: `False`)
        indent (int, optional): specifies an explicit number of spaces
            for indentation
        show_status (bool): show disconnected EDS nodes (ignored if
            *target_fmt* is not `"eds"`; default: `False`)
        predicate_modifiers (bool): apply EDS predicate modification
            for certain kinds of patterns (ignored if *target_fmt* is
            not an EDS format; default: `False`)
        semi: a :class:`delphin.semi.SemI` object or path to a SEM-I
            (ignored if *target_fmt* is not `indexedmrs`)
    Returns:
        str: the converted representation
    """
    if path is None:
        path = sys.stdin

    # normalize codec names
    source_fmt, source_lines = _parse_format_name(source_fmt)
    target_fmt, target_lines = _parse_format_name(target_fmt)
    # process other arguments
    highlight = _get_highlighter(color, target_fmt)
    source_codec = _get_codec(source_fmt)
    target_codec = _get_codec(target_fmt)
    converter = _get_converter(source_codec, target_codec, predicate_modifiers)

    if len(tsql.inspect_query('select ' + select)['projection']) != 1:
        raise CommandError(
            'Exactly 1 column must be given in selection query: '
            '(e.g., result.mrs)')

    if semi is not None and not isinstance(semi, SemI):
        # lets ignore the SEM-I warnings until questions regarding
        # valid SEM-Is are resolved
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            semi = load_semi(semi)

    # read
    kwargs = {}
    if source_fmt == 'indexedmrs' and semi is not None:
        kwargs['semi'] = semi
    if source_lines:
        xs = _read_lines(path, source_codec, kwargs)
    else:
        xs = _read(path, source_codec, select, kwargs)

    # convert if source representation != target representation
    xs = _iter_convert(converter, xs)

    # write
    kwargs = {}
    if indent:
        kwargs['indent'] = indent
    if target_fmt == 'eds':
        kwargs['show_status'] = show_status
    if target_fmt == 'indexedmrs' and semi is not None:
        kwargs['semi'] = semi
    kwargs['properties'] = properties
    kwargs['lnk'] = lnk
    # Manually dealing with headers, joiners, and footers is to
    # accommodate streaming output. Otherwise it is the same as
    # calling the following:
    #     target_codec.dumps(xs, **kwargs)
    if target_lines:
        header = footer = ''
        joiner = '\n'
    else:
        header = getattr(target_codec, 'HEADER', '')
        joiner = getattr(target_codec, 'JOINER', ' ')
        footer = getattr(target_codec, 'FOOTER', '')
        if indent is not None:
            if header:
                header += '\n'
            joiner = joiner.strip() + '\n'
            if footer:
                footer = '\n' + footer

    parts = []
    for x in xs:
        try:
            s = target_codec.encode(x, **kwargs)
        except (PyDelphinException, KeyError, IndexError):
            logger.exception('could not convert representation')
        else:
            parts.append(s)

    output = highlight(header + joiner.join(parts) + footer)

    return output
Beispiel #7
0
def convert(path,
            source_fmt,
            target_fmt,
            select='result:mrs',
            properties=True,
            show_status=False,
            predicate_modifiers=False,
            color=False,
            pretty_print=False,
            indent=None):
    """
    Convert between various DELPH-IN Semantics representations.

    Args:
        path (str, file): filename, testsuite directory, open file, or
            stream of input representations
        source_fmt (str): convert from this format
        target_fmt (str): convert to this format
        select (str): TSQL query for selecting data (ignored if *path*
            is not a testsuite directory; default: `"result:mrs"`)
        properties (bool): include morphosemantic properties if `True`
            (default: `True`)
        show_status (bool): show disconnected EDS nodes (ignored if
            *target_fmt* is not `"eds"`; default: `False`)
        predicate_modifiers (bool): apply EDS predicate modification
            for certain kinds of patterns (ignored if *target_fmt* is
            not an EDS format; default: `False`)
        color (bool): apply syntax highlighting if `True` and
            *target_fmt* is `"simplemrs"` (default: `False`)
        pretty_print (bool): if `True`, format the output with
            newlines and default indentation (default: `False`)
        indent (int, optional): specifies an explicit number of spaces
            for indentation (implies *pretty_print*)
    Returns:
        str: the converted representation
    """
    if source_fmt.startswith('eds') and not target_fmt.startswith('eds'):
        raise ValueError(
            'Conversion from EDS to non-EDS currently not supported.')

    if indent:
        pretty_print = True
        indent = 4 if indent is True else safe_int(indent)

    if len(tsql.inspect_query('select ' + select)['projection']) != 1:
        raise ValueError('Exactly 1 column must be given in selection query: '
                         '(e.g., result:mrs)')

    # read
    loads = _get_codec(source_fmt)
    if path is None:
        xs = loads(sys.stdin.read())
    elif hasattr(path, 'read'):
        xs = loads(path.read())
    elif os.path.isdir(path):
        ts = itsdb.TestSuite(path)
        xs = [next(iter(loads(r[0])), None) for r in tsql.select(select, ts)]
    else:
        xs = loads(open(path, 'r').read())

    # write
    dumps = _get_codec(target_fmt, load=False)
    kwargs = {}
    if color: kwargs['color'] = color
    if pretty_print: kwargs['pretty_print'] = pretty_print
    if indent: kwargs['indent'] = indent
    if target_fmt == 'eds':
        kwargs['pretty_print'] = pretty_print
        kwargs['show_status'] = show_status
    if target_fmt.startswith('eds'):
        kwargs['predicate_modifiers'] = predicate_modifiers
    kwargs['properties'] = properties

    # this is not a great way to improve robustness when converting
    # many representations, but it'll do until v1.0.0. Also, it only
    # improves robustness on the output, not the input.
    # Note that all the code below is to replace the following:
    #     return dumps(xs, **kwargs)
    head, joiner, tail = _get_output_details(target_fmt)
    parts = []
    if pretty_print:
        joiner = joiner.strip() + '\n'

    def _trim(s):
        if head and s.startswith(head):
            s = s[len(head):].lstrip('\n')
        if tail and s.endswith(tail):
            s = s[:-len(tail)].rstrip('\n')
        return s

    for x in xs:
        try:
            s = dumps([x], **kwargs)
        except (PyDelphinException, KeyError, IndexError):
            logging.exception('could not convert representation')
        else:
            s = _trim(s)
            parts.append(s)
    # set these after so head and tail are used correctly in _trim
    if pretty_print:
        if head:
            head += '\n'
        if tail:
            tail = '\n' + tail
    return head + joiner.join(parts) + tail
def test_inspect_query():
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('info relations')
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('set max-results 5')
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('insert into item i-id values 10')
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('select *')
    assert tsql.inspect_query('select i-input') == {
        'type': 'select',
        'projection': ['i-input'],
        'relations': [],
        'condition': None
    }

    assert tsql.inspect_query('select i-input i-wf') == {
        'type': 'select',
        'projection': ['i-input', 'i-wf'],
        'relations': [],
        'condition': None
    }

    assert tsql.inspect_query('select i-input i-wf from item') == {
        'type': 'select',
        'projection': ['i-input', 'i-wf'],
        'relations': ['item'],
        'condition': None
    }

    assert tsql.inspect_query('select i-input mrs from item result') == {
        'type': 'select',
        'projection': ['i-input', 'mrs'],
        'relations': ['item', 'result'],
        'condition': None
    }
def test_parse_select_where_types_issue_261():
    # https://github.com/delph-in/pydelphin/issues/261
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('select i-id where i-wf ~ 1')
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.inspect_query('select i-id where i-input < "string"')