Exemple #1
0
 def test_write(self, single_item_profile, tmpdir):
     t = itsdb.TestSuite(single_item_profile)
     assert t['item'][0]['i-input'] == 'The dog barks.'
     t['item'][0]['i-input'] = 'The dog sleeps.'
     assert t['item'][0]['i-input'] == 'The dog sleeps.'
     t.write()
     t.reload()
     assert t['item'][0]['i-input'] == 'The dog sleeps.'
     t['item'][0]['i-input'] = 'The cat sleeps.'
     t.write('parse')
     t.reload()
     assert t['item'][0]['i-input'] == 'The dog sleeps.'
     t['item'][0]['i-input'] = 'The cat sleeps.'
     t.write(['item', 'parse'])
     assert t['item'][0]['i-input'] == 'The cat sleeps.'
     record = itsdb.Record.from_dict(t.relations['item'], {
         'i-id': 0,
         'i-input': 'The cat meows.'
     })
     t.write({'item': [record]})
     t.reload()
     assert t['item'][0]['i-input'] == 'The cat meows.'
     d = tmpdir.mkdir('alt')
     altrels = itsdb.Relations.from_string(_alt_relations)
     t.write(path=str(d), relations=altrels)
     assert d.join('relations').read_text('utf-8') == _alt_relations
     assert sorted(x.basename for x in d.listdir()) == [
         'item', 'parse', 'relations', 'result'
     ]
     ts = itsdb.TestSuite(str(d))
     assert 'i-date' in ts['item'].fields
Exemple #2
0
    def test_init(self, single_item_profile):
        rels = itsdb.Relations.from_string(_simple_relations)
        t = itsdb.TestSuite(relations=rels)
        assert len(t['item']) == 0
        assert len(t['parse']) == 0
        assert len(t['result']) == 0

        t = itsdb.TestSuite(single_item_profile)
        assert len(t['item']) == 1
        assert len(t['parse']) == 1
        assert len(t['result']) == 1
Exemple #3
0
def compare(testsuite, gold, select='i-id i-input mrs'):
    """
    Compare two [incr tsdb()] profiles.

    Args:
        testsuite (str, TestSuite): path to the test [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        gold (str, TestSuite): path to the gold [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        select: TSQL query to select (id, input, mrs) triples
            (default: `i-id i-input mrs`)
    Yields:
        dict: Comparison results as::

            {"id": "item identifier",
             "input": "input sentence",
             "test": number_of_unique_results_in_test,
             "shared": number_of_shared_results,
             "gold": number_of_unique_results_in_gold}

    """
    from delphin.mrs import simplemrs, compare as mrs_compare

    if not isinstance(testsuite, itsdb.TestSuite):
        if isinstance(testsuite, itsdb.ItsdbProfile):
            testsuite = testsuite.root
        testsuite = itsdb.TestSuite(testsuite)
    if not isinstance(gold, itsdb.TestSuite):
        if isinstance(gold, itsdb.ItsdbProfile):
            gold = gold.root
        gold = itsdb.TestSuite(gold)

    queryobj = tsql.inspect_query('select ' + select)
    if len(queryobj['projection']) != 3:
        raise ValueError('select does not return 3 fields: ' + select)

    input_select = '{} {}'.format(queryobj['projection'][0],
                                  queryobj['projection'][1])
    i_inputs = dict(tsql.select(input_select, testsuite))

    matched_rows = itsdb.match_rows(tsql.select(select, testsuite),
                                    tsql.select(select, gold), 0)

    for (key, testrows, goldrows) in matched_rows:
        (test_unique, shared, gold_unique) = mrs_compare.compare_bags(
            [simplemrs.loads_one(row[2]) for row in testrows],
            [simplemrs.loads_one(row[2]) for row in goldrows])
        yield {
            'id': key,
            'input': i_inputs[key],
            'test': test_unique,
            'shared': shared,
            'gold': gold_unique
        }
Exemple #4
0
    def test_init(self, single_item_profile):
        with pytest.raises(itsdb.ITSDBError):
            itsdb.TestSuite()

        rel = pathlib.Path(single_item_profile, 'relations')
        t = itsdb.TestSuite(schema=rel)
        assert len(t['item']) == 0
        assert len(t['parse']) == 0
        assert len(t['result']) == 0

        t = itsdb.TestSuite(single_item_profile)
        assert len(t['item']) == 1
        assert len(t['parse']) == 1
        assert len(t['result']) == 1
def test_select(mini_testsuite):
    ts = itsdb.TestSuite(mini_testsuite)
    assert list(tsql.select('i-input',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input from item', ts)) == [('It rained.', ),
                                                          ('Rained.', ),
                                                          ('It snowed.', )]
    assert list(tsql.select('i-input from item item',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input from result', ts)) == [('It rained.', ),
                                                            ('It snowed.', )]
    assert list(tsql.select('i-input from item result',
                            ts)) == [('It rained.', ), ('It snowed.', )]
    assert list(tsql.select('i-id i-input', ts)) == [('10', 'It rained.'),
                                                     ('20', 'Rained.'),
                                                     ('30', 'It snowed.')]
    assert list(tsql.select('i-id i-input', ts,
                            record_class=itsdb.Row)) == [(10, 'It rained.'),
                                                         (20, 'Rained.'),
                                                         (30, 'It snowed.')]
    res = ts['result']
    assert list(tsql.select('i-id mrs', ts)) == [('10', res[0]['mrs']),
                                                 ('30', res[1]['mrs'])]
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.select('*', ts)
def __cli_parse__(args):
    """"""
    # validate IRI prefix
    # handle exceptions
    # handle invalid profile
    # handle output exceptions

    ts = itsdb.TestSuite(args.profile)
    prefix = args.prefix.strip("/")
    graph = Graph()

    for row in tsql.select('i-id i-input mrs', ts):
        id = row[0]
        text = row[1]
        if args.verbosity > 0:
            print("Parsing sentence {}".format(id))
        # parse mrs from profile
        m = simplemrs.decode(row[2])
        # transform to eds:
        d = dmrs.from_mrs(m)
        graph = p.dmrs_to_rdf(d=d,
                              prefix=prefix,
                              identifier=id,
                              graph=graph,
                              text=text)
    # serializes output
    graph.serialize(destination=args.output, format=args.format)
Exemple #7
0
def _iter_representations(path: Path, fmt: str, p: int):
    if tsdb.is_database_directory(path):
        ts = itsdb.TestSuite(path)
        for response in ts.processed_items():
            try:
                result = response.result(p)
            except IndexError:
                yield None
            else:
                yield from_mrs(result.mrs(), predicate_modifiers=True)

    elif path.is_file():
        codec = util.import_codec(fmt)
        rep = codec.CODEC_INFO.get('representation', '').lower()
        if rep == 'mrs':
            for mrs in codec.load(path):
                yield from_mrs(mrs, predicate_modifiers=True)
        elif rep in ('dmrs', 'eds'):
            for sr in codec.load(path):
                yield sr
        else:
            raise ValueError(f'unsupported representation: {rep}')

    else:
        raise ValueError(f'not a file or TSDB database: {path}')
Exemple #8
0
 def test_reload(self, single_item_profile):
     t = itsdb.TestSuite(single_item_profile)
     assert t['item'][0]['i-input'] == 'The dog barks.'
     t['item'][0] = (0, 'The dog sleeps.')
     assert t['item'][0]['i-input'] == 'The dog sleeps.'
     t.reload()
     assert t['item'][0]['i-input'] == 'The dog barks.'
Exemple #9
0
def test_Row(empty_alt_testsuite):
    ts = itsdb.TestSuite(str(empty_alt_testsuite))
    item = ts['item']
    r = itsdb.Row(item.fields, [0, 'sentence', datetime(2009, 9, 7)])
    assert r.fields == item.fields
    assert r.keys() == ['i-id', 'i-input', 'i-date']
    assert len(r) == 3
    assert r['i-id'] == r[0] == 0
    assert r['i-input'] == r[1] == 'sentence'
    assert r['i-date'] == r[2] == datetime(2009, 9, 7)
    assert str(r) == '0@sentence@7-sep-2009'
    assert r == (0, 'sentence', datetime(2009, 9, 7))
    assert r.data == ('0', 'sentence', '7-sep-2009')
    assert r == itsdb.Row(item.fields, [0, 'sentence', datetime(2009, 9, 7)])
    assert r != itsdb.Row(item.fields, [1, 'sentence', datetime(2009, 9, 7)])
    assert r != itsdb.Row(item.fields, [0, 'string', datetime(2009, 9, 7)])
    assert r != itsdb.Row(item.fields, [0, 'sentence', datetime(2009, 7, 9)])
    # incorrect number of fields
    with pytest.raises(itsdb.ITSDBError):
        itsdb.Row(item.fields, [0])
    # None values get set to default, and
    # non-string values are left as-is
    r = itsdb.Row(item.fields, [0, None, None])
    assert r['i-id'] == 0
    assert r['i-input'] is None
    assert r['i-date'] is None
Exemple #10
0
def main(args):
    total_sums = _make_counters()
    total_record_count = 0

    for profile in args.PROFILE:
        ts = itsdb.TestSuite(profile)
        sums = _make_counters()
        record_count = 0

        for record in ts['result']:
            record_count += 1
            total_record_count += 1
            mrs = simplemrs.loads_one(record['mrs'])

            for var in mrs.variables():
                vartype = var_sort(var)

                for prop, val in mrs.properties(var).items():
                    sums[vartype][prop.upper()][val.lower()] += 1

        print('{} ({} MRSs):'.format(profile, record_count))
        report(sums)

        for vartype, props in sums.items():
            for prop, vals in props.items():
                for val, count in vals.items():
                    total_sums[vartype][prop][val] += count

    print('TOTAL ({} MRSs):'.format(total_record_count))
    report(total_sums)
def test_select_where_types_issue_261(mini_testsuite):
    # https://github.com/delph-in/pydelphin/issues/261
    ts = itsdb.TestSuite(mini_testsuite)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-id ~ "regex"', ts)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-input < 1', ts)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-input = 1', ts)
Exemple #12
0
 def test_processed_items(self, mini_testsuite):
     ts = itsdb.TestSuite(mini_testsuite)
     responses = list(ts.processed_items())
     assert len(responses) == 3
     assert responses[0]['i-input'] == 'It rained.'
     assert len(responses[0].results()) == 1
     assert responses[0].result(0)['mrs'] == (
         '[ TOP: h0 INDEX: e2 [ e TENSE: past ]'
         '  RELS: < [ _rain_v_1<3:9> LBL: h1 ARG0: e2 ] >'
         '  HCONS: < h0 qeq h1 > ]')
     assert len(responses[1].results()) == 0
     assert len(responses[2].results()) == 1
Exemple #13
0
def test_select_where(ts0):
    ts = itsdb.TestSuite(str(ts0))
    assert list(tsql.select('i-input where i-input ~ "It"',
                            ts)) == [['It rained.'], ['It snowed.']]
    assert list(tsql.select('i-input where i-input ~ "It" or i-id = 20',
                            ts)) == [['It rained.'], ['Rained.'],
                                     ['It snowed.']]
    assert list(tsql.select('i-input where i-date >= 2018-02-01',
                            ts)) == [['It rained.'], ['Rained.'],
                                     ['It snowed.']]
    assert list(tsql.select('i-input where readings > 0',
                            ts)) == [['It rained.'], ['It snowed.']]
Exemple #14
0
def select(dataspec, testsuite, mode='list', cast=True):
    """
    Select data from [incr tsdb()] profiles.

    Args:
        query (str): TSQL select query (e.g., `'i-id i-input mrs'` or
            `'* from item where readings > 0'`)
        testsuite (str, TestSuite): testsuite or path to testsuite
            containing data to select
        mode (str): see :func:`delphin.itsdb.select_rows` for a
            description of the *mode* parameter (default: `list`)
        cast (bool): if `True`, cast column values to their datatype
            according to the relations file (default: `True`)
    Returns:
        a generator that yields selected data
    """
    if isinstance(testsuite, itsdb.ItsdbProfile):
        testsuite = itsdb.TestSuite(testsuite.root)
    elif not isinstance(testsuite, itsdb.TestSuite):
        testsuite = itsdb.TestSuite(testsuite)
    return tsql.select(dataspec, testsuite, mode=mode, cast=cast)
def test_select_where(mini_testsuite):
    ts = itsdb.TestSuite(mini_testsuite)
    assert list(tsql.select('i-input where i-input ~ "It"',
                            ts)) == [('It rained.', ), ('It snowed.', )]
    assert list(tsql.select('i-input where i-input ~ "It" or i-id = 20',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input where i-date >= 2018-02-01',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input where readings > 0',
                            ts)) == [('It rained.', ), ('It snowed.', )]
Exemple #16
0
 def on_get_name(self, req, resp, name):
     try:
         entry = self.index[name]
     except KeyError:
         raise falcon.HTTPNotFound()
     ts = itsdb.TestSuite(entry['path'])
     quote = urllib.parse.quote
     base = req.uri
     resp.media = {
         tablename: '/'.join([base, quote(tablename)])
         for tablename in ts.schema
     }
     resp.status = falcon.HTTP_OK
Exemple #17
0
def test_bad_date_issue_279b(tmp_path, empty_alt_testsuite):
    tmp_ts = tmp_path.joinpath('test_bad_date_issue_279b')
    tmp_ts.mkdir()
    schema = tsdb.read_schema(empty_alt_testsuite)
    fields = schema['item']
    tsdb.write_schema(tmp_ts, schema)
    tsdb.write(tmp_ts, 'item', [(0, 'The cat meows.', 'September 8, 1999')],
               fields)
    ts = itsdb.TestSuite(tmp_ts)
    assert list(ts['item'].select('i-date',
                                  cast=False)) == [('September 8, 1999', )]
    with pytest.warns(tsdb.TSDBWarning):
        ts['item'][0]['i-date']
Exemple #18
0
 def test_process(self, parser_cpu, single_item_skeleton):
     ts = itsdb.TestSuite(single_item_skeleton)
     assert len(ts['parse']) == 0
     assert len(ts['result']) == 0
     ts.process(parser_cpu)
     assert len(ts['parse']) == 1
     assert len(ts['result']) == 2
     assert ts['parse'][0]['parse-id'] == 0
     assert ts['parse'][0]['run-id'] == 0
     assert ts['result'][0]['parse-id'] == 0
     assert ts['result'][0]['result-id'] == 0
     assert ts['result'][1]['parse-id'] == 0
     assert ts['result'][1]['result-id'] == 1
Exemple #19
0
 def test_commit(self, single_item_profile, empty_alt_testsuite):
     t = itsdb.TestSuite(single_item_profile)
     item = t['item']
     # uncommitted changes do not persist
     assert item[0]['i-input'] == 'The dog barks.'
     item[0] = (0, 'The dog sleeps.')
     assert item[0]['i-input'] == 'The dog sleeps.'
     assert t.in_transaction
     t.reload()
     assert item[0]['i-input'] == 'The dog barks.'
     assert not t.in_transaction
     # committing them makes them persist
     item[0] = (0, 'The dog sleeps.')
     assert t.in_transaction
     t.commit()
     assert not t.in_transaction
     t.reload()
     assert item[0]['i-input'] == 'The dog sleeps.'
Exemple #20
0
 def test_in_transaction(self, empty_testsuite):
     t = itsdb.TestSuite(empty_testsuite)
     item = t['item']
     assert not t.in_transaction
     item.append((10, 'Dogs bark.'))
     assert t.in_transaction
     t.commit()
     assert not t.in_transaction
     item.update(-1, {'i-input': 'Cats meow.'})
     assert t.in_transaction
     t.commit()
     assert not t.in_transaction
     item[-1:] = []
     assert t.in_transaction
     item.append((10, 'Dogs bark.'))
     t.commit()
     item.clear()
     assert t.in_transaction
def __cli_parse__(args):
    """"""
    # validate IRI prefix
    # handle exceptions
    # handle invalid profile
    # handle output exceptions

    ts = itsdb.TestSuite(args.profile)
    prefix = args.prefix.strip("/")
    graph = Graph()

    for row in tsql.select('i-id i-input mrs', ts):
        id = row[0]
        text = row[1]
        m = simplemrs.decode(row[2])
        # parse mrs from profile
        p.mrs_to_rdf(m, prefix, id, graph, text=text)
    # serializes output
    graph.serialize(destination=args.output, format=args.format)
Exemple #22
0
    def tsdb_min(path_to_profile):
        """
        The argument path_to_profile should be, for
        example, '/delphin/erg2018/tsdb/mrs'.

        Both skeletons and filled/parsed profiles can be inspected.
        This is why tsql.select is done in multiple queries.
        All profiles always have, minimally, the 'items' file.
        Anything else is should be checked.

        This function returns a dictionary based on i-ids of that profile:
        data[1]['i-wf'] = 1
        data[1]['i-input'] = "This is an example sentence."
        data[1]['i-comment'] = "The comment left inside the items-file."

        Optionally it can include:
        data[1]['i-readings'] = 23 # number of derivation trees
        """
        ts = itsdb.TestSuite(path.join(ROOT, path_to_profile))

        data = dd(lambda: dd())

        for row in tsql.select(
                'i-id i-wf i-input  i-comment i-length i-origin i-translation',
                ts):
            i_id = row[0]
            data[i_id]['i-wf'] = row[1]
            data[i_id]['i-input'] = row[2]
            data[i_id]['i-comment'] = row[3]
            data[i_id]['i-length'] = row[4]
            data[i_id]['i-origin'] = row[5]
            data[i_id]['i-translation'] = row[6]

        #######################################################################
        # If we don't check if the file 'parse' exists, then pydelphin creates
        # an empty 'parse' file. This is undesirable, especially for skeletons
        #######################################################################
        if path.isfile(path.join(ROOT, path_to_profile + 'parse')):
            for row in tsql.select('i-id readings', ts):
                data[row[0]]['readings'] = row[1]

        return data
Exemple #23
0
def test_select(ts0):
    ts = itsdb.TestSuite(str(ts0))
    assert list(tsql.select('i-input', ts)) == [['It rained.'], ['Rained.'],
                                                ['It snowed.']]
    assert list(tsql.select('i-input from item', ts)) == [['It rained.'],
                                                          ['Rained.'],
                                                          ['It snowed.']]
    assert list(tsql.select('i-input from item item', ts)) == [['It rained.'],
                                                               ['Rained.'],
                                                               ['It snowed.']]
    assert list(tsql.select('i-input from result', ts)) == [['It rained.'],
                                                            ['It snowed.']]
    assert list(tsql.select('i-input from item result',
                            ts)) == [['It rained.'], ['It snowed.']]
    assert list(tsql.select('i-id i-input', ts)) == [[10, 'It rained.'],
                                                     [20, 'Rained.'],
                                                     [30, 'It snowed.']]
    res = ts['result']
    assert list(tsql.select('i-id mrs', ts)) == [[10, res[0]['mrs']],
                                                 [30, res[1]['mrs']]]
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.select('*', ts)
    assert list(tsql.select('* from item', ts, cast=True)) == list(ts['item'])
Exemple #24
0
def test_join(single_item_profile):
    p = itsdb.TestSuite(single_item_profile)

    j = itsdb.join(p['parse'], p['result'])
    assert j.name == 'parse+result'
    assert len(j) == 1
    assert len(
        j.fields) == len(p['parse'].fields) + len(p['result'].fields) - 1
    r = j[0]
    assert r['parse:run-id'] == r['run-id']
    assert r['result:mrs'] == r['mrs']
    assert r['parse:parse-id'] == r['result:parse-id'] == r['parse-id']

    j2 = itsdb.join(p['item'], j)
    assert j2.name == 'item+parse+result'
    assert len(j2) == 1
    assert len(j2.fields) == len(j.fields) + len(p['item'].fields) - 1
    r = j2[0]
    assert r['item:i-input'] == r['i-input']
    assert r['item:i-id'] == r['parse:i-id']

    j3 = itsdb.join(j, p['item'])
    assert j3.name == 'parse+result+item'
Exemple #25
0
    def on_get_table(self, req, resp, name, table):
        try:
            entry = self.index[name]
        except KeyError:
            raise falcon.HTTPNotFound()
        ts = itsdb.TestSuite(entry['path'])
        table_ = ts[table]

        limit = req.get_param_as_int('limit', default=len(table_))
        page = req.get_param_as_int('page', default=1)
        rowslice = slice((page - 1) * limit, page * limit)

        rows = []
        transforms = [(table_.column_index(colname), transform)
                      for colname, transform in self.transforms.get(table, [])]
        for row in table_[rowslice]:
            row = list(row)
            for colidx, transform in transforms:
                row[colidx] = transform(row[colidx])
            rows.append(row)

        resp.media = rows
        resp.status = falcon.HTTP_OK
def test_mkprof_issue_273(mini_testsuite, tmp_path):
    # https://github.com/delph-in/pydelphin/issues/273
    from delphin import itsdb
    ts1_ = tmp_path.joinpath('ts1')
    ts1_.mkdir()
    ts1 = str(ts1_)
    ts0 = mini_testsuite
    # this is when the condition occurs on a single row
    mkprof(ts1, source=ts0, full=True, where='mrs ~ "_snow_v_1"')
    item = pathlib.Path(ts1, 'item')
    assert item.read_text() == ('30@It snowed.@1@2018-2-1 (15:00:00)\n')
    # this is when the condition occurs on multiple rows
    _ts0 = itsdb.TestSuite(ts0)
    _ts0['parse'].update(2, {'readings': 2})
    _ts0['result'].append(
        (30, 1, '[ TOP: h0 INDEX e2 [ e TENSE: past ]'
         '  RELS: < [ pronoun_q<0:2> LBL h3 ARG0: x4 RSTR: h5 BODY: h6 ]'
         '          [ pron<0:2> LBL: h7 ARG0: x4 ]'
         '          [ _snow_v_1<3:9> LBL: h1 ARG0: e2 ARG1: x4 ] >'
         '  HCONS: < h0 qeq h1 h5 qeq h7 > ]'))
    _ts0.commit()
    mkprof(ts1, source=ts0, full=True, where='mrs ~ "_snow_v_1"')
    item = pathlib.Path(ts1, 'item')
    assert item.read_text() == ('30@It snowed.@1@2018-2-1 (15:00:00)\n')
Exemple #27
0
from delphin import ace
from delphin import itsdb
from delphin import tsql
from delphin import dmrs, eds
from delphin.codecs import eds as edsnative
from delphin.codecs import simplemrs
from delphin.codecs import dmrx

# import parser as p
from delphin.rdf import parser as p
from rdflib import Graph
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("profile", help="profile path")
ts = itsdb.TestSuite(parser.parse_args().profile)
graph = Graph()
for row in tsql.select('i-id mrs', ts):
    m = simplemrs.decode(row[1])
    p.mrs_to_rdf(m, "http://example.com/example", row[0], graph)

graph.serialize(destination="test.ttl", format="turtle")
Exemple #28
0
from delphin import ace
from delphin import tsdb
from delphin import itsdb

ts = itsdb.TestSuite('sample-200-py')
with ace.ACEParser('terg-mac.dat',
                   cmdargs=['--disable-generalization'],
                   full_forest=True) as cpu:
    ts.process(cpu)
Exemple #29
0
def process(grammar, testsuite, source=None, select=None,
            generate=False, transfer=False, full_forest=False,
            options=None, all_items=False, result_id=None, gzip=False,
            stderr=None):
    """
    Process (e.g., parse) a [incr tsdb()] profile.

    Results are written to directly to *testsuite*.

    If *select* is `None`, the defaults depend on the task:

        ==========  =========================
        Task        Default value of *select*
        ==========  =========================
        Parsing     `item.i-input`
        Transfer    `result.mrs`
        Generation  `result.mrs`
        ==========  =========================

    Args:
        grammar (str): path to a compiled grammar image
        testsuite (str): path to a [incr tsdb()] testsuite where data
            will be read from (see *source*) and written to
        source (str): path to a [incr tsdb()] testsuite; if `None`,
            *testsuite* is used as the source of data
        select (str): TSQL query for selecting processor inputs
            (default depends on the processor type)
        generate (bool): if `True`, generate instead of parse
            (default: `False`)
        transfer (bool): if `True`, transfer instead of parse
            (default: `False`)
        options (list): list of ACE command-line options to use when
            invoking the ACE subprocess; unsupported options will
            give an error message
        all_items (bool): if `True`, don't exclude ignored items
            (those with `i-wf==2`) when parsing
        result_id (int): if given, only keep items with the specified
            `result-id`
        gzip (bool): if `True`, non-empty tables will be compressed
            with gzip
        stderr (file): stream for ACE's stderr
    """
    from delphin import ace

    grammar = Path(grammar).expanduser()
    testsuite = Path(testsuite).expanduser()

    if not grammar.is_file():
        raise CommandError(f'{grammar} is not a file')

    kwargs = {}
    kwargs['stderr'] = stderr
    if sum(1 if mode else 0 for mode in (generate, transfer, full_forest)) > 1:
        raise CommandError("'generate', 'transfer', and 'full-forest' "
                           "are mutually exclusive")

    if source is None:
        source = _validate_tsdb(testsuite)
    else:
        source = _validate_tsdb(source)
        if not tsdb.is_database_directory(testsuite):
            if testsuite.exists():
                raise CommandError(
                    f'{testsuite} exists and is not a TSDB database; '
                    'remove it or select a different destination path')
            mkprof(testsuite, source=source, full=False, quiet=True)
        else:
            pass  # both source and testsuite are valid TSDB databases

    if select is None:
        select = 'result.mrs' if (generate or transfer) else 'item.i-input'
    if generate:
        processor = ace.ACEGenerator
    elif transfer:
        processor = ace.ACETransferer
    else:
        if full_forest:
            kwargs['full_forest'] = True
        if not all_items:
            select += ' where i-wf != 2'
        processor = ace.ACEParser
    if result_id is not None:
        select += f' where result-id == {result_id}'

    target = itsdb.TestSuite(testsuite)
    column, relation, condition = _interpret_selection(select, source)

    with tempfile.TemporaryDirectory() as dir:
        # use a temporary test suite directory for filtered inputs
        mkprof(dir, source=source, where=condition,
               full=True, gzip=True, quiet=True)
        tmp = itsdb.TestSuite(dir)

        with processor(grammar, cmdargs=options, **kwargs) as cpu:
            target.process(cpu,
                           selector=(relation, column),
                           source=tmp,
                           gzip=gzip)
Exemple #30
0
from delphin import itsdb
from delphin import ace
from delphin import commands

src_path = 'golden'
tgt_path = 'p'

commands.mkprof(tgt_path, source=src_path)
src_ts = itsdb.TestSuite(src_path)
tgt_ts = itsdb.TestSuite(tgt_path)
with ace.ACEGenerator('erg.dat') as cpu:
    tgt_ts.process(cpu, source=src_ts)