コード例 #1
0
def test_select(mini_testsuite):
    ts = itsdb.TestSuite(mini_testsuite)
    assert list(tsql.select('i-input',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input from item', ts)) == [('It rained.', ),
                                                          ('Rained.', ),
                                                          ('It snowed.', )]
    assert list(tsql.select('i-input from item item',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input from result', ts)) == [('It rained.', ),
                                                            ('It snowed.', )]
    assert list(tsql.select('i-input from item result',
                            ts)) == [('It rained.', ), ('It snowed.', )]
    assert list(tsql.select('i-id i-input', ts)) == [('10', 'It rained.'),
                                                     ('20', 'Rained.'),
                                                     ('30', 'It snowed.')]
    assert list(tsql.select('i-id i-input', ts,
                            record_class=itsdb.Row)) == [(10, 'It rained.'),
                                                         (20, 'Rained.'),
                                                         (30, 'It snowed.')]
    res = ts['result']
    assert list(tsql.select('i-id mrs', ts)) == [('10', res[0]['mrs']),
                                                 ('30', res[1]['mrs'])]
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.select('*', ts)
コード例 #2
0
def compare(testsuite, gold, select='i-id i-input mrs'):
    """
    Compare two [incr tsdb()] profiles.

    Args:
        testsuite (str, TestSuite): path to the test [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        gold (str, TestSuite): path to the gold [incr tsdb()]
            testsuite or a :class:`TestSuite` object
        select: TSQL query to select (id, input, mrs) triples
            (default: `i-id i-input mrs`)
    Yields:
        dict: Comparison results as::

            {"id": "item identifier",
             "input": "input sentence",
             "test": number_of_unique_results_in_test,
             "shared": number_of_shared_results,
             "gold": number_of_unique_results_in_gold}

    """
    from delphin.mrs import simplemrs, compare as mrs_compare

    if not isinstance(testsuite, itsdb.TestSuite):
        if isinstance(testsuite, itsdb.ItsdbProfile):
            testsuite = testsuite.root
        testsuite = itsdb.TestSuite(testsuite)
    if not isinstance(gold, itsdb.TestSuite):
        if isinstance(gold, itsdb.ItsdbProfile):
            gold = gold.root
        gold = itsdb.TestSuite(gold)

    queryobj = tsql.inspect_query('select ' + select)
    if len(queryobj['projection']) != 3:
        raise ValueError('select does not return 3 fields: ' + select)

    input_select = '{} {}'.format(queryobj['projection'][0],
                                  queryobj['projection'][1])
    i_inputs = dict(tsql.select(input_select, testsuite))

    matched_rows = itsdb.match_rows(tsql.select(select, testsuite),
                                    tsql.select(select, gold), 0)

    for (key, testrows, goldrows) in matched_rows:
        (test_unique, shared, gold_unique) = mrs_compare.compare_bags(
            [simplemrs.loads_one(row[2]) for row in testrows],
            [simplemrs.loads_one(row[2]) for row in goldrows])
        yield {
            'id': key,
            'input': i_inputs[key],
            'test': test_unique,
            'shared': shared,
            'gold': gold_unique
        }
コード例 #3
0
def test_select_where(ts0):
    ts = itsdb.TestSuite(str(ts0))
    assert list(tsql.select('i-input where i-input ~ "It"',
                            ts)) == [['It rained.'], ['It snowed.']]
    assert list(tsql.select('i-input where i-input ~ "It" or i-id = 20',
                            ts)) == [['It rained.'], ['Rained.'],
                                     ['It snowed.']]
    assert list(tsql.select('i-input where i-date >= 2018-02-01',
                            ts)) == [['It rained.'], ['Rained.'],
                                     ['It snowed.']]
    assert list(tsql.select('i-input where readings > 0',
                            ts)) == [['It rained.'], ['It snowed.']]
コード例 #4
0
def test_select_where(mini_testsuite):
    ts = itsdb.TestSuite(mini_testsuite)
    assert list(tsql.select('i-input where i-input ~ "It"',
                            ts)) == [('It rained.', ), ('It snowed.', )]
    assert list(tsql.select('i-input where i-input ~ "It" or i-id = 20',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input where i-date >= 2018-02-01',
                            ts)) == [('It rained.', ), ('Rained.', ),
                                     ('It snowed.', )]
    assert list(tsql.select('i-input where readings > 0',
                            ts)) == [('It rained.', ), ('It snowed.', )]
コード例 #5
0
def __cli_parse__(args):
    """"""
    # validate IRI prefix
    # handle exceptions
    # handle invalid profile
    # handle output exceptions

    ts = itsdb.TestSuite(args.profile)
    prefix = args.prefix.strip("/")
    graph = Graph()

    for row in tsql.select('i-id i-input mrs', ts):
        id = row[0]
        text = row[1]
        if args.verbosity > 0:
            print("Parsing sentence {}".format(id))
        # parse mrs from profile
        m = simplemrs.decode(row[2])
        # transform to eds:
        d = dmrs.from_mrs(m)
        graph = p.dmrs_to_rdf(d=d,
                              prefix=prefix,
                              identifier=id,
                              graph=graph,
                              text=text)
    # serializes output
    graph.serialize(destination=args.output, format=args.format)
コード例 #6
0
def _mkprof_from_database(destination, db, schema, where, full, gzip):
    if schema is None:
        schema = db.schema

    destination.mkdir(exist_ok=True)
    tsdb.write_schema(destination, schema)

    to_copy = set(schema if full else tsdb.TSDB_CORE_FILES)
    where = '' if where is None else 'where ' + where

    for table in schema:
        if table not in to_copy or _no_such_relation(db, table):
            records = []
        elif where:
            # filter the data, but use all if the query fails
            # (e.g., if the filter and table cannot be joined)
            try:
                records = _tsql_distinct(
                    tsql.select(f'* from {table} {where}', db))
            except tsql.TSQLError:
                records = list(db[table])
        else:
            records = list(db[table])
        tsdb.write(destination,
                   table,
                   records,
                   schema[table],
                   gzip=gzip)
コード例 #7
0
ファイル: delphin_call.py プロジェクト: lmorgadodacosta/iTELL
    def tsdb_min(path_to_profile):
        """
        The argument path_to_profile should be, for
        example, '/delphin/erg2018/tsdb/mrs'.

        Both skeletons and filled/parsed profiles can be inspected.
        This is why tsql.select is done in multiple queries.
        All profiles always have, minimally, the 'items' file.
        Anything else is should be checked.

        This function returns a dictionary based on i-ids of that profile:
        data[1]['i-wf'] = 1
        data[1]['i-input'] = "This is an example sentence."
        data[1]['i-comment'] = "The comment left inside the items-file."

        Optionally it can include:
        data[1]['i-readings'] = 23 # number of derivation trees
        """
        ts = itsdb.TestSuite(path.join(ROOT, path_to_profile))

        data = dd(lambda: dd())

        for row in tsql.select(
                'i-id i-wf i-input  i-comment i-length i-origin i-translation',
                ts):
            i_id = row[0]
            data[i_id]['i-wf'] = row[1]
            data[i_id]['i-input'] = row[2]
            data[i_id]['i-comment'] = row[3]
            data[i_id]['i-length'] = row[4]
            data[i_id]['i-origin'] = row[5]
            data[i_id]['i-translation'] = row[6]

        #######################################################################
        # If we don't check if the file 'parse' exists, then pydelphin creates
        # an empty 'parse' file. This is undesirable, especially for skeletons
        #######################################################################
        if path.isfile(path.join(ROOT, path_to_profile + 'parse')):
            for row in tsql.select('i-id readings', ts):
                data[row[0]]['readings'] = row[1]

        return data
コード例 #8
0
def test_select(ts0):
    ts = itsdb.TestSuite(str(ts0))
    assert list(tsql.select('i-input', ts)) == [['It rained.'], ['Rained.'],
                                                ['It snowed.']]
    assert list(tsql.select('i-input from item', ts)) == [['It rained.'],
                                                          ['Rained.'],
                                                          ['It snowed.']]
    assert list(tsql.select('i-input from item item', ts)) == [['It rained.'],
                                                               ['Rained.'],
                                                               ['It snowed.']]
    assert list(tsql.select('i-input from result', ts)) == [['It rained.'],
                                                            ['It snowed.']]
    assert list(tsql.select('i-input from item result',
                            ts)) == [['It rained.'], ['It snowed.']]
    assert list(tsql.select('i-id i-input', ts)) == [[10, 'It rained.'],
                                                     [20, 'Rained.'],
                                                     [30, 'It snowed.']]
    res = ts['result']
    assert list(tsql.select('i-id mrs', ts)) == [[10, res[0]['mrs']],
                                                 [30, res[1]['mrs']]]
    with pytest.raises(tsql.TSQLSyntaxError):
        tsql.select('*', ts)
    assert list(tsql.select('* from item', ts, cast=True)) == list(ts['item'])
コード例 #9
0
def select(query: str, path: util.PathLike, record_class=None):
    """
    Select data from [incr tsdb()] test suites.

    Args:
        query (str): TSQL select query (e.g., `'i-id i-input mrs'` or
            `'* from item where readings > 0'`)
        path: path to a TSDB test suite
        record_class: alternative class for records in the selection
    Yields:
        selected data from the test suite
    """
    db = tsdb.Database(path, autocast=True)
    return tsql.select(query, db, record_class=record_class)
コード例 #10
0
def test_select_where_types_issue_261(mini_testsuite):
    # https://github.com/delph-in/pydelphin/issues/261
    ts = itsdb.TestSuite(mini_testsuite)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-id ~ "regex"', ts)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-input < 1', ts)
    with pytest.raises(tsql.TSQLError):
        tsql.select('i-id where i-input = 1', ts)
コード例 #11
0
def _read(path, source_codec, select, kwargs):
    if hasattr(path, 'read'):
        xs = list(source_codec.load(path, **kwargs))
    else:
        path = Path(path).expanduser()
        if path.is_dir():
            db = tsdb.Database(path)
            # ts = itsdb.TestSuite(path)
            xs = [
                next(iter(source_codec.loads(r[0], **kwargs)), None)
                for r in tsql.select(select, db)
            ]
        else:
            xs = list(source_codec.load(path, **kwargs))
    yield from xs
コード例 #12
0
def __cli_parse__(args):
    """"""
    # validate IRI prefix
    # handle exceptions
    # handle invalid profile
    # handle output exceptions

    ts = itsdb.TestSuite(args.profile)
    prefix = args.prefix.strip("/")
    graph = Graph()

    for row in tsql.select('i-id i-input mrs', ts):
        id = row[0]
        text = row[1]
        m = simplemrs.decode(row[2])
        # parse mrs from profile
        p.mrs_to_rdf(m, prefix, id, graph, text=text)
    # serializes output
    graph.serialize(destination=args.output, format=args.format)
コード例 #13
0
def select(dataspec, testsuite, mode='list', cast=True):
    """
    Select data from [incr tsdb()] profiles.

    Args:
        query (str): TSQL select query (e.g., `'i-id i-input mrs'` or
            `'* from item where readings > 0'`)
        testsuite (str, TestSuite): testsuite or path to testsuite
            containing data to select
        mode (str): see :func:`delphin.itsdb.select_rows` for a
            description of the *mode* parameter (default: `list`)
        cast (bool): if `True`, cast column values to their datatype
            according to the relations file (default: `True`)
    Returns:
        a generator that yields selected data
    """
    if isinstance(testsuite, itsdb.ItsdbProfile):
        testsuite = itsdb.TestSuite(testsuite.root)
    elif not isinstance(testsuite, itsdb.TestSuite):
        testsuite = itsdb.TestSuite(testsuite)
    return tsql.select(dataspec, testsuite, mode=mode, cast=cast)
コード例 #14
0
def read_profile(input_dir, output_dir, profile_name, mrp_eds, lexicon, args):
    ts = d_itsdb.TestSuite(input_dir)
 
    derivation_strs = []
    supertag_strs = []
    dmrs_json_strs = []

    for iid, sentence, parse_tokens, result_derivation, result_mrs in d_tsql.select('i-id i-input p-tokens derivation mrs', ts):
        tokens_rep = d_tokens.YYTokenLattice.from_string(parse_tokens)
        token_dict = {tok.id : tok for tok in tokens_rep.tokens}
        derivation_rep = d_derivation.from_string(result_derivation)
        assert len(derivation_rep.daughters) == 1 
        derivation_rep = derivation_rep.daughters[0]

        if mrp_eds:
            if iid in mrp_eds:
                try:
                    eds_rep = dc_eds.decode(mrp_eds[iid])
                    dmrs_rep = eds_to_dmrs(eds_rep)
                except d_eds._exceptions.EDSSyntaxError:
                    #print("Skipping: EDS syntax error", mrp_eds[iid])
                    continue
            else:
                    #print("Unmatched:", iid)
                    continue
        else:
            try:
                mrs_rep = dc_simplemrs.decode(result_mrs)
            except d_mrs._exceptions.MRSSyntaxError:
                #print("Skipping: MRS syntax error", result_mrs)
                continue

            dmrs_rep = d_dmrs.from_mrs(mrs_rep)

        mr = semantics.SemanticRepresentation(profile_name + ":" + iid, sentence, token_dict, derivation_rep, lexicon) # read derivation tree

        if args.convert_semantics:
            mr.map_dmrs(dmrs_rep)
            mr.process_semantic_tree(mr.root_node_id, dmrs_rep)

        mr.print_mrs()

        if args.extract_syntax:
            derivation_strs.append(mr.derivation_tree_str(mr.root_node_id, newline=False).lstrip())
            supertag_strs.append(mr.supertag_str(mr.root_node_id).strip())

        if args.extract_semantics:
            dmrs_json_strs.append(mr.dmrs_json_str(dmrs_rep))

    if args.extract_syntax:
        with open(output_dir + ".tree", 'w') as dt_out:
            for s in derivation_strs:
                dt_out.write(s + "\n")
        with open(output_dir + ".tags", 'w') as st_out:
            for s in supertag_strs:
                st_out.write(s + "\n")

    if args.extract_semantics:
        with open(output_dir + ".dmrs", 'w') as d_out:
            for s in dmrs_json_strs:
                if s != "":
                    d_out.write(s + "\n")
コード例 #15
0
ファイル: process.py プロジェクト: yfaria/delph-in-rdf
from delphin import ace
from delphin import itsdb
from delphin import tsql
from delphin import dmrs, eds
from delphin.codecs import eds as edsnative
from delphin.codecs import simplemrs
from delphin.codecs import dmrx

# import parser as p
from delphin.rdf import parser as p
from rdflib import Graph
import argparse

parser = argparse.ArgumentParser()
parser.add_argument("profile", help="profile path")
ts = itsdb.TestSuite(parser.parse_args().profile)
graph = Graph()
for row in tsql.select('i-id mrs', ts):
    m = simplemrs.decode(row[1])
    p.mrs_to_rdf(m, "http://example.com/example", row[0], graph)

graph.serialize(destination="test.ttl", format="turtle")
コード例 #16
0
def __cli_parse__(args):
    # remove the not well formed sentences? add option?
    # print MRS or parse to DMRS format?

    path = args.profile
    prefix = args.prefix.strip("/")
    semrep = args.semrep.lower()
    parser = None
    # Setting verbosity; need to figure a better solution.
    if args.verbosity == 1:
        logger.setLevel(20)
    elif args.verbosity >= 2:
        logger.setLevel(10)

    try:
        # validates path
        if not isdir(path):
            raise NotADirectoryError(f"Path is not a directory: {path}")
        # validates profile
        if not is_database_directory(path):
            raise TSDBError(f'Invalid test suite directory: {path}')
        # validates URI prefix
        if not _is_valid_uri(prefix):
            raise Exception(f'Invalid URI: {prefix}')
        # validate format and get converter
        to_rdf, from_mrs = _get_converters(semrep)

        # open Test Suite and start conversion
        ts = itsdb.TestSuite(path)
        # logger.info(f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}")
        logger.info(
            f"Converting {len(ts['result'])} analysis of {len(ts['item'])} sentences from {args.profile}"
        )

        # Creating the store and the default graph
        store = plugin.get("IOMemory", Store)()
        defaultGraph = Graph(store, identifier=BNode())
        PROFILE = URIRef(f"{prefix}")  # review later
        defaultGraph.add((PROFILE, RDF.type, DELPH.Profile))
        semrepURI, prof_semrep_relation = _get_RDF_semrep(semrep, store)
        store.bind("erg", ERG)
        store.bind("delph", DELPH)
        store.bind("pos", POS)
        # store.bind("upref", prefix) # may be useful

        # The tsql takes some time to be processed:
        # logger.info(f"Loading the profile")
        logger.info(f"Loading the profile")
        profile_data = tsql.select('parse-id result-id i-input mrs', ts)
        logger.info(f"Converting the profile")
        # Iterating over the results:
        for (parse_id, result_id, text, mrs_string) in profile_data:
            logger.debug(
                f"Converting the result {result_id} of sentence {parse_id}")
            m = simplemrs.decode(mrs_string)

            # making sure of the well formedness of "m"
            if not is_well_formed(m):
                logger.warning(
                    f"Result {result_id} of sentence {parse_id} is not well formed"
                )
                # continue

            # converting the MRS object to the representation intended to be converted
            obj = from_mrs(m)
            # logger.debug(f"Result {result_id} of item {parse_id}: \n\t{text}\n\t{obj}\n\t{mrs_string}")

            # Creating URIs for relevant resources.
            ITEM = URIRef(
                f"{prefix}/{parse_id}"
            )  # The item part may be redundant, maybe iterate before the itens
            RESULT = URIRef(f"{prefix}/{parse_id}/{result_id}")
            SEMREPI = URIRef(f"{prefix}/{parse_id}/{result_id}/{semrep}")

            # adding types:
            defaultGraph.add((ITEM, RDF.type, DELPH.Item))
            defaultGraph.add((RESULT, RDF.type, DELPH.Result))
            defaultGraph.add((SEMREPI, RDF.type, semrepURI))

            # Associating text to item:
            defaultGraph.add((ITEM, DELPH.hasText, Literal(text)))

            # Linking those nodes:
            defaultGraph.add((PROFILE, DELPH.hasItem, ITEM))
            defaultGraph.add((ITEM, DELPH.hasResult, RESULT))
            defaultGraph.add((RESULT, prof_semrep_relation, SEMREPI))

            to_rdf(obj, SEMREPI, store, defaultGraph)

        # serializes results
        logger.info(f"Serializing results to {args.output}")
        ConjunctiveGraph(store).serialize(destination=args.output,
                                          format=args.format)
        logger.info(f"DONE")

    # except PyDelphinSyntaxError as e:
    #     logger.exception(e)
    # except ImportError as e:
    #     logger.exception(e)
    # except TSDBError as e:
    #     logger.exception(e)
    except Exception as e:
        logger.error(e)
コード例 #17
0
def process(grammar,
            testsuite,
            source=None,
            select=None,
            generate=False,
            transfer=False,
            options=None,
            all_items=False,
            result_id=None,
            gzip=False):
    """
    Process (e.g., parse) a [incr tsdb()] profile.

    Results are written to directly to *testsuite*.

    If *select* is `None`, the defaults depend on the task:

        ==========  =========================
        Task        Default value of *select*
        ==========  =========================
        Parsing     `item:i-input`
        Transfer    `result:mrs`
        Generation  `result:mrs`
        ==========  =========================

    Args:
        grammar (str): path to a compiled grammar image
        testsuite (str): path to a [incr tsdb()] testsuite where data
            will be read from (see *source*) and written to
        source (str): path to a [incr tsdb()] testsuite; if `None`,
            *testsuite* is used as the source of data
        select (str): TSQL query for selecting processor inputs
            (default depends on the processor type)
        generate (bool): if `True`, generate instead of parse
            (default: `False`)
        transfer (bool): if `True`, transfer instead of parse
            (default: `False`)
        options (list): list of ACE command-line options to use when
            invoking the ACE subprocess; unsupported options will
            give an error message
        all_items (bool): if `True`, don't exclude ignored items
            (those with `i-wf==2`) when parsing
        result_id (int): if given, only keep items with the specified
            `result-id`
        gzip (bool): if `True`, non-empty tables will be compressed
            with gzip
    """
    from delphin.interfaces import ace

    if generate and transfer:
        raise ValueError("'generate' is incompatible with 'transfer'")
    if source is None:
        source = testsuite
    if select is None:
        select = 'result:mrs' if (generate or transfer) else 'item:i-input'
    if generate:
        processor = ace.AceGenerator
    elif transfer:
        processor = ace.AceTransferer
    else:
        if not all_items:
            select += ' where i-wf != 2'
        processor = ace.AceParser
    if result_id is not None:
        select += ' where result-id == {}'.format(result_id)

    source = itsdb.TestSuite(source)
    target = itsdb.TestSuite(testsuite)
    column, tablename, condition = _interpret_selection(select, source)
    table = itsdb.Table(
        source[tablename].fields,
        tsql.select('* from {} {}'.format(tablename, condition),
                    source,
                    cast=False))

    with processor(grammar, cmdargs=options) as cpu:
        target.process(cpu, ':' + column, source=table, gzip=gzip)
コード例 #18
0
def mkprof(destination,
           source=None,
           relations=None,
           where=None,
           in_place=False,
           skeleton=False,
           full=False,
           gzip=False):
    """
    Create [incr tsdb()] profiles or skeletons.

    Data for the testsuite may come from an existing testsuite or from
    a list of sentences. There are four main usage patterns:

        - `source="testsuite/"` -- read data from `testsuite/`
        - `source=None, in_place=True` -- read data from *destination*
        - `source=None, in_place=False` -- read sentences from stdin
        - `source="sents.txt"` -- read sentences from `sents.txt`

    For the latter two, the *relations* parameter must be specified.

    Args:
        destination (str): path of the new testsuite
        source (str): path to a source testsuite or a file containing
            sentences; if not given and *in_place* is `False`,
            sentences are read from stdin
        relations (str): path to a relations file to use for the
            created testsuite; if `None` and *source* is given, the
            relations file of the source testsuite is used
        where (str): TSQL condition to filter records by; ignored if
            *source* is not a testsuite
        in_place (bool): if `True` and *source* is not given, use
            *destination* as the source for data (default: `False`)
        skeleton (bool): if `True`, only write tsdb-core files
            (default: `False`)
        full (bool): if `True`, copy all data from the source
            testsuite (requires *source* to be a testsuite path;
            default: `False`)
        gzip (bool): if `True`, non-empty tables will be compressed
            with gzip
    """
    # basic validation
    if skeleton and full:
        raise ValueError("'skeleton' is incompatible with 'full'")
    elif skeleton and in_place:
        raise ValueError("'skeleton' is incompatible with 'in_place'")
    elif in_place and source is not None:
        raise ValueError("'in_place' is incompatible with 'source'")
    if in_place:
        source = destination
    if full and (source is None or not os.path.isdir(source)):
        raise ValueError("'full' must be used with a source testsuite")
    if relations is None and source is not None and os.path.isdir(source):
        relations = os.path.join(source, 'relations')
    elif relations is None or not os.path.isfile(relations):
        raise ValueError(
            'invalid or missing relations file: {}'.format(relations))
    # setup destination testsuite
    _prepare_output_directory(destination)
    dts = itsdb.TestSuite(path=destination, relations=relations)
    # input is sentences on stdin
    if source is None:
        dts.write({'item': _lines_to_rows(sys.stdin, dts.relations)},
                  gzip=gzip)
    # input is sentence file
    elif os.path.isfile(source):
        with open(source) as fh:
            dts.write({'item': _lines_to_rows(fh, dts.relations)}, gzip=gzip)
    # input is source testsuite
    elif os.path.isdir(source):
        sts = itsdb.TestSuite(source)
        tables = dts.relations.tables if full else itsdb.tsdb_core_files
        where = '' if where is None else 'where ' + where
        for table in tables:
            if sts.size(table) > 0:
                # filter the data, but use all if the query fails
                # (e.g., if the filter and table cannot be joined)
                try:
                    rows = tsql.select('* from {} {}'.format(table, where),
                                       sts,
                                       cast=False)
                except itsdb.ItsdbError:
                    rows = sts[table]
                dts.write({table: rows}, gzip=gzip)
    dts.reload()
    # unless a skeleton was requested, make empty files for other tables
    if not skeleton:
        for table in dts.relations:
            if len(dts[table]) == 0:
                dts.write({table: []})

    # summarize what was done
    if sys.stdout.isatty():
        _red = lambda s: '\x1b[1;31m{}\x1b[0m'.format(s)
    else:
        _red = lambda s: s
    fmt = '{:>8} bytes\t{}'
    for filename in ['relations'] + list(dts.relations.tables):
        path = os.path.join(destination, filename)
        if os.path.isfile(path):
            stat = os.stat(path)
            print(fmt.format(stat.st_size, filename))
        elif os.path.isfile(path + '.gz'):
            stat = os.stat(path + '.gz')
            print(fmt.format(stat.st_size, _red(filename + '.gz')))
コード例 #19
0
def convert(path,
            source_fmt,
            target_fmt,
            select='result:mrs',
            properties=True,
            show_status=False,
            predicate_modifiers=False,
            color=False,
            pretty_print=False,
            indent=None):
    """
    Convert between various DELPH-IN Semantics representations.

    Args:
        path (str, file): filename, testsuite directory, open file, or
            stream of input representations
        source_fmt (str): convert from this format
        target_fmt (str): convert to this format
        select (str): TSQL query for selecting data (ignored if *path*
            is not a testsuite directory; default: `"result:mrs"`)
        properties (bool): include morphosemantic properties if `True`
            (default: `True`)
        show_status (bool): show disconnected EDS nodes (ignored if
            *target_fmt* is not `"eds"`; default: `False`)
        predicate_modifiers (bool): apply EDS predicate modification
            for certain kinds of patterns (ignored if *target_fmt* is
            not an EDS format; default: `False`)
        color (bool): apply syntax highlighting if `True` and
            *target_fmt* is `"simplemrs"` (default: `False`)
        pretty_print (bool): if `True`, format the output with
            newlines and default indentation (default: `False`)
        indent (int, optional): specifies an explicit number of spaces
            for indentation (implies *pretty_print*)
    Returns:
        str: the converted representation
    """
    if source_fmt.startswith('eds') and not target_fmt.startswith('eds'):
        raise ValueError(
            'Conversion from EDS to non-EDS currently not supported.')

    if indent:
        pretty_print = True
        indent = 4 if indent is True else safe_int(indent)

    if len(tsql.inspect_query('select ' + select)['projection']) != 1:
        raise ValueError('Exactly 1 column must be given in selection query: '
                         '(e.g., result:mrs)')

    # read
    loads = _get_codec(source_fmt)
    if path is None:
        xs = loads(sys.stdin.read())
    elif hasattr(path, 'read'):
        xs = loads(path.read())
    elif os.path.isdir(path):
        ts = itsdb.TestSuite(path)
        xs = [next(iter(loads(r[0])), None) for r in tsql.select(select, ts)]
    else:
        xs = loads(open(path, 'r').read())

    # write
    dumps = _get_codec(target_fmt, load=False)
    kwargs = {}
    if color: kwargs['color'] = color
    if pretty_print: kwargs['pretty_print'] = pretty_print
    if indent: kwargs['indent'] = indent
    if target_fmt == 'eds':
        kwargs['pretty_print'] = pretty_print
        kwargs['show_status'] = show_status
    if target_fmt.startswith('eds'):
        kwargs['predicate_modifiers'] = predicate_modifiers
    kwargs['properties'] = properties

    # this is not a great way to improve robustness when converting
    # many representations, but it'll do until v1.0.0. Also, it only
    # improves robustness on the output, not the input.
    # Note that all the code below is to replace the following:
    #     return dumps(xs, **kwargs)
    head, joiner, tail = _get_output_details(target_fmt)
    parts = []
    if pretty_print:
        joiner = joiner.strip() + '\n'

    def _trim(s):
        if head and s.startswith(head):
            s = s[len(head):].lstrip('\n')
        if tail and s.endswith(tail):
            s = s[:-len(tail)].rstrip('\n')
        return s

    for x in xs:
        try:
            s = dumps([x], **kwargs)
        except (PyDelphinException, KeyError, IndexError):
            logging.exception('could not convert representation')
        else:
            s = _trim(s)
            parts.append(s)
    # set these after so head and tail are used correctly in _trim
    if pretty_print:
        if head:
            head += '\n'
        if tail:
            tail = '\n' + tail
    return head + joiner.join(parts) + tail