Exemple #1
0
def test_write_database(tmp_path, mini_testsuite, empty_alt_testsuite):
    tmp_ts = tmp_path.joinpath('test_write_database')
    db = tsdb.Database(mini_testsuite)
    tsdb.write_database(db, str(tmp_ts))
    assert tmp_ts.is_dir()
    assert tmp_ts.joinpath('relations').is_file()
    assert tmp_ts.joinpath('item').is_file()
    assert tmp_ts.joinpath('parse').is_file()
    assert tmp_ts.joinpath('result').is_file()
    assert tmp_ts.joinpath('parse').read_text() == (
        '10@10@1\n'
        '20@20@0\n'
        '30@30@1\n')
    tsdb.write_database(db, str(tmp_ts), names=['item'])
    assert tmp_ts.joinpath('item').is_file()
    assert not tmp_ts.joinpath('parse').is_file()
    assert not tmp_ts.joinpath('result').is_file()
    # alt_schema drops i-wf field from mini_testsuite's schema
    alt_schema = tsdb.read_schema(empty_alt_testsuite)
    tsdb.write_database(db, str(tmp_ts), names=['item'], schema=alt_schema)
    alt_db = tsdb.Database(str(tmp_ts))
    assert len(db.schema['item']) == 4
    assert len(alt_db.schema['item']) == 3
    assert tmp_ts.joinpath('item').read_text() == (
        '10@It rained.@1-feb-2018 15:00\n'
        '20@Rained.@01-02-18 15:00:00\n'
        '30@It snowed.@2018-2-1 (15:00:00)\n')
Exemple #2
0
 def test_init(self, tmp_path, mini_testsuite):
     with pytest.raises(TypeError):
         tsdb.Database()
     with pytest.raises(tsdb.TSDBError):
         dir = tmp_path.joinpath('not_a_testsuite')
         dir.mkdir()
         tsdb.Database(str(dir))
     tsdb.Database(mini_testsuite)
Exemple #3
0
 def test__getitem__(self, mini_testsuite, empty_testsuite):
     db = tsdb.Database(mini_testsuite)
     assert list(db['item']) == [
         ('10', 'It rained.', '1', '1-feb-2018 15:00'),
         ('20', 'Rained.', '0', '01-02-18 15:00:00'),
         ('30', 'It snowed.', '1', '2018-2-1 (15:00:00)'),
     ]
     # with autocast
     db.autocast = True
     assert list(db['item']) == [
         (10, 'It rained.', 1, datetime(2018, 2, 1, 15, 0)),
         (20, 'Rained.', 0, datetime(2018, 2, 1, 15, 0)),
         (30, 'It snowed.', 1, datetime(2018, 2, 1, 15, 0)),
     ]
     # relation undefined
     with pytest.raises(tsdb.TSDBError):
         db['not_a_relation']
     # relation defined by file missing
     db = tsdb.Database(empty_testsuite)
     with pytest.raises(tsdb.TSDBError):
         db['item']
Exemple #4
0
def test_bad_date_issue_279(tmp_path, empty_alt_testsuite):
    tmp_ts = tmp_path.joinpath('test_bad_date_issue_279')
    tmp_ts.mkdir()
    schema = tsdb.read_schema(empty_alt_testsuite)
    fields = schema['item']
    tsdb.write_schema(tmp_ts, schema)
    tsdb.write(tmp_ts, 'item', [(0, 'The cat meows.', datetime(1999, 9, 8))],
               fields)
    db = tsdb.Database(tmp_ts)
    assert list(db['item']) == [('0', 'The cat meows.', '8-sep-1999')]
    tsdb.write(tmp_ts, 'item', [(0, 'The cat meows.', 'September 8, 1999')],
               fields)
    assert list(db['item']) == [('0', 'The cat meows.', 'September 8, 1999')]
Exemple #5
0
def select(query: str, path: util.PathLike, record_class=None):
    """
    Select data from [incr tsdb()] test suites.

    Args:
        query (str): TSQL select query (e.g., `'i-id i-input mrs'` or
            `'* from item where readings > 0'`)
        path: path to a TSDB test suite
        record_class: alternative class for records in the selection
    Yields:
        selected data from the test suite
    """
    db = tsdb.Database(path, autocast=True)
    return tsql.select(query, db, record_class=record_class)
Exemple #6
0
def update_test(args):
    """
    Use the current test profile to the gold.
    """
    tests = list(_discover(args))
    if len(tests) != 1:
        raise RegressionTestError('only 1 test may be updated at a time')
    name, idx, chc, txt, skel, prof, gold = tests[0]

    try:
        db = tsdb.Database(prof)
        tsdb.write_database(db, gold)
    except tsdb.TSDBError as exc:
        raise RegressionTestError('Failed to update gold.') from exc
Exemple #7
0
def _read(path, source_codec, select, kwargs):
    if hasattr(path, 'read'):
        xs = list(source_codec.load(path, **kwargs))
    else:
        path = Path(path).expanduser()
        if path.is_dir():
            db = tsdb.Database(path)
            # ts = itsdb.TestSuite(path)
            xs = [
                next(iter(source_codec.loads(r[0], **kwargs)), None)
                for r in tsql.select(select, db)
            ]
        else:
            xs = list(source_codec.load(path, **kwargs))
    yield from xs
Exemple #8
0
 def test_select_from(self, mini_testsuite):
     db = tsdb.Database(mini_testsuite)
     fields = ('i-id', 'i-date')
     assert list(db.select_from('item', fields)) == [
         ('10', '1-feb-2018 15:00'),
         ('20', '01-02-18 15:00:00'),
         ('30', '2018-2-1 (15:00:00)'),
     ]
     assert list(db.select_from('item', fields, cast=True)) == [
         (10, datetime(2018, 2, 1, 15, 0)),
         (20, datetime(2018, 2, 1, 15, 0)),
         (30, datetime(2018, 2, 1, 15, 0)),
     ]
     db.autocast = True
     assert list(db.select_from('item', fields)) == [
         (10, datetime(2018, 2, 1, 15, 0)),
         (20, datetime(2018, 2, 1, 15, 0)),
         (30, datetime(2018, 2, 1, 15, 0)),
     ]
     assert list(db.select_from('item', fields, cast=True)) == [
         (10, datetime(2018, 2, 1, 15, 0)),
         (20, datetime(2018, 2, 1, 15, 0)),
         (30, datetime(2018, 2, 1, 15, 0)),
     ]
Exemple #9
0
 def test_path(self, mini_testsuite):
     db = tsdb.Database(mini_testsuite)
     assert db.path == pathlib.Path(mini_testsuite)
Exemple #10
0
def mkprof(destination, source=None, schema=None, where=None, delimiter=None,
           refresh=False, skeleton=False, full=False, gzip=False, quiet=False):
    """
    Create [incr tsdb()] profiles or skeletons.

    Data for the testsuite may come from an existing testsuite or from
    a list of sentences. There are four main usage patterns:

        - `source="testsuite/"` -- read data from `testsuite/`
        - `source=None, refresh=True` -- read data from *destination*
        - `source=None, refresh=False` -- read sentences from stdin
        - `source="sents.txt"` -- read sentences from `sents.txt`

    The latter two require the *schema* parameter.

    Args:
        destination (str): path of the new testsuite
        source (str): path to a source testsuite or a file containing
            sentences; if not given and *refresh* is `False`, sentences
            are read from stdin
        schema (str): path to a relations file to use for the created
            testsuite; if `None` and *source* is a test suite, the
            schema of *source* is used
        where (str): TSQL condition to filter records by; ignored if
            *source* is not a testsuite
        delimiter (str): if given, split lines from *source* or stdin
            on the character *delimiter*; if *delimiter* is `"@"`,
            split using :func:`delphin.tsdb.split`; a header line
            with field names is required; ignored when the data source
            is not text lines
        refresh (bool): if `True`, rewrite the data at *destination*;
            implies *full* is `True`; ignored if *source* is not
            `None`, best combined with *schema* or *gzip* (default:
            `False`)
        skeleton (bool): if `True`, only write tsdb-core files
            (default: `False`)
        full (bool): if `True`, copy all data from the source
            testsuite; ignored if the data source is not a testsuite
            or if *skeleton* is `True` (default: `False`)
        gzip (bool): if `True`, non-empty tables will be compressed
            with gzip
        quiet (bool): if `True`, don't print summary information
    """
    destination = Path(destination).expanduser()
    if source is not None:
        source = Path(source).expanduser()
    if schema is not None:
        schema = tsdb.read_schema(schema)
    old_relation_files = []

    # work in-place on destination test suite
    if source is None and refresh:
        db = tsdb.Database(destination)
        old_relation_files = list(db.schema)
        tsdb.write_database(db, db.path, schema=schema, gzip=gzip)

    # input is sentences on stdin or a file of sentences
    elif source is None and not refresh:
        _mkprof_from_lines(
            destination, sys.stdin, schema, delimiter, gzip)
    elif source.is_file():
        with source.open() as fh:
            _mkprof_from_lines(
                destination, fh, schema, delimiter, gzip)

    # input is source testsuite
    elif source.is_dir():
        db = tsdb.Database(source)
        old_relation_files = list(db.schema)
        _mkprof_from_database(
            destination, db, schema, where, full, gzip)

    else:
        raise CommandError(f'invalid source for mkprof: {source!s}')

    _mkprof_cleanup(destination, skeleton, old_relation_files)

    if not quiet:
        _mkprof_summarize(destination, tsdb.read_schema(destination))