Beispiel #1
0
def test_make_record(empty_testsuite):
    rel = pathlib.Path(empty_testsuite, 'relations')
    r = tsdb.read_schema(rel)
    assert (tsdb.make_record({'i-input': 'one', 'i-id': 100}, r['item'])
            == (100, 'one'))
    assert tsdb.make_record({'i-id': 100}, r['item']) == (100, None)
    assert tsdb.make_record({'i-id': 100, 'mrs': '[RELS: < > HCONS: < >]'},
                            r['item']) == (100, None)
Beispiel #2
0
def customize_itsdb(grammar_path):
    if 'sentence' not in ch:
        return

    today = datetime.datetime.today()
    author = 'Grammar Matrix Customization System'

    def get_item(s, i):
        return {
            'i-id': str(i),
            'i-origin': 'unknown',
            'i-register': 'unknown',
            'i-format': 'none',
            'i-difficulty': '1',
            'i-category': 'S' if not s.get('star', False) else '',
            'i-input': s['orth'],
            'i-wf': '0' if s.get('star', False) else '1',
            'i-length': str(len(s['orth'].split())),
            'i-author': author,
            'i-date': today
        }

    skeletons = os.path.join(grammar_path, 'tsdb', 'skeletons')
    matrix_skeleton = os.path.join(skeletons, 'matrix')
    schema = tsdb.read_schema(os.path.join(skeletons, 'Relations'))
    tsdb.initialize_database(matrix_skeleton, schema=schema)
    records = [
        tsdb.make_record(get_item(s, i), schema['item'])
        for i, s in enumerate(ch['sentence'], 1)
    ]
    tsdb.write(matrix_skeleton, 'item', records, schema['item'])
Beispiel #3
0
def _lines_to_records(lineiter, colnames, split, fields):

    with_i_id = with_i_length = False
    for field in fields:
        if field.name == 'i-id':
            with_i_id = True
        elif field.name == 'i-length':
            with_i_length = True

    i_ids = set()
    for i, line in enumerate(lineiter, 1):
        colvals = split(line.rstrip('\n'))
        if len(colvals) != len(colnames):
            raise CommandError(
                'line values do not match expected fields:\n'
                f'  fields: {", ".join(colnames)}\n'
                f'  values: {", ".join(colvals)}')
        colmap = dict(zip(colnames, colvals))

        if with_i_id:
            if 'i-id' not in colmap:
                colmap['i-id'] = i
            if colmap['i-id'] in i_ids:
                raise CommandError(f'duplicate i-id: {colmap["i-id"]}')
            i_ids.add(colmap['i-id'])

        if with_i_length and 'i-length' not in colmap and 'i-input' in colmap:
            colmap['i-length'] = len(colmap['i-input'].split())

        yield tsdb.make_record(colmap, fields)
Beispiel #4
0
def _add_row(ts: TestSuite, name: str, data: Dict, buffer_size: int) -> None:
    """
    Prepare and append a Row into its Table; flush to disk if necessary.
    """
    fields = ts.schema[name]
    # remove any keys that aren't relation fields
    for invalid_key in set(data).difference([f.name for f in fields]):
        del data[invalid_key]

    ts[name].append(tsdb.make_record(data, fields))

    num_changes = 0
    for _name in ts:
        table = ts[_name]
        num_changes += len(table) - table._persistent_count
    if num_changes > buffer_size:
        ts.commit()