Beispiel #1
0
def test_db(tmpdir, dataset, mocker, capsys):
    db = Database(str(tmpdir.join('lexibank.sqlite')))
    db.load(dataset)
    db.create(exists_ok=True)
    with pytest.raises(ValueError):
        db.create()
    db.create(force=True)
    db.load(dataset)
    db.load_glottolog_data(dataset.glottolog)
    db.load_concepticon_data(mocker.Mock(conceptsets={}))
    for sql in db.sql:
        db.fetchall(sql)
    with db.connection() as conn:
        db.fetchall('select * from dataset', conn=conn, verbose=True)
    out, _ = capsys.readouterr()
    assert 'select' in out

    db.create(force=True)
    db.load(dataset)
    cldf_ds = dataset.cldf_reader()
    cols = cldf_ds['FormTable'].tableSchema.columns
    cols.append(Column(name='custom'))
    cldf_ds.write_metadata()
    db.load(dataset)
    cols.pop()
    cols.append(Column(name='custom', datatype='integer'))
    cldf_ds.write_metadata()
    with pytest.raises(ValueError):
        db.load(dataset)
    cols.pop()
    cldf_ds.write_metadata()
    db.load(dataset)
Beispiel #2
0
def requirements(args):
    if args.cfg.datasets:
        print(
            '-e git+https://github.com/clld/glottolog.git@{0}#egg=pyglottolog'.
            format(git_hash(args.cfg.datasets[0].glottolog.repos)))
        print(
            '-e git+https://github.com/clld/concepticon-data.git@{0}#egg=pyconcepticon'
            .format(git_hash(args.cfg.datasets[0].concepticon.repos)))
    if pylexibank.__version__.endswith('dev0'):
        print(
            '-e git+https://github.com/lexibank/pylexibank.git@{0}#egg=pylexibank'
            .format(git_hash(Path(pylexibank.__file__).parent.parent.parent)))
    db = Database(args.db)
    db.create(exists_ok=True)
    for r in db.fetchall('select id, version from dataset'):
        print(
            '-e git+https://github.com/lexibank/{0}.git@{1}#egg=lexibank_{0}'.
            format(*r))
Beispiel #3
0
def _unload(ds, **kw):
    db = Database(kw['db'])
    db.create(exists_ok=True)
    db.unload(ds)
Beispiel #4
0
def _load(ds, **kw):
    db = Database(kw['db'])
    db.create(exists_ok=True)
    db.load(ds)
    db.load_concepticon_data(ds.concepticon)
    db.load_glottolog_data(ds.glottolog)
Beispiel #5
0
def ls(args):
    """
    lexibank ls [COLS]+

    column specification:
    - license
    - lexemes
    - macroareas
    """
    db = Database(args.db)
    db.create(exists_ok=True)
    in_db = {
        r[0]: r[1]
        for r in db.fetchall('select id, version from dataset')
    }
    # FIXME: how to smartly choose columns?
    table = Table('ID', 'Title')
    cols = OrderedDict([(col, {}) for col in args.args if col in [
        'version',
        'location',
        'changes',
        'license',
        'all_lexemes',
        'lexemes',
        'concepts',
        'languages',
        'families',
        'varieties',
        'macroareas',
    ]])
    tl = 40
    if cols:
        tl = 25
        table.columns.extend(col.capitalize() for col in cols)

    for col, sql in [
        ('languages', 'glottocodes_by_dataset'),
        ('concepts', 'conceptsets_by_dataset'),
        ('lexemes', 'mapped_lexemes_by_dataset'),
        ('all_lexemes', 'lexemes_by_dataset'),
        ('macroareas', 'macroareas_by_dataset'),
        ('families', 'families_by_dataset'),
    ]:
        if col in cols:
            cols[col] = {r[0]: r[1] for r in db.fetchall(sql)}
    for ds in args.cfg.datasets:
        row = [
            colored(ds.id, 'green' if ds.id in in_db else 'red'),
            truncate_with_ellipsis(ds.metadata.title or '', width=tl),
        ]
        for col in cols:
            if col == 'version':
                row.append(git_hash(ds.dir))
            elif col == 'location':
                row.append(colored(str(ds.dir), 'green'))
            elif col == 'changes':
                row.append(ds.git_repo.is_dirty())
            elif col == 'license':
                lic = licenses.find(ds.metadata.license or '')
                row.append(lic.id if lic else ds.metadata.license)
            elif col in [
                    'languages', 'concepts', 'lexemes', 'all_lexemes',
                    'families'
            ]:
                row.append(float(cols[col].get(ds.id, 0)))
            elif col == 'macroareas':
                row.append(', '.join(
                    sorted((cols[col].get(ds.id) or '').split(','))))
            else:
                row.append('')

        table.append(row)
    totals = ['zztotal', len(args.cfg.datasets)]
    for i, col in enumerate(cols):
        if col in ['lexemes', 'all_lexemes']:
            totals.append(sum([r[i + 2] for r in table]))
        elif col == 'languages':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct glottocode) FROM languagetable")
                    [0]))
        elif col == 'concepts':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct concepticon_id) FROM parametertable"
                    )[0]))
        elif col == 'families':
            totals.append(
                float(
                    db.fetchone(
                        "SELECT count(distinct family) FROM languagetable")
                    [0]))
        else:
            totals.append('')
    table.append(totals)
    print(
        table.render(tablefmt='simple',
                     sortkey=lambda r: r[0],
                     condensed=False,
                     floatfmt=',.0f'))
Beispiel #6
0
def get_db(args):
    db = Database(args.db)
    db.create(exists_ok=True)
    return db