def test_db(tmpdir, dataset, mocker, capsys): db = Database(str(tmpdir.join('lexibank.sqlite'))) db.load(dataset) db.create(exists_ok=True) with pytest.raises(ValueError): db.create() db.create(force=True) db.load(dataset) db.load_glottolog_data(dataset.glottolog) db.load_concepticon_data(mocker.Mock(conceptsets={})) for sql in db.sql: db.fetchall(sql) with db.connection() as conn: db.fetchall('select * from dataset', conn=conn, verbose=True) out, _ = capsys.readouterr() assert 'select' in out db.create(force=True) db.load(dataset) cldf_ds = dataset.cldf_reader() cols = cldf_ds['FormTable'].tableSchema.columns cols.append(Column(name='custom')) cldf_ds.write_metadata() db.load(dataset) cols.pop() cols.append(Column(name='custom', datatype='integer')) cldf_ds.write_metadata() with pytest.raises(ValueError): db.load(dataset) cols.pop() cldf_ds.write_metadata() db.load(dataset)
def requirements(args): if args.cfg.datasets: print( '-e git+https://github.com/clld/glottolog.git@{0}#egg=pyglottolog'. format(git_hash(args.cfg.datasets[0].glottolog.repos))) print( '-e git+https://github.com/clld/concepticon-data.git@{0}#egg=pyconcepticon' .format(git_hash(args.cfg.datasets[0].concepticon.repos))) if pylexibank.__version__.endswith('dev0'): print( '-e git+https://github.com/lexibank/pylexibank.git@{0}#egg=pylexibank' .format(git_hash(Path(pylexibank.__file__).parent.parent.parent))) db = Database(args.db) db.create(exists_ok=True) for r in db.fetchall('select id, version from dataset'): print( '-e git+https://github.com/lexibank/{0}.git@{1}#egg=lexibank_{0}'. format(*r))
def _unload(ds, **kw): db = Database(kw['db']) db.create(exists_ok=True) db.unload(ds)
def _load(ds, **kw): db = Database(kw['db']) db.create(exists_ok=True) db.load(ds) db.load_concepticon_data(ds.concepticon) db.load_glottolog_data(ds.glottolog)
def ls(args): """ lexibank ls [COLS]+ column specification: - license - lexemes - macroareas """ db = Database(args.db) db.create(exists_ok=True) in_db = { r[0]: r[1] for r in db.fetchall('select id, version from dataset') } # FIXME: how to smartly choose columns? table = Table('ID', 'Title') cols = OrderedDict([(col, {}) for col in args.args if col in [ 'version', 'location', 'changes', 'license', 'all_lexemes', 'lexemes', 'concepts', 'languages', 'families', 'varieties', 'macroareas', ]]) tl = 40 if cols: tl = 25 table.columns.extend(col.capitalize() for col in cols) for col, sql in [ ('languages', 'glottocodes_by_dataset'), ('concepts', 'conceptsets_by_dataset'), ('lexemes', 'mapped_lexemes_by_dataset'), ('all_lexemes', 'lexemes_by_dataset'), ('macroareas', 'macroareas_by_dataset'), ('families', 'families_by_dataset'), ]: if col in cols: cols[col] = {r[0]: r[1] for r in db.fetchall(sql)} for ds in args.cfg.datasets: row = [ colored(ds.id, 'green' if ds.id in in_db else 'red'), truncate_with_ellipsis(ds.metadata.title or '', width=tl), ] for col in cols: if col == 'version': row.append(git_hash(ds.dir)) elif col == 'location': row.append(colored(str(ds.dir), 'green')) elif col == 'changes': row.append(ds.git_repo.is_dirty()) elif col == 'license': lic = licenses.find(ds.metadata.license or '') row.append(lic.id if lic else ds.metadata.license) elif col in [ 'languages', 'concepts', 'lexemes', 'all_lexemes', 'families' ]: row.append(float(cols[col].get(ds.id, 0))) elif col == 'macroareas': row.append(', '.join( sorted((cols[col].get(ds.id) or '').split(',')))) else: row.append('') table.append(row) totals = ['zztotal', len(args.cfg.datasets)] for i, col in enumerate(cols): if col in ['lexemes', 'all_lexemes']: totals.append(sum([r[i + 2] for r in table])) elif col == 'languages': totals.append( float( db.fetchone( "SELECT count(distinct glottocode) FROM languagetable") [0])) elif col == 'concepts': totals.append( float( db.fetchone( "SELECT count(distinct concepticon_id) FROM parametertable" )[0])) elif col == 'families': totals.append( float( db.fetchone( "SELECT count(distinct family) FROM languagetable") [0])) else: totals.append('') table.append(totals) print( table.render(tablefmt='simple', sortkey=lambda r: r[0], condensed=False, floatfmt=',.0f'))
def get_db(args): db = Database(args.db) db.create(exists_ok=True) return db