def test_dump(capsys, mocker, tmpdir): tmpdir.join('data').mkdir() dump(mocker.Mock(repos=CLTS(str(tmpdir))), test=True) out, err = capsys.readouterr() assert Path(str(tmpdir)).joinpath('data', 'graphemes.tsv').exists() stats(mocker.Mock(repos=CLTS(str(tmpdir)))) out, err = capsys.readouterr() assert 'Unique graphemes' in out
def main(args=None): # pragma: no cover parser = ArgumentParserWithLogging('pyclts') parser.add_argument( "--repos", default=CLTS(repos=Path(__file__).parent.parent.parent), type=CLTS, help="Path to clts repos.") parser.add_argument("--format", default="pipe", help="Format of tabular output.") parser.add_argument('--nonames', help="do not report the sound names in the output", action='store_true') parser.add_argument('--filter', help="only list generated sounds", default='') parser.add_argument('--data', help="specify the transcription data you want to load", default="phoible") parser.add_argument( '--system', help="specify the transcription system you want to load", default="bipa") res = parser.main(args=args) if args is None: # pragma: no cover sys.exit(res)
def main(args): # pragma: no cover data = Data() clts_repos = Path(__file__).parent.parent.parent.parent.resolve() / 'clts-data' clts_repos = CLTS(clts_repos) print(clts_repos.repos) version = 'v2.1.0' # assert_release(clts_repos.repos) for rec in Database.from_file(args.cldf.bibpath, lowercase=True): data.add(common.Source, rec.id, _obj=bibtex2source(rec)) dataset = common.Dataset( id='clts', name="CLTS {0}".format(version), publisher_name="Max Planck Institute for Evolutionary Anthropology", publisher_place="Leipzig", publisher_url="http://www.eva.mpg.de", license="http://creativecommons.org/licenses/by/4.0/", contact='*****@*****.**', domain='clts.clld.org', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0 International License'}) DBSession.add(dataset) for i, name in enumerate([ 'Johann-Mattis List', 'Cormac Anderson', 'Tiago Tresoldi', 'Robert Forkel', ]): c = common.Contributor(id=slug(name), name=name) dataset.editors.append(common.Editor(contributor=c, ord=i)) for line in args.cldf['data/features.tsv']: data.add( models.Feature, line['ID'], id=line['ID'], name='{} {}: {}'.format(line['TYPE'], line['FEATURE'], line['VALUE']), sound_type=line['TYPE'], feature=line['FEATURE'], value=line['VALUE'], ) DBSession.add(models.SoundSegment( id='NA', name='<NA>', description='<NA>', type='marker', generated=True, unicode='', color='#bbbbbb', )) for line in args.cldf['data/sounds.tsv']: s = data.add( models.SoundSegment, line['ID'], id=line['ID'], name=line['GRAPHEME'], description=line['NAME'], type=line['TYPE'], generated=line['GENERATED'], unicode=' / '.join(line['UNICODE']), color=clts_repos.soundclass('color').resolve_sound(line['GRAPHEME']), ) if s.color == '0': s.color = '#bbbbbb' assert s.color in LEGEND DBSession.flush() seen = set() for line in args.cldf['data/sounds.tsv']: for fid in line['FEATURES']: spk, fpk = data['SoundSegment'][line['ID']].pk, data['Feature'][fid].pk if (spk, fpk) not in seen: DBSession.add(models.SoundSegmentFeature(soundsegment_pk=spk, feature_pk=fpk)) seen.add((spk, fpk)) english = data.add( common.Language, 'eng', id='eng', name='English') for line in args.cldf['sources/index.tsv']: c = data.add( models.Transcription, line['NAME'], id=line['NAME'], name=line['NAME'], description=line['DESCRIPTION'].replace(':bib:', '/sources/'), datatype=getattr(models.Datatype, line['TYPE']) ) for ref in line.get('REFS', []): common.ContributionReference(source=data['Source'][ref], contribution=c) sound_url_template = args.cldf['data/graphemes.tsv', 'SOUND'].valueUrl image_url_template = args.cldf['data/graphemes.tsv', 'IMAGE'].valueUrl for line in args.cldf['data/graphemes.tsv']: key = line['DATASET'] + ':' + line['NAME'] + ':' + line['GRAPHEME'] if key not in data['Grapheme']: sound_id = line['NAME'].replace(' ', '_') vs = data['ValueSet'].get((line['DATASET'], line['NAME'])) if not vs: try: vs = data.add( common.ValueSet, (line['DATASET'], line['NAME']), id=key, description=line['NAME'], language=english, contribution=data['Transcription'][line['DATASET']], parameter=data['SoundSegment'][sound_id] ) except: print(line) raise data.add( models.Grapheme, key, id=key, name=line['GRAPHEME'], description=line['NAME'], url=line['URL'].unsplit() if line['URL'] else None, audio=sound_url_template.expand(line) if line['SOUND'] else None, image=image_url_template.expand(line) if line['IMAGE'] else None, valueset=vs )
def test_make_app_data(capsys, mocker, tmpdir): tmpdir.join('app').mkdir() _make_app_data(mocker.Mock(repos=CLTS(str(tmpdir))), test=True) assert Path(str(tmpdir)).joinpath('app', 'data.js').exists()
def test_stats(capsys, mocker, tmpdir): dstats(mocker.Mock(system='bipa', repos=CLTS(str(tmpdir)))) out, err = capsys.readouterr() assert 'id' in out stats(mocker.Mock(system='bipa', repos=CLTS('.')))
def test_iter_sources(sources, tmpdir): api = CLTS(repos=str(tmpdir)) srcs = list(api.iter_sources(type='td')) assert len(srcs[0][1]) == 0 assert srcs[0][0]['NAME'] == 'test'