def test_data_uri(self): from clld.web.util.helpers import data_uri data_uri(__file__, 'text/plain')
def main(args): data = Data() dataset = common.Dataset( id=cobl2.__name__, name="IE-CoR", publisher_name="Max Planck Institute for the Science of Human History", publisher_place="Jena", publisher_url="https://www.eva.mpg.de", license="https://creativecommons.org/licenses/by/4.0/", domain='iecor.clld.org', contact='*****@*****.**', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0 International License'}) DBSession.add(dataset) editors = OrderedDict([('Heggarty', None), ('Anderson', None), ('Scarborough', None)]) for row in sorted(ds['authors.csv'], key=lambda x: [ x['Last_Name'].lower(), x['First_Name'].lower()]): if row['Last_Name'] in editors: editors[row['Last_Name']] = row['ID'] data.add( models.Author, row['ID'], id=row['ID'], name='{0} {1}'.format(row['First_Name'], row['Last_Name']), url=row['URL'], photo=data_uri(photos[row['Last_Name']], 'image/jpg') if row['Last_Name'] in photos else None) for i, cid in enumerate(editors.values()): common.Editor(dataset=dataset, contributor=data['Author'][cid], ord=i + 1) for src in ds.sources.items(): for invalid in ['isbn', 'part', 'institution']: if invalid in src: del src[invalid] data.add( common.Source, src.id, id=src.id, name=src.get('author', src.get('editor')), description=src.get('title', src.get('booktitle')), bibtex_type=getattr(EntryType, src.genre, EntryType.misc), **src) re_links = re.compile(r'\[(?P<label>[^\]]+?)\]\((?P<type>.+?)-(?P<id>\d+)\)') link_map = { 'cog': '/cognatesets/', 'lex': '/values/', 'src': '/sources/', } def parse_links(m): try: return '<a href="{}{}">{}</a>'.format( link_map[m.group('type')], m.group('id'), m.group('label')) except KeyError: print("parse_links: type error in '{}'".format(":".join(m.groups()))) return '[{}]({}-{})'.format(m.group('label'), m.group('type'), m.group('id')) for param in ds['ParameterTable']: data.add( models.Meaning, param['ID'], id=slug(param['Name']), name=param['Name'], description_md=param['Description_md'], concepticon_id=int(param['Concepticon_ID']) if param['Concepticon_ID'] != '0' else None, ) for row in ds['clades.csv']: data.add( models.Clade, row['ID'], id=row['ID'], level0_name=row['level0_name'], level1_name=row['level1_name'], level2_name=row['level2_name'], level3_name=row['level3_name'], clade_level0=row['clade_level0'], clade_level1=row['clade_level1'], clade_level2=row['clade_level2'], clade_level3=row['clade_level3'], clade_name=row['clade_name'], short_name=row['short_name'], color=row['color'], ) for row in ds['LanguageTable']: c = data.add( common.Contribution, row['ID'], id=row['ID'], name=row['Name'], ) for i, cid in enumerate(row['Author_ID']): DBSession.add(common.ContributionContributor( contribution=c, contributor=data['Author'][cid], ord=i + 1)) data.add( models.Variety, row['ID'], id=slug(row['Name']), name=row['Name'], latitude=float(row['Latitude']) if row['Latitude'] is not None else None, longitude=float(row['Longitude']) if row['Longitude'] is not None else None, contribution=c, color=rgb_as_hex(row['Color']), clade=', '.join(filter(None, row['Clade'])), clade_name=row['clade_name'], glottocode=row['Glottocode'], historical=row['historical'], distribution=row['distribution'], logNormalMean=row['logNormalMean'], logNormalOffset=row['logNormalOffset'], logNormalStDev=row['logNormalStDev'], normalMean=row['normalMean'], normalStDev=row['normalStDev'], ascii_name=row['ascii_name'], iso=row['ISO639P3code'], lang_description=row['Description'], variety=row['Variety'], loc_justification=row['loc_justification'] or None, sort_order=row['sort_order'] ) vsrs = set() for row in ds['FormTable']: vs = data['ValueSet'].get((row['Language_ID'], row['Parameter_ID'])) if not vs: vs = data.add( common.ValueSet, (row['Language_ID'], row['Parameter_ID']), id='{0}-{1}'.format(row['Language_ID'], row['Parameter_ID']), language=data['Variety'][row['Language_ID']], parameter=data['Meaning'][row['Parameter_ID']], contribution=data['Contribution'][row['Language_ID']], ) v = data.add( models.Lexeme, row['ID'], id=row['ID'], name=row['Form'], native_script=row['native_script'], phonetic=row['phon_form'], phonemic=row['Phonemic'], comment=re_links.sub(parse_links, row['Comment'] or ''), url=row['url'], gloss=row['Gloss'], valueset=vs ) for src in row['Source']: sid, pages = ds.sources.parse(src) key = (vs.id, sid, pages) if pages: pages = pages.replace('|', ';') if key not in vsrs: DBSession.add(common.ValueSetReference( valueset=vs, source=data['Source'][sid], description=pages)) vsrs.add(key) for row in ds['CognatesetTable']: cc = data.add( models.CognateClass, row['ID'], id=row['ID'], name=row['ID'], root_form=row['Root_Form_calc'] if row['Root_Form_calc'] is not None and len(row['Root_Form_calc']) else row['Root_Form'], root_form_calc=row['Root_Form_calc'] or None, root_gloss=row['Root_Gloss'] or None, root_language=row['Root_Language_calc'] if row['Root_Language_calc'] is not None and len(row['Root_Language_calc']) else row['Root_Language'], root_language_calc=row['Root_Language_calc'] or None, comment=re_links.sub(parse_links, row['Comment'] or ''), justification=re_links.sub(parse_links, row['Justification'] or ''), ideophonic=row['Ideophonic'] or None, parallel_derivation=row['parallelDerivation'] or None, revised_by=','.join(row['revised_by']) or None, superset_id=int(row['supersetid']) if row['supersetid'] else None, ) for src in row['Source']: sid, pages = ds.sources.parse(src) if pages: pages = pages.replace('|', ';') DBSession.add(clld_cognacy_plugin.models.CognatesetReference( cognateset=cc, source=data['Source'][sid], description=pages)) DBSession.flush() cc_id_pk_map = {str(ccid): cc.pk for ccid, cc in data['CognateClass'].items()} for row in ds['CognatesetTable']: if row['proposedAsCognateTo_pk']: DBSession.add(models.ProposedCognates( cc1_pk=data['CognateClass'][row['ID']].pk, cc2_pk=cc_id_pk_map[str(row['proposedAsCognateTo_pk'])], scale=row['proposedAsCognateToScale'] )) DBSession.flush() loans = {ln['Cognateset_ID']: ln for ln in ds['loans.csv']} for ccid, cc in data['CognateClass'].items(): if ccid in loans: le = loans[ccid] if le['SourceCognateset_ID']: cc.loan_source_pk = data['CognateClass'][le['SourceCognateset_ID']].pk else: cc.loan_source_pk = None cc.loan_notes = le['Comment'] cc.loan_source_languoid = le['Source_languoid'] cc.loan_source_form = le['Source_form'] cc.parallel_loan_event = le['Parallel_loan_event'] cc.is_loan = True for row in ds['CognateTable']: cc = data['CognateClass'][row['Cognateset_ID']] if cc.meaning_pk is None: cc.meaning_pk = data['Lexeme'][row['Form_ID']].valueset.parameter_pk else: assert data['Lexeme'][row['Form_ID']].valueset.parameter_pk == cc.meaning_pk data.add( clld_cognacy_plugin.models.Cognate, row['ID'], cognateset=data['CognateClass'][row['Cognateset_ID']], counterpart=data['Lexeme'][row['Form_ID']], doubt=row['Doubt'], ) l_by_gc = {} for s in DBSession.query(models.Variety): l_by_gc[s.glottocode] = s.pk tree = Phylogeny( id='1', name='Bouckaert et al.', description='', newick=Path.read_text(data_file_path / 'raw' / 'bouckaert_et_al2012' / 'newick.txt'), ) for k, taxon in enumerate(reader(data_file_path / 'raw' / 'bouckaert_et_al2012' / 'taxa.csv', namedtuples=True)): label = TreeLabel( id='{0}-{1}-{2}'.format(tree.id, slug(taxon.taxon), k + 1), name=taxon.taxon, phylogeny=tree, description=taxon.glottocode) if taxon.glottocode in l_by_gc: LanguageTreeLabel(language_pk=l_by_gc[taxon.glottocode], treelabel=label) DBSession.add(tree) l_by_ascii = {} for s in DBSession.query(models.Variety): l_by_ascii[s.ascii_name] = s.pk tree = Phylogeny( id='2', name='CoBL consensu', description='', newick=Path.read_text(data_file_path / 'raw' / 'ie122' / 'newick.txt'), ) for k, taxon in enumerate(reader(data_file_path / 'raw' / 'ie122' / 'taxa.csv', namedtuples=True)): label = TreeLabel( id='{0}-{1}-{2}'.format(tree.id, slug(taxon.taxon), k + 1), name=taxon.taxon, phylogeny=tree) if taxon.taxon in l_by_ascii: LanguageTreeLabel(language_pk=l_by_ascii[taxon.taxon], treelabel=label) DBSession.add(tree)
def test_data_uri(env): from clld.web.util.helpers import data_uri res = data_uri(__file__, 'text/plain') assert not res.split(',')[1].startswith("b'")
def test_data_uri(self): from clld.web.util.helpers import data_uri data_uri(__file__, "text/plain")
def test_data_uri(self): from clld.web.util.helpers import data_uri res = data_uri(__file__, 'text/plain') self.assertFalse(res.split(',')[1].startswith("b'"))