def cmd_makecldf(self, args): dsdir = self.dir / 'raw' / 'Verkerk-DravLex-622ac6e' dataset = Wordlist.from_metadata(dsdir / 'Wordlist-metadata.json') # load concepts concepts = args.writer.add_concepts( id_factory=lambda c: c.id.split('-')[-1]+ '_' + slug(c.english), lookup_factory="Name" ) # load sources from original CLDF, and then the fieldwork source args.writer.add_sources(*self.raw_dir.read_bib(dsdir / 'sources.bib')) args.writer.add_sources() # load languages args.writer.add_languages() # load cognates cogs = { r['Form_ID']: r for r in self.raw_dir.read_csv(dsdir / 'cognates.csv', dicts=True) } # load data for row in self.raw_dir.read_csv(dsdir / 'forms.csv', dicts=True): src = row['Source'].split(";") if row['Source'] else ['KolipakamFW'] cog = cogs.get(row['ID']) for lex in args.writer.add_forms_from_value( Local_ID=row['ID'], Language_ID=row['Language_ID'], Parameter_ID=concepts[row['Parameter_ID']], Value=row['Form'], Source=src, Comment=row['status'], Loan=True if row['status'] else False ): args.writer.add_cognate( lexeme=lex, ID=cog['ID'], Source=cog['Source'], Cognateset_ID=cog['Cognateset_ID'], Comment=", ".join([cog['Comment'], cog['source_comment']]) )
from xml.etree.ElementTree import fromstring from xmljson import badgerfish as bf import sys import os import csv from cariban import util from pycldf import Wordlist import re import pyperclip lexicon = {} cariban_data = Wordlist.from_metadata("../cariban_data.json") for row in cariban_data["FormTable"]: alt_glossings = row["Glossing"].split("; ") if len(alt_glossings) == 0 or alt_glossings[0] == "": meanings = row["Parameter_ID"] else: meanings = alt_glossings lexicon[row["ID"]] = { "forms": row["Form"], "meanings": meanings, "language": row["Language_ID"], } # print(lexicon) def search_lexicon(form, meaning, language): if len(lexicon) == 0: return ("X") if not meaning.isupper(): new_meaning = meaning.replace(".", " ") else:
from clldutils.path import Path from clldutils.misc import slug from pycldf import Wordlist from clld_phylogeny_plugin.models import Phylogeny, TreeLabel, LanguageTreeLabel from clld_cognacy_plugin.models import Cognate, Cognateset from csvw.dsv import reader import cobl2 from cobl2 import models import clld_cognacy_plugin.models data_file_path = Path(cobl2.__file__).parent / '../..' / 'iecor' ds = Wordlist.from_metadata(data_file_path / 'cldf' / 'cldf-metadata.json') photos = { p.stem: p.as_posix() for p in (Path(cobl2.__file__).parent / '../..' / 'CoBL-public' / 'cobl' / 'static' / 'contributors').iterdir() if p.suffix == '.jpg'} for k, v in { 'Kümmel': 'Kuemmel', 'de Vaan': 'deVaan', 'Dewey-Findell': 'Dewey', }.items(): photos[k] = photos[v] def main(args): data = Data()
def original_cldf(self): for p in self.raw_dir.iterdir(): if p.name.endswith(MD_SUFFIX): return Wordlist.from_metadata(p)
def main(args): # pragma: no cover wl = Wordlist.from_metadata(args.data_file('cldf', 'cldf-metadata.json')) data = Data() data.add( common.Contributor, 'barthwolfgang', id='barthwolfgang', name="Wolfgang Barth", url="http://www.dynamicsoflanguage.edu.au/") # # FIXME: get dataset attributes from CLDF metadata! # dataset = common.Dataset( id='parabank', name='Parabank Pronouns', description='Database of pronouns', domain='parabank.clld.org', publisher_name="CoEDL Centre of Excellence for the Dynamics of Language", publisher_place="Canberra, Australia", publisher_url="http://www.dynamicsoflanguage.edu.au/", license='http://creativecommons.org/licenses/by/4.0/', contact='*****@*****.**', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0'}) DBSession.add(dataset) for i, editor in enumerate(['barthwolfgang']): common.Editor(dataset=dataset, contributor=data['Contributor'][editor], ord=i + 1) contrib = common.Contribution(id='contrib', name='the contribution') for l in wl['LanguageTable']: lang = data.add( models.ParabankLanguage, l['ID'], id=l['ID'], name=l['Name'], description=l['Notes'], source=l['Source_Citation'], classification=l['Classification'], ) add_language_codes(data, lang, None, glottocode=l['Glottocode']) for p in wl['ParameterTable']: data.add( common.Parameter, p['ID'], id=p['ID'], name='{0} ({1})'.format(p['Name'], p['ID']), #description=p['Description'], ) for f in wl['FormTable']: vsid = '{0}-{1}'.format(f['Parameter_ID'], f['Language_ID']) vs = data['ValueSet'].get(vsid) if not vs: vs = data.add( common.ValueSet, vsid, id=vsid, language=data['ParabankLanguage'][f['Language_ID']], parameter=data['Parameter'][f['Parameter_ID']], contribution=contrib) DBSession.add(models.Word( id=f['ID'], name=f['Form'], comment=f.get('Comment'), original=f['Original_parameter'], valueset=vs)) load_families( data, [(l.glottocode, l) for l in data['ParabankLanguage'].values()], glottolog_repos=args.data_file('glottolog'), isolates_icon='tcccccc')