def test_Record(self): from clld.lib.bibtex import Record, EntryType rec = Record('article', '1', author=['a', 'b'], editor='a and b') self.assertEqual(rec['author'], 'a and b') self.assertEqual(rec.get('author'), rec.getall('author')) self.assertEqual(rec['editor'], rec.get('editor')) self.assertEqual(rec.getall('editor'), ['a', 'b']) rec = Record('book', '1', title='The Title', author='author', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M', note="Revised edition") self.assertIn('@book', rec.__unicode__()) self.assertIn('@book', rec.__str__()) self.assertIn('bt', rec.text()) for fmt in ['txt', 'en', 'ris', 'mods']: rec.format(fmt) Record.from_string(rec.__unicode__(), lowercase=True) Record.from_object(Mock()) rec = Record('incollection', '1', title='The Title', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M', note="Revised edition") self.assertIn('In ', rec.text()) rec = Record('article', '1', title='The Title', journal='The Journal', volume="The volume", issue='issue') self.assertTrue('The Journal' in rec.text()) rec = Record('xmisc', '1', note='Something') self.assertEqual(rec.genre, EntryType.misc) self.assertIn('Something', rec.text())
def add_sources(args, data): bib = Database.from_file(args.data_file('phoible-references.bib'), lowercase=True) ext = [ Record.from_string('@' + s, lowercase=True) for s in nfilter(BIB.split('@')) ] for rec in chain(ext, bib): if rec.id not in data['Source']: data.add(Source, rec.id, _obj=bibtex2source(rec)) # # add aliases to lookup records with bibtex keys with numeric prefixes without # specifying the prefix # for key in list(data['Source'].keys()): if '_' in key: no, rem = key.split('_', 1) try: int(no) if rem not in data['Source']: data['Source'][rem] = data['Source'][key] except (ValueError, TypeError): pass
def test_linearization(self): from clld.lib.bibtex import Record for bib, txt in [ ( """@book{Dayley-1985, address = {Berkeley}, author = {Dayley, Jon P.}, iso_code = {tzt; tzj}, olac_field = {general_linguistics; semantics; morphology; typology; syntax}, publisher = {University of California Press}, series = {University of California Publications in Linguistics}, title = {Tzutujil Grammar}, volume = {107}, wals_code = {tzu}, year = {1985} } """, "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California " "Publications in Linguistics, 107.) Berkeley: University of California " "Press."), ( """@book{318762, address = {Vancouver}, author = {Cook, Eung-Do}, pages = {670}, publisher = {UBC Press}, series = {First Nations Languages Series}, title = {A Tsilhqút'ín Grammar}, year = {2013} } """, "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages " "Series.) Vancouver: UBC Press. 670pp."), ( """@inbook{316361, author = {Healey, Alan}, booktitle = {New Guinea area languages and language study}, pages = {223-232}, title = {History of research in Austronesian languages: Admiralty Islands area}, volume = {2} } """, "Healey, Alan. n.d. History of research in Austronesian languages: " "Admiralty Islands area. 2. 223-232."), ( """@inproceedings{moisikesling2011, author = {Moisik, Scott R. and Esling, John H.}, booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)}, pages = {1406-1409}, title = {The 'whole larynx' approach to laryngeal features}, year = {2011} }""", "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach " "to laryngeal features. In Proceedings of the Congress of " "Phonetic Sciences (ICPhS XVII), 1406-1409.") ]: rec = Record.from_string(bib) self.assertEqual(rec.text(), txt)
def test_Record(self): from clld.lib.bibtex import Record, EntryType rec = Record('article', '1', author=['a', 'b'], editor='a and b') self.assertEqual(rec['author'], 'a and b') self.assertEqual(rec.get('author'), rec.getall('author')) self.assertEqual(rec['editor'], rec.get('editor')) self.assertEqual(rec.getall('editor'), ['a', 'b']) rec = Record( 'book', '1', title='The Title', author='author', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M', note="Revised edition") self.assertIn('@book', rec.__unicode__()) self.assertIn('@book', rec.__str__()) self.assertIn('bt', rec.text()) for fmt in ['txt', 'en', 'ris', 'mods']: rec.format(fmt) Record.from_string(rec.__unicode__(), lowercase=True) Record.from_object(Mock()) rec = Record( 'incollection', '1', title='The Title', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M', note="Revised edition") self.assertIn('In ', rec.text()) rec = Record( 'article', '1', title='The Title', journal='The Journal', volume="The volume", issue='issue') self.assertTrue('The Journal' in rec.text()) rec = Record('xmisc', '1', note='Something') self.assertEqual(rec.genre, EntryType.misc) self.assertIn('Something', rec.text())
def test_linearization(self): from clld.lib.bibtex import Record for bib, txt in [ ("""@book{Dayley-1985, address = {Berkeley}, author = {Dayley, Jon P.}, iso_code = {tzt; tzj}, olac_field = {general_linguistics; semantics; morphology; typology; syntax}, publisher = {University of California Press}, series = {University of California Publications in Linguistics}, title = {Tzutujil Grammar}, volume = {107}, wals_code = {tzu}, year = {1985} } """, "Dayley, Jon P. 1985. Tzutujil Grammar. (University of California " "Publications in Linguistics, 107.) Berkeley: University of California " "Press."), ("""@book{318762, address = {Vancouver}, author = {Cook, Eung-Do}, pages = {670}, publisher = {UBC Press}, series = {First Nations Languages Series}, title = {A Tsilhqút'ín Grammar}, year = {2013} } """, "Cook, Eung-Do. 2013. A Tsilhqút'ín Grammar. (First Nations Languages " "Series.) Vancouver: UBC Press. 670pp."), ("""@inbook{316361, author = {Healey, Alan}, booktitle = {New Guinea area languages and language study}, pages = {223-232}, title = {History of research in Austronesian languages: Admiralty Islands area}, volume = {2} } """, "Healey, Alan. n.d. History of research in Austronesian languages: " "Admiralty Islands area. 2. 223-232."), ("""@inproceedings{moisikesling2011, author = {Moisik, Scott R. and Esling, John H.}, booktitle = {Proceedings of the Congress of Phonetic Sciences (ICPhS XVII)}, pages = {1406-1409}, title = {The 'whole larynx' approach to laryngeal features}, year = {2011} }""", "Moisik, Scott R. and Esling, John H. 2011. The 'whole larynx' approach " "to laryngeal features. In Proceedings of the Congress of " "Phonetic Sciences (ICPhS XVII), 1406-1409.") ]: rec = Record.from_string(bib) self.assertEqual(rec.text(), txt)
def test_Record(self): from clld.lib.bibtex import Record rec = Record( 'book', '1', title='The Title', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M') self.assertTrue('@book' in rec.__unicode__()) self.assertTrue('@book' in rec.__str__()) self.assertTrue('The Title' in rec.text()) for fmt in ['txt', 'en', 'ris', 'mods']: rec.format(fmt) rec = Record.from_string(rec.__unicode__()) rec = Record.from_object(Mock())
def test_Record(self): from clld.lib.bibtex import Record rec = Record('book', '1', title='The Title', editor='ed', booktitle='bt', school='s', issue='i', pages='1-4', publisher='M') self.assertTrue('@book' in rec.__unicode__()) self.assertTrue('@book' in rec.__str__()) self.assertTrue('The Title' in rec.text()) for fmt in ['txt', 'en', 'ris', 'mods']: rec.format(fmt) rec = Record.from_string(rec.__unicode__()) rec = Record.from_object(Mock())
def add_sources(args, data): bib = Database.from_file(args.data_file('phoible-references.bib'), lowercase=True) ext = [Record.from_string('@' + s, lowercase=True) for s in nfilter(BIB.split('@'))] for rec in chain(ext, bib): if rec.id not in data['Source']: data.add(Source, rec.id, _obj=bibtex2source(rec)) # # add aliases to lookup records with bibtex keys with numeric prefixes without # specifying the prefix # for key in list(data['Source'].keys()): if '_' in key: no, rem = key.split('_', 1) try: int(no) if rem not in data['Source']: data['Source'][rem] = data['Source'][key] except (ValueError, TypeError): pass
def main(args): # pragma: no cover ds = StructureDataset.from_metadata(DS) data = Data() for source in ds.sources: data.add(common.Source, source.id, _obj=bibtex2source(source)) ext = [ Record.from_string('@' + s, lowercase=True) for s in nfilter(BIB.split('@')) ] for rec in ext: if rec.id not in data['Source']: data.add(common.Source, rec.id, _obj=bibtex2source(rec)) for contrib in ds['contributors.csv']: o = data.add( common.Contributor, contrib['ID'], id=contrib['ID'].upper(), name=contrib['Name'], description=contrib['Description'], url=contrib['URL'], jsondata={ 'readme': contrib['Readme'], 'contents': contrib['Contents'] }, ) for src in contrib['Source']: DBSession.add( models.ContributorReference(source=data['Source'][src], contributor=o)) dataset = data.add( common.Dataset, 'phoible', id='phoible', name='PHOIBLE 2.0', description='PHOIBLE 2.0', publisher_name="Max Planck Institute for the Science of Human History", publisher_place="Jena", publisher_url="https://www.shh.mpg.de", domain='phoible.org', license='https://creativecommons.org/licenses/by-sa/3.0/', contact='*****@*****.**', jsondata={ 'license_icon': 'https://i.creativecommons.org/l/by-sa/3.0/88x31.png', 'license_name': 'Creative Commons Attribution-ShareAlike 3.0 Unported License' }) for i, (cid, name) in enumerate([ ('UZ', "Steven Moran"), ('mccloy', "Daniel McCloy"), ], start=1): contrib = data['Contributor'].get(cid) if not contrib: contrib = common.Contributor(id=cid, name=name) DBSession.add( common.Editor(dataset=dataset, ord=i, contributor=contrib)) glottolog = Glottolog( Path(phoible.__file__).parent.parent.parent.parent.joinpath( 'glottolog', 'glottolog')) for lang in ds['LanguageTable']: l = data.add( models.Variety, lang['ID'], id=lang['ID'], name=lang['Name'], ) load_families(data, [(l.id, l) for l in data['Variety'].values() if len(l.id) == 8], glottolog.repos) DBSession.flush() # assign color codes: families = defaultdict(list) for l in data['Variety'].values(): families[l.family_pk].append(l) colors = color.qualitative_colors(len(families)) for i, langs in enumerate(sorted(families.values(), key=lambda v: -len(v))): for l in langs: l.jsondata = {'color': colors[i]} for segment in ds['ParameterTable']: equivalence_class = ''.join([ t[0] for t in [(c, unicodedata.name(c)) for c in segment['Name']] if t[1].split()[0] not in ['COMBINING', 'MODIFIER'] ]), data.add(models.Segment, segment['ID'], id=segment['ID'], name=segment['Name'], description=segment['Description'], segment_class=segment['SegmentClass'], equivalence_class=equivalence_class) DBSession.flush() # Add redirects for old language pages! get relevant ISO codes and map to Glottocode! for model, repls in load( Path(phoible.__file__).parent.parent / 'replacements.json').items(): if model == 'Language': languoids = {l.id: l for l in glottolog.languoids()} iso_languoids = {l.iso: l for l in languoids.values() if l.iso} gl_in_phoible = set(data['Variety'].keys()) for oid, nid in repls.items(): gls = descendants_from_nodemap( iso_languoids.get(oid), languoids).intersection(gl_in_phoible) if gls: nid = gls.pop() if len(gls) > 1: print('+++', oid, gls) else: print('---', oid) common.Config.add_replacement(oid, nid, common.Language) elif model == 'Parameter': segments_in_phoible = set(data['Segment'].keys()) for oid, nid in repls.items(): id_ = nid if nid in segments_in_phoible else None common.Config.add_replacement(oid, id_, common.Parameter) for segment in ds['ParameterTable']: for i, (k, v) in enumerate(sorted(segment.items())): if k not in ['ID', 'Name', 'Description', 'SegmentClass']: DBSession.add( common.Parameter_data( key=feature_name(k), value=v, ord=i, object_pk=data['Segment'][segment['ID']].pk)) for inventory in ds['contributions.csv']: inv = data.add( models.Inventory, inventory['ID'], id=inventory['ID'], name='{0} ({1} {2})'.format( inventory['Name'], inventory['Contributor_ID'].upper(), inventory['ID'], ), source_url=inventory['URL'], count_tone=inventory['count_tones'], count_vowel=inventory['count_vowels'], count_consonant=inventory['count_consonants'], ) DBSession.add( common.ContributionContributor( contribution=inv, contributor=data['Contributor'][ inventory['Contributor_ID'].upper()])) for src in inventory['Source']: DBSession.add( common.ContributionReference(contribution=inv, source=data['Source'][src])) for phoneme in ds['ValueTable']: lang = data['Variety'][phoneme['Language_ID']] inv = data['Inventory'][phoneme['Contribution_ID']] if not inv.language: inv.language = lang vs = common.ValueSet( id=phoneme['ID'], contribution=inv, language=lang, parameter=data['Segment'][phoneme['Parameter_ID']]) for ref in phoneme['Source']: DBSession.add( common.ValueSetReference(source=data['Source'][ref], valueset=vs)) DBSession.add( models.Phoneme( id=phoneme['ID'], name='%s %s' % (phoneme['Value'], data['Inventory'][phoneme['Contribution_ID']].name), allophones=' '.join(phoneme['Allophones']), marginal=phoneme['Marginal'], valueset=vs)) return