def cldf(dataset, concepticon, **kw): concept_map = { c.attributes['url'].split('v=')[1]: c.concepticon_id for c in dataset.conceptlist.concepts.values() } for c in dataset.concepts: concept_map[c['ID']] = c['CONCEPTICON_ID'] or None gl_map = dataset.glottocode_by_iso wordlists = [] for xml in dataset.raw.glob('*.xml'): wl = abvd.Wordlist(dataset, xml, SECTION) if wl.language.iso: if wl.language.iso in gl_map: wl.language.glottocode = gl_map[wl.language.iso] if not wl.language.glottocode: dataset.log.warn('no glottocode for language %s, iso-code %s' % (wl.language.name, wl.language.iso)) wordlists.append(wl) sources = {} unmapped = Unmapped(lambda r: int(r[0])) for wl in wordlists: args = [None, None] ref = get_reference(None, None, wl.language.notes, None, sources) if ref: args = [ref.source.id, ref.source] wl.to_cldf(concept_map, unmapped, *args) unmapped.pprint()
def itersources(item, lang, sources): source = item.get('Source', '') if source == 'See Language page': source = lang['Data Sources'] source = source.strip() source = source.replace('Huber, R.; Reed, R.', 'Huber, R. and Reed, R.') if '\n\n' in source: for vv in source.split('\n\n'): if vv.strip(): authors, year, rem = get_author_and_year(vv.strip()) yield get_reference(authors, year, rem, None, sources) elif ';' in source: for vv in source.split(';'): if vv.strip(): authors, year, rem, pages = get_source_and_pages(vv.strip()) yield get_reference(authors, year, rem, pages, sources) else: authors, year, rem = get_author_and_year(source) yield get_reference(authors, year, rem, None, sources)
def get_ref(lang, sources): pages = None src = lang['source'].strip() if src.startswith('Collectors:'): src = lang['source'].split('Collectors:')[1].strip() match = PAGES_PATTERN.search(src) if match: pages = match.group('pages') src = src[:match.start()].strip() author, year, src = split_by_year(src) return get_reference(author, year, src, pages, sources)
def test_get_reference(): assert util.get_reference('John Doe', '1998', 'The Title', None, {}) assert util.get_reference(None, None, None, None, {}) is None assert util.get_reference(None, None, 'The Title', None, {}).source.id == 'thetitle'