コード例 #1
0
def cldf(dataset, concepticon, **kw):
    concept_map = {
        c.attributes['url'].split('v=')[1]: c.concepticon_id
        for c in dataset.conceptlist.concepts.values()
    }
    for c in dataset.concepts:
        concept_map[c['ID']] = c['CONCEPTICON_ID'] or None

    gl_map = dataset.glottocode_by_iso
    wordlists = []
    for xml in dataset.raw.glob('*.xml'):
        wl = abvd.Wordlist(dataset, xml, SECTION)
        if wl.language.iso:
            if wl.language.iso in gl_map:
                wl.language.glottocode = gl_map[wl.language.iso]
        if not wl.language.glottocode:
            dataset.log.warn('no glottocode for language %s, iso-code %s' %
                             (wl.language.name, wl.language.iso))
        wordlists.append(wl)

    sources = {}
    unmapped = Unmapped(lambda r: int(r[0]))
    for wl in wordlists:
        args = [None, None]
        ref = get_reference(None, None, wl.language.notes, None, sources)
        if ref:
            args = [ref.source.id, ref.source]
        wl.to_cldf(concept_map, unmapped, *args)
    unmapped.pprint()
コード例 #2
0
def itersources(item, lang, sources):
    source = item.get('Source', '')
    if source == 'See Language page':
        source = lang['Data Sources']
    source = source.strip()
    source = source.replace('Huber, R.; Reed, R.', 'Huber, R. and Reed, R.')

    if '\n\n' in source:
        for vv in source.split('\n\n'):
            if vv.strip():
                authors, year, rem = get_author_and_year(vv.strip())
                yield get_reference(authors, year, rem, None, sources)
    elif ';' in source:
        for vv in source.split(';'):
            if vv.strip():
                authors, year, rem, pages = get_source_and_pages(vv.strip())
                yield get_reference(authors, year, rem, pages, sources)
    else:
        authors, year, rem = get_author_and_year(source)
        yield get_reference(authors, year, rem, None, sources)
コード例 #3
0
def get_ref(lang, sources):
    pages = None
    src = lang['source'].strip()
    if src.startswith('Collectors:'):
        src = lang['source'].split('Collectors:')[1].strip()

    match = PAGES_PATTERN.search(src)
    if match:
        pages = match.group('pages')
        src = src[:match.start()].strip()

    author, year, src = split_by_year(src)
    return get_reference(author, year, src, pages, sources)
コード例 #4
0
ファイル: test_util.py プロジェクト: lexibank/pylexibank
def test_get_reference():
    assert util.get_reference('John Doe', '1998', 'The Title', None, {})
    assert util.get_reference(None, None, None, None, {}) is None
    assert util.get_reference(None, None, 'The Title', None,
                              {}).source.id == 'thetitle'