def test_Reference(self): from pyglottolog.objects import Reference ref = Reference('bib:key', '12-34', 'German') self.assertEqual('{0}'.format(ref), '**bib:key**:12-34<trigger "German">') Reference.from_list(['{0}'.format(ref)]) with self.assertRaises(ValueError): Reference.from_list(['abc'])
def show(args): """Display details of a Glottolog object. glottolog show <GLOTTOCODE>|<ISO-CODE>|<BIBTEXKEY> """ if args.args and ':' in args.args[0]: if args.args[0].startswith('**'): ref = Reference.from_string(args.args[0]) else: ref = Reference(key=args.args[0]) sprint('Glottolog reference {0}'.format(ref), attrs=['bold', 'underline']) print() src = ref.get_source(args.repos) sprint(src.text()) print() sprint(src) return lang = existing_lang(args) print() sprint('Glottolog languoid {0}'.format(lang.id), attrs=['bold', 'underline']) print() sprint('Classification:', attrs=['bold', 'underline']) args.repos.ascii_tree(lang, maxlevel=1) print() sprint('Info:', attrs=['bold', 'underline']) sprint('Path: {0}'.format(lang.fname), 'green', attrs=['bold']) sources = lang.sources if sources: del lang.cfg['sources']['glottolog'] del lang.cfg['sources'] for line in lang.cfg.write_string().split('\n'): if not line.startswith('#'): sprint(line, None, attrs=['bold'] if line.startswith('[') else []) sprint('Sources:', attrs=['bold', 'underline']) for src in sources: src = src.get_source(args.repos) sprint(src.id, color='green') sprint(src.text()) print()
def sources(self): if self.cfg.has_option('sources', 'glottolog'): return Reference.from_list(self.cfg.getlist( 'sources', 'glottolog')) return []
def check(args): """Check the glottolog data for consistency. glottolog check [tree|refs] """ def error(obj, msg): args.log.error(message(obj, msg)) def warn(obj, msg): args.log.warn(message(obj, msg)) def info(obj, msg): args.log.info(message(obj, msg)) what = args.args[0] if args.args else 'all' if what in ['all', 'refs']: for bibfile in args.repos.bibfiles: bibfile.check(args.log) if what not in ['all', 'tree']: return hhkeys = args.repos.bibfiles['hh.bib'].keys() iso = args.repos.iso args.log.info('checking ISO codes against %s' % iso) args.log.info('checking tree at %s' % args.repos) by_level = Counter() by_category = Counter() iso_in_gl, languoids, iso_splits = {}, {}, [] names = defaultdict(set) for lang in args.repos.languoids(): # duplicate glottocodes: if lang.id in languoids: error( lang.id, 'duplicate glottocode\n{0}\n{1}'.format( languoids[lang.id].dir, lang.dir)) languoids[lang.id] = lang for lang in languoids.values(): ancestors = lang.ancestors_from_nodemap(languoids) children = lang.children_from_nodemap(languoids) if lang.latitude and not (-90 <= lang.latitude <= 90): error(lang, 'invalid latitude: {0}'.format(lang.latitude)) if lang.longitude and not (-180 <= lang.longitude <= 180): error(lang, 'invalid longitude: {0}'.format(lang.longitude)) assert isinstance(lang.countries, list) assert isinstance(lang.macroareas, list) if 'sources' in lang.cfg: for ref in Reference.from_list( lang.cfg.getlist('sources', 'glottolog')): if ref.provider == 'hh' and ref.key not in hhkeys: error(lang, 'missing source: {0}'.format(ref)) for attr in ['classification_comment', 'ethnologue_comment']: obj = getattr(lang, attr) if obj: obj.check(lang, hhkeys, args.log) names[lang.name].add(lang) by_level.update([lang.level.name]) if lang.level == Level.language: by_category.update([lang.category]) if iso and lang.iso: if lang.iso not in iso: warn(lang, 'invalid ISO-639-3 code [%s]' % lang.iso) else: isocode = iso[lang.iso] if lang.iso in iso_in_gl: error( isocode, 'duplicate: {0}, {1}'.format(iso_in_gl[lang.iso].id, lang.id)) iso_in_gl[lang.iso] = lang if isocode.is_retired and lang.category != 'Bookkeeping': if isocode.type == 'Retirement/split': iso_splits.append(lang) else: msg = repr(isocode) level = info if len(isocode.change_to) == 1: level = warn msg += ' changed to [%s]' % isocode.change_to[ 0].code level(lang, msg) if not lang.id.startswith( 'unun9') and lang.id not in args.repos.glottocodes: error(lang, 'unregistered glottocode') for attr in ['level', 'name']: if not getattr(lang, attr): error(lang, 'missing %s' % attr) if lang.level == Level.language: parent = ancestors[-1] if ancestors else None if parent and parent.level != Level.family: error( lang, 'invalid nesting of language under {0}'.format( parent.level)) for child in children: if child.level != Level.dialect: error( child, 'invalid nesting of {0} under language'.format( child.level)) elif lang.level == Level.family: for d in lang.dir.iterdir(): if d.is_dir(): break else: error(lang, 'family without children') if iso: changed_to = set(chain(*[code.change_to for code in iso.retirements])) for code in sorted(iso.languages): if code.type == 'Individual/Living': if code not in changed_to: if code.code not in iso_in_gl: info(repr(code), 'missing') for lang in iso_splits: isocode = iso[lang.iso] missing = [ s.code for s in isocode.change_to if s.code not in iso_in_gl ] if missing: warn( lang, '{0} missing new codes: {1}'.format( repr(isocode), ', '.join(missing))) for name, gcs in sorted(names.items()): if len(gcs) > 1: # duplicate names: method = error if len([1 for n in gcs if n.level != Level.dialect]) <= 1: # at most one of the languoids is not a dialect, just warn method = warn if len([ 1 for n in gcs if (not n.lineage) or (n.lineage[0][1] != 'book1242') ]) <= 1: # at most one of the languoids is not in bookkeping, just warn method = warn method( name, 'duplicate name: {0}'.format(', '.join( sorted([ '{0} <{1}>'.format(n.id, n.level.name[0]) for n in gcs ])))) def log_counter(counter, name): msg = [name + ':'] maxl = max([len(k) for k in counter.keys()]) + 1 for k, l in counter.most_common(): msg.append(('{0:<%s} {1:>8,}' % maxl).format(k + ':', l)) msg.append( ('{0:<%s} {1:>8,}' % maxl).format('', sum(list(counter.values())))) print('\n'.join(msg)) log_counter(by_level, 'Languoids by level') log_counter(by_category, 'Languages by category') return by_level