コード例 #1
0
ファイル: iso.py プロジェクト: clld/glottolog
def bibtex():
    bib = references_path("bibtex", "iso6393.bib")

    glottolog_ref_ids = {}
    if bib.exists():
        with bib.open(encoding="utf8") as fp:
            for rec in fp.read().split("@misc"):
                if rec.strip():
                    rec = Source.from_bibtex("@misc" + rec)
                    if "glottolog_ref_id" in rec:
                        glottolog_ref_ids[rec.id] = rec["glottolog_ref_id"]

    with bib.open("w", encoding="utf8") as fp:
        for id_, rows in groupby(iter_change_requests(), lambda c: c["CR Number"]):
            fp.write(change_request_as_source(id_, list(rows), glottolog_ref_ids).bibtex())
            fp.write("\n\n")
コード例 #2
0
ファイル: iso.py プロジェクト: billbrasky1/glottolog
def bibtex():
    bib = references_path('bibtex', 'iso6393.bib')

    glottolog_ref_ids = {}
    if bib.exists():
        with bib.open(encoding='utf8') as fp:
            for rec in fp.read().split('@misc'):
                if rec.strip():
                    rec = Source.from_bibtex('@misc' + rec)
                    if 'glottolog_ref_id' in rec:
                        glottolog_ref_ids[rec.id] = rec['glottolog_ref_id']

    with bib.open('w', encoding='utf8') as fp:
        for id_, rows in groupby(iter_change_requests(),
                                 lambda c: c['CR Number']):
            fp.write(
                change_request_as_source(id_, list(rows),
                                         glottolog_ref_ids).bibtex())
            fp.write('\n\n')
コード例 #3
0
ファイル: _bibfiles.py プロジェクト: clld/glottolog
# _bibfiles.py - ordered collection of bibfiles with load/save api

import datetime

from six import string_types

from pyglottolog.util import references_path, read_ini
from pyglottolog.monsterlib import _bibtex
from pyglottolog.monsterlib._bibfiles_db import Database

__all__ = ['Collection', 'BibFile', 'Database']

DIR = references_path('bibtex')


class Collection(list):
    """Directory with an INI-file with settings for BibTeX files inside."""

    _encoding = 'utf-8-sig'

    @classmethod
    def _bibfiles(cls, directory):
        """Read the INI-file, yield bibfile instances for sections."""
        cfg = read_ini(directory.parent.joinpath('BIBFILES.ini'))
        for s in cfg.sections():
            if not s.endswith('.bib'):
                continue
            filepath = directory.joinpath(s)
            assert filepath.exists()
            sortkey = cfg.get(s, 'sortkey')
            if sortkey.lower() == 'none':
コード例 #4
0
ファイル: monster.py プロジェクト: billbrasky1/glottolog
def main(repos=DATA_DIR, rebuild=False):
    bibfiles = _bibfiles.Collection(references_path('bibtex', repos=repos))
    previous = references_path('monster.csv', repos=repos)
    replacements = build_path('monster-replacements.json', repos=repos)
    monster = _bibfiles.BibFile(
        build_path('monster-utf8.bib', repos=repos), encoding='utf-8', sortkey='bibkey')
    tree = languoids_path('tree', repos=repos)
    hht = HHTypes(repos=repos)

    print('%s open/rebuild bibfiles db' % time.ctime())
    db = bibfiles.to_sqlite(
        build_path('_bibfiles.sqlite3', repos=repos).as_posix(),
        rebuild=rebuild)

    print('%s compile_monster' % time.ctime())
    m = dict(db.merged())

    print('%s load hh.bib' % time.ctime())
    hhbib = bibfiles['hh.bib'].load()

    # Annotate with macro_area from lgcode when lgcode is assigned manually
    print('%s macro_area_from_lgcode' % time.ctime())
    m = macro_area_from_lgcode(m, tree)

    # Annotate with hhtype
    print('%s annotate hhtype' % time.ctime())
    m = markconservative(
        m,
        hht.triggers,
        hhbib,
        hht,
        build_path('monstermark-hht.txt', repos=repos),
        rank=lambda l: hht[l])

    ltriggers = languoids.load_triggers(tree=tree)

    # Annotate with lgcode
    print('%s annotate lgcode' % time.ctime())
    m = markconservative(
        m,
        ltriggers['lgcode'],
        hhbib,
        hht,
        build_path('monstermark-lgc.txt', repos=repos))

    # Annotate with inlg
    print('%s add_inlg_e' % time.ctime())
    m = add_inlg_e(m, ltriggers['inlg'])

    # Print some statistics
    stats = Counter()
    print(time.ctime())
    for t, f in m.values():
        stats.update(['entry'])
        for field in ['lgcode', 'hhtype', 'macro_area']:
            if field in f:
                stats.update([field])
    print("# entries", stats['entry'])
    for field in ['lgcode', 'hhtype', 'macro_area']:
        print("with " + field, stats[field])

    # Update the CSV with the previous mappings for later reference
    print('%s update_previous' % time.ctime())
    db.to_csvfile(previous)

    print('%s save_replacements' % time.ctime())
    db.to_replacements(replacements)

    # Trickling back
    print('%s trickle' % time.ctime())
    db.trickle(bibfiles)

    # Save
    print('%s save as utf8' % time.ctime())
    monster.save(m, verbose=False)

    print('%s done.' % time.ctime())
コード例 #5
0
ファイル: _libmonster.py プロジェクト: afehn/glottolog
from pyglottolog import languoids
from pyglottolog.util import references_path, parse_conjunctions, read_ini
from pyglottolog._bibtex_undiacritic import undiacritic

__all__ = [
    'add_inlg_e',
    'keyid',
    'wrds', 'setd', 'setd3', 'indextrigs',
    'lstat', 'lstat_witness', 
    'hhtype_to_n', 'expl_to_hhtype', 'lgcode',
    'read_csv_dict', 'write_csv_rows', 'load_triggers',
    'pitems',
]

HHTYPE = references_path('alt4hhtype.ini')


def read_csv_dict(filename):
    return {row[0]: row for row in csv_iterrows(filename)}


def csv_iterrows(filename, fieldnames=None, dialect='excel'):
    with open(filename) as fd:
        reader = csv.reader(fd, dialect=dialect)
        if fieldnames is None:
            fieldnames = next(reader)
        make_row = namedtuple('Row', fieldnames)._make
        for row in reader:
            yield make_row(row)
コード例 #6
0
ファイル: references.py プロジェクト: billbrasky1/glottolog
 def __init__(self, repos=None):
     ini = read_ini(references_path('hhtype.ini', repos=repos))
     self._types = sorted([HHType(s, ini) for s in ini.sections()],
                          reverse=True)
     self._type_by_id = {t.id: t for t in self._types}
コード例 #7
0
ファイル: references.py プロジェクト: clld/glottolog
 def __init__(self, repos=None):
     ini = read_ini(references_path("hhtype.ini", repos=repos))
     self._types = sorted([HHType(s, ini) for s in ini.sections()], reverse=True)
     self._type_by_id = {t.id: t for t in self._types}
コード例 #8
0
ファイル: _bibfiles.py プロジェクト: afehn/glottolog
# _bibfiles.py - ordered collection of bibfiles with load/save api

import os
import io
import datetime

from clldutils.inifile import INI

from pyglottolog.util import references_path
import _bibtex
from _bibfiles_db import Database

__all__ = ['Collection', 'BibFile', 'Database']

DIR = references_path('bibtex').as_posix()
CONFIG = 'BIBFILES.ini'


class Collection(list):
    """Directory with an INI-file with settings for BibTeX files inside."""

    _encoding = 'utf-8-sig'

    @classmethod
    def _bibfiles(cls, directory, config, endwith):
        """Read the INI-file, yield bibfile instances for sections."""
        cfg = INI(interpolation=None)
        cfg.read(os.path.join(directory, '..', config))
        for s in cfg.sections():
            if not s.endswith(endwith):
                continue
コード例 #9
0
ファイル: monster.py プロジェクト: clld/glottolog
def main(repos=DATA_DIR, rebuild=False):
    bibfiles = _bibfiles.Collection(references_path('bibtex', repos=repos))
    previous = references_path('monster.csv', repos=repos)
    replacements = build_path('monster-replacements.json', repos=repos)
    monster = _bibfiles.BibFile(
        build_path('monster-utf8.bib', repos=repos), encoding='utf-8', sortkey='bibkey')
    tree = languoids_path('tree', repos=repos)
    hht = HHTypes(repos=repos)

    print('%s open/rebuild bibfiles db' % time.ctime())
    db = bibfiles.to_sqlite(
        build_path('_bibfiles.sqlite3', repos=repos).as_posix(),
        rebuild=rebuild)

    print('%s compile_monster' % time.ctime())
    m = dict(db.merged())

    print('%s load hh.bib' % time.ctime())
    hhbib = bibfiles['hh.bib'].load()

    # Annotate with macro_area from lgcode when lgcode is assigned manually
    print('%s macro_area_from_lgcode' % time.ctime())
    m = macro_area_from_lgcode(m, tree)

    # Annotate with hhtype
    print('%s annotate hhtype' % time.ctime())
    m = markconservative(
        m,
        hht.triggers,
        hhbib,
        hht,
        build_path('monstermark-hht.txt', repos=repos),
        rank=lambda l: hht[l])

    ltriggers = languoids.load_triggers(tree=tree)

    # Annotate with lgcode
    print('%s annotate lgcode' % time.ctime())
    m = markconservative(
        m,
        ltriggers['lgcode'],
        hhbib,
        hht,
        build_path('monstermark-lgc.txt', repos=repos))

    # Annotate with inlg
    print('%s add_inlg_e' % time.ctime())
    m = add_inlg_e(m, ltriggers['inlg'])

    # Print some statistics
    stats = Counter()
    print(time.ctime())
    for t, f in m.values():
        stats.update(['entry'])
        for field in ['lgcode', 'hhtype', 'macro_area']:
            if field in f:
                stats.update([field])
    print("# entries", stats['entry'])
    for field in ['lgcode', 'hhtype', 'macro_area']:
        print("with " + field, stats[field])

    # Update the CSV with the previous mappings for later reference
    print('%s update_previous' % time.ctime())
    db.to_csvfile(previous)

    print('%s save_replacements' % time.ctime())
    db.to_replacements(replacements)

    # Trickling back
    print('%s trickle' % time.ctime())
    db.trickle(bibfiles)

    # Save
    print('%s save as utf8' % time.ctime())
    monster.save(m, verbose=False)

    print('%s done.' % time.ctime())
コード例 #10
0
ファイル: _bibfiles_db.py プロジェクト: afehn/glottolog
import json
import sqlite3
import difflib
import operator
import itertools
import contextlib
import collections

from pyglottolog.util import references_path, build_path
import _bibtex

__all__ = ['Database']

DBFILE = build_path('_bibfiles.sqlite3').as_posix()
BIBFILE = build_path('monster-utf8.bib').as_posix()
CSVFILE = references_path('monster.csv').as_posix()
REPLACEMENTSFILE = build_path('monster-replacements.json').as_posix()

UNION_FIELDS = {'fn', 'asjp_name', 'isbn'}

IGNORE_FIELDS = {'crossref', 'numnote', 'glotto_id'}


class Database(object):
    """Bibfile collection parsed into an sqlite3 file."""

    @staticmethod
    def _get_bibfiles(bibfiles):
        if bibfiles is None:
            from _bibfiles import Collection
            return Collection()
コード例 #11
0
# _bibfiles.py - ordered collection of bibfiles with load/save api

import datetime

from six import string_types

from pyglottolog.util import references_path, read_ini
from pyglottolog.monsterlib import _bibtex
from pyglottolog.monsterlib._bibfiles_db import Database

__all__ = ['Collection', 'BibFile', 'Database']

DIR = references_path('bibtex')


class Collection(list):
    """Directory with an INI-file with settings for BibTeX files inside."""

    _encoding = 'utf-8-sig'

    @classmethod
    def _bibfiles(cls, directory):
        """Read the INI-file, yield bibfile instances for sections."""
        cfg = read_ini(directory.parent.joinpath('BIBFILES.ini'))
        for s in cfg.sections():
            if not s.endswith('.bib'):
                continue
            filepath = directory.joinpath(s)
            assert filepath.exists()
            sortkey = cfg.get(s, 'sortkey')
            if sortkey.lower() == 'none':