def main(args):
    with open(args.data_file('2.3', 'obsolete_refs.json')) as fp:
        obsolete = json.load(fp)

    with transaction.manager:
        provider = Provider.get('glottolog20121')
        for ref in provider.refs:
            if ref.id in obsolete:
                Config.add_replacement(ref, None, session=DBSession, model=Source)
                DBSession.delete(ref)
            else:
                assert len(ref.providers) > 1

        DBSession.flush()
        DBSession.delete(provider)
Beispiel #2
0
    def __init__(self, req, *args, **kw):
        if 'cq' in kw:
            self.complexquery = get_params(kw)
        elif 'cq' in req.params:
            self.complexquery = get_params(req.params)
        else:
            self.complexquery = None

        if 'provider' in kw:
            self.provider = kw['provider']
        elif 'provider' in req.params:
            self.provider = Provider.get(req.params['provider'], default=None)
        else:
            self.provider = None
        super(Refs, self).__init__(req, *args, **kw)
        if self.language:
            self.language_sources = [s.pk for s in self.language.sources]
Beispiel #3
0
def update_providers(args, verbose=False):
    filepath = args.data_dir.joinpath('references', 'bibtex', 'BIBFILES.ini')
    p = RawConfigParser()
    with io.open(filepath, encoding='utf-8-sig') as fp:
        p.readfp(fp)

    provider_map = get_map(Provider)
    for section in p.sections():
        sectname = section[:-4] if section.endswith('.bib') else section
        id_ = slug(sectname)
        attrs = {
            'name': p.get(section, 'title'),
            'description': p.get(section, 'description'),
            'abbr': p.get(section, 'abbr'),
        }
        if id_ in provider_map:
            provider = provider_map[id_]
            for a in list(attrs):
                before, after = getattr(provider, a), attrs[a]
                if before == after:
                    del attrs[a]
                else:
                    setattr(provider, a, after)
                    attrs[a] = (before, after)
            if attrs:
                args.log.info('updating provider %s %s' %
                              (slug(id_), sorted(attrs)))
            if verbose:
                for a, (before, after) in attrs.items():
                    before, after = (' '.join(_.split())
                                     for _ in (before, after))
                    if before != after:
                        args.log.info('%s\n%r\n%r' % (a, before, after))
        else:
            args.log.info('adding provider %s' % slug(id_))
            DBSession.add(Provider(id=id_, **attrs))
Beispiel #4
0
def update(args):
    author = 'ISO 639-3 Registration Authority'
    pid = 'iso6393'
    dtid = 'overview'
    dt = Doctype.get(dtid)
    provider = Provider.get(pid, default=None)
    if provider is None:
        provider = Provider(
            id=pid,
            abbr=pid,
            name=author,
            description="Change requests submitted to the ISO 639-3 registration authority.")
    iid = max(int(DBSession.execute(
        "select max(cast(id as integer)) from source").fetchone()[0]), 500000)
    pk = int(DBSession.execute("select max(pk) from source").fetchone()[0])
    for crno, affected in args.json['changerequests'].items():
        year, serial = crno.split('-')
        title = 'Change Request Number %s' % crno
        ref = Ref.get(title, key='title', default=None)

        if not ref:
            iid += 1
            pk += 1
            ref = Ref(
                pk=pk,
                id=str(iid),
                name='%s %s' % (author, year),
                bibtex_type=EntryType.misc,
                number=crno,
                description=title,
                year=year,
                year_int=int(year),
                title=title,
                author=author,
                address='Dallas',
                publisher='SIL International',
                url='http://www.sil.org/iso639-3/cr_files/%s.pdf' % crno,
                doctypes_str=dtid,
                providers_str=pid,
                language_note=', '.join('%(Language Name)s [%(Affected Identifier)s]' % spec for spec in affected),
                jsondata=dict(hhtype=dtid, src=pid))
            ref.doctypes.append(dt)
            ref.providers.append(provider)

        for spec in affected:
            lang = Languoid.get(spec['Affected Identifier'], key='hid', default=None)
            if lang and lang not in ref.languages:
                ref.languages.append(lang)
        DBSession.add(ref)

    transaction.commit()
    transaction.begin()

    matched = 0
    near = 0
    max_identifier_pk = DBSession.query(
        Identifier.pk).order_by(desc(Identifier.pk)).first()[0]
    families = []
    for family in DBSession.query(Languoid)\
            .filter(Languoid.level == LanguoidLevel.family)\
            .filter(Language.active == True)\
            .all():
        isoleafs = set()
        for row in DBSession.query(TreeClosureTable.child_pk, Languoid.hid)\
            .filter(family.pk == TreeClosureTable.parent_pk)\
            .filter(Languoid.pk == TreeClosureTable.child_pk)\
            .filter(Languoid.hid != None)\
            .filter(Languoid.level == LanguoidLevel.language)\
            .filter(Languoid.status == LanguoidStatus.established)\
            .all():
            if len(row[1]) == 3:
                isoleafs.add(row[1])
        families.append((family, isoleafs))

    families = sorted(families, key=lambda p: len(p[1]))

    for mid, leafs in args.json['macrolanguages'].items():
        leafs = set(leafs)
        found = False
        for family, isoleafs in families:
            if leafs == isoleafs:
                if mid not in [c.name for c in family.identifiers
                               if c.type == IdentifierType.iso.value]:
                    family.codes.append(Identifier(
                        id=str(max_identifier_pk + 1),
                        name=mid,
                        type=IdentifierType.iso.value))
                    max_identifier_pk += 1
                matched += 1
                found = True
                break
            elif leafs.issubset(isoleafs):
                print '~~~', family.name, '-->', mid, 'distance:', len(leafs), len(isoleafs)
                near += 1
                found = True
                break
        if not found:
            print '---', mid, leafs

    print 'matched', matched, 'of', len(args.json['macrolanguages']), 'macrolangs'
    print near
Beispiel #5
0
def update(args):
    author = 'ISO 639-3 Registration Authority'
    pid = 'iso6393'
    dtid = 'overview'
    dt = Doctype.get(dtid)
    provider = Provider.get(pid, default=None)
    if provider is None:
        provider = Provider(
            id=pid,
            abbr=pid,
            name=author,
            description=
            "Change requests submitted to the ISO 639-3 registration authority."
        )
    iid = max(
        int(
            DBSession.execute(
                "select max(cast(id as integer)) from source").fetchone()[0]),
        500000)
    pk = int(DBSession.execute("select max(pk) from source").fetchone()[0])
    for crno, affected in args.json['changerequests'].items():
        year, serial = crno.split('-')
        title = 'Change Request Number %s' % crno
        ref = Ref.get(title, key='title', default=None)

        if not ref:
            iid += 1
            pk += 1
            ref = Ref(pk=pk,
                      id=str(iid),
                      name='%s %s' % (author, year),
                      bibtex_type=EntryType.misc,
                      number=crno,
                      description=title,
                      year=year,
                      year_int=int(year),
                      title=title,
                      author=author,
                      address='Dallas',
                      publisher='SIL International',
                      url='http://www.sil.org/iso639-3/cr_files/%s.pdf' % crno,
                      language_note=', '.join(
                          '%(Language Name)s [%(Affected Identifier)s]' % spec
                          for spec in affected),
                      jsondata=dict(hhtype=dtid, src=pid))
            ref.doctypes.append(dt)
            ref.providers.append(provider)

        for spec in affected:
            lang = Languoid.get(spec['Affected Identifier'],
                                key='hid',
                                default=None)
            if lang and lang not in ref.languages:
                ref.languages.append(lang)
        DBSession.add(ref)

    transaction.commit()
    transaction.begin()

    matched = 0
    near = 0
    max_identifier_pk = DBSession.query(Identifier.pk).order_by(
        desc(Identifier.pk)).first()[0]
    families = []
    for family in DBSession.query(Languoid)\
            .filter(Languoid.level == LanguoidLevel.family)\
            .filter(Language.active == True)\
            .all():
        isoleafs = set()
        for row in DBSession.query(TreeClosureTable.child_pk, Languoid.hid)\
            .filter(family.pk == TreeClosureTable.parent_pk)\
            .filter(Languoid.pk == TreeClosureTable.child_pk)\
            .filter(Languoid.hid != None)\
            .filter(Languoid.level == LanguoidLevel.language)\
            .filter(Languoid.status == LanguoidStatus.established)\
            .all():
            if len(row[1]) == 3:
                isoleafs.add(row[1])
        families.append((family, isoleafs))

    families = sorted(families, key=lambda p: len(p[1]))

    for mid, leafs in args.json['macrolanguages'].items():
        leafs = set(leafs)
        found = False
        for family, isoleafs in families:
            if leafs == isoleafs:
                if mid not in [
                        c.name for c in family.identifiers
                        if c.type == IdentifierType.iso.value
                ]:
                    family.codes.append(
                        Identifier(id=str(max_identifier_pk + 1),
                                   name=mid,
                                   type=IdentifierType.iso.value))
                    max_identifier_pk += 1
                matched += 1
                found = True
                break
            elif leafs.issubset(isoleafs):
                print '~~~', family.name, '-->', mid, 'distance:', len(
                    leafs), len(isoleafs)
                near += 1
                found = True
                break
        if not found:
            print '---', mid, leafs

    print 'matched', matched, 'of', len(
        args.json['macrolanguages']), 'macrolangs'
    print near