def main(args): with open(args.data_file('2.3', 'obsolete_refs.json')) as fp: obsolete = json.load(fp) with transaction.manager: provider = Provider.get('glottolog20121') for ref in provider.refs: if ref.id in obsolete: Config.add_replacement(ref, None, session=DBSession, model=Source) DBSession.delete(ref) else: assert len(ref.providers) > 1 DBSession.flush() DBSession.delete(provider)
def __init__(self, req, *args, **kw): if 'cq' in kw: self.complexquery = get_params(kw) elif 'cq' in req.params: self.complexquery = get_params(req.params) else: self.complexquery = None if 'provider' in kw: self.provider = kw['provider'] elif 'provider' in req.params: self.provider = Provider.get(req.params['provider'], default=None) else: self.provider = None super(Refs, self).__init__(req, *args, **kw) if self.language: self.language_sources = [s.pk for s in self.language.sources]
def update_providers(args, verbose=False): filepath = args.data_dir.joinpath('references', 'bibtex', 'BIBFILES.ini') p = RawConfigParser() with io.open(filepath, encoding='utf-8-sig') as fp: p.readfp(fp) provider_map = get_map(Provider) for section in p.sections(): sectname = section[:-4] if section.endswith('.bib') else section id_ = slug(sectname) attrs = { 'name': p.get(section, 'title'), 'description': p.get(section, 'description'), 'abbr': p.get(section, 'abbr'), } if id_ in provider_map: provider = provider_map[id_] for a in list(attrs): before, after = getattr(provider, a), attrs[a] if before == after: del attrs[a] else: setattr(provider, a, after) attrs[a] = (before, after) if attrs: args.log.info('updating provider %s %s' % (slug(id_), sorted(attrs))) if verbose: for a, (before, after) in attrs.items(): before, after = (' '.join(_.split()) for _ in (before, after)) if before != after: args.log.info('%s\n%r\n%r' % (a, before, after)) else: args.log.info('adding provider %s' % slug(id_)) DBSession.add(Provider(id=id_, **attrs))
def update(args): author = 'ISO 639-3 Registration Authority' pid = 'iso6393' dtid = 'overview' dt = Doctype.get(dtid) provider = Provider.get(pid, default=None) if provider is None: provider = Provider( id=pid, abbr=pid, name=author, description="Change requests submitted to the ISO 639-3 registration authority.") iid = max(int(DBSession.execute( "select max(cast(id as integer)) from source").fetchone()[0]), 500000) pk = int(DBSession.execute("select max(pk) from source").fetchone()[0]) for crno, affected in args.json['changerequests'].items(): year, serial = crno.split('-') title = 'Change Request Number %s' % crno ref = Ref.get(title, key='title', default=None) if not ref: iid += 1 pk += 1 ref = Ref( pk=pk, id=str(iid), name='%s %s' % (author, year), bibtex_type=EntryType.misc, number=crno, description=title, year=year, year_int=int(year), title=title, author=author, address='Dallas', publisher='SIL International', url='http://www.sil.org/iso639-3/cr_files/%s.pdf' % crno, doctypes_str=dtid, providers_str=pid, language_note=', '.join('%(Language Name)s [%(Affected Identifier)s]' % spec for spec in affected), jsondata=dict(hhtype=dtid, src=pid)) ref.doctypes.append(dt) ref.providers.append(provider) for spec in affected: lang = Languoid.get(spec['Affected Identifier'], key='hid', default=None) if lang and lang not in ref.languages: ref.languages.append(lang) DBSession.add(ref) transaction.commit() transaction.begin() matched = 0 near = 0 max_identifier_pk = DBSession.query( Identifier.pk).order_by(desc(Identifier.pk)).first()[0] families = [] for family in DBSession.query(Languoid)\ .filter(Languoid.level == LanguoidLevel.family)\ .filter(Language.active == True)\ .all(): isoleafs = set() for row in DBSession.query(TreeClosureTable.child_pk, Languoid.hid)\ .filter(family.pk == TreeClosureTable.parent_pk)\ .filter(Languoid.pk == TreeClosureTable.child_pk)\ .filter(Languoid.hid != None)\ .filter(Languoid.level == LanguoidLevel.language)\ .filter(Languoid.status == LanguoidStatus.established)\ .all(): if len(row[1]) == 3: isoleafs.add(row[1]) families.append((family, isoleafs)) families = sorted(families, key=lambda p: len(p[1])) for mid, leafs in args.json['macrolanguages'].items(): leafs = set(leafs) found = False for family, isoleafs in families: if leafs == isoleafs: if mid not in [c.name for c in family.identifiers if c.type == IdentifierType.iso.value]: family.codes.append(Identifier( id=str(max_identifier_pk + 1), name=mid, type=IdentifierType.iso.value)) max_identifier_pk += 1 matched += 1 found = True break elif leafs.issubset(isoleafs): print '~~~', family.name, '-->', mid, 'distance:', len(leafs), len(isoleafs) near += 1 found = True break if not found: print '---', mid, leafs print 'matched', matched, 'of', len(args.json['macrolanguages']), 'macrolangs' print near
def update(args): author = 'ISO 639-3 Registration Authority' pid = 'iso6393' dtid = 'overview' dt = Doctype.get(dtid) provider = Provider.get(pid, default=None) if provider is None: provider = Provider( id=pid, abbr=pid, name=author, description= "Change requests submitted to the ISO 639-3 registration authority." ) iid = max( int( DBSession.execute( "select max(cast(id as integer)) from source").fetchone()[0]), 500000) pk = int(DBSession.execute("select max(pk) from source").fetchone()[0]) for crno, affected in args.json['changerequests'].items(): year, serial = crno.split('-') title = 'Change Request Number %s' % crno ref = Ref.get(title, key='title', default=None) if not ref: iid += 1 pk += 1 ref = Ref(pk=pk, id=str(iid), name='%s %s' % (author, year), bibtex_type=EntryType.misc, number=crno, description=title, year=year, year_int=int(year), title=title, author=author, address='Dallas', publisher='SIL International', url='http://www.sil.org/iso639-3/cr_files/%s.pdf' % crno, language_note=', '.join( '%(Language Name)s [%(Affected Identifier)s]' % spec for spec in affected), jsondata=dict(hhtype=dtid, src=pid)) ref.doctypes.append(dt) ref.providers.append(provider) for spec in affected: lang = Languoid.get(spec['Affected Identifier'], key='hid', default=None) if lang and lang not in ref.languages: ref.languages.append(lang) DBSession.add(ref) transaction.commit() transaction.begin() matched = 0 near = 0 max_identifier_pk = DBSession.query(Identifier.pk).order_by( desc(Identifier.pk)).first()[0] families = [] for family in DBSession.query(Languoid)\ .filter(Languoid.level == LanguoidLevel.family)\ .filter(Language.active == True)\ .all(): isoleafs = set() for row in DBSession.query(TreeClosureTable.child_pk, Languoid.hid)\ .filter(family.pk == TreeClosureTable.parent_pk)\ .filter(Languoid.pk == TreeClosureTable.child_pk)\ .filter(Languoid.hid != None)\ .filter(Languoid.level == LanguoidLevel.language)\ .filter(Languoid.status == LanguoidStatus.established)\ .all(): if len(row[1]) == 3: isoleafs.add(row[1]) families.append((family, isoleafs)) families = sorted(families, key=lambda p: len(p[1])) for mid, leafs in args.json['macrolanguages'].items(): leafs = set(leafs) found = False for family, isoleafs in families: if leafs == isoleafs: if mid not in [ c.name for c in family.identifiers if c.type == IdentifierType.iso.value ]: family.codes.append( Identifier(id=str(max_identifier_pk + 1), name=mid, type=IdentifierType.iso.value)) max_identifier_pk += 1 matched += 1 found = True break elif leafs.issubset(isoleafs): print '~~~', family.name, '-->', mid, 'distance:', len( leafs), len(isoleafs) near += 1 found = True break if not found: print '---', mid, leafs print 'matched', matched, 'of', len( args.json['macrolanguages']), 'macrolangs' print near