def refined_query(self, query, model, req): """Derived classes may override this method to add model-specific query refinements of their own. """ if model == Contribution: return query.options( joinedload_all(Contribution.references, ContributionReference.source)) if model == Parameter: if req.matchdict['id'][-1] not in ascii_uppercase: # route match for 2008-style URL: redirect! raise HTTPMovedPermanently( req.route_url('contribution', id=req.matchdict['id'])) if model == Source: try: # redirect legacy refdb URLs formed with numeric id: rec = Source.get(int(req.matchdict['id']), default=None) if rec: raise HTTPMovedPermanently( req.route_url('source', id=rec.id)) else: raise HTTPNotFound() except ValueError: pass return query
def dataset_detail_html(context=None, request=None, **kw): res = dict((row[0], row[1]) for row in DBSession.execute("select source, count(pk) from inventory group by source")) res["inventory_count"] = DBSession.query(Inventory).count() res["segment_count"] = DBSession.query(Parameter).count() res["language_count"] = DBSession.query(Language).count() res["contributors"] = ( DBSession.query(Contributor) .order_by(Contributor.name) .options(joinedload(Contributor.contribution_assocs), joinedload(Contributor.references)) .all() ) res["sources"] = { k: Source.get(k) for k in [ "moisikesling2011", "ipa2005", "hayes2009", "moran2012a", "moranetal2012", "cysouwetal2012", "mccloyetal2013", ] } res["descriptions"] = {c.id: desc(request, c.description, res["sources"]) for c in res["contributors"]} return res
def test_Source(self): from clld.db.models.common import Source d = Source(id='abc') self.assertIsNone(d.gbs_identifier) d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}}) self.assertIsNone(d.gbs_identifier) d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'x', 'identifier': 'y'}]}}}) self.assertEquals(d.gbs_identifier, 'y') d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': ''}]}}}) self.assertEquals(d.gbs_identifier, 'ISBN:') d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': ''}]}}}) self.assertEquals(d.gbs_identifier, 'ISBN:') d.bibtex()
def test_Source(): from clld.db.models.common import Source d = Source(id='abc') assert d.gbs_identifier is None d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}}) assert d.gbs_identifier is None d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'x', 'identifier': 'y'}]}}}) assert d.gbs_identifier == 'y' d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': ''}]}}}) assert d.gbs_identifier == 'ISBN:' d = Source( id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': ''}]}}}) assert d.gbs_identifier == 'ISBN:' d.bibtex()
def desc(req, d, sources=None): if sources is None: sources = {k: Source.get(k) for k in "moisikesling2011 hayes2009 moran2012a moranetal2012".split()} if not d: return d for k, v in sources.items(): a = link(req, v) d = re.sub("\*\*(?P<id>%s)\*\*" % k, text_type(a), d) return d
def desc(req, d, sources=None): if sources is None: sources = {k: Source.get(k) for k in 'MoisikEsling2011 Hayes2009 Moran2012a Moran_etal2012'.split()} if not d: return d for k, v in sources.items(): a = link(req, v) d = re.sub(r'\*\*(?P<id>%s)\*\*' % k, str(a), d) return d
def desc(req, d, sources=None): if sources is None: sources = {k: Source.get(k) for k in 'moisikesling2011 hayes2009 moran2012a moranetal2012'.split()} if not d: return d for k, v in sources.items(): a = link(req, v) d = re.sub('\*\*(?P<id>%s)\*\*' % k, text_type(a), d) return d
def test_compute_language_sources(self): from clld.db.models.common import Source, Sentence, Language, SentenceReference from clld.db.meta import DBSession from clld.db.util import compute_language_sources s = Sentence(id='sentenced', language=Language(id='newlang')) sr = SentenceReference(sentence=s, source=Source.first()) DBSession.add(sr) DBSession.flush() compute_language_sources()
def bangime(req): docs = { 'memorial': 'eldersmemorialcall07', 'bangerimevocabulaire': 'bangerimevocabulaire', 'bangerimephrases': 'bangerimephrases', 'bangerimepres': 'elders2006', 'blacksmith': 'blacksmithvocabulary', } return { 'docs': {k: Source.get(sid) for k, sid in docs.items()} }
def other(req): jenaama = 'Heath2016-Jenaama-lexicon Heath2016-JenaamaBozo'.split() rows = [ ["Tieyaxo", "Tigemaxo", "boz", "tiey1235"], ["Tiema Cewe", "Tiema Ce", "boo", "tiem1235"], ["Kelenga", "Hainyaxo", "bsx", "hain1253"], ["Jenaama", "Sorogaana", "bze", "jena1242"], ] return { 'rows': rows, 'jenaama': [Source.get(slug(sid)) for sid in jenaama] }
def florafauna(req): note_ids = [ 'fish_notes_Mali_JH', 'flora_notes_Mali_JH', 'insect_arthropod_mollusc_notes_Mali_JH', 'mammal_notes_Mali_JH', 'reptile_notes_Mali_JH', 'bird_notes_Mali_JH', ] return { 'notes': [Source.get(slug(sid)) for sid in note_ids] }
def main(args): repls = set((i['id'], i['replacement']) for i in jsonload(args.data_dir.joinpath('scripts', 'monster-replacements.json'))) with transaction.manager: for ref_id, repl_id in repls: ref = Source.get('%s' % ref_id, default=None) if ref: Config.add_replacement( ref, '%s' % repl_id, session=DBSession, model=Source) # FIXME: "redirect" relations, e.g. from valuesetreference as well! DBSession.delete(ref) args.log.info('%s replacements' % len(repls))
def markup_feature_desc(req, desc): for pattern, repl in [ ('WALS feature number:\s*(?P<id>[0-9]+)\s*\[http://wals\.info\]', lambda match: external_link( 'http://wals.info/feature/%sA' % match.group('id'), label='WALS feature number %sA' % match.group('id'))), ('Constenla feature number:\s*(?P<id>[a-z0-9]+)\s*\[[^\]]+\]', lambda match: link( req, Source.get('hvtypconstenlaintermedia'), label='Constenla feature number: ' + match.group('id')))]: desc = re.sub(pattern, repl, desc) return desc
def markup_feature_desc(req, desc): for pattern, repl in [ ('WALS feature number:\s*(?P<id>[0-9]+)\s*\[http://wals\.info\]', lambda match: external_link( 'http://wals.info/feature/%sA' % match.group('id'), label='WALS feature number %sA' % match.group('id'))), ('Constenla feature number:\s*(?P<id>[a-z0-9]+)\s*\[[^\]]+\]', lambda match: link(req, Source.get('hvtypconstenlaintermedia'), label='Constenla feature number: ' + match.group('id'))) ]: desc = re.sub(pattern, repl, desc) return desc
def dataset_detail_html(context=None, request=None, **kw): res = {}#dict( #(row[0], row[1]) for row in #DBSession.execute("select source, count(pk) from inventory group by source")) res['inventory_count'] = DBSession.query(Inventory).count() res['segment_count'] = DBSession.query(Parameter).count() res['language_count'] = DBSession.query(Language).count() res['contributors'] = DBSession.query(Contributor).order_by(Contributor.name).options( joinedload(Contributor.contribution_assocs), joinedload(Contributor.references)).all() res['sources'] = { k: Source.get(k) for k in ['MoisikEsling2011', 'IPA2005', 'Hayes2009', 'Moran2012a', 'Moran_etal2012', 'Cysouw_etal2012', 'mccloy_etal2013']} res['descriptions'] = {c.id: desc(request, c.description, res['sources']) for c in res['contributors']} return res
def format_classificationcomment(req, comment): parts = [] pos = 0 for match in REF_PATTERN.finditer(comment): preceding = comment[pos:match.start()] parts.append(preceding) preceding_words = preceding.strip().split() if preceding_words and preceding_words[-1] not in [ 'in', 'of', 'per', 'by' ]: parts.append('(') parts.append(link(req, Source.get(match.group('id')))) if preceding_words and preceding_words[-1] not in [ 'in', 'of', 'per', 'by' ]: parts.append(')') pos = match.end() parts.append(comment[pos:]) return HTML.p(*parts)
def refined_query(self, query, model, req): if model == Contribution: return query.options(joinedload_all( Contribution.references, ContributionReference.source)) if model == Parameter: if req.matchdict['id'][-1] not in ascii_uppercase: # route match for 2008-style URL: redirect! raise HTTPMovedPermanently( req.route_url('contribution', id=req.matchdict['id'])) if model == Source: try: # redirect legacy refdb URLs formed with numeric id: rec = Source.get(int(req.matchdict['id']), default=None) if rec: raise HTTPMovedPermanently( req.route_url('source', id=rec.id)) else: raise HTTPNotFound() except ValueError: pass return query
def refined_query(self, query, model, req): if model == Contribution: return query.options( joinedload(Contribution.references).joinedload( ContributionReference.source)) if model == Parameter: if req.matchdict['id'][-1] not in string.ascii_uppercase: # route match for 2008-style URL: redirect! raise HTTPMovedPermanently( req.route_url('contribution', id=req.matchdict['id'])) if model == Source: try: # redirect legacy refdb URLs formed with numeric id: rec = Source.get(int(req.matchdict['id']), default=None) if rec: raise HTTPMovedPermanently( req.route_url('source', id=rec.id)) else: raise HTTPNotFound() except ValueError: pass return query
def get_record(self, req, identifier): rec = Source.get(self.parse_identifier(req, identifier), default=None) assert rec return rec
def test_Source(): from clld.db.models.common import Source d = Source(id='abc') assert d.gbs_identifier is None d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}}) assert d.gbs_identifier is None d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'x', 'identifier': 'y' }] } } }) assert d.gbs_identifier == 'y' d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'ISBN_10', 'identifier': '' }] } } }) assert d.gbs_identifier == 'ISBN:' d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'ISBN_13', 'identifier': '' }] } } }) assert d.gbs_identifier == 'ISBN:' d.bibtex()
def dataset_detail_html(context=None, request=None, **kw): return { 'buck1949': Source.get('buck1949'), 'entries': DBSession.query(Parameter).count(), 'chapters': DBSession.query(Chapter).count(), }
def dataset_detail_html(request=None, context=None, **kw): return { 'Kraft1981': Source.get('kraft1981'), }
def value_index_html(context=None, request=None, **kw): ids = 'heathetal2015 floradogonunicode faunadogonunicode'.split() return { 'spreadsheets': [Source.get(sid) for sid in ids], 'heathmcpherson2009actionverbs': Source.get('heathmcpherson2009actionverbs') }
def test_Source(self): from clld.db.models.common import Source d = Source(id='abc') self.assertIsNone(d.gbs_identifier) d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}}) self.assertIsNone(d.gbs_identifier) d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'x', 'identifier': 'y' }] } } }) self.assertEquals(d.gbs_identifier, 'y') d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'ISBN_10', 'identifier': '' }] } } }) self.assertEquals(d.gbs_identifier, 'ISBN:') d = Source(id='abc', jsondata={ 'gbs': { 'volumeInfo': { 'industryIdentifiers': [{ 'type': 'ISBN_13', 'identifier': '' }] } } }) self.assertEquals(d.gbs_identifier, 'ISBN:') d.bibtex()
def justifications(args, languages, stats): """ - text goes into ValueSet.description - refs go into ValueSetReference objects """ hh_bibkey_to_glottolog_id = {} for rec in get_bib(args): for provider, bibkeys in get_bibkeys(rec).items(): if provider == 'hh': for bibkey in bibkeys: hh_bibkey_to_glottolog_id[bibkey] = rec['glottolog_ref_id'] break def substitute_hh_bibkeys(m): return '**%s**' % hh_bibkey_to_glottolog_id[m.group('bibkey')] # # create mappings to look up glottolog languoids matching names in justification files # langs_by_hid = languages langs_by_hname = {} langs_by_name = {} # order by active to make sure, we active languoid overwrite the data of obsolete ones. for l in DBSession.query(Languoid).order_by(Languoid.active): langs_by_hname[l.jsondata.get('hname')] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l def normalize_pages(s): return (s or '').strip().rstrip(',') or None for id_, type_ in [('fc', 'family'), ('sc', 'subclassification')]: for i, row in enumerate(dsv.reader( args.data_dir.joinpath('languoids', 'forkel_%s_justifications-utf8.tab' % type_))): name = row[0] name = name.replace('_', ' ') if not name.startswith('NOCODE') else name l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name))) if not l: args.log.warn('ignoring %s' % name) continue _r = 3 if type_ == 'family' else 2 comment = (row[_r].strip() or None) if len(row) > _r else None if comment and not WORD_PATTERN.search(comment): comment = None if comment: comment = re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, comment) # # TODO: look for [NOCODE_ppp] patterns as well!? # refs = [(int(m.group('id')), normalize_pages(m.group('pages'))) for m in REF_PATTERN.finditer( re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, row[2]))] vs = None for _vs in l.valuesets: if _vs.parameter.id == id_: vs = _vs break if not vs: args.log.info('%s %s ++' % (l.id, type_)) vs = ValueSet( id='%s%s' % (id_, l.pk), description=comment, language=l, parameter=Parameter.get(id_), contribution=Contribution.first()) DBSession.add(Value( id='%s%s' % (id_, l.pk), name='%s - %s' % (l.level, l.status), valueset=vs)) DBSession.flush() else: if vs.description != comment: args.log.info('%s %s ~~ description: %s ---> %s' % (l.id, type_, vs.description, comment)) vs.description = comment stats.update(['justifications-%s' % type_]) for r in vs.references: DBSession.delete(r) for r, pages in refs: # FIXME: we must make sure not to link sources which will subsequently be # replaced! vs.references.append(ValueSetReference( source=Source.get(str(r)), description=pages)) args.log.info('%s %s' % (i, type_))
def justifications(args, languages): """ - text goes into ValueSet.description - refs go into ValueSetReference objects """ def normalized_pages(s): if PAGES_PATTERN.match(s or ""): return s or "" # # create mappings to look up glottolog languoids matching names in justification files # langs_by_hid = languages langs_by_hname = {} langs_by_name = {} for l in DBSession.query(Languoid).filter(Languoid.active == False): langs_by_hname[l.jsondatadict.get("hname")] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l for l in DBSession.query(Languoid).filter(Languoid.active == True): langs_by_hname[l.jsondatadict.get("hname")] = l langs_by_hid[l.hid] = l langs_by_name[l.name] = l for id_, type_ in [("fc", "family"), ("sc", "subclassification")]: for i, row in enumerate(dsv.reader(args.data_file("%s_justifications.tab" % type_))): name = row[0] name = name.replace("_", " ") if not name.startswith("NOCODE") else name l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name))) if not l: args.log.warn("ignoring %s" % name) continue _r = 3 if type_ == "family" else 2 comment = (row[_r].strip() or None) if len(row) > _r else None if comment and not WORD_PATTERN.search(comment): comment = None # # TODO: look for [NOCODE_ppp] patterns as well!? # refs = [(int(m.group("id")), normalized_pages(m.group("comment"))) for m in REF_PATTERN.finditer(row[2])] vs = None for _vs in l.valuesets: if _vs.parameter.id == id_: vs = _vs break if not vs: args.log.info("%s %s ++" % (l.id, type_)) vs = ValueSet( id="%s%s" % (type_, l.id), description=comment, language=l, parameter=Parameter.get(id_), contribution=Contribution.first(), ) DBSession.add(Value(id="%s%s" % (type_, l.id), name="%s - %s" % (l.level, l.status), valueset=vs)) DBSession.flush() else: if vs.description != comment: args.log.info("%s %s ~~ description" % (l.id, type_)) vs.description = comment for r in vs.references: DBSession.delete(r) for r, pages in refs: vs.references.append(ValueSetReference(source=Source.get(str(r)), description=pages)) args.log.info("%s %s" % (i, type_))
def language_snippet_html(request=None, context=None, **kw): return dict( source=Source.get(request.params['source']) if request.params.get('source') else None)
def language_index_html(context=None, request=None, **kw): return { 'refs': { k: Source.get(misc.slug(k)) for k in 'Hochstetler_etal2004 Blench2007 Blench2005 Blench2005b Blench2007b'.split()}}