Example #1
0
 def refined_query(self, query, model, req):
     """Derived classes may override this method to add model-specific query
     refinements of their own.
     """
     if model == Contribution:
         return query.options(
             joinedload_all(Contribution.references,
                            ContributionReference.source))
     if model == Parameter:
         if req.matchdict['id'][-1] not in ascii_uppercase:
             # route match for 2008-style URL: redirect!
             raise HTTPMovedPermanently(
                 req.route_url('contribution', id=req.matchdict['id']))
     if model == Source:
         try:
             # redirect legacy refdb URLs formed with numeric id:
             rec = Source.get(int(req.matchdict['id']), default=None)
             if rec:
                 raise HTTPMovedPermanently(
                     req.route_url('source', id=rec.id))
             else:
                 raise HTTPNotFound()
         except ValueError:
             pass
     return query
Example #2
0
def dataset_detail_html(context=None, request=None, **kw):
    res = dict((row[0], row[1]) for row in DBSession.execute("select source, count(pk) from inventory group by source"))
    res["inventory_count"] = DBSession.query(Inventory).count()
    res["segment_count"] = DBSession.query(Parameter).count()
    res["language_count"] = DBSession.query(Language).count()
    res["contributors"] = (
        DBSession.query(Contributor)
        .order_by(Contributor.name)
        .options(joinedload(Contributor.contribution_assocs), joinedload(Contributor.references))
        .all()
    )
    res["sources"] = {
        k: Source.get(k)
        for k in [
            "moisikesling2011",
            "ipa2005",
            "hayes2009",
            "moran2012a",
            "moranetal2012",
            "cysouwetal2012",
            "mccloyetal2013",
        ]
    }
    res["descriptions"] = {c.id: desc(request, c.description, res["sources"]) for c in res["contributors"]}
    return res
Example #3
0
    def test_Source(self):
        from clld.db.models.common import Source

        d = Source(id='abc')
        self.assertIsNone(d.gbs_identifier)
        d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}})
        self.assertIsNone(d.gbs_identifier)
        d = Source(
            id='abc',
            jsondata={
                'gbs': {
                    'volumeInfo': {
                        'industryIdentifiers': [{'type': 'x', 'identifier': 'y'}]}}})
        self.assertEquals(d.gbs_identifier, 'y')
        d = Source(
            id='abc',
            jsondata={
                'gbs': {
                    'volumeInfo': {
                        'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': ''}]}}})
        self.assertEquals(d.gbs_identifier, 'ISBN:')
        d = Source(
            id='abc',
            jsondata={
                'gbs': {
                    'volumeInfo': {
                        'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': ''}]}}})
        self.assertEquals(d.gbs_identifier, 'ISBN:')
        d.bibtex()
Example #4
0
def test_Source():
    from clld.db.models.common import Source

    d = Source(id='abc')
    assert d.gbs_identifier is None
    d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}})
    assert d.gbs_identifier is None
    d = Source(
        id='abc',
        jsondata={
            'gbs': {
                'volumeInfo': {
                    'industryIdentifiers': [{'type': 'x', 'identifier': 'y'}]}}})
    assert d.gbs_identifier == 'y'
    d = Source(
        id='abc',
        jsondata={
            'gbs': {
                'volumeInfo': {
                    'industryIdentifiers': [{'type': 'ISBN_10', 'identifier': ''}]}}})
    assert d.gbs_identifier == 'ISBN:'
    d = Source(
        id='abc',
        jsondata={
            'gbs': {
                'volumeInfo': {
                    'industryIdentifiers': [{'type': 'ISBN_13', 'identifier': ''}]}}})
    assert d.gbs_identifier == 'ISBN:'
    d.bibtex()
Example #5
0
def desc(req, d, sources=None):
    if sources is None:
        sources = {k: Source.get(k) for k in "moisikesling2011 hayes2009 moran2012a moranetal2012".split()}
    if not d:
        return d
    for k, v in sources.items():
        a = link(req, v)
        d = re.sub("\*\*(?P<id>%s)\*\*" % k, text_type(a), d)
    return d
Example #6
0
def desc(req, d, sources=None):
    if sources is None:
        sources = {k: Source.get(k) for k in
                   'MoisikEsling2011 Hayes2009 Moran2012a Moran_etal2012'.split()}
    if not d:
        return d
    for k, v in sources.items():
        a = link(req, v)
        d = re.sub(r'\*\*(?P<id>%s)\*\*' % k, str(a), d)
    return d
Example #7
0
def desc(req, d, sources=None):
    if sources is None:
        sources = {k: Source.get(k) for k in
                   'moisikesling2011 hayes2009 moran2012a moranetal2012'.split()}
    if not d:
        return d
    for k, v in sources.items():
        a = link(req, v)
        d = re.sub('\*\*(?P<id>%s)\*\*' % k, text_type(a), d)
    return d
Example #8
0
    def test_compute_language_sources(self):
        from clld.db.models.common import Source, Sentence, Language, SentenceReference
        from clld.db.meta import DBSession
        from clld.db.util import compute_language_sources

        s = Sentence(id='sentenced', language=Language(id='newlang'))
        sr = SentenceReference(sentence=s, source=Source.first())
        DBSession.add(sr)
        DBSession.flush()
        compute_language_sources()
Example #9
0
def bangime(req):
    docs = {
        'memorial': 'eldersmemorialcall07',
        'bangerimevocabulaire': 'bangerimevocabulaire',
        'bangerimephrases': 'bangerimephrases',
        'bangerimepres': 'elders2006',
        'blacksmith': 'blacksmithvocabulary',
    }
    return {
        'docs': {k: Source.get(sid) for k, sid in docs.items()}
    }
Example #10
0
def other(req):
    jenaama = 'Heath2016-Jenaama-lexicon Heath2016-JenaamaBozo'.split()
    rows = [
        ["Tieyaxo", "Tigemaxo", "boz", "tiey1235"],
        ["Tiema Cewe", "Tiema Ce", "boo", "tiem1235"],
        ["Kelenga", "Hainyaxo", "bsx", "hain1253"],
        ["Jenaama", "Sorogaana", "bze", "jena1242"],
    ]
    return {
        'rows': rows,
        'jenaama': [Source.get(slug(sid)) for sid in jenaama]
    }
Example #11
0
def florafauna(req):
    note_ids = [
        'fish_notes_Mali_JH',
        'flora_notes_Mali_JH',
        'insect_arthropod_mollusc_notes_Mali_JH',
        'mammal_notes_Mali_JH',
        'reptile_notes_Mali_JH',
        'bird_notes_Mali_JH',
    ]
    return {
        'notes': [Source.get(slug(sid)) for sid in note_ids]
    }
def main(args):
    repls = set((i['id'], i['replacement']) for i in
                jsonload(args.data_dir.joinpath('scripts', 'monster-replacements.json')))

    with transaction.manager:
        for ref_id, repl_id in repls:
            ref = Source.get('%s' % ref_id, default=None)
            if ref:
                Config.add_replacement(
                    ref, '%s' % repl_id, session=DBSession, model=Source)
                # FIXME: "redirect" relations, e.g. from valuesetreference as well!
                DBSession.delete(ref)
    args.log.info('%s replacements' % len(repls))
Example #13
0
def markup_feature_desc(req, desc):
    for pattern, repl in [
        ('WALS feature number:\s*(?P<id>[0-9]+)\s*\[http://wals\.info\]',
         lambda match: external_link(
            'http://wals.info/feature/%sA' % match.group('id'),
            label='WALS feature number %sA' % match.group('id'))),
        ('Constenla feature number:\s*(?P<id>[a-z0-9]+)\s*\[[^\]]+\]',
         lambda match: link(
            req,
            Source.get('hvtypconstenlaintermedia'),
            label='Constenla feature number: ' + match.group('id')))]:
        desc = re.sub(pattern, repl, desc)

    return desc
Example #14
0
def markup_feature_desc(req, desc):
    for pattern, repl in [
        ('WALS feature number:\s*(?P<id>[0-9]+)\s*\[http://wals\.info\]',
         lambda match: external_link(
             'http://wals.info/feature/%sA' % match.group('id'),
             label='WALS feature number %sA' % match.group('id'))),
        ('Constenla feature number:\s*(?P<id>[a-z0-9]+)\s*\[[^\]]+\]', lambda
         match: link(req,
                     Source.get('hvtypconstenlaintermedia'),
                     label='Constenla feature number: ' + match.group('id')))
    ]:
        desc = re.sub(pattern, repl, desc)

    return desc
Example #15
0
def dataset_detail_html(context=None, request=None, **kw):
    res = {}#dict(
        #(row[0], row[1]) for row in
        #DBSession.execute("select source, count(pk) from inventory group by source"))
    res['inventory_count'] = DBSession.query(Inventory).count()
    res['segment_count'] = DBSession.query(Parameter).count()
    res['language_count'] = DBSession.query(Language).count()
    res['contributors'] = DBSession.query(Contributor).order_by(Contributor.name).options(
        joinedload(Contributor.contribution_assocs),
        joinedload(Contributor.references)).all()
    res['sources'] = {
        k: Source.get(k) for k in
        ['MoisikEsling2011', 'IPA2005', 'Hayes2009', 'Moran2012a', 'Moran_etal2012',
         'Cysouw_etal2012', 'mccloy_etal2013']}
    res['descriptions'] = {c.id: desc(request, c.description, res['sources'])
                           for c in res['contributors']}
    return res
Example #16
0
def format_classificationcomment(req, comment):
    parts = []
    pos = 0
    for match in REF_PATTERN.finditer(comment):
        preceding = comment[pos:match.start()]
        parts.append(preceding)
        preceding_words = preceding.strip().split()
        if preceding_words and preceding_words[-1] not in [
                'in', 'of', 'per', 'by'
        ]:
            parts.append('(')
        parts.append(link(req, Source.get(match.group('id'))))
        if preceding_words and preceding_words[-1] not in [
                'in', 'of', 'per', 'by'
        ]:
            parts.append(')')
        pos = match.end()
    parts.append(comment[pos:])
    return HTML.p(*parts)
Example #17
0
 def refined_query(self, query, model, req):
     if model == Contribution:
         return query.options(joinedload_all(
             Contribution.references, ContributionReference.source))
     if model == Parameter:
         if req.matchdict['id'][-1] not in ascii_uppercase:
             # route match for 2008-style URL: redirect!
             raise HTTPMovedPermanently(
                 req.route_url('contribution', id=req.matchdict['id']))
     if model == Source:
         try:
             # redirect legacy refdb URLs formed with numeric id:
             rec = Source.get(int(req.matchdict['id']), default=None)
             if rec:
                 raise HTTPMovedPermanently(
                     req.route_url('source', id=rec.id))
             else:
                 raise HTTPNotFound()
         except ValueError:
             pass
     return query
Example #18
0
 def refined_query(self, query, model, req):
     if model == Contribution:
         return query.options(
             joinedload(Contribution.references).joinedload(
                 ContributionReference.source))
     if model == Parameter:
         if req.matchdict['id'][-1] not in string.ascii_uppercase:
             # route match for 2008-style URL: redirect!
             raise HTTPMovedPermanently(
                 req.route_url('contribution', id=req.matchdict['id']))
     if model == Source:
         try:
             # redirect legacy refdb URLs formed with numeric id:
             rec = Source.get(int(req.matchdict['id']), default=None)
             if rec:
                 raise HTTPMovedPermanently(
                     req.route_url('source', id=rec.id))
             else:
                 raise HTTPNotFound()
         except ValueError:
             pass
     return query
Example #19
0
 def get_record(self, req, identifier):
     rec = Source.get(self.parse_identifier(req, identifier), default=None)
     assert rec
     return rec
Example #20
0
def test_Source():
    from clld.db.models.common import Source

    d = Source(id='abc')
    assert d.gbs_identifier is None
    d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}})
    assert d.gbs_identifier is None
    d = Source(id='abc',
               jsondata={
                   'gbs': {
                       'volumeInfo': {
                           'industryIdentifiers': [{
                               'type': 'x',
                               'identifier': 'y'
                           }]
                       }
                   }
               })
    assert d.gbs_identifier == 'y'
    d = Source(id='abc',
               jsondata={
                   'gbs': {
                       'volumeInfo': {
                           'industryIdentifiers': [{
                               'type': 'ISBN_10',
                               'identifier': ''
                           }]
                       }
                   }
               })
    assert d.gbs_identifier == 'ISBN:'
    d = Source(id='abc',
               jsondata={
                   'gbs': {
                       'volumeInfo': {
                           'industryIdentifiers': [{
                               'type': 'ISBN_13',
                               'identifier': ''
                           }]
                       }
                   }
               })
    assert d.gbs_identifier == 'ISBN:'
    d.bibtex()
Example #21
0
File: util.py Project: clld/ids
def dataset_detail_html(context=None, request=None, **kw):
    return {
        'buck1949': Source.get('buck1949'),
        'entries': DBSession.query(Parameter).count(),
        'chapters': DBSession.query(Chapter).count(),
    }
Example #22
0
File: views.py Project: clld/wals3
 def get_record(self, req, identifier):
     rec = Source.get(self.parse_identifier(req, identifier), default=None)
     assert rec
     return rec
Example #23
0
def dataset_detail_html(request=None, context=None, **kw):
    return {
        'Kraft1981': Source.get('kraft1981'),
    }
Example #24
0
def value_index_html(context=None, request=None, **kw):
    ids = 'heathetal2015 floradogonunicode faunadogonunicode'.split()
    return {
        'spreadsheets': [Source.get(sid) for sid in ids],
        'heathmcpherson2009actionverbs': Source.get('heathmcpherson2009actionverbs')
    }
Example #25
0
    def test_Source(self):
        from clld.db.models.common import Source

        d = Source(id='abc')
        self.assertIsNone(d.gbs_identifier)
        d = Source(id='abc', jsondata={'gbs': {'volumeInfo': {}}})
        self.assertIsNone(d.gbs_identifier)
        d = Source(id='abc',
                   jsondata={
                       'gbs': {
                           'volumeInfo': {
                               'industryIdentifiers': [{
                                   'type': 'x',
                                   'identifier': 'y'
                               }]
                           }
                       }
                   })
        self.assertEquals(d.gbs_identifier, 'y')
        d = Source(id='abc',
                   jsondata={
                       'gbs': {
                           'volumeInfo': {
                               'industryIdentifiers': [{
                                   'type': 'ISBN_10',
                                   'identifier': ''
                               }]
                           }
                       }
                   })
        self.assertEquals(d.gbs_identifier, 'ISBN:')
        d = Source(id='abc',
                   jsondata={
                       'gbs': {
                           'volumeInfo': {
                               'industryIdentifiers': [{
                                   'type': 'ISBN_13',
                                   'identifier': ''
                               }]
                           }
                       }
                   })
        self.assertEquals(d.gbs_identifier, 'ISBN:')
        d.bibtex()
Example #26
0
def justifications(args, languages, stats):
    """
    - text goes into ValueSet.description
    - refs go into ValueSetReference objects
    """
    hh_bibkey_to_glottolog_id = {}
    for rec in get_bib(args):
        for provider, bibkeys in get_bibkeys(rec).items():
            if provider == 'hh':
                for bibkey in bibkeys:
                    hh_bibkey_to_glottolog_id[bibkey] = rec['glottolog_ref_id']
                break

    def substitute_hh_bibkeys(m):
        return '**%s**' % hh_bibkey_to_glottolog_id[m.group('bibkey')]

    #
    # create mappings to look up glottolog languoids matching names in justification files
    #
    langs_by_hid = languages
    langs_by_hname = {}
    langs_by_name = {}

    # order by active to make sure, we active languoid overwrite the data of obsolete ones.
    for l in DBSession.query(Languoid).order_by(Languoid.active):
        langs_by_hname[l.jsondata.get('hname')] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    def normalize_pages(s):
        return (s or '').strip().rstrip(',') or None

    for id_, type_ in [('fc', 'family'), ('sc', 'subclassification')]:
        for i, row in enumerate(dsv.reader(
                args.data_dir.joinpath('languoids', 'forkel_%s_justifications-utf8.tab' % type_))):
            name = row[0]
            name = name.replace('_', ' ') if not name.startswith('NOCODE') else name
            l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name)))
            if not l:
                args.log.warn('ignoring %s' % name)
                continue

            _r = 3 if type_ == 'family' else 2
            comment = (row[_r].strip() or None) if len(row) > _r else None
            if comment and not WORD_PATTERN.search(comment):
                comment = None
            if comment:
                comment = re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, comment)

            #
            # TODO: look for [NOCODE_ppp] patterns as well!?
            #

            refs = [(int(m.group('id')), normalize_pages(m.group('pages')))
                    for m in REF_PATTERN.finditer(
                    re.sub('\*\*(?P<bibkey>[^\*]+)\*\*', substitute_hh_bibkeys, row[2]))]

            vs = None
            for _vs in l.valuesets:
                if _vs.parameter.id == id_:
                    vs = _vs
                    break

            if not vs:
                args.log.info('%s %s ++' % (l.id, type_))
                vs = ValueSet(
                    id='%s%s' % (id_, l.pk),
                    description=comment,
                    language=l,
                    parameter=Parameter.get(id_),
                    contribution=Contribution.first())
                DBSession.add(Value(
                    id='%s%s' % (id_, l.pk),
                    name='%s - %s' % (l.level, l.status),
                    valueset=vs))
                DBSession.flush()
            else:
                if vs.description != comment:
                    args.log.info('%s %s ~~ description: %s ---> %s' % (l.id, type_, vs.description, comment))
                    vs.description = comment
                    stats.update(['justifications-%s' % type_])

            for r in vs.references:
                DBSession.delete(r)

            for r, pages in refs:
                # FIXME: we must make sure not to link sources which will subsequently be
                # replaced!
                vs.references.append(ValueSetReference(
                    source=Source.get(str(r)),
                    description=pages))

        args.log.info('%s %s' % (i, type_))
Example #27
0
def dataset_detail_html(request=None, context=None, **kw):
    return {
        'Kraft1981': Source.get('kraft1981'),
    }
Example #28
0
def justifications(args, languages):
    """
    - text goes into ValueSet.description
    - refs go into ValueSetReference objects
    """

    def normalized_pages(s):
        if PAGES_PATTERN.match(s or ""):
            return s or ""

    #
    # create mappings to look up glottolog languoids matching names in justification files
    #
    langs_by_hid = languages
    langs_by_hname = {}
    langs_by_name = {}

    for l in DBSession.query(Languoid).filter(Languoid.active == False):
        langs_by_hname[l.jsondatadict.get("hname")] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    for l in DBSession.query(Languoid).filter(Languoid.active == True):
        langs_by_hname[l.jsondatadict.get("hname")] = l
        langs_by_hid[l.hid] = l
        langs_by_name[l.name] = l

    for id_, type_ in [("fc", "family"), ("sc", "subclassification")]:
        for i, row in enumerate(dsv.reader(args.data_file("%s_justifications.tab" % type_))):
            name = row[0]
            name = name.replace("_", " ") if not name.startswith("NOCODE") else name
            l = langs_by_hname.get(name, langs_by_hid.get(name, langs_by_name.get(name)))
            if not l:
                args.log.warn("ignoring %s" % name)
                continue

            _r = 3 if type_ == "family" else 2
            comment = (row[_r].strip() or None) if len(row) > _r else None
            if comment and not WORD_PATTERN.search(comment):
                comment = None

            #
            # TODO: look for [NOCODE_ppp] patterns as well!?
            #

            refs = [(int(m.group("id")), normalized_pages(m.group("comment"))) for m in REF_PATTERN.finditer(row[2])]

            vs = None
            for _vs in l.valuesets:
                if _vs.parameter.id == id_:
                    vs = _vs
                    break

            if not vs:
                args.log.info("%s %s ++" % (l.id, type_))
                vs = ValueSet(
                    id="%s%s" % (type_, l.id),
                    description=comment,
                    language=l,
                    parameter=Parameter.get(id_),
                    contribution=Contribution.first(),
                )
                DBSession.add(Value(id="%s%s" % (type_, l.id), name="%s - %s" % (l.level, l.status), valueset=vs))
                DBSession.flush()
            else:
                if vs.description != comment:
                    args.log.info("%s %s ~~ description" % (l.id, type_))
                    vs.description = comment

            for r in vs.references:
                DBSession.delete(r)

            for r, pages in refs:
                vs.references.append(ValueSetReference(source=Source.get(str(r)), description=pages))

        args.log.info("%s %s" % (i, type_))
Example #29
0
def language_snippet_html(request=None, context=None, **kw):
    return dict(
        source=Source.get(request.params['source'])
        if request.params.get('source') else None)
Example #30
0
def language_index_html(context=None, request=None, **kw):
    return {
        'refs': {
            k: Source.get(misc.slug(k)) for k in
            'Hochstetler_etal2004 Blench2007 Blench2005 Blench2005b Blench2007b'.split()}}