def extract_data(endangerment):  # pragma: no cover
    status = {}
    lpks = DBSession.query(common.Language.pk) \
        .filter(common.Language.active == True) \
        .filter(common.Language.latitude != None) \
        .filter(Languoid.level == LanguoidLevel.language) \
        .order_by(common.Language.pk).all()
    print(len(lpks))

    sql = """\
select ls.source_pk, count(ls.language_pk) from languagesource as ls, ref as r 
where ls.source_pk = r.pk and r.ca_doctype_trigger is null and r.ca_language_trigger is null 
group by source_pk 
    """
    lcounts = {r[0]: r[1] for r in DBSession.execute(sql)}

    # loop over active, established languages with geo-coords
    for i, lpk in enumerate(lpks):
        l = DBSession.query(common.Language).filter(common.Language.pk == lpk).one()
        # let's collect the relevant sources in a way that allows computation of med.
        # Note: we limit refs to the ones without computerized assignments.
        sources = list(DBSession.query(Ref).join(common.LanguageSource) \
                       .filter(common.LanguageSource.language_pk == lpk) \
                       .filter(Ref.ca_doctype_trigger == None) \
                       .filter(Ref.ca_language_trigger == None) \
                       .options(joinedload(Ref.doctypes)))
        sources = sorted([Source(s, lcounts.get(s.pk, 0)) for s in sources])

        # keep the overall med
        # note: this source may not be included in the potential meds computed
        # below,
        # e.g. because it may not have a year.
        med = sources[0].__json__() if sources else None

        # now we have to compute meds respecting a cut-off year.
        # to do so, we collect eligible sources per year and then
        # take the med of this collection.
        potential_meds = []

        # we only have to loop over publication years within all sources, because
        # only in these years something better might have come along.
        for year in set(s.year for s in sources if s.year):
            # let's see if something better was published!
            eligible = [s for s in sources if s.year and s.year <= year]
            if eligible:
                potential_meds.append(sorted(eligible)[0])

        # we store the precomputed sources information as jsondata:
        status[l.id] = [
            med,
            [s.__json__() for s in
             sorted(set(potential_meds), key=lambda s: -s.year)],
            endangerment.get(l.id, {}).get('source')
        ]
        if i and i % 1000 == 0:
            print(i)
            DBSession.close()

    return status
Beispiel #2
0
def extract_data(endangerment):  # pragma: no cover
    status = {}
    lpks = DBSession.query(common.Language.pk) \
        .filter(common.Language.active == True) \
        .filter(common.Language.latitude != None) \
        .filter(Languoid.level == LanguoidLevel.language) \
        .order_by(common.Language.pk).all()
    print(len(lpks))

    sql = """\
select ls.source_pk, count(ls.language_pk) from languagesource as ls, ref as r 
where ls.source_pk = r.pk and r.ca_doctype_trigger is null and r.ca_language_trigger is null 
group by source_pk 
    """
    lcounts = {r[0]: r[1] for r in DBSession.execute(sql)}

    # loop over active, established languages with geo-coords
    for i, lpk in enumerate(lpks):
        l = DBSession.query(common.Language).filter(common.Language.pk == lpk).one()
        # let's collect the relevant sources in a way that allows computation of med.
        # Note: we limit refs to the ones without computerized assignments.
        sources = list(DBSession.query(Ref).join(common.LanguageSource) \
                       .filter(common.LanguageSource.language_pk == lpk) \
                       .filter(Ref.ca_doctype_trigger == None) \
                       .filter(Ref.ca_language_trigger == None) \
                       .options(joinedload(Ref.doctypes)))
        sources = sorted([Source(s, lcounts.get(s.pk, 0)) for s in sources])

        # keep the overall med
        # note: this source may not be included in the potential meds computed
        # below,
        # e.g. because it may not have a year.
        med = sources[0].__json__() if sources else None

        # now we have to compute meds respecting a cut-off year.
        # to do so, we collect eligible sources per year and then
        # take the med of this collection.
        potential_meds = []

        # we only have to loop over publication years within all sources, because
        # only in these years something better might have come along.
        for year in set(s.year for s in sources if s.year):
            # let's see if something better was published!
            eligible = [s for s in sources if s.year and s.year <= year]
            if eligible:
                potential_meds.append(sorted(eligible)[0])

        # we store the precomputed sources information as jsondata:
        status[l.id] = [
            med,
            [s.__json__() for s in
             sorted(set(potential_meds), key=lambda s: -s.year)],
            endangerment.get(l.id, {}).get('source')
        ]
        if i and i % 1000 == 0:
            print(i)
            DBSession.close()

    return status
Beispiel #3
0
def data(settings):
    from clld.db.meta import Base, DBSession

    engine = sa.engine_from_config(settings)
    Base.metadata.create_all(bind=engine)
    DBSession.configure(bind=engine)

    yield engine

    DBSession.close()
Beispiel #4
0
def db(url='sqlite://'):
    from clld.db.meta import Base, DBSession

    engine = sa.create_engine(url)
    Base.metadata.create_all(bind=engine)
    DBSession.configure(bind=engine)

    yield engine

    DBSession.close()
Beispiel #5
0
def data(settings):
    from clld.db.meta import Base, DBSession, VersionedDBSession

    engine = sa.engine_from_config(settings)
    Base.metadata.create_all(bind=engine)
    DBSession.configure(bind=engine)
    VersionedDBSession.configure(bind=engine)

    yield engine

    DBSession.close()
    VersionedDBSession.close()