예제 #1
0
            'Creative Commons Attribution 4.0 International License',
        },
        domain='crubadan.org',
    )

    DBSession.add(dataset)
    DBSession.flush()

    editor = data.add(common.Contributor,
                      "Kevin Scannell",
                      id="Kevin Scannell",
                      name="Kevin Scannell",
                      email="*****@*****.**")
    common.Editor(dataset=dataset, contributor=editor, ord=0)
    DBSession.flush()

    fillTable(DBSession)


def prime_cache(args):
    """If data needs to be denormalized for lookup, do that here.
    This procedure should be separate from the db initialization, because
    it will have to be run periodiucally whenever data has been updated.
    """


if __name__ == '__main__':
    prepSysDirs()
    initializedb(create=main, prime_cache=prime_cache)
    sys.exit(0)
예제 #2
0
    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute(
            'select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL,
                              dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk
                            ]).where(ltable.c.pk == fid)).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)
예제 #3
0
    for parameter, valuesets in groupby(
        DBSession.query(common.ValueSet).order_by(common.ValueSet.parameter_pk), lambda vs: vs.parameter
    ):
        parameter.representation = str(len(set(v.language_pk for v in valuesets)))
    print("recomputation of representation done")
    transaction.commit()
    transaction.begin()

    # cache iso codes for languages:
    for language in DBSession.query(common.Language).options(
        joinedload_all(common.Language.languageidentifier, common.LanguageIdentifier.identifier)
    ):
        iso_codes = []
        for identifier in language.identifiers:
            if identifier.type == common.IdentifierType.iso.value:
                iso_codes.append(identifier.name)
        language.iso_codes = ", ".join(sorted(set(iso_codes)))
    print("ecomputation of iso codes done")
    transaction.commit()
    transaction.begin()

    compute_language_sources()
    transaction.commit()
    transaction.begin()

    gbs_func("update", args)


if __name__ == "__main__":  # pragma: no cover
    initializedb(create=main, prime_cache=prime_cache)
예제 #4
0
    it will have to be run periodically whenever data has been updated.
    """
    for vs in DBSession.query(common.ValueSet).options(
            joinedload(common.ValueSet.values)):
        d = []
        for generic_term, words in groupby(
            sorted(vs.values, key=lambda v: v.description), key=lambda v: v.description
        ):
            if generic_term:
                generic_term += ': '
            else:
                generic_term = ''
            d.append(generic_term + ', '.join(nfilter([w.name for w in words])))

        vs.description = '; '.join(d)

    for model in [models.Country, models.Ecoregion]:
        for instance in DBSession.query(model).options(
                joinedload(getattr(model, 'taxa'))
        ):
            if not instance.taxa:
                instance.active = False

    # TODO: assign ThePlantList ids!


if __name__ == '__main__':
    initializedb(
        (('data_repos',), dict(action=ExistingDir)), create=main, prime_cache=prime_cache)
    sys.exit(0)
예제 #5
0
        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache)
예제 #6
0
        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache, bootstrap=True)
예제 #7
0
    languages = import_languages()
    import_cldf("datasets", features, languages, trust=trust)
    if languages_path not in trust:
        languages.to_csv(languages_path, sep='\t', encoding='utf-8')
    if features_path not in trust:
        features.to_csv(features_path, sep='\t', encoding='utf-8')


import sys
sys.argv = ["i", "../grambank/sqlite.ini"]

if model_is_available:
    from clld.scripts.util import initializedb
    from clld.db.util import compute_language_sources
    try:
        initializedb(create=main, prime_cache=lambda x: None)
    except SystemExit:
        print("done")
else:
    parser = argparse.ArgumentParser(
        description="Process GramRumah data with consistency in mind")
    parser.add_argument("--sqlite",
                        default=None,
                        const="gramrumah.sqlite",
                        nargs="?",
                        help="Generate an sqlite database from the data")
    parser.add_argument("--trust",
                        "-t",
                        nargs="*",
                        type=argparse.FileType("r"),
                        default=[],
예제 #8
0
initializedb's instance in the undocumented and weird way that seems to have
been provided for such cases. The latter option is implemented.
"""
if __name__ == '__main__':

    if os.path.exists('db.sqlite'):
        os.remove('db.sqlite')

    main_data_arg = [('main_data', ), {
        'help':
        'path to the tsv file that contains the TuLeD data'
    }]
    lang_data_arg = [('lang_data', ), {
        'help':
        'path to the tsv file that contains the language data'
    }]
    concept_data_arg = [('concept_data', ), {
        'help':
        'path to the tsv file that contains the concept data'
    }]
    sources_data_arg = [('sources_data', ), {
        'help':
        'path to the bibtex file that contains the references'
    }]

    initializedb(main_data_arg,
                 lang_data_arg,
                 concept_data_arg,
                 sources_data_arg,
                 create=main)
예제 #9
0
    ltable = models2.Languoid.__table__

    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute('select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL, dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk]).where(ltable.c.pk == fid)
            ).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)
예제 #10
0
파일: initializedb.py 프로젝트: clld/csd
    lemma_pattern = re.compile("(?P<cf>Cf\.\s*)?‘(?P<lemma>[^’]+)’", re.MULTILINE)

    def language_repl(m):
        return '**%s**' % m.group('id')

    language_pattern = re.compile('(?P<id>%s)' % '|'.join(k.upper() for k in LANGUAGES.keys()))

    for entry in entries.values():
        if entry.description:
            #print ('\\lx %s' % entry.name).encode('utf8')
            entry.description = lemma_pattern.sub(lemma_repl, entry.description)
            entry.description = language_pattern.sub(language_repl, entry.description)
    print 'hits:', len(hit)
    print 'miss:', len(miss)

    def level(l):
        _level = 0
        while l.parent:
            _level += 1
            l = l.parent
        return _level

    for lang in DBSession.query(models.Languoid):
        lang.level = level(lang)


if __name__ == '__main__':
    initializedb(create=main, prime_cache=prime_cache, bootstrap=True)
    sys.exit(0)