Python initializedbの例、clld.scripts.util.initializedb Pythonの例

コード例 #1

0

ファイルを表示

ファイル: initializedb.py プロジェクト: ZAS-QUEST/crubadan_clld

            'Creative Commons Attribution 4.0 International License',
        },
        domain='crubadan.org',
    )

    DBSession.add(dataset)
    DBSession.flush()

    editor = data.add(common.Contributor,
                      "Kevin Scannell",
                      id="Kevin Scannell",
                      name="Kevin Scannell",
                      email="*****@*****.**")
    common.Editor(dataset=dataset, contributor=editor, ord=0)
    DBSession.flush()

    fillTable(DBSession)


def prime_cache(args):
    """If data needs to be denormalized for lookup, do that here.
    This procedure should be separate from the db initialization, because
    it will have to be run periodiucally whenever data has been updated.
    """


if __name__ == '__main__':
    prepSysDirs()
    initializedb(create=main, prime_cache=prime_cache)
    sys.exit(0)

コード例 #2

0

ファイルを表示

    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute(
            'select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL,
                              dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk
                            ]).where(ltable.c.pk == fid)).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)

コード例 #3

0

ファイルを表示

ファイル: initializedb.py プロジェクト: Castroyesid/wals3

    for parameter, valuesets in groupby(
        DBSession.query(common.ValueSet).order_by(common.ValueSet.parameter_pk), lambda vs: vs.parameter
    ):
        parameter.representation = str(len(set(v.language_pk for v in valuesets)))
    print("recomputation of representation done")
    transaction.commit()
    transaction.begin()

    # cache iso codes for languages:
    for language in DBSession.query(common.Language).options(
        joinedload_all(common.Language.languageidentifier, common.LanguageIdentifier.identifier)
    ):
        iso_codes = []
        for identifier in language.identifiers:
            if identifier.type == common.IdentifierType.iso.value:
                iso_codes.append(identifier.name)
        language.iso_codes = ", ".join(sorted(set(iso_codes)))
    print("ecomputation of iso codes done")
    transaction.commit()
    transaction.begin()

    compute_language_sources()
    transaction.commit()
    transaction.begin()

    gbs_func("update", args)


if __name__ == "__main__":  # pragma: no cover
    initializedb(create=main, prime_cache=prime_cache)

コード例 #4

0

ファイルを表示

ファイル: initializedb.py プロジェクト: clld/tsammalex

    it will have to be run periodically whenever data has been updated.
    """
    for vs in DBSession.query(common.ValueSet).options(
            joinedload(common.ValueSet.values)):
        d = []
        for generic_term, words in groupby(
            sorted(vs.values, key=lambda v: v.description), key=lambda v: v.description
        ):
            if generic_term:
                generic_term += ': '
            else:
                generic_term = ''
            d.append(generic_term + ', '.join(nfilter([w.name for w in words])))

        vs.description = '; '.join(d)

    for model in [models.Country, models.Ecoregion]:
        for instance in DBSession.query(model).options(
                joinedload(getattr(model, 'taxa'))
        ):
            if not instance.taxa:
                instance.active = False

    # TODO: assign ThePlantList ids!


if __name__ == '__main__':
    initializedb(
        (('data_repos',), dict(action=ExistingDir)), create=main, prime_cache=prime_cache)
    sys.exit(0)

コード例 #5

0

ファイルを表示

ファイル: initializedb.py プロジェクト: marchdown/dictionaria

        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache)

コード例 #6

0

ファイルを表示

ファイル: initializedb.py プロジェクト: clld/dictionaria

        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache, bootstrap=True)

コード例 #7

0

ファイルを表示

ファイル: process_data.py プロジェクト: lessersunda/gramrumah-data

    languages = import_languages()
    import_cldf("datasets", features, languages, trust=trust)
    if languages_path not in trust:
        languages.to_csv(languages_path, sep='\t', encoding='utf-8')
    if features_path not in trust:
        features.to_csv(features_path, sep='\t', encoding='utf-8')


import sys
sys.argv = ["i", "../grambank/sqlite.ini"]

if model_is_available:
    from clld.scripts.util import initializedb
    from clld.db.util import compute_language_sources
    try:
        initializedb(create=main, prime_cache=lambda x: None)
    except SystemExit:
        print("done")
else:
    parser = argparse.ArgumentParser(
        description="Process GramRumah data with consistency in mind")
    parser.add_argument("--sqlite",
                        default=None,
                        const="gramrumah.sqlite",
                        nargs="?",
                        help="Generate an sqlite database from the data")
    parser.add_argument("--trust",
                        "-t",
                        nargs="*",
                        type=argparse.FileType("r"),
                        default=[],

コード例 #8

0

ファイルを表示

ファイル: initializedb.py プロジェクト: LanguageStructure/TuLeD-0.8-

initializedb's instance in the undocumented and weird way that seems to have
been provided for such cases. The latter option is implemented.
"""
if __name__ == '__main__':

    if os.path.exists('db.sqlite'):
        os.remove('db.sqlite')

    main_data_arg = [('main_data', ), {
        'help':
        'path to the tsv file that contains the TuLeD data'
    }]
    lang_data_arg = [('lang_data', ), {
        'help':
        'path to the tsv file that contains the language data'
    }]
    concept_data_arg = [('concept_data', ), {
        'help':
        'path to the tsv file that contains the concept data'
    }]
    sources_data_arg = [('sources_data', ), {
        'help':
        'path to the bibtex file that contains the references'
    }]

    initializedb(main_data_arg,
                 lang_data_arg,
                 concept_data_arg,
                 sources_data_arg,
                 create=main)

コード例 #9

0

ファイルを表示

ファイル: initializedb.py プロジェクト: kublaj/glottolog3

    ltable = models2.Languoid.__table__

    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute('select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL, dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk]).where(ltable.c.pk == fid)
            ).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)

コード例 #10

0

ファイルを表示

ファイル: initializedb.py プロジェクト: clld/csd

    lemma_pattern = re.compile("(?P<cf>Cf\.\s*)?‘(?P<lemma>[^’]+)’", re.MULTILINE)

    def language_repl(m):
        return '**%s**' % m.group('id')

    language_pattern = re.compile('(?P<id>%s)' % '|'.join(k.upper() for k in LANGUAGES.keys()))

    for entry in entries.values():
        if entry.description:
            #print ('\\lx %s' % entry.name).encode('utf8')
            entry.description = lemma_pattern.sub(lemma_repl, entry.description)
            entry.description = language_pattern.sub(language_repl, entry.description)
    print 'hits:', len(hit)
    print 'miss:', len(miss)

    def level(l):
        _level = 0
        while l.parent:
            _level += 1
            l = l.parent
        return _level

    for lang in DBSession.query(models.Languoid):
        lang.level = level(lang)


if __name__ == '__main__':
    initializedb(create=main, prime_cache=prime_cache, bootstrap=True)
    sys.exit(0)