コード例 #1
0
            'Creative Commons Attribution 4.0 International License',
        },
        domain='crubadan.org',
    )

    DBSession.add(dataset)
    DBSession.flush()

    editor = data.add(common.Contributor,
                      "Kevin Scannell",
                      id="Kevin Scannell",
                      name="Kevin Scannell",
                      email="*****@*****.**")
    common.Editor(dataset=dataset, contributor=editor, ord=0)
    DBSession.flush()

    fillTable(DBSession)


def prime_cache(args):
    """If data needs to be denormalized for lookup, do that here.
    This procedure should be separate from the db initialization, because
    it will have to be run periodiucally whenever data has been updated.
    """


if __name__ == '__main__':
    prepSysDirs()
    initializedb(create=main, prime_cache=prime_cache)
    sys.exit(0)
コード例 #2
0
    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute(
            'select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL,
                              dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk
                            ]).where(ltable.c.pk == fid)).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)
コード例 #3
0
ファイル: initializedb.py プロジェクト: Castroyesid/wals3
    for parameter, valuesets in groupby(
        DBSession.query(common.ValueSet).order_by(common.ValueSet.parameter_pk), lambda vs: vs.parameter
    ):
        parameter.representation = str(len(set(v.language_pk for v in valuesets)))
    print("recomputation of representation done")
    transaction.commit()
    transaction.begin()

    # cache iso codes for languages:
    for language in DBSession.query(common.Language).options(
        joinedload_all(common.Language.languageidentifier, common.LanguageIdentifier.identifier)
    ):
        iso_codes = []
        for identifier in language.identifiers:
            if identifier.type == common.IdentifierType.iso.value:
                iso_codes.append(identifier.name)
        language.iso_codes = ", ".join(sorted(set(iso_codes)))
    print("ecomputation of iso codes done")
    transaction.commit()
    transaction.begin()

    compute_language_sources()
    transaction.commit()
    transaction.begin()

    gbs_func("update", args)


if __name__ == "__main__":  # pragma: no cover
    initializedb(create=main, prime_cache=prime_cache)
コード例 #4
0
ファイル: initializedb.py プロジェクト: clld/tsammalex
    it will have to be run periodically whenever data has been updated.
    """
    for vs in DBSession.query(common.ValueSet).options(
            joinedload(common.ValueSet.values)):
        d = []
        for generic_term, words in groupby(
            sorted(vs.values, key=lambda v: v.description), key=lambda v: v.description
        ):
            if generic_term:
                generic_term += ': '
            else:
                generic_term = ''
            d.append(generic_term + ', '.join(nfilter([w.name for w in words])))

        vs.description = '; '.join(d)

    for model in [models.Country, models.Ecoregion]:
        for instance in DBSession.query(model).options(
                joinedload(getattr(model, 'taxa'))
        ):
            if not instance.taxa:
                instance.active = False

    # TODO: assign ThePlantList ids!


if __name__ == '__main__':
    initializedb(
        (('data_repos',), dict(action=ExistingDir)), create=main, prime_cache=prime_cache)
    sys.exit(0)
コード例 #5
0
ファイル: initializedb.py プロジェクト: marchdown/dictionaria
        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache)
コード例 #6
0
ファイル: initializedb.py プロジェクト: clld/dictionaria
        choices = {}
        for col in d.jsondata.get('custom_fields', []):
            values = [
                r[0] for r in DBSession.query(common.Unit_data.value)
                .filter(common.Unit_data.object_pk.in_(word_pks))
                .filter(common.Unit_data.key == col)
                .distinct()]
            if len(values) < 40:
                choices[col] = sorted(values)
        d.update_jsondata(choices=choices)

    DBSession.execute("""
    UPDATE word
      SET example_count = s.c 
      FROM (
        SELECT m.word_pk AS wpk, count(ms.sentence_pk) AS c
        FROM meaning AS m, meaningsentence AS ms
        WHERE m.pk = ms.meaning_pk
        GROUP BY m.word_pk
      ) AS s
      WHERE word.pk = s.wpk
    """)


if __name__ == '__main__':
    initializedb(
        (("--internal",), dict(action='store_true')),
        (("--no-concepts",), dict(action='store_true')),
        (("--dict",), dict()),
        create=main, prime_cache=prime_cache, bootstrap=True)
コード例 #7
0
    languages = import_languages()
    import_cldf("datasets", features, languages, trust=trust)
    if languages_path not in trust:
        languages.to_csv(languages_path, sep='\t', encoding='utf-8')
    if features_path not in trust:
        features.to_csv(features_path, sep='\t', encoding='utf-8')


import sys
sys.argv = ["i", "../grambank/sqlite.ini"]

if model_is_available:
    from clld.scripts.util import initializedb
    from clld.db.util import compute_language_sources
    try:
        initializedb(create=main, prime_cache=lambda x: None)
    except SystemExit:
        print("done")
else:
    parser = argparse.ArgumentParser(
        description="Process GramRumah data with consistency in mind")
    parser.add_argument("--sqlite",
                        default=None,
                        const="gramrumah.sqlite",
                        nargs="?",
                        help="Generate an sqlite database from the data")
    parser.add_argument("--trust",
                        "-t",
                        nargs="*",
                        type=argparse.FileType("r"),
                        default=[],
コード例 #8
0
initializedb's instance in the undocumented and weird way that seems to have
been provided for such cases. The latter option is implemented.
"""
if __name__ == '__main__':

    if os.path.exists('db.sqlite'):
        os.remove('db.sqlite')

    main_data_arg = [('main_data', ), {
        'help':
        'path to the tsv file that contains the TuLeD data'
    }]
    lang_data_arg = [('lang_data', ), {
        'help':
        'path to the tsv file that contains the language data'
    }]
    concept_data_arg = [('concept_data', ), {
        'help':
        'path to the tsv file that contains the concept data'
    }]
    sources_data_arg = [('sources_data', ), {
        'help':
        'path to the bibtex file that contains the references'
    }]

    initializedb(main_data_arg,
                 lang_data_arg,
                 concept_data_arg,
                 sources_data_arg,
                 create=main)
コード例 #9
0
ファイル: initializedb.py プロジェクト: kublaj/glottolog3
    ltable = models2.Languoid.__table__

    # we compute the ancestry for each single languoid
    for lid, fid in DBSession.execute('select pk, father_pk from languoid').fetchall():
        depth = 0
        DBSession.execute(SQL, dict(child_pk=lid, parent_pk=lid, depth=depth))

        # now follow up the line of ancestors
        while fid:
            depth += 1
            DBSession.execute(SQL, dict(child_pk=lid, parent_pk=fid, depth=depth))
            fid = DBSession.execute(
                sql.select([ltable.c.father_pk]).where(ltable.c.pk == fid)
            ).fetchone()[0]

    # we also pre-compute counts of descendants for each languoid:
    for level in ['language', 'dialect', 'family']:
        DBSession.execute("""\
UPDATE languoid SET child_%(level)s_count = (
    SELECT count(*)
    FROM treeclosuretable as t, languoid as l
    WHERE languoid.pk = t.parent_pk
    AND languoid.pk != t.child_pk AND t.child_pk = l.pk AND l.level = '%(level)s'
)""" % locals())

    DBSession.execute('COMMIT')


if __name__ == '__main__':
    initializedb(create=create, prime_cache=prime_cache)
コード例 #10
0
ファイル: initializedb.py プロジェクト: clld/csd
    lemma_pattern = re.compile("(?P<cf>Cf\.\s*)?‘(?P<lemma>[^’]+)’", re.MULTILINE)

    def language_repl(m):
        return '**%s**' % m.group('id')

    language_pattern = re.compile('(?P<id>%s)' % '|'.join(k.upper() for k in LANGUAGES.keys()))

    for entry in entries.values():
        if entry.description:
            #print ('\\lx %s' % entry.name).encode('utf8')
            entry.description = lemma_pattern.sub(lemma_repl, entry.description)
            entry.description = language_pattern.sub(language_repl, entry.description)
    print 'hits:', len(hit)
    print 'miss:', len(miss)

    def level(l):
        _level = 0
        while l.parent:
            _level += 1
            l = l.parent
        return _level

    for lang in DBSession.query(models.Languoid):
        lang.level = level(lang)


if __name__ == '__main__':
    initializedb(create=main, prime_cache=prime_cache, bootstrap=True)
    sys.exit(0)