Beispiel #1
0
def testapp():
    from webtest import TestApp
    from clld.db.meta import DBSession, VersionedDBSession, Base
    from clld.db.models import common
    from clld_cognacy_plugin.models import Cognateset, Cognate

    def main():
        cfg = config.Configurator(settings={
            'sqlalchemy.url': 'sqlite://',
            'mako.directories': [
                'clld:web/templates',
                'clld_cognacy_plugin:templates'
            ]})
        cfg.include('clld.web.app')
        cfg.include('clld_cognacy_plugin')
        return cfg.make_wsgi_app()

    DBSession.remove()
    VersionedDBSession.remove()
    wsgi_app = main()
    Base.metadata.bind = DBSession.bind
    Base.metadata.create_all()
    DBSession.add(common.Dataset(id='1', name='test app', domain='example.org'))
    cs = Cognateset(id='1', name='cs: test')
    lang = common.Language(id='l', latitude=2, longitude=2)
    param = common.Parameter(id='l')
    vs = common.ValueSet(id='vs', language=lang, parameter=param)
    v = common.Value(id='v', name='abc', valueset=vs)
    DBSession.add(Cognate(cognateset=cs, counterpart=v))
    yield TestApp(wsgi_app)
Beispiel #2
0
    def setUp(self):
        TestWithDb.setUp(self)

        DBSession.add(
            common.Dataset(id='dataset',
                           name='dataset',
                           description='desc',
                           domain='clld'))

        source = common.Source(id='source')
        contributors = {
            'contributor': 'A Name',
            'b': 'b Name',
            'c': 'c Name',
            'd': 'd Name'
        }
        for id_, name in contributors.items():
            contributors[id_] = common.Contributor(id=id_, name=name)

        contribution = common.Contribution(id='contribution',
                                           name='Contribution')
        cr = common.ContributionReference(contribution=contribution,
                                          source=source)
        assert common.ContributionContributor(
            contribution=contribution,
            primary=True,
            contributor=contributors['contributor'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['b'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=True,
                                              contributor=contributors['c'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['d'])

        DBSession.add(contribution)

        language = common.Language(id='language',
                                   name='Language 1',
                                   latitude=10.5,
                                   longitude=0.3)
        language.sources.append(source)
        identifier = common.Identifier(type='iso639-3', id='iso')
        li = common.LanguageIdentifier(language=language,
                                       identifier=identifier)

        for i in range(2, 102):
            _l = common.Language(id='l%s' % i, name='Language %s' % i)
            _i = common.Identifier(type='iso639-3',
                                   id='%.3i' % i,
                                   name='%.3i' % i)
            _li = common.LanguageIdentifier(language=_l, identifier=_i)
            DBSession.add(_l)

        param = common.Parameter(id='parameter', name='Parameter')
        de = common.DomainElement(id='de',
                                  name='DomainElement',
                                  parameter=param)
        de2 = common.DomainElement(id='de2',
                                   name='DomainElement2',
                                   parameter=param)
        valueset = common.ValueSet(id='valueset',
                                   language=language,
                                   parameter=param,
                                   contribution=contribution)
        value = common.Value(id='value',
                             domainelement=de,
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)
        paramnd = common.Parameter(id='no-domain',
                                   name='Parameter without domain')
        valueset = common.ValueSet(id='vs2',
                                   language=language,
                                   parameter=paramnd,
                                   contribution=contribution)
        vr = common.ValueSetReference(valueset=valueset, source=source)
        value = common.Value(id='v2',
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)

        unit = common.Unit(id='unit', name='Unit', language=language)
        up = common.UnitParameter(id='unitparameter', name='UnitParameter')
        DBSession.add(unit)
        DBSession.add(
            common.UnitValue(id='unitvalue',
                             name='UnitValue',
                             unit=unit,
                             unitparameter=up))

        up2 = common.UnitParameter(id='up2', name='UnitParameter with domain')
        de = common.UnitDomainElement(id='de', name='de', parameter=up2)
        DBSession.add(
            common.UnitValue(id='uv2',
                             name='UnitValue2',
                             unit=unit,
                             unitparameter=up2,
                             unitdomainelement=de))

        DBSession.add(common.Source(id='s'))

        sentence = common.Sentence(id='sentence',
                                   name='sentence name',
                                   description='sentence description',
                                   analyzed='a\tmorpheme\tdoes\tdo',
                                   gloss='a\tmorpheme\t1SG\tdo.SG2',
                                   source='own',
                                   comment='comment',
                                   original_script='a morpheme',
                                   language=language)
        sr = common.SentenceReference(sentence=sentence, source=source)
        DBSession.add(common.Config(key='key', value='value'))
        DBSession.flush()
Beispiel #3
0
def add_cultural_data(questionnaire_file_name, parameters, language):
    """ Parse the typological questionnaire into the database """
    contribution_text, parameter_descriptions, answers = parse_culture_questionnaire(
        os.path.join(DBPATH, questionnaire_file_name))

    # All ValueSets must be related to a contribution, so generate one from the metadata.
    contrib = common.Contribution(id='contrib' + newid(),
                                  name=contribution_text + newid())

    for p, parameter in parameter_descriptions.iterrows():
        # First, make sure that this parameter exists – either look it up or create it.
        pid = p.replace(".", "-")
        try:
            param, domain = parameters[pid]
        except KeyError:
            param = common.Parameter(
                id='culture' + pid,
                name=p,
                description=parameter['Question_text_English'],
                markup_description=parameter['Question_text_English'])
            domain = {}
            parameters[pid] = (param, domain)

        # Secondly, check whether we are aware that this answer is
        # valid already – otherwise we add its value to the domain,
        # and use that.
        # Note: Once we have a database, we can do better filtering
        # and constraining, and don't need to rely on reasonable data.
        answer = str(answers["Answer"][p])
        try:
            domain_element = domain[slug(answer)]
        except KeyError:
            try:
                numerical_value = int(answer)
            except ValueError:
                numerical_value = (
                    1 if answer == "Y" or answer == 'True' else
                    0 if answer == "N" or answer == 'False' else None)
            domain_element = common.DomainElement(
                id=param.id + slug(answer),
                description=answer,
                number=numerical_value,
                name=answer,
                parameter=param,
                abbr=answer,
                jsondata={'color': color(numerical_value)})
            DBSession.add(domain_element)
            try:
                DBSession.flush()
            except:
                print(domain, domain_element, language.name, pid, param.name)
            domain[slug(answer)] = domain_element

        # Now create the ValueSet, representing all values the
        # language has for this parameter
        vs = common.ValueSet(id='vs' + newid(),
                             language=language,
                             parameter=param,
                             jsondata=domain_element.jsondata,
                             contribution=contrib)

        # and fill in the actual values, which in this case is only
        # one. This object, and all objects it depends on, are then
        # scheduled for writing into the database.
        DBSession.add(
            common.Value(id='v' + newid(),
                         valueset=vs,
                         frequency=float(100),
                         jsondata=domain_element.jsondata,
                         domainelement=domain_element))
        # Execute all scheduled database updates.
        DBSession.flush()
Beispiel #4
0
def update(args):
    pid, cid = 'vitality', 'unesco'
    count = 0
    notfound = {}
    contrib = common.Contribution.get(cid, default=None)
    if not contrib:
        contrib = common.Contribution(
            id=cid,
            name='Atlas of the World’s Languages in Danger',
            description=
            'Atlas of the World’s Languages in Danger, © UNESCO, http://www.unesco.org/culture/languages-atlas'
        )
    param = common.Parameter.get(pid, default=None)
    if param is None:
        param = common.Parameter(id=pid, name='Degree of endangerment')
    domain = {de.name: de for de in param.domain}
    for i, spec in enumerate(VITALITY_VALUES):
        name, desc = spec
        if name not in domain:
            number = i + 1
            domain[name] = common.DomainElement(id='%s-%s' % (pid, number),
                                                name=name,
                                                description=desc,
                                                number=number,
                                                parameter=param)
    valuesets = {vs.id: vs for vs in param.valuesets}

    for record in et.parse(args.data_file(DATA_FILE)).findall('.//RECORD'):
        item = {}
        for attr in [
                'ID', 'Name in English', 'Name in French', 'Name in Spanish',
                'Countries', 'Country codes alpha 3', 'ISO639-3 codes',
                'Degree of endangerment'
        ]:
            item[attr] = record.find(attr.replace(' ', '_')).text
        if item['ISO639-3 codes']:
            for code in item['ISO639-3 codes'].split(','):
                code = code.strip()
                lang = Languoid.get(code, key='hid', default=None)
                if lang:
                    count += 1
                    item[
                        'url'] = 'http://www.unesco.org/culture/languages-atlas/en/atlasmap/language-iso-%s.html' % code
                    lang.update_jsondata(unesco=item)
                    de = domain[item['Degree of endangerment']]
                    vsid = '%s-%s' % (pid, lang.id)
                    vs = valuesets.get(vsid)
                    if not vs:
                        vs = common.ValueSet(id='vitality-%s' % lang.id,
                                             parameter=param,
                                             contribution=contrib,
                                             language=lang)
                        DBSession.add(
                            common.Value(valueset=vs,
                                         name=de.name,
                                         domainelement=de))
                        valuesets[vsid] = vs
                    else:
                        vs.values[0].domainelement = de
                else:
                    notfound[code] = 1
    print 'assigned', count, 'unesco urls'
    print 'missing iso codes:', notfound
Beispiel #5
0
def load():
    wals = create_engine('postgresql://robert@/wals3')

    contributor = common.Contributor(id='gastvolker', name='Volker Gast')
    contribution = common.Contribution(
        id='tdir', name='Typological Database of Intensifiers and Reflexives')
    cc = common.ContributionContributor(
        contribution=contribution, contributor=contributor)
    DBSession.add(cc)

    for row in read('glosses'):
        DBSession.add(common.GlossAbbreviation(id=row['gloss'], name=row['explanation']))

    params = {}
    for id_, name in PARAMS.items():
        params[id_] = common.Parameter(id='tdir-' + id_, name=name)
        DBSession.add(params[id_])
        #
        # TODO: domain for sortal restrictions!
        #

    values = {}
    languages = {}
    for row in read('languages'):
        if row['adn'] and '<br>' in row['adn']:
            row['adn'], other = row['adn'].split('<br>', 1)
            if not row['otherint']:
                row['otherint'] = ''
            row['otherint'] = '\n'.join(filter(None, row['otherint'].split('<br>') + other.split('<br>')))

        row['sil'] = row['sil'].lower()
        row['sil'] = {
            'arm': 'hye',
            'vmn': 'mig',
            'gli': 'gle',
            'grk': 'ell',
            'hbr': 'heb',
            'ltn': 'lat',
            'chn': 'cmn',
            'ota': 'ote',
            'pnj': 'pan',
            'pba': 'rap',
            'esg': 'kal',
            'vla': 'zea',
            'lat': 'lav',
        }.get(row['sil'], row['sil'])

        l = common.Language(id=row['sil'].lower(), name=row['language'])
        languages[row['language']] = l
        res = wals.execute("select l.latitude, l.longitude from language as l, languageidentifier as li, identifier as i where l.pk = li.language_pk and li.identifier_pk = i.pk and i.id = '%s' and i.type = 'iso639-3';" \
                           % row['sil']).fetchone()
        if not res:
            res = wals.execute("select latitude, longitude from language where name = '%s';" % row['language']).fetchone()

        if res:
            l.latitude, l.longitude = res
        else:
            print(row['language'], row['sil'])
#(u'Classical Nahuatl', u'nci')   ???
#(u'Ancient Greek', u'gko')

        for pid in params.keys():
            value = row[pid]
            if value:
                value = common.Value(
                    id='tdir-%s-%s' % (pid, l.id),
                    name=unicode(bs(value)),
                    contribution=contribution,
                    parameter=params[pid],
                    language=l)
                values['%s-%s' % (pid, row['language'])] = value
                DBSession.add(value)

    def normalize_ref(ref):
        ref = re.sub('\s+', ' ', ref).strip()
        return unicode(bs(ref)).replace('<i>', '"').replace('</i>', '"')

    """
Ogawa, A. (1998)
Wali, K. et al. (2000)

Lyutikova. -> Lyutikova,
se-Bertit -> se-Berit

missing refs:
Sengupta, G. (2000). Lexical anaphors and pronouns in Bangla. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter.
Davison, A. Mistry (2000). Lexical anaphors and pronouns in Hindi/Urdu. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter.

"""

    refs = {}
    for row in read('references'):
        name = re.sub('\s+', ' ', row['entry'].split(').')[0].strip()) + ')'
        src = common.Source(
            id=row['ref'].strip(), name=name, description=normalize_ref(row['entry']))
        refs[name] = src
        DBSession.add(src)

    for row in read('examples'):
        if row['language'] not in languages:
            print('example for unknown language "%s"' % row['language'])
            continue

        s = common.Sentence(
            id=row['Nr'].strip(),
            name=fix_example(row['original'], repl=' '),
            language=languages[row['language']],
            analyzed=fix_example(row['original']),
            gloss=fix_example(row['gloss']),
            description=row['translation'],
            source=row['source'],
            comment=row['comments'])

        has_refs = False
        for ref in refs:
            if ref in row['source']:
                if normalize_ref(row['source']) != refs[ref].description:
                    print('-->')
                    print(row['source'])
                has_refs = True
                common.SentenceReference(sentence=s, source=refs[ref])

        if not has_refs:
            print('+++++')
            print(row['source'])

        pid = EXAMPLE_MAP[row['pov']]
        if pid:
            # associate with value!
            o = common.ValueSentence(value=values['%s-%s' % (pid, row['language'])], sentence=s)

        DBSession.add(s)
Beispiel #6
0
    def setUp(self):
        TestWithDb.setUp(self)

        DBSession.add(
            common.Dataset(id='dataset',
                           name='dataset',
                           description='desc',
                           domain='clld',
                           jsondata={'license_icon': 'cc-by'}))

        DBSession.add(
            common.Source(id='replaced',
                          active=False,
                          jsondata={'__replacement_id__': 'source'}))
        source = common.Source(id='source')
        contributors = {
            'contributor': 'A Name',
            'b': 'b Name',
            'c': 'c Name',
            'd': 'd Name'
        }
        for id_, name in contributors.items():
            contributors[id_] = common.Contributor(id=id_,
                                                   name=name,
                                                   url='http://example.org')

        contribution = common.Contribution(id='contribution',
                                           name='Contribution')
        common.ContributionReference(contribution=contribution, source=source)
        assert common.ContributionContributor(
            contribution=contribution,
            primary=True,
            contributor=contributors['contributor'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['b'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=True,
                                              contributor=contributors['c'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['d'])

        DBSession.add(contribution)

        language = common.Language(id='language',
                                   name='Language 1',
                                   latitude=10.5,
                                   longitude=0.3)
        language.sources.append(source)
        for i, type_ in enumerate(common.IdentifierType):
            id_ = common.Identifier(type=type_.value,
                                    id=type_.value + str(i),
                                    name='abc')
            common.LanguageIdentifier(language=language, identifier=id_)

        for i in range(2, 102):
            _l = common.Language(id='l%s' % i, name='Language %s' % i)
            _i = common.Identifier(type='iso639-3',
                                   id='%.3i' % i,
                                   name='%.3i' % i)
            common.LanguageIdentifier(language=_l, identifier=_i)
            DBSession.add(_l)

        param = common.Parameter(id='parameter', name='Parameter')
        de = common.DomainElement(id='de',
                                  name='DomainElement',
                                  parameter=param)
        de2 = common.DomainElement(id='de2',
                                   name='DomainElement2',
                                   parameter=param)
        valueset = common.ValueSet(id='valueset',
                                   language=language,
                                   parameter=param,
                                   contribution=contribution)
        value = common.Value(id='value',
                             domainelement=de,
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)
        value2 = common.Value(id='value2',
                              domainelement=de2,
                              valueset=valueset,
                              frequency=50,
                              confidence='high')
        DBSession.add(value2)
        paramnd = common.Parameter(id='no-domain',
                                   name='Parameter without domain')
        valueset = common.ValueSet(id='vs2',
                                   language=language,
                                   parameter=paramnd,
                                   contribution=contribution)
        common.ValueSetReference(valueset=valueset, source=source)
        value = common.Value(id='v2',
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)

        unit = common.Unit(id='unit', name='Unit', language=language)
        up = common.UnitParameter(id='unitparameter', name='UnitParameter')
        DBSession.add(unit)
        DBSession.add(
            common.UnitValue(id='unitvalue',
                             name='UnitValue',
                             unit=unit,
                             unitparameter=up))

        up2 = common.UnitParameter(id='up2', name='UnitParameter with domain')
        de = common.UnitDomainElement(id='de', name='de', parameter=up2)
        DBSession.add(
            common.UnitValue(id='uv2',
                             name='UnitValue2',
                             unit=unit,
                             unitparameter=up2,
                             unitdomainelement=de))

        DBSession.add(common.Source(id='s'))

        sentence = common.Sentence(
            id='sentence',
            name='sentence name',
            description='sentence description',
            analyzed='a\tmorpheme\tdoes\tdo',
            gloss='a\tmorpheme\t1SG\tdo.SG2',
            source='own',
            comment='comment',
            original_script='a morpheme',
            language=language,
            jsondata={'alt_translation': 'Spanish: ...'})
        common.SentenceReference(sentence=sentence, source=source)
        DBSession.add(common.Config(key='key', value='value'))

        common.Config.add_replacement('replaced',
                                      'language',
                                      model=common.Language)
        common.Config.add_replacement('gone', None, model=common.Language)
        DBSession.flush()