Example #1
0
def _addSource(lp):
    """For a lighter 'main' function."""

    DBSession.add(
        common.Source(id=lp[0],
                      name=lp[0],
                      author=lp[2],
                      year=lp[3],
                      title=lp[4],
                      url=lp[5],
                      note=lp[6]))
    DBSession.flush()
Example #2
0
def bibtex2source(rec):  # pragma: no cover
    year = rec.get('year', 'nd')
    fields = {}
    jsondata = {}
    for field in FIELDS:
        if field in rec:
            value = unescape(rec[field])
            container = fields if hasattr(common.Source, field) else jsondata
            container[field] = value
            # remove \\ from url fields!
            if field == 'url':
                container[field] = container[field].replace('\\', '')

    etal = ''
    eds = ''
    authors = rec.get('author')
    if not authors:
        authors = rec.get('editor', '')
        if authors:
            eds = ' (eds.)'
    if authors:
        authors = unescape(authors).split(' and ')
        if len(authors) > 2:
            authors = authors[:1]
            etal = ' et al.'

        authors = [HumanName(a) for a in authors]
        authors = [n.last or n.first for n in authors]
        authors = '%s%s%s' % (' and '.join(authors), etal, eds)

    if rec.genre == 'thesis':
        if rec['type'] == 'phdthesis':
            rec.genre = 'phdthesis'
        else:
            rec.genre = 'mastersthesis'

    try:
        bibtex_type = EntryType.from_string(rec.genre)
    except:
        bibtex_type = EntryType.from_string('misc')

    return common.Source(id=rec.id,
                         name=('%s %s' % (authors, year)).strip(),
                         description=unescape(
                             rec.get('title', rec.get('booktitle', ''))),
                         jsondata=jsondata,
                         bibtex_type=bibtex_type,
                         **fields)
Example #3
0
def get_source(source, id_):
    author, year, description = source

    url = None
    match = re.search('(?P<url>http(s)?://[^\s]+)(\s+|$)', description)
    if match:
        url = match.group('url')

    res = common.Source(id='%s' % id_,
                        name='%s %s' % (author or 'n.a.', year or 'n.d.'),
                        description=description,
                        author=author,
                        year=year,
                        title=description,
                        url=url,
                        bibtex_type=EntryType.misc)
    DBSession.add(res)
    return res
Example #4
0
def bibtex2source(rec):
    year = bibtex.unescape(rec.get('year', 'nd'))
    fields = {}
    jsondata = {}
    for field in bibtex.FIELDS:
        if field in rec:
            value = bibtex.unescape(rec[field])
            container = fields if hasattr(common.Source, field) else jsondata
            container[field] = value

    return common.Source(
        id=slug(rec.id),
        name=('%s %s' % (bibtex.unescape(
            rec.get('author', rec.get('editor', ''))), year)).strip(),
        description=bibtex.unescape(rec.get('title', rec.get('booktitle',
                                                             ''))),
        jsondata=jsondata,
        bibtex_type=rec.genre,
        **fields)
Example #5
0
    def setUp(self):
        TestWithDb.setUp(self)

        DBSession.add(
            common.Dataset(id='dataset',
                           name='dataset',
                           description='desc',
                           domain='clld'))

        source = common.Source(id='source')
        contributors = {
            'contributor': 'A Name',
            'b': 'b Name',
            'c': 'c Name',
            'd': 'd Name'
        }
        for id_, name in contributors.items():
            contributors[id_] = common.Contributor(id=id_, name=name)

        contribution = common.Contribution(id='contribution',
                                           name='Contribution')
        cr = common.ContributionReference(contribution=contribution,
                                          source=source)
        assert common.ContributionContributor(
            contribution=contribution,
            primary=True,
            contributor=contributors['contributor'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['b'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=True,
                                              contributor=contributors['c'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['d'])

        DBSession.add(contribution)

        language = common.Language(id='language',
                                   name='Language 1',
                                   latitude=10.5,
                                   longitude=0.3)
        language.sources.append(source)
        identifier = common.Identifier(type='iso639-3', id='iso')
        li = common.LanguageIdentifier(language=language,
                                       identifier=identifier)

        for i in range(2, 102):
            _l = common.Language(id='l%s' % i, name='Language %s' % i)
            _i = common.Identifier(type='iso639-3',
                                   id='%.3i' % i,
                                   name='%.3i' % i)
            _li = common.LanguageIdentifier(language=_l, identifier=_i)
            DBSession.add(_l)

        param = common.Parameter(id='parameter', name='Parameter')
        de = common.DomainElement(id='de',
                                  name='DomainElement',
                                  parameter=param)
        de2 = common.DomainElement(id='de2',
                                   name='DomainElement2',
                                   parameter=param)
        valueset = common.ValueSet(id='valueset',
                                   language=language,
                                   parameter=param,
                                   contribution=contribution)
        value = common.Value(id='value',
                             domainelement=de,
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)
        paramnd = common.Parameter(id='no-domain',
                                   name='Parameter without domain')
        valueset = common.ValueSet(id='vs2',
                                   language=language,
                                   parameter=paramnd,
                                   contribution=contribution)
        vr = common.ValueSetReference(valueset=valueset, source=source)
        value = common.Value(id='v2',
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)

        unit = common.Unit(id='unit', name='Unit', language=language)
        up = common.UnitParameter(id='unitparameter', name='UnitParameter')
        DBSession.add(unit)
        DBSession.add(
            common.UnitValue(id='unitvalue',
                             name='UnitValue',
                             unit=unit,
                             unitparameter=up))

        up2 = common.UnitParameter(id='up2', name='UnitParameter with domain')
        de = common.UnitDomainElement(id='de', name='de', parameter=up2)
        DBSession.add(
            common.UnitValue(id='uv2',
                             name='UnitValue2',
                             unit=unit,
                             unitparameter=up2,
                             unitdomainelement=de))

        DBSession.add(common.Source(id='s'))

        sentence = common.Sentence(id='sentence',
                                   name='sentence name',
                                   description='sentence description',
                                   analyzed='a\tmorpheme\tdoes\tdo',
                                   gloss='a\tmorpheme\t1SG\tdo.SG2',
                                   source='own',
                                   comment='comment',
                                   original_script='a morpheme',
                                   language=language)
        sr = common.SentenceReference(sentence=sentence, source=source)
        DBSession.add(common.Config(key='key', value='value'))
        DBSession.flush()
Example #6
0
def load():
    wals = create_engine('postgresql://robert@/wals3')

    contributor = common.Contributor(id='gastvolker', name='Volker Gast')
    contribution = common.Contribution(
        id='tdir', name='Typological Database of Intensifiers and Reflexives')
    cc = common.ContributionContributor(
        contribution=contribution, contributor=contributor)
    DBSession.add(cc)

    for row in read('glosses'):
        DBSession.add(common.GlossAbbreviation(id=row['gloss'], name=row['explanation']))

    params = {}
    for id_, name in PARAMS.items():
        params[id_] = common.Parameter(id='tdir-' + id_, name=name)
        DBSession.add(params[id_])
        #
        # TODO: domain for sortal restrictions!
        #

    values = {}
    languages = {}
    for row in read('languages'):
        if row['adn'] and '<br>' in row['adn']:
            row['adn'], other = row['adn'].split('<br>', 1)
            if not row['otherint']:
                row['otherint'] = ''
            row['otherint'] = '\n'.join(filter(None, row['otherint'].split('<br>') + other.split('<br>')))

        row['sil'] = row['sil'].lower()
        row['sil'] = {
            'arm': 'hye',
            'vmn': 'mig',
            'gli': 'gle',
            'grk': 'ell',
            'hbr': 'heb',
            'ltn': 'lat',
            'chn': 'cmn',
            'ota': 'ote',
            'pnj': 'pan',
            'pba': 'rap',
            'esg': 'kal',
            'vla': 'zea',
            'lat': 'lav',
        }.get(row['sil'], row['sil'])

        l = common.Language(id=row['sil'].lower(), name=row['language'])
        languages[row['language']] = l
        res = wals.execute("select l.latitude, l.longitude from language as l, languageidentifier as li, identifier as i where l.pk = li.language_pk and li.identifier_pk = i.pk and i.id = '%s' and i.type = 'iso639-3';" \
                           % row['sil']).fetchone()
        if not res:
            res = wals.execute("select latitude, longitude from language where name = '%s';" % row['language']).fetchone()

        if res:
            l.latitude, l.longitude = res
        else:
            print(row['language'], row['sil'])
#(u'Classical Nahuatl', u'nci')   ???
#(u'Ancient Greek', u'gko')

        for pid in params.keys():
            value = row[pid]
            if value:
                value = common.Value(
                    id='tdir-%s-%s' % (pid, l.id),
                    name=unicode(bs(value)),
                    contribution=contribution,
                    parameter=params[pid],
                    language=l)
                values['%s-%s' % (pid, row['language'])] = value
                DBSession.add(value)

    def normalize_ref(ref):
        ref = re.sub('\s+', ' ', ref).strip()
        return unicode(bs(ref)).replace('<i>', '"').replace('</i>', '"')

    """
Ogawa, A. (1998)
Wali, K. et al. (2000)

Lyutikova. -> Lyutikova,
se-Bertit -> se-Berit

missing refs:
Sengupta, G. (2000). Lexical anaphors and pronouns in Bangla. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter.
Davison, A. Mistry (2000). Lexical anaphors and pronouns in Hindi/Urdu. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter.

"""

    refs = {}
    for row in read('references'):
        name = re.sub('\s+', ' ', row['entry'].split(').')[0].strip()) + ')'
        src = common.Source(
            id=row['ref'].strip(), name=name, description=normalize_ref(row['entry']))
        refs[name] = src
        DBSession.add(src)

    for row in read('examples'):
        if row['language'] not in languages:
            print('example for unknown language "%s"' % row['language'])
            continue

        s = common.Sentence(
            id=row['Nr'].strip(),
            name=fix_example(row['original'], repl=' '),
            language=languages[row['language']],
            analyzed=fix_example(row['original']),
            gloss=fix_example(row['gloss']),
            description=row['translation'],
            source=row['source'],
            comment=row['comments'])

        has_refs = False
        for ref in refs:
            if ref in row['source']:
                if normalize_ref(row['source']) != refs[ref].description:
                    print('-->')
                    print(row['source'])
                has_refs = True
                common.SentenceReference(sentence=s, source=refs[ref])

        if not has_refs:
            print('+++++')
            print(row['source'])

        pid = EXAMPLE_MAP[row['pov']]
        if pid:
            # associate with value!
            o = common.ValueSentence(value=values['%s-%s' % (pid, row['language'])], sentence=s)

        DBSession.add(s)
Example #7
0
def populate_test_db(engine):
    set_alembic_version(engine, '58559d4eea0d')

    data = TestData()
    data.add_default(common.Dataset,
                     domain='clld',
                     jsondata={
                         'license_icon': 'cc-by',
                         'license_url': 'http://example.org'
                     })

    data.add_default(common.Contributor, name='A Name', email='*****@*****.**')
    for id_, name in {
            'b': 'b Name',
            'c': 'c Name',
            'd': 'd Name',
    }.items():
        data.add(common.Contributor,
                 id_,
                 id=id_,
                 name=name,
                 url='http://example.org')

    DBSession.add(
        common.Editor(dataset=data[common.Dataset],
                      contributor=data[common.Contributor]))

    data.add_default(common.Source)
    data.add(common.Source,
             'replaced',
             id='replaced',
             active=False,
             jsondata={'__replacement_id__': 'source'})

    data.add_default(common.Contribution)
    common.ContributionReference(contribution=data[common.Contribution],
                                 source=data[common.Source])

    for primary, c in [(True, 'contributor'), (False, 'b'), (True, 'c'),
                       (False, 'd')]:
        common.ContributionContributor(contribution=data[common.Contribution],
                                       primary=primary,
                                       contributor=data['Contributor'][c])

    data.add_default(common.Language, latitude=10.5, longitude=0.3)
    data[common.Language].sources.append(data[common.Source])

    for i, type_ in enumerate(common.IdentifierType):
        common.LanguageIdentifier(
            language=data[common.Language],
            identifier=common.Identifier(
                type=type_.value,
                id=type_.value + str(i),
                name='abc' if type_.name == 'iso' else 'glot1234'))

    common.LanguageIdentifier(language=data[common.Language],
                              identifier=common.Identifier(type='name',
                                                           id='name',
                                                           name='a'))

    for i in range(2, 102):
        _l = common.Language(id='l%s' % i, name='Language %s' % i)
        _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='abc')
        common.LanguageIdentifier(language=_l, identifier=_i)
        DBSession.add(_l)

    param = data.add_default(common.Parameter)
    de = common.DomainElement(id='de', name='DomainElement', parameter=param)
    de2 = common.DomainElement(id='de2',
                               name='DomainElement2',
                               parameter=param)

    valueset = data.add_default(common.ValueSet,
                                language=data[common.Language],
                                parameter=param,
                                contribution=data[common.Contribution])
    common.ValueSetReference(valueset=valueset,
                             source=data[common.Source],
                             description='10-20')

    data.add_default(common.Value,
                     domainelement=de,
                     valueset=valueset,
                     frequency=50,
                     confidence='high')
    data.add(common.Value,
             'value2',
             id='value2',
             domainelement=de2,
             valueset=valueset,
             frequency=50,
             confidence='high')

    paramnd = data.add(common.Parameter,
                       'no-domain',
                       id='no-domain',
                       name='Parameter without domain')
    valueset = common.ValueSet(id='vs2',
                               language=data[common.Language],
                               parameter=paramnd,
                               contribution=data[common.Contribution])

    common.ValueSetReference(valueset=valueset,
                             source=data[common.Source],
                             description='10-20')
    common.Value(id='v2', valueset=valueset, frequency=50, confidence='high')

    unit = data.add_default(common.Unit, language=data[common.Language])
    up = data.add_default(common.UnitParameter)
    common.UnitValue(id='unitvalue',
                     name='UnitValue',
                     unit=unit,
                     unitparameter=up)

    up2 = common.UnitParameter(id='up2', name='UnitParameter with domain')
    de = common.UnitDomainElement(id='de', name='de', parameter=up2)
    DBSession.add(
        common.UnitValue(id='uv2',
                         name='UnitValue2',
                         unit=unit,
                         unitparameter=up2,
                         unitdomainelement=de))

    DBSession.add(common.Source(id='s'))

    sentence = data.add_default(common.Sentence,
                                description='sentence description',
                                analyzed='a\tmorpheme\tdoes\tdo',
                                gloss='a\tmorpheme\t1SG\tdo.SG2',
                                source='own',
                                comment='comment',
                                original_script='a morpheme',
                                language=data[common.Language],
                                jsondata={'alt_translation': 'Spanish: ...'})
    common.SentenceReference(sentence=sentence, source=data[common.Source])
    DBSession.add(common.Config(key='key', value='value'))

    common.Config.add_replacement('replaced',
                                  'language',
                                  model=common.Language)
    common.Config.add_replacement('gone', None, model=common.Language)
    DBSession.flush()
Example #8
0
    def setUp(self):
        TestWithDb.setUp(self)

        DBSession.add(
            common.Dataset(id='dataset',
                           name='dataset',
                           description='desc',
                           domain='clld',
                           jsondata={'license_icon': 'cc-by'}))

        DBSession.add(
            common.Source(id='replaced',
                          active=False,
                          jsondata={'__replacement_id__': 'source'}))
        source = common.Source(id='source')
        contributors = {
            'contributor': 'A Name',
            'b': 'b Name',
            'c': 'c Name',
            'd': 'd Name'
        }
        for id_, name in contributors.items():
            contributors[id_] = common.Contributor(id=id_,
                                                   name=name,
                                                   url='http://example.org')

        contribution = common.Contribution(id='contribution',
                                           name='Contribution')
        common.ContributionReference(contribution=contribution, source=source)
        assert common.ContributionContributor(
            contribution=contribution,
            primary=True,
            contributor=contributors['contributor'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['b'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=True,
                                              contributor=contributors['c'])
        assert common.ContributionContributor(contribution=contribution,
                                              primary=False,
                                              contributor=contributors['d'])

        DBSession.add(contribution)

        language = common.Language(id='language',
                                   name='Language 1',
                                   latitude=10.5,
                                   longitude=0.3)
        language.sources.append(source)
        for i, type_ in enumerate(common.IdentifierType):
            id_ = common.Identifier(type=type_.value,
                                    id=type_.value + str(i),
                                    name='abc')
            common.LanguageIdentifier(language=language, identifier=id_)

        for i in range(2, 102):
            _l = common.Language(id='l%s' % i, name='Language %s' % i)
            _i = common.Identifier(type='iso639-3',
                                   id='%.3i' % i,
                                   name='%.3i' % i)
            common.LanguageIdentifier(language=_l, identifier=_i)
            DBSession.add(_l)

        param = common.Parameter(id='parameter', name='Parameter')
        de = common.DomainElement(id='de',
                                  name='DomainElement',
                                  parameter=param)
        de2 = common.DomainElement(id='de2',
                                   name='DomainElement2',
                                   parameter=param)
        valueset = common.ValueSet(id='valueset',
                                   language=language,
                                   parameter=param,
                                   contribution=contribution)
        value = common.Value(id='value',
                             domainelement=de,
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)
        value2 = common.Value(id='value2',
                              domainelement=de2,
                              valueset=valueset,
                              frequency=50,
                              confidence='high')
        DBSession.add(value2)
        paramnd = common.Parameter(id='no-domain',
                                   name='Parameter without domain')
        valueset = common.ValueSet(id='vs2',
                                   language=language,
                                   parameter=paramnd,
                                   contribution=contribution)
        common.ValueSetReference(valueset=valueset, source=source)
        value = common.Value(id='v2',
                             valueset=valueset,
                             frequency=50,
                             confidence='high')
        DBSession.add(value)

        unit = common.Unit(id='unit', name='Unit', language=language)
        up = common.UnitParameter(id='unitparameter', name='UnitParameter')
        DBSession.add(unit)
        DBSession.add(
            common.UnitValue(id='unitvalue',
                             name='UnitValue',
                             unit=unit,
                             unitparameter=up))

        up2 = common.UnitParameter(id='up2', name='UnitParameter with domain')
        de = common.UnitDomainElement(id='de', name='de', parameter=up2)
        DBSession.add(
            common.UnitValue(id='uv2',
                             name='UnitValue2',
                             unit=unit,
                             unitparameter=up2,
                             unitdomainelement=de))

        DBSession.add(common.Source(id='s'))

        sentence = common.Sentence(
            id='sentence',
            name='sentence name',
            description='sentence description',
            analyzed='a\tmorpheme\tdoes\tdo',
            gloss='a\tmorpheme\t1SG\tdo.SG2',
            source='own',
            comment='comment',
            original_script='a morpheme',
            language=language,
            jsondata={'alt_translation': 'Spanish: ...'})
        common.SentenceReference(sentence=sentence, source=source)
        DBSession.add(common.Config(key='key', value='value'))

        common.Config.add_replacement('replaced',
                                      'language',
                                      model=common.Language)
        common.Config.add_replacement('gone', None, model=common.Language)
        DBSession.flush()