def _addSource(lp): """For a lighter 'main' function.""" DBSession.add( common.Source(id=lp[0], name=lp[0], author=lp[2], year=lp[3], title=lp[4], url=lp[5], note=lp[6])) DBSession.flush()
def bibtex2source(rec): # pragma: no cover year = rec.get('year', 'nd') fields = {} jsondata = {} for field in FIELDS: if field in rec: value = unescape(rec[field]) container = fields if hasattr(common.Source, field) else jsondata container[field] = value # remove \\ from url fields! if field == 'url': container[field] = container[field].replace('\\', '') etal = '' eds = '' authors = rec.get('author') if not authors: authors = rec.get('editor', '') if authors: eds = ' (eds.)' if authors: authors = unescape(authors).split(' and ') if len(authors) > 2: authors = authors[:1] etal = ' et al.' authors = [HumanName(a) for a in authors] authors = [n.last or n.first for n in authors] authors = '%s%s%s' % (' and '.join(authors), etal, eds) if rec.genre == 'thesis': if rec['type'] == 'phdthesis': rec.genre = 'phdthesis' else: rec.genre = 'mastersthesis' try: bibtex_type = EntryType.from_string(rec.genre) except: bibtex_type = EntryType.from_string('misc') return common.Source(id=rec.id, name=('%s %s' % (authors, year)).strip(), description=unescape( rec.get('title', rec.get('booktitle', ''))), jsondata=jsondata, bibtex_type=bibtex_type, **fields)
def get_source(source, id_): author, year, description = source url = None match = re.search('(?P<url>http(s)?://[^\s]+)(\s+|$)', description) if match: url = match.group('url') res = common.Source(id='%s' % id_, name='%s %s' % (author or 'n.a.', year or 'n.d.'), description=description, author=author, year=year, title=description, url=url, bibtex_type=EntryType.misc) DBSession.add(res) return res
def bibtex2source(rec): year = bibtex.unescape(rec.get('year', 'nd')) fields = {} jsondata = {} for field in bibtex.FIELDS: if field in rec: value = bibtex.unescape(rec[field]) container = fields if hasattr(common.Source, field) else jsondata container[field] = value return common.Source( id=slug(rec.id), name=('%s %s' % (bibtex.unescape( rec.get('author', rec.get('editor', ''))), year)).strip(), description=bibtex.unescape(rec.get('title', rec.get('booktitle', ''))), jsondata=jsondata, bibtex_type=rec.genre, **fields)
def setUp(self): TestWithDb.setUp(self) DBSession.add( common.Dataset(id='dataset', name='dataset', description='desc', domain='clld')) source = common.Source(id='source') contributors = { 'contributor': 'A Name', 'b': 'b Name', 'c': 'c Name', 'd': 'd Name' } for id_, name in contributors.items(): contributors[id_] = common.Contributor(id=id_, name=name) contribution = common.Contribution(id='contribution', name='Contribution') cr = common.ContributionReference(contribution=contribution, source=source) assert common.ContributionContributor( contribution=contribution, primary=True, contributor=contributors['contributor']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['b']) assert common.ContributionContributor(contribution=contribution, primary=True, contributor=contributors['c']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['d']) DBSession.add(contribution) language = common.Language(id='language', name='Language 1', latitude=10.5, longitude=0.3) language.sources.append(source) identifier = common.Identifier(type='iso639-3', id='iso') li = common.LanguageIdentifier(language=language, identifier=identifier) for i in range(2, 102): _l = common.Language(id='l%s' % i, name='Language %s' % i) _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='%.3i' % i) _li = common.LanguageIdentifier(language=_l, identifier=_i) DBSession.add(_l) param = common.Parameter(id='parameter', name='Parameter') de = common.DomainElement(id='de', name='DomainElement', parameter=param) de2 = common.DomainElement(id='de2', name='DomainElement2', parameter=param) valueset = common.ValueSet(id='valueset', language=language, parameter=param, contribution=contribution) value = common.Value(id='value', domainelement=de, valueset=valueset, frequency=50, confidence='high') DBSession.add(value) paramnd = common.Parameter(id='no-domain', name='Parameter without domain') valueset = common.ValueSet(id='vs2', language=language, parameter=paramnd, contribution=contribution) vr = common.ValueSetReference(valueset=valueset, source=source) value = common.Value(id='v2', valueset=valueset, frequency=50, confidence='high') DBSession.add(value) unit = common.Unit(id='unit', name='Unit', language=language) up = common.UnitParameter(id='unitparameter', name='UnitParameter') DBSession.add(unit) DBSession.add( common.UnitValue(id='unitvalue', name='UnitValue', unit=unit, unitparameter=up)) up2 = common.UnitParameter(id='up2', name='UnitParameter with domain') de = common.UnitDomainElement(id='de', name='de', parameter=up2) DBSession.add( common.UnitValue(id='uv2', name='UnitValue2', unit=unit, unitparameter=up2, unitdomainelement=de)) DBSession.add(common.Source(id='s')) sentence = common.Sentence(id='sentence', name='sentence name', description='sentence description', analyzed='a\tmorpheme\tdoes\tdo', gloss='a\tmorpheme\t1SG\tdo.SG2', source='own', comment='comment', original_script='a morpheme', language=language) sr = common.SentenceReference(sentence=sentence, source=source) DBSession.add(common.Config(key='key', value='value')) DBSession.flush()
def load(): wals = create_engine('postgresql://robert@/wals3') contributor = common.Contributor(id='gastvolker', name='Volker Gast') contribution = common.Contribution( id='tdir', name='Typological Database of Intensifiers and Reflexives') cc = common.ContributionContributor( contribution=contribution, contributor=contributor) DBSession.add(cc) for row in read('glosses'): DBSession.add(common.GlossAbbreviation(id=row['gloss'], name=row['explanation'])) params = {} for id_, name in PARAMS.items(): params[id_] = common.Parameter(id='tdir-' + id_, name=name) DBSession.add(params[id_]) # # TODO: domain for sortal restrictions! # values = {} languages = {} for row in read('languages'): if row['adn'] and '<br>' in row['adn']: row['adn'], other = row['adn'].split('<br>', 1) if not row['otherint']: row['otherint'] = '' row['otherint'] = '\n'.join(filter(None, row['otherint'].split('<br>') + other.split('<br>'))) row['sil'] = row['sil'].lower() row['sil'] = { 'arm': 'hye', 'vmn': 'mig', 'gli': 'gle', 'grk': 'ell', 'hbr': 'heb', 'ltn': 'lat', 'chn': 'cmn', 'ota': 'ote', 'pnj': 'pan', 'pba': 'rap', 'esg': 'kal', 'vla': 'zea', 'lat': 'lav', }.get(row['sil'], row['sil']) l = common.Language(id=row['sil'].lower(), name=row['language']) languages[row['language']] = l res = wals.execute("select l.latitude, l.longitude from language as l, languageidentifier as li, identifier as i where l.pk = li.language_pk and li.identifier_pk = i.pk and i.id = '%s' and i.type = 'iso639-3';" \ % row['sil']).fetchone() if not res: res = wals.execute("select latitude, longitude from language where name = '%s';" % row['language']).fetchone() if res: l.latitude, l.longitude = res else: print(row['language'], row['sil']) #(u'Classical Nahuatl', u'nci') ??? #(u'Ancient Greek', u'gko') for pid in params.keys(): value = row[pid] if value: value = common.Value( id='tdir-%s-%s' % (pid, l.id), name=unicode(bs(value)), contribution=contribution, parameter=params[pid], language=l) values['%s-%s' % (pid, row['language'])] = value DBSession.add(value) def normalize_ref(ref): ref = re.sub('\s+', ' ', ref).strip() return unicode(bs(ref)).replace('<i>', '"').replace('</i>', '"') """ Ogawa, A. (1998) Wali, K. et al. (2000) Lyutikova. -> Lyutikova, se-Bertit -> se-Berit missing refs: Sengupta, G. (2000). Lexical anaphors and pronouns in Bangla. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter. Davison, A. Mistry (2000). Lexical anaphors and pronouns in Hindi/Urdu. In Lust et al. (eds.), <i>Lexical Anaphors and Pronouns in Selected South Asian Languages</i>. Berlin: Mouton de Gruyter. """ refs = {} for row in read('references'): name = re.sub('\s+', ' ', row['entry'].split(').')[0].strip()) + ')' src = common.Source( id=row['ref'].strip(), name=name, description=normalize_ref(row['entry'])) refs[name] = src DBSession.add(src) for row in read('examples'): if row['language'] not in languages: print('example for unknown language "%s"' % row['language']) continue s = common.Sentence( id=row['Nr'].strip(), name=fix_example(row['original'], repl=' '), language=languages[row['language']], analyzed=fix_example(row['original']), gloss=fix_example(row['gloss']), description=row['translation'], source=row['source'], comment=row['comments']) has_refs = False for ref in refs: if ref in row['source']: if normalize_ref(row['source']) != refs[ref].description: print('-->') print(row['source']) has_refs = True common.SentenceReference(sentence=s, source=refs[ref]) if not has_refs: print('+++++') print(row['source']) pid = EXAMPLE_MAP[row['pov']] if pid: # associate with value! o = common.ValueSentence(value=values['%s-%s' % (pid, row['language'])], sentence=s) DBSession.add(s)
def populate_test_db(engine): set_alembic_version(engine, '58559d4eea0d') data = TestData() data.add_default(common.Dataset, domain='clld', jsondata={ 'license_icon': 'cc-by', 'license_url': 'http://example.org' }) data.add_default(common.Contributor, name='A Name', email='*****@*****.**') for id_, name in { 'b': 'b Name', 'c': 'c Name', 'd': 'd Name', }.items(): data.add(common.Contributor, id_, id=id_, name=name, url='http://example.org') DBSession.add( common.Editor(dataset=data[common.Dataset], contributor=data[common.Contributor])) data.add_default(common.Source) data.add(common.Source, 'replaced', id='replaced', active=False, jsondata={'__replacement_id__': 'source'}) data.add_default(common.Contribution) common.ContributionReference(contribution=data[common.Contribution], source=data[common.Source]) for primary, c in [(True, 'contributor'), (False, 'b'), (True, 'c'), (False, 'd')]: common.ContributionContributor(contribution=data[common.Contribution], primary=primary, contributor=data['Contributor'][c]) data.add_default(common.Language, latitude=10.5, longitude=0.3) data[common.Language].sources.append(data[common.Source]) for i, type_ in enumerate(common.IdentifierType): common.LanguageIdentifier( language=data[common.Language], identifier=common.Identifier( type=type_.value, id=type_.value + str(i), name='abc' if type_.name == 'iso' else 'glot1234')) common.LanguageIdentifier(language=data[common.Language], identifier=common.Identifier(type='name', id='name', name='a')) for i in range(2, 102): _l = common.Language(id='l%s' % i, name='Language %s' % i) _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='abc') common.LanguageIdentifier(language=_l, identifier=_i) DBSession.add(_l) param = data.add_default(common.Parameter) de = common.DomainElement(id='de', name='DomainElement', parameter=param) de2 = common.DomainElement(id='de2', name='DomainElement2', parameter=param) valueset = data.add_default(common.ValueSet, language=data[common.Language], parameter=param, contribution=data[common.Contribution]) common.ValueSetReference(valueset=valueset, source=data[common.Source], description='10-20') data.add_default(common.Value, domainelement=de, valueset=valueset, frequency=50, confidence='high') data.add(common.Value, 'value2', id='value2', domainelement=de2, valueset=valueset, frequency=50, confidence='high') paramnd = data.add(common.Parameter, 'no-domain', id='no-domain', name='Parameter without domain') valueset = common.ValueSet(id='vs2', language=data[common.Language], parameter=paramnd, contribution=data[common.Contribution]) common.ValueSetReference(valueset=valueset, source=data[common.Source], description='10-20') common.Value(id='v2', valueset=valueset, frequency=50, confidence='high') unit = data.add_default(common.Unit, language=data[common.Language]) up = data.add_default(common.UnitParameter) common.UnitValue(id='unitvalue', name='UnitValue', unit=unit, unitparameter=up) up2 = common.UnitParameter(id='up2', name='UnitParameter with domain') de = common.UnitDomainElement(id='de', name='de', parameter=up2) DBSession.add( common.UnitValue(id='uv2', name='UnitValue2', unit=unit, unitparameter=up2, unitdomainelement=de)) DBSession.add(common.Source(id='s')) sentence = data.add_default(common.Sentence, description='sentence description', analyzed='a\tmorpheme\tdoes\tdo', gloss='a\tmorpheme\t1SG\tdo.SG2', source='own', comment='comment', original_script='a morpheme', language=data[common.Language], jsondata={'alt_translation': 'Spanish: ...'}) common.SentenceReference(sentence=sentence, source=data[common.Source]) DBSession.add(common.Config(key='key', value='value')) common.Config.add_replacement('replaced', 'language', model=common.Language) common.Config.add_replacement('gone', None, model=common.Language) DBSession.flush()
def setUp(self): TestWithDb.setUp(self) DBSession.add( common.Dataset(id='dataset', name='dataset', description='desc', domain='clld', jsondata={'license_icon': 'cc-by'})) DBSession.add( common.Source(id='replaced', active=False, jsondata={'__replacement_id__': 'source'})) source = common.Source(id='source') contributors = { 'contributor': 'A Name', 'b': 'b Name', 'c': 'c Name', 'd': 'd Name' } for id_, name in contributors.items(): contributors[id_] = common.Contributor(id=id_, name=name, url='http://example.org') contribution = common.Contribution(id='contribution', name='Contribution') common.ContributionReference(contribution=contribution, source=source) assert common.ContributionContributor( contribution=contribution, primary=True, contributor=contributors['contributor']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['b']) assert common.ContributionContributor(contribution=contribution, primary=True, contributor=contributors['c']) assert common.ContributionContributor(contribution=contribution, primary=False, contributor=contributors['d']) DBSession.add(contribution) language = common.Language(id='language', name='Language 1', latitude=10.5, longitude=0.3) language.sources.append(source) for i, type_ in enumerate(common.IdentifierType): id_ = common.Identifier(type=type_.value, id=type_.value + str(i), name='abc') common.LanguageIdentifier(language=language, identifier=id_) for i in range(2, 102): _l = common.Language(id='l%s' % i, name='Language %s' % i) _i = common.Identifier(type='iso639-3', id='%.3i' % i, name='%.3i' % i) common.LanguageIdentifier(language=_l, identifier=_i) DBSession.add(_l) param = common.Parameter(id='parameter', name='Parameter') de = common.DomainElement(id='de', name='DomainElement', parameter=param) de2 = common.DomainElement(id='de2', name='DomainElement2', parameter=param) valueset = common.ValueSet(id='valueset', language=language, parameter=param, contribution=contribution) value = common.Value(id='value', domainelement=de, valueset=valueset, frequency=50, confidence='high') DBSession.add(value) value2 = common.Value(id='value2', domainelement=de2, valueset=valueset, frequency=50, confidence='high') DBSession.add(value2) paramnd = common.Parameter(id='no-domain', name='Parameter without domain') valueset = common.ValueSet(id='vs2', language=language, parameter=paramnd, contribution=contribution) common.ValueSetReference(valueset=valueset, source=source) value = common.Value(id='v2', valueset=valueset, frequency=50, confidence='high') DBSession.add(value) unit = common.Unit(id='unit', name='Unit', language=language) up = common.UnitParameter(id='unitparameter', name='UnitParameter') DBSession.add(unit) DBSession.add( common.UnitValue(id='unitvalue', name='UnitValue', unit=unit, unitparameter=up)) up2 = common.UnitParameter(id='up2', name='UnitParameter with domain') de = common.UnitDomainElement(id='de', name='de', parameter=up2) DBSession.add( common.UnitValue(id='uv2', name='UnitValue2', unit=unit, unitparameter=up2, unitdomainelement=de)) DBSession.add(common.Source(id='s')) sentence = common.Sentence( id='sentence', name='sentence name', description='sentence description', analyzed='a\tmorpheme\tdoes\tdo', gloss='a\tmorpheme\t1SG\tdo.SG2', source='own', comment='comment', original_script='a morpheme', language=language, jsondata={'alt_translation': 'Spanish: ...'}) common.SentenceReference(sentence=sentence, source=source) DBSession.add(common.Config(key='key', value='value')) common.Config.add_replacement('replaced', 'language', model=common.Language) common.Config.add_replacement('gone', None, model=common.Language) DBSession.flush()