def test_foreign_keys(tg, translate): tg.tables[0].tableSchema.columns.append(Column.fromvalue({'name': 'v'})) tg.tables[0].tableSchema.primaryKey = ['v'] tg.tables.append( Table.fromvalue({ 'url': 'ref', 'tableSchema': { 'columns': [{ 'name': 'ref' }], 'foreignKeys': [{ 'columnReference': 'ref', 'reference': { 'resource': 'data', 'columnReference': 'v', } }] } })) db = Database(tg, translate=translate) with pytest.raises(sqlite3.IntegrityError): db.write(ref=[{'ref': 'y'}], data=[{'v': 'x'}]) db.write(ref=[{'ref': 'x'}], data=[{'v': 'x'}]) # 'vv' is used as column name as specified by the translate fixture: assert 'vv' in db.read()['data'][0]
def write_languoids_table(self, outdir, version=None): version = version or self.describe() if outdir is not None and not outdir.exists(): raise IOError("Specified output directory %s does not exist. Please create it." % outdir) out = outdir / 'glottolog-languoids-{0}.csv'.format(version) md = outdir / (out.name + '-metadata.json') tg = TableGroup.fromvalue({ "@context": "http://www.w3.org/ns/csvw", "dc:version": version, "dc:": "Harald Hammarström, Robert Forkel & Martin Haspelmath. " "clld/glottolog: Glottolog database (Version {0}) [Data set]. " "Zenodo. http://doi.org/10.5281/zenodo.596479".format(version), "tables": [load(pycldf.util.pkg_path('components', 'LanguageTable-metadata.json'))], }) tg.tables[0].url = out.name for col in [ dict(name='LL_Code'), dict(name='Classification', separator='/'), dict(name='Family_Glottocode'), dict(name='Family_Name'), dict(name='Language_Glottocode'), dict(name='Language_Name'), dict(name='Level', datatype=dict(base='string', format='family|language|dialect')), dict(name='Status'), ]: tg.tables[0].tableSchema.columns.append(Column.fromvalue(col)) langs = [] for lang in self.languoids(): lid, lname = None, None if lang.level == self.languoid_levels.language: lid, lname = lang.id, lang.name elif lang.level == self.languoid_levels.dialect: for lname, lid, level in reversed(lang.lineage): if level == self.languoid_levels.language: break else: # pragma: no cover raise ValueError langs.append(dict( ID=lang.id, Name=lang.name, Macroarea=lang.macroareas[0].name if lang.macroareas else None, Latitude=lang.latitude, Longitude=lang.longitude, Glottocode=lang.id, ISO639P3code=lang.iso, LL_Code=lang.identifier.get('multitree'), Classification=[c[1] for c in lang.lineage], Language_Glottocode=lid, Language_Name=lname, Family_Name=lang.lineage[0][0] if lang.lineage else None, Family_Glottocode=lang.lineage[0][1] if lang.lineage else None, Level=lang.level.name, Status=lang.endangerment.status.name if lang.endangerment else None, )) tg.to_file(md) tg.tables[0].write(langs, fname=out) return md, out
def test_datatypes(tg, datatype): tg.tables[0].tableSchema.columns.extend([ Column.fromvalue({ 'datatype': datatype.name, 'name': 'v1' }), Column.fromvalue({ 'datatype': datatype.name, 'name': 'v2' }), ]) db = Database(tg) v = datatype.to_python(datatype.example) db.write(data=[{'v1': v, 'v2': None}]) data = db.read()['data'] assert data[0]['v1'] == v assert data[0]['v2'] is None
def test_file(tmpdir, tg): fname = tmpdir.join('test.sqlite') tg.tables[0].tableSchema.columns.append(Column.fromvalue({'name': 'v'})) db = Database(tg, fname=str(fname)) db.write() assert fname.check() with pytest.raises(ValueError): db.write()
def test_required(tg): tg.tables[0].tableSchema.columns.append( Column.fromvalue({ 'required': 'True', 'name': 'v' })) db = Database(tg) with pytest.raises(sqlite3.IntegrityError): db.write(data=[{'v': None}])
def test_many_to_many_self_referential(tg): tg.tables[0].tableSchema.columns.append(Column.fromvalue({'name': 'v'})) tg.tables[0].tableSchema.columns.append( Column.fromvalue({ 'name': 'ref', 'separator': ';' })) tg.tables[0].tableSchema.primaryKey = ['v'] tg.tables[0].tableSchema.foreignKeys.append( ForeignKey.fromdict({ 'columnReference': 'ref', 'reference': { 'resource': 'data', 'columnReference': 'v', } })) db = Database(tg) with pytest.raises(sqlite3.IntegrityError): db.write(data=[{'v': 'x', 'ref': ['y']}]) db.write(data=[{'v': 'x', 'ref': []}, {'v': 'y', 'ref': ['x', 'y']}]) assert db.read()['data'][1]['ref'] == ['x', 'y']
def test_constraints(tg, datatype, value, error): tg.tables[0].tableSchema.columns.append( Column.fromvalue({ 'datatype': datatype, 'name': 'v' })) db = Database(tg) if error: with pytest.raises(sqlite3.IntegrityError): db.write(data=[{'v': value}]) else: db.write(data=[{'v': value}])
def test_list_valued(tg): tg.tables[0].tableSchema.columns.append( Column.fromvalue({ 'separator': '#', 'name': 'v' })) db = Database(tg) with pytest.raises(TypeError): db.write(data=[{'v': [1, 2, 3]}]) db.write(data=[{'v': ['a', 'b', ' c']}, {'v': []}]) data = db.read()['data'] assert data[0]['v'] == ['a', 'b', ' c'] assert data[1]['v'] == []
def __str__(self): """ A Profile is represented as tab-separated lines of grapheme specifications. """ tg = TableGroup.fromvalue(self.MD) for col in self.column_labels: if col != self.GRAPHEME_COL: tg.tables[0].tableSchema.columns.append( Column.fromvalue({ "name": col, "null": self.NULL })) return tg.tables[0].write(self.iteritems(), fname=None).decode('utf8').strip()
def run(args): for cl in args.repos.conceptlists.values(): mdpath = cl.path.parent.joinpath(cl.path.name + '-metadata.json') if not mdpath.exists(): cols_in_md = [] for col in cl.metadata.tableSchema.columns: cnames = [] # all names or aliases csvw will recognize for this column cnames.append(col.name) if col.titles: c = col.titles.getfirst() cnames.append(c) cols_in_md.extend(cnames) for col in cl.cols_in_list: if col not in cols_in_md: cl.metadata.tableSchema.columns.append( Column.fromvalue(dict(name=col, datatype='string'))) cl.tg.to_file(mdpath)
def tg_with_foreign_keys(tg): tg.tables[0].tableSchema.columns.append(Column.fromvalue({'name': 'v'})) tg.tables[0].tableSchema.primaryKey = ['v'] tg.tables.append( Table.fromvalue({ 'url': 'ref', 'tableSchema': { # Define two list-valued foreign key: 'columns': [ { 'name': 'pk' }, { 'name': 'ref1', 'separator': ';' }, { 'name': 'ref2', 'separator': ';' }, ], 'foreignKeys': [{ 'columnReference': 'ref1', 'reference': { 'resource': 'data', 'columnReference': 'v', } }, { 'columnReference': 'ref2', 'reference': { 'resource': 'data', 'columnReference': 'v', } }], 'primaryKey': ['pk'] } })) return tg
def write_languoids_table(self, outdir, version=None): version = version or self.describe() out = outdir / 'glottolog-languoids-{0}.csv'.format(version) md = outdir / (out.name + '-metadata.json') tg = TableGroup.fromvalue({ "@context": "http://www.w3.org/ns/csvw", "dc:version": version, "dc:bibliographicCitation": "{0}. " "{1} [Data set]. " "Zenodo. https://doi.org/{2}".format( ' & '.join([e.name for e in self.current_editors]), self.publication.zenodo.title_format.format( '(Version {0})'.format(version)), self.publication.zenodo.doi, ), "tables": [ load( pycldf.util.pkg_path('components', 'LanguageTable-metadata.json')) ], }) tg.tables[0].url = out.name for col in [ dict(name='LL_Code'), dict(name='Classification', separator='/'), dict(name='Family_Glottocode'), dict(name='Family_Name'), dict(name='Language_Glottocode'), dict(name='Language_Name'), dict(name='Level', datatype=dict(base='string', format='family|language|dialect')), dict(name='Status'), ]: tg.tables[0].tableSchema.columns.append(Column.fromvalue(col)) langs = [] for lang in self.languoids(): lid, lname = None, None if lang.level == self.languoid_levels.language: lid, lname = lang.id, lang.name elif lang.level == self.languoid_levels.dialect: for lname, lid, level in reversed(lang.lineage): if level == self.languoid_levels.language: break else: # pragma: no cover raise ValueError langs.append( dict( ID=lang.id, Name=lang.name, Macroarea=lang.macroareas[0].name if lang.macroareas else None, Latitude=lang.latitude, Longitude=lang.longitude, Glottocode=lang.id, ISO639P3code=lang.iso, LL_Code=lang.identifier.get('multitree'), Classification=[c[1] for c in lang.lineage], Language_Glottocode=lid, Language_Name=lname, Family_Name=lang.lineage[0][0] if lang.lineage else None, Family_Glottocode=lang.lineage[0][1] if lang.lineage else None, Level=lang.level.name, Status=lang.endangerment.status.name if lang.endangerment else None, )) tg.to_file(md) tg.tables[0].write(langs, fname=out) return md, out