def test_Dataset(self): from clld import RESOURCES from clld.db.models.common import Dataset, Source d = Dataset(id='abc', domain='test') DBSession.add(d) DBSession.flush() d.get_stats(RESOURCES, source=Source.id == None)
def test_TxtCitation(self): from clld.web.adapters import TxtCitation adapter = TxtCitation(None) self.assertTrue( '.' in adapter.render(Contribution.first(), self.env['request'])) adapter.render(Dataset.first(), self.env['request'])
def llod_func(args): # pragma: no cover """Create an RDF dump and compute some statistics about it.""" tmp = Path(mkdtemp()) count_rsc = 0 count_triples = 0 tmp_dump = tmp.joinpath('rdf.n3') with open(as_posix(tmp_dump), 'w') as fp: for rsc in RESOURCES: args.log.info('Resource type %s ...' % rsc.name) try: q = DBSession.query(rsc.model) except InvalidRequestError: args.log.info('... skipping') continue for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True): graph = get_graph(obj, args.env['request'], rsc.name) count_triples += len(graph) count_rsc += 1 fp.write(n3(graph, with_head=count_rsc == 1)) args.log.info('... finished') # put in args.data_file('..', 'static', 'download')? md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples} md.update(count_links(as_posix(tmp_dump))) jsonlib.dump(md, args.data_file('rdf-metadata.json')) print(md) dataset = Dataset.first() rdf_dump = args.module_dir.joinpath( 'static', 'download', '%s-dataset.n3' % dataset.id) tmp_dump.copy(rdf_dump) check_call('gzip -f %s' % rdf_dump, shell=True) print(str(rdf_dump))
def register(args): # pragma: no cover """Register a dataset with datahub.io.""" dataset = Dataset.first() name = 'clld-' + dataset.id.lower() package = datahub('package_show', id=name) if not package: package = datahub( 'package_create', **{'name': name, 'title': 'CLLD-' + dataset.id.upper(), 'owner_org': 'clld'}) md = { 'url': 'http://%s' % dataset.domain, 'notes': '%s published by the CLLD project' % dataset.name, 'maintainer': 'CLLD Project', 'tags': [ {'name': 'linguistics'}, {'name': 'lod'}, {'name': 'llod'}, ]} if dataset.contact: md['maintainer_email'] = dataset.contact if dataset.license: if 'creativecommons.org/licenses/by/' in dataset.license: md['license_id'] = 'cc-by-sa' md['license_title'] = "Creative Commons Attribution Share-Alike" elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license: md['license_id'] = 'cc-nc' md['license_title'] = "Creative Commons Non-Commercial (Any)" rdf_md = args.data_file('rdf-metadata.json') if rdf_md.exists(): rdf_md = jsonlib.load(rdf_md) md['extras'] = [ {'key': k, 'value': str(rdf_md[k])} for k in rdf_md.keys() if k.split(':')[0] in ['triples', 'resources', 'links']] package = datahub('package_update', id=name, **md) resources = [rsc['name'] for rsc in package['resources']] if 'VoID description' not in resources: rsc = datahub( 'resource_create', package_id=package['id'], name='VoID description', url='http://%s/void.ttl' % dataset.domain, format='meta/void', mimetype='text/turtle') assert rsc rdf_dump = '%s-dataset.n3.gz' % dataset.id if ('RDF dump' not in resources) \ and args.module_dir.joinpath('static', 'download', rdf_dump).exists(): rsc = datahub( 'resource_create', package_id=package['id'], name='RDF dump', url='http://%s/static/download/%s' % (dataset.domain, rdf_dump), format='text/n3', mimetype='text/n3') assert rsc print('>>> Make sure to upload the RDF dump to the production site.')
def test_freeze(self): from clld.scripts.freeze import freeze_func, unfreeze_func tmp = Path(mkdtemp()) tmp.joinpath('data').mkdir() tmp.joinpath('appname').mkdir() class Args(object): env = self.env module_dir = tmp.joinpath('appname').resolve() module = Mock(__name__='appname') def data_file(self, *comps): return tmp.resolve().joinpath('data', *comps) DBSession.flush() args = Args() freeze_func(args, dataset=Dataset.first(), with_history=False) self.assert_(tmp.joinpath('data.zip').exists()) engine = create_engine('sqlite://') Base.metadata.create_all(engine) self.assertEqual( engine.execute('select count(*) from language').fetchone()[0], 0) unfreeze_func(args, engine=engine) s1 = DBSession s2 = sessionmaker(bind=engine)() self.assertEqual( s1.query(Language).count(), s2.query(Language).count()) l1 = s1.query(Language).filter(Language.latitude != null()).first() l2 = s2.query(Language).filter(Language.pk == l1.pk).first() self.assertEqual(l1.created, l2.created) self.assertEqual(l1.latitude, l2.latitude) self.assertEqual(l1.description, l2.description) contrib = s2.query(Contribution).filter( Contribution.id == 'contribution').one() self.assert_(contrib.primary_contributors) self.assert_(contrib.secondary_contributors) rmtree(tmp, ignore_errors=True)
def test_freeze(self): from clld.scripts.freeze import freeze_func, unfreeze_func tmp = Path(mkdtemp()) tmp.joinpath('data').mkdir() tmp.joinpath('appname').mkdir() class Args(object): env = self.env module_dir = tmp.joinpath('appname').resolve() module = Mock(__name__='appname') def data_file(self, *comps): return tmp.resolve().joinpath('data', *comps) DBSession.flush() args = Args() freeze_func(args, dataset=Dataset.first(), with_history=False) self.assert_(tmp.joinpath('data.zip').exists()) engine = create_engine('sqlite://') Base.metadata.create_all(engine) self.assertEqual( engine.execute('select count(*) from language').fetchone()[0], 0) unfreeze_func(args, engine=engine) s1 = DBSession s2 = sessionmaker(bind=engine)() self.assertEqual(s1.query(Language).count(), s2.query(Language).count()) l1 = s1.query(Language).filter(Language.latitude != null()).first() l2 = s2.query(Language).filter(Language.pk == l1.pk).first() self.assertEqual(l1.created, l2.created) self.assertEqual(l1.latitude, l2.latitude) self.assertEqual(l1.description, l2.description) contrib = s2.query(Contribution).filter(Contribution.id == 'contribution').one() self.assert_(contrib.primary_contributors) self.assert_(contrib.secondary_contributors) rmtree(tmp, ignore_errors=True)
def llod_func(args): # pragma: no cover """Create an RDF dump and compute some statistics about it.""" tmp = Path(mkdtemp()) count_rsc = 0 count_triples = 0 tmp_dump = tmp.joinpath('rdf.n3') with open(as_posix(tmp_dump), 'w') as fp: for rsc in RESOURCES: args.log.info('Resource type %s ...' % rsc.name) try: q = DBSession.query(rsc.model) except InvalidRequestError: args.log.info('... skipping') continue for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True): graph = get_graph(obj, args.env['request'], rsc.name) count_triples += len(graph) count_rsc += 1 fp.write(n3(graph, with_head=count_rsc == 1)) args.log.info('... finished') # put in args.data_file('..', 'static', 'download')? md = { 'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples } md.update(count_links(as_posix(tmp_dump))) jsonlib.dump(md, args.data_file('rdf-metadata.json')) print(md) dataset = Dataset.first() rdf_dump = args.module_dir.joinpath('static', 'download', '%s-dataset.n3' % dataset.id) tmp_dump.copy(rdf_dump) check_call('gzip -f %s' % rdf_dump, shell=True) print(str(rdf_dump))
def main(config=None, trust=[languages_path, features_path]): with open("metadata.json") as md: dataset_metadata = json.load(md) DBSession.add( Dataset(id=dataset_metadata["id"], name=dataset_metadata["name"], publisher_name=dataset_metadata["publisher_name"], publisher_place=dataset_metadata["publisher_place"], publisher_url=dataset_metadata["publisher_url"], license=dataset_metadata["license"], domain=dataset_metadata["domain"], contact=dataset_metadata["contact"], jsondata={ 'license_icon': dataset_metadata["license_icon"], 'license_name': dataset_metadata["license_name"] })) features = import_features() languages = import_languages() import_cldf("datasets", features, languages, trust=trust) if languages_path not in trust: languages.to_csv(languages_path, sep='\t', encoding='utf-8') if features_path not in trust: features.to_csv(features_path, sep='\t', encoding='utf-8')
def register(args): # pragma: no cover """Register a dataset with datahub.io.""" dataset = Dataset.first() name = 'clld-' + dataset.id.lower() package = datahub('package_show', id=name) if not package: package = datahub( 'package_create', **{ 'name': name, 'title': 'CLLD-' + dataset.id.upper(), 'owner_org': 'clld' }) md = { 'url': 'http://%s' % dataset.domain, 'notes': '%s published by the CLLD project' % dataset.name, 'maintainer': 'CLLD Project', 'tags': [ { 'name': 'linguistics' }, { 'name': 'lod' }, { 'name': 'llod' }, ] } if dataset.contact: md['maintainer_email'] = dataset.contact if dataset.license: if 'creativecommons.org/licenses/by/' in dataset.license: md['license_id'] = 'cc-by-sa' md['license_title'] = "Creative Commons Attribution Share-Alike" elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license: md['license_id'] = 'cc-nc' md['license_title'] = "Creative Commons Non-Commercial (Any)" rdf_md = args.data_file('rdf-metadata.json') if rdf_md.exists(): rdf_md = jsonlib.load(rdf_md) md['extras'] = [ { 'key': k, 'value': str(rdf_md[k]) } for k in rdf_md.keys() if k.split(':')[0] in ['triples', 'resources', 'links'] ] package = datahub('package_update', id=name, **md) resources = [rsc['name'] for rsc in package['resources']] if 'VoID description' not in resources: rsc = datahub('resource_create', package_id=package['id'], name='VoID description', url='http://%s/void.ttl' % dataset.domain, format='meta/void', mimetype='text/turtle') assert rsc rdf_dump = '%s-dataset.n3.gz' % dataset.id if ('RDF dump' not in resources) \ and args.module_dir.joinpath('static', 'download', rdf_dump).exists(): rsc = datahub('resource_create', package_id=package['id'], name='RDF dump', url='http://%s/static/download/%s' % (dataset.domain, rdf_dump), format='text/n3', mimetype='text/n3') assert rsc print('>>> Make sure to upload the RDF dump to the production site.')
def test_TxtCitation(env): from clld.web.adapters import TxtCitation adapter = TxtCitation(None) assert '.' in adapter.render(Contribution.first(), env['request']) adapter.render(Dataset.first(), env['request'])