class Cache(object): def __init__(self, dir_=None): self._dir = Path(dir_ or CACHE_DIR) if not self._dir.exists(): self._dir.mkdir(parents=True) # pragma: no cover def _path(self, key): return self._dir.joinpath(path_component(key)) def __len__(self): return len(list(self.keys())) def __getitem__(self, item): with self._path(item).open('rb') as fp: return pickle.load(fp) def __setitem__(self, key, value): with self._path(key).open('wb') as fp: pickle.dump(value, fp) def __delitem__(self, key): remove(self._path(key)) def __contains__(self, item): return self._path(item).exists() def keys(self): for p in self._dir.iterdir(): yield as_unicode(p.name) def clear(self): for key in self.keys(): remove(self._path(key))
def llod_func(args): # pragma: no cover """Create an RDF dump and compute some statistics about it.""" tmp = Path(mkdtemp()) count_rsc = 0 count_triples = 0 tmp_dump = tmp.joinpath('rdf.n3') with open(as_posix(tmp_dump), 'w') as fp: for rsc in RESOURCES: args.log.info('Resource type %s ...' % rsc.name) try: q = DBSession.query(rsc.model) except InvalidRequestError: args.log.info('... skipping') continue for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True): graph = get_graph(obj, args.env['request'], rsc.name) count_triples += len(graph) count_rsc += 1 fp.write(n3(graph, with_head=count_rsc == 1)) args.log.info('... finished') # put in args.data_file('..', 'static', 'download')? md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples} md.update(count_links(as_posix(tmp_dump))) jsonlib.dump(md, args.data_file('rdf-metadata.json')) print(md) dataset = Dataset.first() rdf_dump = args.module_dir.joinpath( 'static', 'download', '%s-dataset.n3' % dataset.id) tmp_dump.copy(rdf_dump) check_call('gzip -f %s' % rdf_dump, shell=True) print(str(rdf_dump))
def write_info(self, outdir=None): outdir = outdir or self.dir if not isinstance(outdir, Path): outdir = Path(outdir) if outdir.name != self.id: outdir = outdir.joinpath(self.id) if not outdir.exists(): outdir.mkdir() fname = outdir.joinpath(INFO_FILENAME) self.cfg.write(fname) if os.linesep == '\n': with fname.open(encoding='utf8') as fp: text = fp.read() with fname.open('w', encoding='utf8') as fp: fp.write(text.replace('\n', '\r\n')) return fname
def __init__(self, name, dir_=None, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ INI.__init__(self, kw, allow_no_value=True) self.name = name config_dir = Path(dir_ or CONFIG_DIR) if default: if isinstance(default, text_type): self.read_string(default) #elif isinstance(default, (dict, OrderedDict)): # self.read_dict(default) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): self.write(cfg_path.as_posix()) self.path = cfg_path
def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False): outdir = Path(outdir) if not outdir.exists(): raise ValueError(outdir.as_posix()) close = False if archive: if isinstance(archive, Archive): container = archive else: container = Archive(outdir.joinpath(self.name + '.zip'), mode='w') close = True else: container = outdir fname = Path(outdir).joinpath(self.name + suffix) if fname.suffix in TAB_SUFFIXES: self.table.dialect.delimiter = '\t' with UnicodeWriter( None if isinstance(container, Archive) else fname, delimiter=self.table.dialect.delimiter) as writer: writer.writerow(self.fields) for row in self.rows: writer.writerow(row.to_list()) if isinstance(container, Archive): container.write_text(writer.read(), fname.name) self.table.url = fname.name self.metadata.write(Dataset.filename(fname, 'metadata'), container) ids = self._cited_sources if cited_sources_only else None self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids) if close: container.close()
def __init__(self, name, default=None, **kw): """Initialization. :param name: Basename for the config file (suffix .ini will be appended). :param default: Default content of the config file. """ self.name = name self.default = default config_dir = Path(kw.pop('config_dir', None) or DIR) RawConfigParser.__init__(self, kw, allow_no_value=True) if self.default: if PY3: fp = io.StringIO(self.default) else: fp = io.BytesIO(self.default.encode('utf8')) self.readfp(fp) cfg_path = config_dir.joinpath(name + '.ini') if cfg_path.exists(): assert cfg_path.is_file() self.read(cfg_path.as_posix()) else: if not config_dir.exists(): try: config_dir.mkdir() except OSError: # pragma: no cover # this happens when run on travis-ci, by a system user. pass if config_dir.exists(): with open(cfg_path.as_posix(), 'w') as fp: self.write(fp) self.path = cfg_path
def test_freeze(self): from clld.scripts.freeze import freeze_func, unfreeze_func tmp = Path(mkdtemp()) tmp.joinpath('data').mkdir() tmp.joinpath('appname').mkdir() class Args(object): env = self.env module_dir = tmp.joinpath('appname').resolve() module = Mock(__name__='appname') def data_file(self, *comps): return tmp.resolve().joinpath('data', *comps) DBSession.flush() args = Args() freeze_func(args, dataset=Dataset.first(), with_history=False) self.assert_(tmp.joinpath('data.zip').exists()) engine = create_engine('sqlite://') Base.metadata.create_all(engine) self.assertEqual( engine.execute('select count(*) from language').fetchone()[0], 0) unfreeze_func(args, engine=engine) s1 = DBSession s2 = sessionmaker(bind=engine)() self.assertEqual( s1.query(Language).count(), s2.query(Language).count()) l1 = s1.query(Language).filter(Language.latitude != null()).first() l2 = s2.query(Language).filter(Language.pk == l1.pk).first() self.assertEqual(l1.created, l2.created) self.assertEqual(l1.latitude, l2.latitude) self.assertEqual(l1.description, l2.description) contrib = s2.query(Contribution).filter( Contribution.id == 'contribution').one() self.assert_(contrib.primary_contributors) self.assert_(contrib.secondary_contributors) rmtree(tmp, ignore_errors=True)
def test_freeze(self): from clld.scripts.freeze import freeze_func, unfreeze_func tmp = Path(mkdtemp()) tmp.joinpath('data').mkdir() tmp.joinpath('appname').mkdir() class Args(object): env = self.env module_dir = tmp.joinpath('appname').resolve() module = Mock(__name__='appname') def data_file(self, *comps): return tmp.resolve().joinpath('data', *comps) DBSession.flush() args = Args() freeze_func(args, dataset=Dataset.first(), with_history=False) self.assert_(tmp.joinpath('data.zip').exists()) engine = create_engine('sqlite://') Base.metadata.create_all(engine) self.assertEqual( engine.execute('select count(*) from language').fetchone()[0], 0) unfreeze_func(args, engine=engine) s1 = DBSession s2 = sessionmaker(bind=engine)() self.assertEqual(s1.query(Language).count(), s2.query(Language).count()) l1 = s1.query(Language).filter(Language.latitude != null()).first() l2 = s2.query(Language).filter(Language.pk == l1.pk).first() self.assertEqual(l1.created, l2.created) self.assertEqual(l1.latitude, l2.latitude) self.assertEqual(l1.description, l2.description) contrib = s2.query(Contribution).filter(Contribution.id == 'contribution').one() self.assert_(contrib.primary_contributors) self.assert_(contrib.secondary_contributors) rmtree(tmp, ignore_errors=True)
def create(self, dir_, content): """Write ``content`` to a file using ``dir_`` as file-system directory. :return: File-system path of the file that was created. """ if not isinstance(dir_, Path): dir_ = Path(dir_) p = dir_.joinpath(self.relpath) if not p.parent.exists(): p.parent.mkdir(parents=True) with open(p.as_posix(), 'wb') as fp: fp.write(content) return p.as_posix()
def main(args): Index('ducet', collkey(common.Value.name)).create(DBSession.bind) repos = Path( os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data') with transaction.manager: dataset = common.Dataset( id=lexirumah.__name__, name="lexirumah", publisher_name= "Max Planck Institute for the Science of Human History", publisher_place="Jena", publisher_url="http://shh.mpg.de", license="http://creativecommons.org/licenses/by/4.0/", domain='lexirumah.model-ling.eu', contact='*****@*****.**', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0 International License' }) DBSession.add(dataset) glottolog_repos = Path( lexirumah.__file__).parent.parent.parent.parent.joinpath( 'glottolog3', 'glottolog') languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()} concepticon = Concepticon( Path(lexirumah.__file__).parent.parent.parent.parent.joinpath( 'concepticon', 'concepticon-data')) conceptsets = {c.id: c for c in concepticon.conceptsets.values()} skip = True for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name): #if dname.name == 'benuecongo': # skip = False #if skip: # continue if dname.is_dir() and dname.name != '_template': mdpath = dname.joinpath('cldf', 'metadata.json') if mdpath.exists(): print(dname.name) import_cldf(dname, load(mdpath), languoids, conceptsets) with transaction.manager: load_families(Data(), DBSession.query(LexiRumahLanguage), glottolog_repos=glottolog_repos, isolates_icon='tcccccc')
def write_info(self, outdir=None): outdir = outdir or self.id if not isinstance(outdir, Path): outdir = Path(outdir) if not outdir.exists(): outdir.mkdir() fname = outdir.joinpath(self.fname('.ini')) self.cfg.write(fname) if os.linesep == '\n': with fname.open(encoding='utf8') as fp: text = fp.read() with fname.open('w', encoding='utf8') as fp: fp.write(text.replace('\n', '\r\n')) return fname
def languoids(self, ids=None, maxlevel=models.Level.dialect): nodes = {} for dirpath, dirnames, filenames in os.walk(as_posix(self.tree)): dp = Path(dirpath) if dp.name in nodes and nodes[dp.name][2] > maxlevel: del dirnames[:] for dirname in dirnames: if ids is None or dirname in ids: lang = languoids.Languoid.from_dir(dp.joinpath(dirname), nodes=nodes) if lang.level <= maxlevel: yield lang
class WithTempDirMixin(object): """Composable test fixture providing access to a temporary directory. http://nedbatchelder.com/blog/201210/multiple_inheritance_is_hard.html """ def setUp(self): super(WithTempDirMixin, self).setUp() self.tmp = Path(tempfile.mkdtemp()) def tearDown(self): rmtree(self.tmp, ignore_errors=True) super(WithTempDirMixin, self).tearDown() def tmp_path(self, *comps): return self.tmp.joinpath(*comps)
def from_metadata(cls, fname): fname = Path(fname) if fname.is_dir(): name = '{0}{1}'.format(cls.__name__, MD_SUFFIX) tablegroup = TableGroup.from_file(pkg_path('modules', name)) # adapt the path of the metadata file such that paths to tables are resolved # correctly: tablegroup._fname = fname.joinpath(name) else: tablegroup = TableGroup.from_file(fname) for mod in get_modules(): if mod.match(tablegroup): return mod.cls(tablegroup) return cls(tablegroup)
class WithTempDirMixin(object): """ Composable test fixture providing access to a temporary directory. http://nedbatchelder.com/blog/201210/multiple_inheritance_is_hard.html """ def setUp(self): super(WithTempDirMixin, self).setUp() self.tmp = Path(mkdtemp()) def tearDown(self): rmtree(self.tmp, ignore_errors=True) super(WithTempDirMixin, self).tearDown() def tmp_path(self, *comps): return self.tmp.joinpath(*comps)
def main(args): Index('ducet', collkey(common.Value.name)).create(DBSession.bind) repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data') with transaction.manager: dataset = common.Dataset( id=lexirumah.__name__, name="lexirumah", publisher_name="Max Planck Institute for the Science of Human History", publisher_place="Jena", publisher_url="http://shh.mpg.de", license="http://creativecommons.org/licenses/by/4.0/", domain='lexirumah.model-ling.eu', contact='*****@*****.**', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0 International License'}) DBSession.add(dataset) glottolog_repos = Path( lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog') languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()} concepticon = Concepticon( Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data')) conceptsets = {c.id: c for c in concepticon.conceptsets.values()} skip = True for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name): #if dname.name == 'benuecongo': # skip = False #if skip: # continue if dname.is_dir() and dname.name != '_template': mdpath = dname.joinpath('cldf', 'metadata.json') if mdpath.exists(): print(dname.name) import_cldf(dname, load(mdpath), languoids, conceptsets) with transaction.manager: load_families( Data(), DBSession.query(LexiRumahLanguage), glottolog_repos=glottolog_repos, isolates_icon='tcccccc')
def main(args): Index('ducet', collkey(common.Value.name)).create(DBSession.bind) repos = Path(os.path.expanduser('~')).joinpath('venvs/lexibank/lexibank-data') with transaction.manager: dataset = common.Dataset( id=lexibank.__name__, name="lexibank", publisher_name="Max Planck Institute for the Science of Human History", publisher_place="Jena", publisher_url="http://shh.mpg.de", license="http://creativecommons.org/licenses/by/4.0/", domain='lexibank.clld.org', contact='*****@*****.**', jsondata={ 'license_icon': 'cc-by.png', 'license_name': 'Creative Commons Attribution 4.0 International License'}) DBSession.add(dataset) glottolog = Glottolog( Path(lexibank.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')) languoids = {l.id: l for l in glottolog.languoids()} concepticon = Concepticon( Path(lexibank.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data')) conceptsets = {c['ID']: c for c in concepticon.conceptsets()} for dname in repos.joinpath('datasets').iterdir(): #if dname.name not in ['acbd']: # continue if dname.is_dir() and dname.name != '_template': #if dname.name != 'zenodo34092': # continue mdpath = dname.joinpath('metadata.json') if mdpath.exists(): print(dname.name) import_cldf(dname, load(mdpath), languoids, conceptsets) with transaction.manager: load_families( Data(), DBSession.query(LexibankLanguage), glottolog=languoids, isolates_icon='tcccccc')
class API(UnicodeMixin): """An API base class to provide programmatic access to data in a git repository.""" # A light-weight way to specifiy a default repository location (without having to # overwrite __init__) __repos_path__ = None def __init__(self, repos=None): self.repos = Path(repos or self.__repos_path__) def __unicode__(self): name = self.repos.resolve().name if self.repos.exists( ) else self.repos.name return '<{0} repository {1} at {2}>'.format(name, git_describe(self.repos), self.repos) def path(self, *comps): return self.repos.joinpath(*comps) @property def appdir(self): return self.path('app') @property def appdatadir(self): return self.appdir.joinpath('data') @classmethod def app_wrapper(cls, func): @wraps(func) def wrapper(args): api = cls(args.repos) if not api.appdatadir.exists() or '--recreate' in args.args: api.appdatadir.mkdir(exist_ok=True) args.api = api func(args) index = api.appdir / 'index.html' if index.exists(): webbrowser.open(index.resolve().as_uri()) return wrapper
def unfreeze_func(args, engine=None): try: importlib.import_module(args.module.__name__) except ImportError: pass # pragma: no cover engine = engine or DBSession.get_bind() data_dir = Path(mkdtemp()) with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp: fp.extractall(as_posix(data_dir)) db_version = None for table in Base.metadata.sorted_tables: csv = data_dir.joinpath('%s.csv' % table.name) if csv.exists(): db_version = load(table, csv, engine) if db_version: set_alembic_version(engine, db_version) # pragma: no cover rmtree(data_dir)
def unfreeze_func(args, engine=None): try: importlib.import_module(args.module.__name__) except ImportError: pass # pragma: no cover engine = engine or DBSession.get_bind() data_dir = Path(mkdtemp()) with ZipFile(as_posix(args.module_dir.joinpath('..', 'data.zip'))) as fp: fp.extractall(as_posix(data_dir)) db_version = None for table in Base.metadata.sorted_tables: csv = data_dir.joinpath('%s.csv' % table.name) if csv.exists(): db_version = load(table, csv, engine) if db_version: set_alembic_version(engine, db_version) rmtree(data_dir)
def llod_func(args): # pragma: no cover """Create an RDF dump and compute some statistics about it.""" tmp = Path(mkdtemp()) count_rsc = 0 count_triples = 0 tmp_dump = tmp.joinpath('rdf.n3') with open(as_posix(tmp_dump), 'w') as fp: for rsc in RESOURCES: args.log.info('Resource type %s ...' % rsc.name) try: q = DBSession.query(rsc.model) except InvalidRequestError: args.log.info('... skipping') continue for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True): graph = get_graph(obj, args.env['request'], rsc.name) count_triples += len(graph) count_rsc += 1 fp.write(n3(graph, with_head=count_rsc == 1)) args.log.info('... finished') # put in args.data_file('..', 'static', 'download')? md = { 'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples } md.update(count_links(as_posix(tmp_dump))) jsonlib.dump(md, args.data_file('rdf-metadata.json')) print(md) dataset = Dataset.first() rdf_dump = args.module_dir.joinpath('static', 'download', '%s-dataset.n3' % dataset.id) tmp_dump.copy(rdf_dump) check_call('gzip -f %s' % rdf_dump, shell=True) print(str(rdf_dump))
def from_metadata(cls, fname): fname = Path(fname) if fname.is_dir(): name = '{0}{1}'.format(cls.__name__, MD_SUFFIX) tablegroup = TableGroup.from_file(pkg_path('modules', name)) # adapt the path of the metadata file such that paths to tables are resolved # correctly: tablegroup._fname = fname.joinpath(name) else: tablegroup = TableGroup.from_file(fname) comps = Counter() for table in tablegroup.tables: try: comps.update([Dataset.get_tabletype(table)]) except ValueError: pass if comps and comps.most_common(1)[0][1] > 1: raise ValueError('{0}: duplicate components!'.format(fname)) for mod in get_modules(): if mod.match(tablegroup): return mod.cls(tablegroup) return cls(tablegroup)
class Repos(UnicodeMixin): def __init__(self, dir_): self.dir = Path(dir_) self.datasets = [ Dataset(base_dir=self.dir.joinpath('datasets'), **r) for r in reader(self.dir.joinpath('datasets', 'index.csv'), dicts=True)] self.phylogenies = [ Phylogeny(base_dir=self.dir.joinpath('phylogenies'), **r) for r in reader(self.dir.joinpath('phylogenies', 'index.csv'), dicts=True)] self.societies = { s.id: s for s in chain.from_iterable(d.societies for d in self.datasets) } self.variables = { v.id: v for v in chain.from_iterable(d.societies for d in self.datasets) } self.sources = BibFile(self.dir.joinpath('datasets', 'sources.bib')) def __unicode__(self): return '<D-PLACE data repos {0} at {1}>'.format(git_describe(self.dir), self.dir) def path(self, *comps): return self.dir.joinpath(*comps) def read_csv(self, *comps, **kw): return list(reader(self.path(*comps), **kw)) def read_json(self, *comps): return jsonlib.load(self.path(*comps)) def iter_data(self, datasets=None, variables=None, societies=None): for ds in self.datasets: if datasets and ds.id in datasets: for record in ds.data: if variables and record.var_id not in variables: continue if societies and record.soc_id not in societies: continue yield record
def includeme(config): """Upgrading: - register utilities "by hand", after config.include('clld.web.app') - add routes by hand (and remove these from the **kw passed to Configurator) :param config: :return: """ # # now we exploit the default package layout as created via the CLLD scaffold: # # note: the following exploits the import time side effect of modifying the webassets # environment! root_package = config.root_package.__name__ pkg_dir = Path(config.root_package.__file__).parent.resolve() maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir) json_renderer = JSON() json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('json', json_renderer) jsonp_renderer = JSONP(param_name='callback') jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('jsonp', jsonp_renderer) config.set_request_factory(ClldRequest) config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery) config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig) config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig) # initialize the db connection engine = engine_from_config(config.registry.settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.bind = engine try: git_tag = git_describe(Path(pkg_dir).parent) except ValueError: # pragma: no cover git_tag = None config.add_settings({ 'pyramid.default_locale_name': 'en', 'clld.pkg': root_package, 'clld.git_tag': git_tag, 'clld.parameters': {} }) if 'clld.files' in config.registry.settings: # deployment-specific location of static data files abspath = Path(config.registry.settings['clld.files']).resolve() config.add_settings({'clld.files': abspath}) config.add_static_view('files', str(abspath)) # event subscribers: config.add_subscriber(add_localizer, events.NewRequest) config.add_subscriber(init_map, events.ContextFound) config.add_subscriber( partial(add_renderer_globals, maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)), events.BeforeRender) # # make it easy to register custom functionality # for name, func in { 'register_utility': register_utility, 'register_datatable': partial(register_cls, interfaces.IDataTable), 'register_map': partial(register_cls, interfaces.IMap), 'register_menu': register_menu, 'register_resource': register_resource, 'register_adapter': register_adapter, 'register_adapters': register_adapters, 'register_download': register_download, 'register_staticresource': register_staticresource, 'add_route_and_view': add_route_and_view, 'add_settings_from_file': add_settings_from_file, 'add_301': add_301, 'add_410': add_410, 'add_page': add_page, 'register_resource_routes_and_views': register_resource_routes_and_views, }.items(): config.add_directive(name, func) # # routes and views # config.add_static_view('clld-static', 'clld:web/static') config.add_static_view('static', '%s:static' % root_package) config.add_route_and_view('_js', '/_js', js, http_cache=3600) # add some maintenance hatches config.add_route_and_view('_raise', '/_raise', _raise) config.add_route_and_view('_ping', '/_ping', _ping, renderer='json') # sitemap support: config.add_route_and_view('robots', '/robots.txt', robots) config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex) config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap) config.add_route('resourcemap', '/resourcemap.json') config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp') config.add_route_and_view('select_combination', '/_select_combination', select_combination) config.add_route_and_view('unapi', '/unapi', unapi) config.add_route_and_view('olac', '/olac', olac) config.add_settings_from_file(pkg_dir.joinpath('appconf.ini')) if not config.registry.settings.get('mako.directories'): config.add_settings({'mako.directories': ['clld:web/templates']}) for rsc in RESOURCES: config.register_resource_routes_and_views(rsc) config.register_datatable( rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable)) register_resource_adapters(config, rsc) # maps config.register_map('languages', Map) config.register_map('language', LanguageMap) config.register_map('parameter', ParameterMap) config.register_map('combination', CombinationMap) config.include('clld.web.adapters') for icon in ICONS: config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name) config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList) config.registry.registerUtility(MapMarker(), interfaces.IMapMarker) # # inspect default locations for views and templates: # home_comp = OrderedDict() for name, template in [ ('introduction', False), ('about', False), ('terms', False), ('glossary', False), ('history', False), ('changes', False), ('credits', False), ('legal', True), ('download', True), ('contact', True), ('help', False), ]: home_comp[name] = template if pkg_dir.joinpath('templates').exists(): for p in pkg_dir.joinpath('templates').iterdir(): if p.stem in home_comp and p.suffix == '.mako': home_comp[p.stem] = True for name, template in home_comp.items(): if template: config.add_page(name) config.add_settings( {'home_comp': [k for k in home_comp.keys() if home_comp[k]]}) if 'clld.favicon' not in config.registry.settings: favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'} # hard to test (in particular on travis) and without too much consequence # (and the consequences faced are easy to spot). if pkg_dir.joinpath('static', 'favicon.ico').exists(): # pragma: no cover favicon['clld.favicon'] = root_package + ':static/favicon.ico' config.add_settings(favicon) config.add_settings({ 'clld.favicon_hash': md5(abspath_from_asset_spec(config.registry.settings['clld.favicon'])) }) translation_dirs = ['clld:locale'] if pkg_dir.joinpath('locale').exists(): translation_dirs.append('%s:locale' % root_package) # pragma: no cover config.add_translation_dirs(*translation_dirs) if pkg_dir.joinpath( 'static/publisher_logo.png').exists(): # pragma: no cover config.add_settings({ 'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package }) if asbool(config.registry.settings.get('clld.pacific_centered_maps')): geojson.pacific_centered() v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir) if v: config.scan(v) # pragma: no cover menuitems = config.registry.settings.get( 'clld.menuitems_list', ['contributions', 'parameters', 'languages', 'contributors']) config.register_menu(('dataset', dict(label='Home')), *menuitems) config.include('pyramid_mako') for name in ['adapters', 'datatables', 'maps']: mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir) if mod and hasattr(mod, 'includeme'): config.include(mod) config.register_download(CldfDownload(common.Dataset, root_package))
def includeme(config): """Upgrading: - register utilities "by hand", after config.include('clld.web.app') - add routes by hand (and remove these from the **kw passed to Configurator) :param config: :return: """ # # now we exploit the default package layout as created via the CLLD scaffold: # # note: the following exploits the import time side effect of modifying the webassets # environment! root_package = config.root_package.__name__ maybe_import('%s.assets' % root_package) pkg_dir = Path(config.root_package.__file__).parent.resolve() json_renderer = JSON() json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('json', json_renderer) jsonp_renderer = JSONP(param_name='callback') jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('jsonp', jsonp_renderer) config.set_request_factory(ClldRequest) config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery) config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig) # initialize the db connection engine = engine_from_config(config.registry.settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.bind = engine config.add_settings({ 'pyramid.default_locale_name': 'en', 'clld.pkg': root_package, 'clld.parameters': {}}) if 'clld.files' in config.registry.settings: # deployment-specific location of static data files abspath = Path(config.registry.settings['clld.files']).resolve() config.add_settings({'clld.files': abspath}) config.add_static_view('files', abspath.as_posix()) # event subscribers: config.add_subscriber(add_localizer, events.NewRequest) config.add_subscriber(init_map, events.ContextFound) config.add_subscriber( partial(add_renderer_globals, maybe_import('%s.util' % root_package)), events.BeforeRender) # # make it easy to register custom functionality # for name, func in { 'register_datatable': partial(register_cls, interfaces.IDataTable), 'register_map': partial(register_cls, interfaces.IMap), 'register_menu': register_menu, 'register_resource': register_resource, 'register_adapter': register_adapter, 'register_adapters': register_adapters, 'register_download': register_download, 'register_staticresource': register_staticresource, 'add_route_and_view': add_route_and_view, 'add_settings_from_file': add_settings_from_file, 'add_301': add_301, 'add_410': add_410, 'add_page': add_page, 'register_resource_routes_and_views': register_resource_routes_and_views, }.items(): config.add_directive(name, func) # # routes and views # config.add_static_view('clld-static', 'clld:web/static') config.add_static_view('static', '%s:static' % root_package) config.add_route_and_view('_js', '/_js', js, http_cache=3600) # add some maintenance hatches config.add_route_and_view('_raise', '/_raise', _raise) config.add_route_and_view('_ping', '/_ping', _ping, renderer='json') # sitemap support: config.add_route_and_view('robots', '/robots.txt', robots) config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex) config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap) config.add_route('resourcemap', '/resourcemap.json') config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp') config.add_route_and_view( 'select_combination', '/_select_combination', select_combination) config.add_route_and_view('unapi', '/unapi', unapi) config.add_route_and_view('olac', '/olac', olac) config.add_settings_from_file(pkg_dir.joinpath('appconf.ini')) if not config.registry.settings.get('mako.directories'): config.add_settings({'mako.directories': ['clld:web/templates']}) for rsc in RESOURCES: config.register_resource_routes_and_views(rsc) config.register_datatable( rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable)) register_resource_adapters(config, rsc) # maps config.register_map('languages', Map) config.register_map('language', LanguageMap) config.register_map('parameter', ParameterMap) config.register_map('combination', CombinationMap) config.include('clld.web.adapters') for icon in ICONS: config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name) config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList) config.registry.registerUtility(MapMarker(), interfaces.IMapMarker) # # inspect default locations for views and templates: # home_comp = OrderedDict() for name, template in [ ('introduction', False), ('about', False), ('terms', False), ('glossary', False), ('history', False), ('changes', False), ('credits', False), ('legal', True), ('download', True), ('contact', True), ('help', False), ]: home_comp[name] = template if pkg_dir.joinpath('templates').exists(): for p in pkg_dir.joinpath('templates').iterdir(): if p.stem in home_comp and p.suffix == '.mako': home_comp[p.stem] = True for name, template in home_comp.items(): if template: config.add_page(name) config.add_settings({'home_comp': [k for k in home_comp.keys() if home_comp[k]]}) if 'clld.favicon' not in config.registry.settings: favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'} # hard to test (in particular on travis) and without too much consequence # (and the consequences faced are easy to spot). if pkg_dir.joinpath('static', 'favicon.ico').exists(): # pragma: no cover favicon['clld.favicon'] = root_package + ':static/favicon.ico' config.add_settings(favicon) with open(abspath_from_asset_spec( config.registry.settings['clld.favicon']), mode='rb') as fp: fh = md5() fh.update(fp.read()) config.add_settings({'clld.favicon_hash': fh.hexdigest()}) translation_dirs = ['clld:locale'] if pkg_dir.joinpath('locale').exists(): translation_dirs.append('%s:locale' % root_package) # pragma: no cover config.add_translation_dirs(*translation_dirs) if pkg_dir.joinpath('static/publisher_logo.png').exists(): # pragma: no cover config.add_settings( {'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package}) if asbool(config.registry.settings.get('clld.pacific_centered_maps')): geojson.pacific_centered() v = maybe_import('%s.views' % root_package) if v: config.scan(v) # pragma: no cover menuitems = config.registry.settings.get( 'clld.menuitems_list', ['contributions', 'parameters', 'languages', 'contributors']) config.register_menu(('dataset', dict(label='Home')), *menuitems) config.include('pyramid_mako') for name in ['adapters', 'datatables', 'maps']: mod = maybe_import('%s.%s' % (root_package, name)) if mod and hasattr(mod, 'includeme'): config.include(mod) config.register_download(CldfDownload(common.Dataset, root_package))
def reflexes(write_stats=True, path='concepticondata'): """ Returns a dictionary with concept set label as value and tuples of concept list identifier and concept label as values. """ D, G = {}, {} cpl = 0 cln = 0 clb = set([]) dpath = Path(path) if path else PKG_PATH for i, cl in enumerate(dpath.joinpath('conceptlists').glob('*.tsv')): concepts = list(reader(cl, namedtuples=True, delimiter="\t")) for j,concept in enumerate([c for c in concepts if c.CONCEPTICON_ID]): label = concept.GLOSS if hasattr(concept, 'GLOSS') else concept.ENGLISH name = cl.name try: D[concept.CONCEPTICON_GLOSS] += [(name, label)] except KeyError: D[concept.CONCEPTICON_GLOSS] = [(name, label)] try: G[label] += [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)] except KeyError: G[label] = [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)] clb.add(label) cpl += 1 cln += 1 # write basic statistics and most frequent glosses if write_stats: txt = """# Concepticon Statistics * concept sets (used): {0} * concept lists: {1} * concept labels: {2} * concept labels (unique): {3} * Ø concepts per list: {4:.2f} * Ø concepts per concept set: {5:.2f} * Ø unique concept labels per concept set: {6:.2f} """ txt = txt.format( len(D), cln, cpl, len(clb), cpl / cln, sum([len(v) for k,v in D.items()]) / len(D), sum([len(set([label for _,label in v])) for k,v in D.items()]) / len(D) ) txt += '# Twenty Most Diverse Concept Sets\n\n' txt += '| No. | concept set | distinct labels | concept lists | examples |\n' txt += '| --- | --- | --- | --- | --- |\n' for i,(k,v) in enumerate(sorted(D.items(), key=lambda x: len(set([label for _,label in x[1]])), reverse=True)[:20]): txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format( i+1, k, len(set([label for _,label in v])), len(set([clist for clist,_ in v])), ', '.join(sorted(set(['«{0}»'.format(label.replace('*','`*`')) for _,label in v]))) ) txt += '# Twenty Most Frequent Concept Sets\n\n' txt += '| No. | concept set | distinct labels | concept lists | examples |\n' txt += '| --- | --- | --- | --- | --- |\n' for i,(k,v) in enumerate(sorted(D.items(), key=lambda x: len(set([clist for clist,_ in x[1]])), reverse=True)[:20]): txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format( i+1, k, len(set([label for _,label in v])), len(set([clist for clist,_ in v])), ', '.join(sorted(set(['«{0}»'.format(label.replace('*','`*`')) for _,label in v]))) ) with dpath.joinpath('README.md').open('w', encoding='utf8') as fp: fp.write(txt) return D, G
log.update(['subsubunit']) res[fid]['subgroups'][sfid]['subgroups'][ ssfid] = ddd else: # isolates: for l in langs: if l.med: log.update(['isolate']) res[l.id] = { 'name': l.name, 'doctype': l.med, 'macroareas': l.macroareas, 'extension': [l.id], } outdir = Path(grambank.__file__).parent.joinpath('static') dump(res, outdir.joinpath('stats_by_classification.json')) stats = defaultdict(lambda: defaultdict(list)) for fid, f in res.items(): for maname, maid in f['macroareas']: stats[maid][f['doctype']].append(fid) dump(stats, outdir.joinpath('stats_by_macroarea.json')) macroareas = {ma.name: ma.value for ma in Macroarea} dump(macroareas, outdir.joinpath('stats_macroareas.json')) print(log) sys.exit(0)
def write_info(self, outdir): if not isinstance(outdir, Path): outdir = Path(outdir) self.cfg.write(outdir.joinpath(self.fname('.ini')))
def reflexes(write_stats=True, path='concepticondata'): """ Returns a dictionary with concept set label as value and tuples of concept list identifier and concept label as values. """ D, G = {}, {} cpl = 0 cln = 0 clb = set([]) dpath = Path(path) if path else PKG_PATH for i, cl in enumerate(dpath.joinpath('conceptlists').glob('*.tsv')): concepts = list(reader(cl, namedtuples=True, delimiter="\t")) for j, concept in enumerate([c for c in concepts if c.CONCEPTICON_ID]): label = concept.GLOSS if hasattr(concept, 'GLOSS') else concept.ENGLISH name = cl.name try: D[concept.CONCEPTICON_GLOSS] += [(name, label)] except KeyError: D[concept.CONCEPTICON_GLOSS] = [(name, label)] try: G[label] += [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)] except KeyError: G[label] = [(concept.CONCEPTICON_ID, concept.CONCEPTICON_GLOSS, name)] clb.add(label) cpl += 1 cln += 1 # write basic statistics and most frequent glosses if write_stats: txt = """# Concepticon Statistics * concept sets (used): {0} * concept lists: {1} * concept labels: {2} * concept labels (unique): {3} * Ø concepts per list: {4:.2f} * Ø concepts per concept set: {5:.2f} * Ø unique concept labels per concept set: {6:.2f} """ txt = txt.format( len(D), cln, cpl, len(clb), cpl / cln, sum([len(v) for k, v in D.items()]) / len(D), sum([len(set([label for _, label in v])) for k, v in D.items()]) / len(D)) txt += '# Twenty Most Diverse Concept Sets\n\n' txt += '| No. | concept set | distinct labels | concept lists | examples |\n' txt += '| --- | --- | --- | --- | --- |\n' for i, (k, v) in enumerate( sorted(D.items(), key=lambda x: len(set([label for _, label in x[1]])), reverse=True)[:20]): txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format( i + 1, k, len(set([label for _, label in v])), len(set([clist for clist, _ in v])), ', '.join( sorted( set([ '«{0}»'.format(label.replace('*', '`*`')) for _, label in v ])))) txt += '# Twenty Most Frequent Concept Sets\n\n' txt += '| No. | concept set | distinct labels | concept lists | examples |\n' txt += '| --- | --- | --- | --- | --- |\n' for i, (k, v) in enumerate( sorted(D.items(), key=lambda x: len(set([clist for clist, _ in x[1]])), reverse=True)[:20]): txt += '| {0} | {1} | {2} | {3} | {4} |\n'.format( i + 1, k, len(set([label for _, label in v])), len(set([clist for clist, _ in v])), ', '.join( sorted( set([ '«{0}»'.format(label.replace('*', '`*`')) for _, label in v ])))) with dpath.joinpath('README.md').open('w', encoding='utf8') as fp: fp.write(txt) return D, G
class Dictionary(object): def __init__(self, filename, **kw): kw.setdefault('entry_impl', Entry) kw['marker_map'] = kw.get('marker_map') or {} lexeme_marker = 'lx' reverse_marker_map = {v: k for k, v in kw['marker_map'].items()} if lexeme_marker in reverse_marker_map: lexeme_marker = reverse_marker_map[lexeme_marker] kw.setdefault('entry_prefix', '\\lx ') kw.setdefault('entry_sep', '\\%s ' % lexeme_marker) self.sfm = sfm.SFM.from_file(filename, **kw) self.dir = Path(filename).parent #def validated(self, entry): # entry = sfm.Dictionary.validated(self, entry) # return entry.preprocessed() def stats(self): stats = Stats() self.sfm.visit(stats) print(stats.count) print(stats._mult_markers) print(stats._implicit_mult_markers) def process(self, outfile): """extract examples, etc.""" assert self.dir.name != 'processed' self.sfm.visit(Rearrange()) with self.dir.joinpath('examples.log').open('w', encoding='utf8') as log: extractor = ExampleExtractor(Corpus(self.dir), log) self.sfm.visit(extractor) self.sfm.write(outfile) extractor.write_examples(outfile.parent.joinpath('examples.sfm')) def load( self, submission, did, lid, comparison_meanings, comparison_meanings_alt_labels, marker_map): data = Data() rel = [] vocab = models.Dictionary.get(did) lang = models.Variety.get(lid) for ex in Examples.from_file(self.dir.joinpath('examples.sfm')): data.add( common.Sentence, ex.id, id=ex.id, name=ex.text, language=lang, analyzed=ex.morphemes, gloss=ex.gloss, description=ex.translation) for i, entry in enumerate(self.sfm): words = list(entry.get_words()) headword = None for j, word in enumerate(words): if not word.meanings: print('no meanings for word %s' % word.form) continue if not headword: headword = word.id else: rel.append((word.id, 'sub', headword)) for tw in word.rel: rel.append((word.id, tw[0], tw[1])) w = data.add( models.Word, word.id, id='%s-%s-%s' % (submission.id, i + 1, j + 1), name=word.form, number=int(word.hm) if word.hm else 0, phonetic=word.ph, pos=word.ps, dictionary=vocab, language=lang) DBSession.flush() concepts = [] for k, meaning in enumerate(word.meanings): if not (meaning.ge or meaning.de): print('meaning without description for word %s' % w.name) continue if meaning.ge: meaning.ge = meaning.ge.replace('.', ' ') m = models.Meaning( id='%s-%s' % (w.id, k + 1), name=meaning.de or meaning.ge, description=meaning.de, gloss=meaning.ge, word=w, semantic_domain=', '.join(meaning.sd)) assert not meaning.x for xref in meaning.xref: s = data['Sentence'].get(xref) assert s models.MeaningSentence(meaning=m, sentence=s) key = (meaning.ge or meaning.de).replace('.', ' ').lower() concept = None if key in comparison_meanings: concept = comparison_meanings[key] elif key in comparison_meanings_alt_labels: concept = comparison_meanings_alt_labels[key] if concept and concept not in concepts: concepts.append(concept) vsid = '%s-%s' % (key, submission.id), if vsid in data['ValueSet']: vs = data['ValueSet'][vsid] else: vs = data.add( common.ValueSet, vsid, id='%s-%s' % (submission.id, m.id), language=lang, contribution=vocab, parameter_pk=concept) DBSession.add(models.Counterpart( id='%s-%s' % (w.id, k + 1), name=w.name, valueset=vs, word=w)) for _lang, meanings in word.non_english_meanings.items(): assert _lang in submission.md['metalanguages'] for meaning in meanings: k += 1 models.Meaning( id='%s-%s' % (w.id, k + 1), name=meaning, gloss=meaning, language=submission.md['metalanguages'][_lang], word=w) for index, (key, values) in enumerate(word.data.items()): if key in marker_map: label = marker_map[key] converter = default_value_converter if isinstance(label, (list, tuple)): label, converter = label for value in values: DBSession.add(common.Unit_data( object_pk=w.pk, key=label, value=converter(value, word.data), ord=index)) # FIXME: vgroup words by description and add synonym relationships! for s, d, t in rel: if s in data['Word'] and t in data['Word']: DBSession.add(models.SeeAlso( source_pk=data['Word'][s].pk, target_pk=data['Word'][t].pk, description=d)) else: print('---m---', s if s not in data['Word'] else t)
def htmlmap(args, min_langs_for_legend_item=10): """ glottolog --repos=. htmlmap [OUTDIR] [GLOTTOCODES] """ nodes = {n.id: n for n in args.repos.languoids()} legend = Counter() glottocodes = None if len(args.args) > 1: glottocodes = read_text(args.args[1]).split() langs = [] for n in nodes.values(): if ((glottocodes is None and n.level == args.repos.languoid_levels.language) or (glottocodes and n.id in glottocodes)) and n.latitude != None: fid = n.lineage[0][1] if n.lineage else n.id if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id: langs.append((n, fid)) legend.update([fid]) color_map = [fid for fid, _ in legend.most_common()] color_map = dict(zip(color_map, qualitative_colors(len(color_map)))) print(color_map) def l2f(t): n, fid = t lon, lat = n.longitude, n.latitude if lon <= -26: lon += 360 # make the map pacific-centered. return { "geometry": { "coordinates": [lon, lat], "type": "Point" }, "id": n.id, "properties": { "name": n.name, "color": color_map[fid], "family": nodes[fid].name, "family_id": fid, }, "type": "Feature" } def legend_item(fid, c): return \ '<span style="background-color: {0}; border: 1px solid black;">'\ ' </span> '\ '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format( color_map[fid], fid, nodes[fid].name, c) geojson = { "features": list(map(l2f, langs)), "properties": { "legend": { fid: legend_item(fid, c) for fid, c in legend.most_common() if c >= min_langs_for_legend_item }, }, "type": "FeatureCollection" } def rendered_template(name, **kw): return Template( read_text( Path(pyglottolog.__file__).parent.joinpath( 'templates', 'htmlmap', name))).substitute(**kw) jsname = 'glottolog_map.json' outdir = Path('.') if not args.args else Path(args.args[0]) write_text( outdir.joinpath(jsname), rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4))) html = outdir.joinpath('glottolog_map.html') write_text( html, rendered_template('htmlmap.html', version=git_describe(args.repos.repos), jsname=jsname, nlangs=len(langs))) print(html.resolve().as_uri())
def htmlmap(args): """ glottolog htmlmap [OUTDIR] """ nodes = {n.id: n for n in args.repos.languoids()} legend = Counter() langs = [] for n in nodes.values(): if n.level == Level.language and n.latitude != None: fid = n.lineage[0][1] if n.lineage else n.id if not nodes[fid].category.startswith('Pseudo'): langs.append((n, fid)) legend.update([fid]) color_map = { fid: "{0:0{1}X}".format((i + 1) * 10, 3) for i, fid in enumerate(sorted(legend.keys())) } def l2f(t): n, fid = t lon, lat = n.longitude, n.latitude if lon <= -26: lon += 360 return { "geometry": { "coordinates": [lon, lat], "type": "Point" }, "id": n.id, "properties": { "name": n.name, "color": color_map[fid], "family": nodes[fid].name, "family_id": fid, }, "type": "Feature" } def legend_item(fid, c): return \ '<span style="background-color: #{0}; border: 1px solid black;">'\ ' </span> '\ '<a href="http://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format( color_map[fid], fid, nodes[fid].name, c) geojson = { "features": map(l2f, langs), "properties": { "legend": { fid: legend_item(fid, c) for fid, c in legend.most_common() if c > 10 }, }, "type": "FeatureCollection" } def rendered_template(name, **kw): return Template( read_text( Path(pyglottolog.__file__).parent.joinpath( 'templates', 'htmlmap', name))).substitute(**kw) jsname = 'glottolog_map.json' outdir = Path('.') if not args.args else Path(args.args[0]) write_text( outdir.joinpath(jsname), rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4))) html = outdir.joinpath('glottolog_map.html') write_text( html, rendered_template('htmlmap.html', version=git_describe(args.repos.repos), jsname=jsname, nlangs=len(langs))) print(html.resolve().as_uri())