def test_git_describe(tmppath, capsys): from clldutils.path import git_describe d = tmppath / 'testdir' with pytest.raises(ValueError): git_describe(d) d.mkdir() assert git_describe(d) == 'testdir'
def cldf(self, **kw): self.glottolog_version = git_describe(kw['glottolog_repos']) self.concepticon_version = git_describe(kw['concepticon_repos']) try: bag = bagit.Bag(self.raw.parent.as_posix()) if not bag.is_valid(): if confirm( 'The downloaded data has changed. Update checksums?'): bag.save(manifests=True) assert bag.is_valid() else: raise bagit.BagError('invalid raw data') concepticon = Concepticon(kw['concepticon_repos']) if self.conceptlist: self.conceptlist = concepticon.conceptlists[self.conceptlist] self._run_command('cldf', concepticon, **kw) except bagit.BagError: self.log.error('invalid raw data for dataset %s' % self.id)
def json_ld(self): res = collections.OrderedDict([ ('rdf:about', self.url), ('rdf:type', 'prov:Entity'), ]) if self.version: res['dc:created'] = self.version elif self.clone: res['dc:created'] = git_describe(self.clone) res.update({'dc:{0}'.format(k): self.dc[k] for k in sorted(self.dc)}) return res
def run(args): if not args.version: # pragma: no cover args.version = git_describe(args.repos.repos) if args.version.startswith('v'): args.version = args.version[1:] editors = [] for e in args.repos.editors: if ((not e.end) or (int(e.end) >= args.year)) and int(e.start) <= args.year: name = HumanName(e.name) editors.append('{0.last}, {0.first} {0.middle}'.format(name).strip()) editors = ' & '.join(editors) print( "{0} (eds.) {1.year}. {2.title} {1.version}. {2.description}. " "{2.publisher.place}: {2.publisher.name}. Available online at {2.url}".format( editors, args, args.repos.dataset_metadata, ))
def run(args): if not args.version: # pragma: no cover args.version = git_describe(args.repos.repos) if args.version.startswith('v'): args.version = args.version[1:] current_editors = [ e for e in args.repos.editors if ((not e.end) or (int(e.end) >= args.year)) and int(e.start) <= args.year ] editor_names = [] for e in current_editors: name = HumanName(e.name) editor_names.append( '{0.last}, {0.first} {0.middle}'.format(name).strip()) editor_names = ' & '.join(editor_names) res = "{0} (eds.) {1.year}. {2.title} {1.version}. {2.description}. "\ "{2.publisher.place}: {2.publisher.name}. Available online at {2.url}".format( editor_names, args, args.repos.dataset_metadata, ) print(res) dump(zenodo_json(res, args.version, current_editors), args.repos.repos / '.zenodo.json', indent=4)
def describe(self): return git_describe(self.repos)
def __unicode__(self): return '<Glottolog repos {0} at {1}>'.format(git_describe(self.repos), self.repos)
def htmlmap(args, min_langs_for_legend_item=10): """ glottolog --repos=. htmlmap [OUTDIR] [GLOTTOCODES] """ nodes = {n.id: n for n in args.repos.languoids()} legend = Counter() glottocodes = None if len(args.args) > 1: glottocodes = read_text(args.args[1]).split() langs = [] for n in nodes.values(): if ((glottocodes is None and n.level == args.repos.languoid_levels.language) or (glottocodes and n.id in glottocodes)) and n.latitude != None: fid = n.lineage[0][1] if n.lineage else n.id if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id: langs.append((n, fid)) legend.update([fid]) color_map = [fid for fid, _ in legend.most_common()] color_map = dict(zip(color_map, qualitative_colors(len(color_map)))) print(color_map) def l2f(t): n, fid = t lon, lat = n.longitude, n.latitude if lon <= -26: lon += 360 # make the map pacific-centered. return { "geometry": { "coordinates": [lon, lat], "type": "Point" }, "id": n.id, "properties": { "name": n.name, "color": color_map[fid], "family": nodes[fid].name, "family_id": fid, }, "type": "Feature" } def legend_item(fid, c): return \ '<span style="background-color: {0}; border: 1px solid black;">'\ ' </span> '\ '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format( color_map[fid], fid, nodes[fid].name, c) geojson = { "features": list(map(l2f, langs)), "properties": { "legend": { fid: legend_item(fid, c) for fid, c in legend.most_common() if c >= min_langs_for_legend_item }, }, "type": "FeatureCollection" } def rendered_template(name, **kw): return Template( read_text( Path(pyglottolog.__file__).parent.joinpath( 'templates', 'htmlmap', name))).substitute(**kw) jsname = 'glottolog_map.json' outdir = Path('.') if not args.args else Path(args.args[0]) write_text( outdir.joinpath(jsname), rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4))) html = outdir.joinpath('glottolog_map.html') write_text( html, rendered_template('htmlmap.html', version=git_describe(args.repos.repos), jsname=jsname, nlangs=len(langs))) print(html.resolve().as_uri())
def includeme(config): """Upgrading: - register utilities "by hand", after config.include('clld.web.app') - add routes by hand (and remove these from the **kw passed to Configurator) :param config: :return: """ # # now we exploit the default package layout as created via the CLLD scaffold: # # note: the following exploits the import time side effect of modifying the webassets # environment! root_package = config.root_package.__name__ pkg_dir = Path(config.root_package.__file__).parent.resolve() maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir) json_renderer = JSON() json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('json', json_renderer) jsonp_renderer = JSONP(param_name='callback') jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('jsonp', jsonp_renderer) config.set_request_factory(ClldRequest) config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery) config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig) config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig) # initialize the db connection engine = engine_from_config(config.registry.settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.bind = engine try: git_tag = git_describe(Path(pkg_dir).parent) except ValueError: # pragma: no cover git_tag = None config.add_settings({ 'pyramid.default_locale_name': 'en', 'clld.pkg': root_package, 'clld.git_tag': git_tag, 'clld.parameters': {} }) if 'clld.files' in config.registry.settings: # deployment-specific location of static data files abspath = Path(config.registry.settings['clld.files']).resolve() config.add_settings({'clld.files': abspath}) config.add_static_view('files', str(abspath)) # event subscribers: config.add_subscriber(add_localizer, events.NewRequest) config.add_subscriber(init_map, events.ContextFound) config.add_subscriber( partial(add_renderer_globals, maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)), events.BeforeRender) # # make it easy to register custom functionality # for name, func in { 'register_utility': register_utility, 'register_datatable': partial(register_cls, interfaces.IDataTable), 'register_map': partial(register_cls, interfaces.IMap), 'register_menu': register_menu, 'register_resource': register_resource, 'register_adapter': register_adapter, 'register_adapters': register_adapters, 'register_download': register_download, 'register_staticresource': register_staticresource, 'add_route_and_view': add_route_and_view, 'add_settings_from_file': add_settings_from_file, 'add_301': add_301, 'add_410': add_410, 'add_page': add_page, 'register_resource_routes_and_views': register_resource_routes_and_views, }.items(): config.add_directive(name, func) # # routes and views # config.add_static_view('clld-static', 'clld:web/static') config.add_static_view('static', '%s:static' % root_package) config.add_route_and_view('_js', '/_js', js, http_cache=3600) # add some maintenance hatches config.add_route_and_view('_raise', '/_raise', _raise) config.add_route_and_view('_ping', '/_ping', _ping, renderer='json') # sitemap support: config.add_route_and_view('robots', '/robots.txt', robots) config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex) config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap) config.add_route('resourcemap', '/resourcemap.json') config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp') config.add_route_and_view('select_combination', '/_select_combination', select_combination) config.add_route_and_view('unapi', '/unapi', unapi) config.add_route_and_view('olac', '/olac', olac) config.add_settings_from_file(pkg_dir.joinpath('appconf.ini')) if not config.registry.settings.get('mako.directories'): config.add_settings({'mako.directories': ['clld:web/templates']}) for rsc in RESOURCES: config.register_resource_routes_and_views(rsc) config.register_datatable( rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable)) register_resource_adapters(config, rsc) # maps config.register_map('languages', Map) config.register_map('language', LanguageMap) config.register_map('parameter', ParameterMap) config.register_map('combination', CombinationMap) config.include('clld.web.adapters') for icon in ICONS: config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name) config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList) config.registry.registerUtility(MapMarker(), interfaces.IMapMarker) # # inspect default locations for views and templates: # home_comp = OrderedDict() for name, template in [ ('introduction', False), ('about', False), ('terms', False), ('glossary', False), ('history', False), ('changes', False), ('credits', False), ('legal', True), ('download', True), ('contact', True), ('help', False), ]: home_comp[name] = template if pkg_dir.joinpath('templates').exists(): for p in pkg_dir.joinpath('templates').iterdir(): if p.stem in home_comp and p.suffix == '.mako': home_comp[p.stem] = True for name, template in home_comp.items(): if template: config.add_page(name) config.add_settings( {'home_comp': [k for k in home_comp.keys() if home_comp[k]]}) if 'clld.favicon' not in config.registry.settings: favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'} # hard to test (in particular on travis) and without too much consequence # (and the consequences faced are easy to spot). if pkg_dir.joinpath('static', 'favicon.ico').exists(): # pragma: no cover favicon['clld.favicon'] = root_package + ':static/favicon.ico' config.add_settings(favicon) config.add_settings({ 'clld.favicon_hash': md5(abspath_from_asset_spec(config.registry.settings['clld.favicon'])) }) translation_dirs = ['clld:locale'] if pkg_dir.joinpath('locale').exists(): translation_dirs.append('%s:locale' % root_package) # pragma: no cover config.add_translation_dirs(*translation_dirs) if pkg_dir.joinpath( 'static/publisher_logo.png').exists(): # pragma: no cover config.add_settings({ 'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package }) if asbool(config.registry.settings.get('clld.pacific_centered_maps')): geojson.pacific_centered() v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir) if v: config.scan(v) # pragma: no cover menuitems = config.registry.settings.get( 'clld.menuitems_list', ['contributions', 'parameters', 'languages', 'contributors']) config.register_menu(('dataset', dict(label='Home')), *menuitems) config.include('pyramid_mako') for name in ['adapters', 'datatables', 'maps']: mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir) if mod and hasattr(mod, 'includeme'): config.include(mod) config.register_download(CldfDownload(common.Dataset, root_package))
def __unicode__(self): name = self.repos.resolve().name if self.repos.exists( ) else self.repos.name return '<{0} repository {1} at {2}>'.format(name, git_describe(self.repos), self.repos)
def git_hash(d): return git_describe(d).split('-g')[-1]
def version(self): return git_describe(self.repos)
def main(scripts, dev, glr): cldf_dir = Path('cldf') bib = parse_string(read_text(cldf_dir / 'sources.bib'), bib_format='bibtex') write_text(cldf_dir / 'sources.bib', bib.lower().to_string('bibtex')) glottolog = Glottolog(glr) ds = StructureDataset.in_dir(cldf_dir) ds.tablegroup.notes.append( OrderedDict([('dc:title', 'environment'), ('properties', OrderedDict([ ('glottolog_version', git_describe(glottolog.repos)), ]))])) ds.add_columns('ValueTable', { 'name': 'Marginal', 'datatype': 'boolean' }, { 'name': 'Allophones', 'separator': ' ' }, 'Contribution_ID') features = [ "tone", "stress", "syllabic", "short", "long", "consonantal", "sonorant", "continuant", "delayedRelease", "approximant", "tap", "trill", "nasal", "lateral", "labial", "round", "labiodental", "coronal", "anterior", "distributed", "strident", "dorsal", "high", "low", "front", "back", "tense", "retractedTongueRoot", "advancedTongueRoot", "periodicGlottalSource", "epilaryngealSource", "spreadGlottis", "constrictedGlottis", "fortis", "raisedLarynxEjective", "loweredLarynxImplosive", "click" ] ds.add_component('ParameterTable', 'SegmentClass', *features) ds.add_component('LanguageTable') ds.add_table( 'contributions.csv', 'ID', 'Name', 'Contributor_ID', { 'name': 'Source', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source', 'separator': ';' }, 'URL') ds.add_table( 'contributors.csv', 'ID', 'Name', 'Description', 'Readme', 'Contents', { 'name': 'Source', 'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source', 'separator': ';' }, 'URL', ) def read(what): return reader(scripts / 'to_cldf' / 'cldf' / what, namedtuples=True) languoids = {l.id: l for l in glottolog.languoids()} values, segments, languages, inventories, sources = [], [], {}, {}, [] for contrib in read('contributors.csv'): sources.append( dict( ID=contrib.Name, Name=contrib.Contributor, Description=contrib.Description, Readme=desc(dev, contrib.Name), Contents=contrib.Contents, Source=[ c.strip().lower() for c in contrib.Citation.split(';') ], URL=contrib.SourceURL if contrib.SourceURL != 'NA' else '', )) pid_map = {} for row in read('parameters.csv'): pid = md5(row.Description.encode('utf8')).hexdigest().upper() pid_map[row.ID] = pid segments.append( dict(ID=pid, Name=row.Name, Description=row.Description, SegmentClass=row.SegmentClass, **{f: getattr(row, f) for f in features})) src = {} for row in read('contributions.csv'): src[row.ID] = row.References.split( ';') if row.References != 'no source given' else [] src[row.ID] = [sid.lower() for sid in src[row.ID]] inventories[row.ID] = dict(ID=row.ID, Name=row.Name, Contributor_ID=row.Contributor_ID, URL=row.URI if row.URI != 'NA' else '', Source=src[row.ID]) uniq = set() for row in read('values.csv'): pk = (row.Language_ID, row.Parameter_ID, row.Contribution_ID) if pk in uniq: print('skipping duplicate phoneme {0}'.format(pk)) continue uniq.add(pk) lid = row.Language_ID if row.Language_ID in languoids else slug( inventories[row.Contribution_ID]['Name']) if lid not in languages: # # FIXME: Language_ID == 'NA' for three inventories! This must be mapped! # lang = languoids.get(lid) languages[lid] = dict( ID=lid, Name=lang.name if lang else None, Glottocode=lang.id if lang else None, ISO639P3code=row.ISO639P3code if row.ISO639P3code != 'NA' else None, ) values.append( dict( ID=row.ID, Language_ID=lid, Parameter_ID=pid_map[row.Parameter_ID], Contribution_ID=row.Contribution_ID, Value=row.Name, Marginal=None if row.Marginal == 'NA' else eval( row.Marginal.lower().capitalize()), # FALSE|TRUE|NA Allophones=row.Allophones.split() if row.Allophones != 'NA' else [], Source=src[row.Contribution_ID], )) ds.write( **{ 'ValueTable': values, 'LanguageTable': languages.values(), 'ParameterTable': segments, 'contributions.csv': inventories.values(), 'contributors.csv': sources }) ds.validate(logging.getLogger(__name__))
def describe(self) -> str: return git_describe(self.repos)
def includeme(config): """Upgrading: - register utilities "by hand", after config.include('clld.web.app') - add routes by hand (and remove these from the **kw passed to Configurator) :param config: :return: """ # # now we exploit the default package layout as created via the CLLD scaffold: # # note: the following exploits the import time side effect of modifying the webassets # environment! root_package = config.root_package.__name__ pkg_dir = Path(config.root_package.__file__).parent.resolve() maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir) json_renderer = JSON() json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('json', json_renderer) jsonp_renderer = JSONP(param_name='callback') jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat()) jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat()) config.add_renderer('jsonp', jsonp_renderer) config.set_request_factory(ClldRequest) config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery) config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig) config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig) # initialize the db connection engine = engine_from_config(config.registry.settings, 'sqlalchemy.') DBSession.configure(bind=engine) Base.metadata.bind = engine try: git_tag = git_describe(Path(pkg_dir).parent) except ValueError: # pragma: no cover git_tag = None config.add_settings({ 'pyramid.default_locale_name': 'en', 'clld.pkg': root_package, 'clld.git_tag': git_tag, 'clld.parameters': {}}) if 'clld.files' in config.registry.settings: # deployment-specific location of static data files abspath = Path(config.registry.settings['clld.files']).resolve() config.add_settings({'clld.files': abspath}) config.add_static_view('files', str(abspath)) # event subscribers: config.add_subscriber(add_localizer, events.NewRequest) config.add_subscriber(init_map, events.ContextFound) config.add_subscriber( partial( add_renderer_globals, maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)), events.BeforeRender) # # make it easy to register custom functionality # for name, func in { 'register_utility': register_utility, 'register_datatable': partial(register_cls, interfaces.IDataTable), 'register_map': partial(register_cls, interfaces.IMap), 'register_menu': register_menu, 'register_resource': register_resource, 'register_adapter': register_adapter, 'register_adapters': register_adapters, 'register_download': register_download, 'register_staticresource': register_staticresource, 'add_route_and_view': add_route_and_view, 'add_settings_from_file': add_settings_from_file, 'add_301': add_301, 'add_410': add_410, 'add_page': add_page, 'register_resource_routes_and_views': register_resource_routes_and_views, }.items(): config.add_directive(name, func) # # routes and views # config.add_static_view('clld-static', 'clld:web/static') config.add_static_view('static', '%s:static' % root_package) config.add_route_and_view('_js', '/_js', js, http_cache=3600) # add some maintenance hatches config.add_route_and_view('_raise', '/_raise', _raise) config.add_route_and_view('_ping', '/_ping', _ping, renderer='json') # sitemap support: config.add_route_and_view('robots', '/robots.txt', robots) config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex) config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap) config.add_route('resourcemap', '/resourcemap.json') config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp') config.add_route_and_view( 'select_combination', '/_select_combination', select_combination) config.add_route_and_view('unapi', '/unapi', unapi) config.add_route_and_view('olac', '/olac', olac) config.add_settings_from_file(pkg_dir.joinpath('appconf.ini')) if not config.registry.settings.get('mako.directories'): config.add_settings({'mako.directories': ['clld:web/templates']}) for rsc in RESOURCES: config.register_resource_routes_and_views(rsc) config.register_datatable( rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable)) register_resource_adapters(config, rsc) # maps config.register_map('languages', Map) config.register_map('language', LanguageMap) config.register_map('parameter', ParameterMap) config.register_map('combination', CombinationMap) config.include('clld.web.adapters') for icon in ICONS: config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name) config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList) config.registry.registerUtility(MapMarker(), interfaces.IMapMarker) # # inspect default locations for views and templates: # home_comp = OrderedDict() for name, template in [ ('introduction', False), ('about', False), ('terms', False), ('glossary', False), ('history', False), ('changes', False), ('credits', False), ('legal', True), ('download', True), ('contact', True), ('help', False), ]: home_comp[name] = template if pkg_dir.joinpath('templates').exists(): for p in pkg_dir.joinpath('templates').iterdir(): if p.stem in home_comp and p.suffix == '.mako': home_comp[p.stem] = True for name, template in home_comp.items(): if template: config.add_page(name) config.add_settings({'home_comp': [k for k in home_comp.keys() if home_comp[k]]}) if 'clld.favicon' not in config.registry.settings: favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'} # hard to test (in particular on travis) and without too much consequence # (and the consequences faced are easy to spot). if pkg_dir.joinpath('static', 'favicon.ico').exists(): # pragma: no cover favicon['clld.favicon'] = root_package + ':static/favicon.ico' config.add_settings(favicon) config.add_settings({ 'clld.favicon_hash': md5(abspath_from_asset_spec( config.registry.settings['clld.favicon']))}) translation_dirs = ['clld:locale'] if pkg_dir.joinpath('locale').exists(): translation_dirs.append('%s:locale' % root_package) # pragma: no cover config.add_translation_dirs(*translation_dirs) if pkg_dir.joinpath('static/publisher_logo.png').exists(): # pragma: no cover config.add_settings( {'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package}) if asbool(config.registry.settings.get('clld.pacific_centered_maps')): geojson.pacific_centered() v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir) if v: config.scan(v) # pragma: no cover menuitems = config.registry.settings.get( 'clld.menuitems_list', ['contributions', 'parameters', 'languages', 'contributors']) config.register_menu(('dataset', dict(label='Home')), *menuitems) config.include('pyramid_mako') for name in ['adapters', 'datatables', 'maps']: mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir) if mod and hasattr(mod, 'includeme'): config.include(mod) config.register_download(CldfDownload(common.Dataset, root_package))
def assert_release(repos): match = VERSION_NUMBER_PATTERN.match(git_describe(repos)) assert match, 'Repository is not checked out to a valid release tag' return match.group('number') # pragma: no cover
def run(args): ds = get_dataset(args) ds_cldf = ds.cldf_reader() release_dir = args.out / '{0}_{1}'.format(ds.id, MEDIA) if ds_cldf.get('media.csv', None) is None: # pragma: no cover args.log.error('Dataset has no media.csv') raise ParserError if args.parent_doi and not Zenodo.DOI_PATTERN.match(args.parent_doi): args.log.error('Invalid passed DOI') raise ParserError if args.update_zenodo: if not release_dir.exists(): args.log.error( '"{0}" not found -- run --create-release first?'.format( release_dir)) raise ParserError if not (release_dir / ZENODO_FILE_NAME).exists(): args.log.error( '"{0}" not found -- run --create-release first?'.format( release_dir / ZENODO_FILE_NAME)) raise ParserError if args.create_release: args.log.error( 'You cannot create the release and update zenodo at the same time.' ) raise ParserError if args.create_release: if not args.parent_doi: args.log.error( 'The corresponding DOI is required (via --parent-doi).') raise ParserError mime_types = None if args.mimetype: mime_types = [m.strip() for m in nfilter(args.mimetype.split(','))] if args.list: size = collections.Counter() number = collections.Counter() else: media_dir = args.out / MEDIA media_dir.mkdir(exist_ok=True) media = [] if not args.update_zenodo: used_file_extensions = set() with UnicodeWriter(media_dir / INDEX_CSV if not args.list else None) as w: for i, row in enumerate( tqdm.tqdm([r for r in ds_cldf['media.csv']], desc='Getting {0} items'.format(MEDIA))): url = ds_cldf.get_row_url('media.csv', row) if isinstance(url, rfc3986.URIReference): url = url.normalize().unsplit() row['URL'] = url f_ext = url.split('.')[-1].lower() if args.debug and i > 500: break if (mime_types is None) or f_ext in mime_types\ or any(row['mimetype'].startswith(x) for x in mime_types): if args.list: m = '{0} ({1})'.format(row['mimetype'], f_ext) size[m] += int(row['size']) number.update([m]) else: used_file_extensions.add(f_ext.lower()) d = media_dir / row['ID'][:2] d.mkdir(exist_ok=True) fn = '.'.join([row['ID'], f_ext]) target = d / fn row['local_path'] = pathlib.Path(row['ID'][:2]) / fn if i == 0: w.writerow(row) w.writerow(row.values()) media.append(target) if (not target.exists()) or md5(target) != row['ID']: _create_download_thread(url, target) if args.list: for k, v in size.most_common(): print('\t'.join([k.ljust(20), str(number[k]), format_size(v)])) return # Waiting for the download threads to finish if 'download_threads' in globals(): for t in download_threads: t.join() if args.create_release: assert media_dir.exists(), 'No folder "{0}" found in {1}'.format( MEDIA, media_dir.resolve()) release_dir.mkdir(exist_ok=True) media.append(media_dir / INDEX_CSV) try: zipf = zipfile.ZipFile(str(release_dir / '{0}.zip'.format(MEDIA)), 'w', zipfile.ZIP_DEFLATED) fp = args.out for f in tqdm.tqdm(media, desc='Creating {0}.zip'.format(MEDIA)): zipf.write(str(f), str(os.path.relpath(str(f), str(fp)))) zipf.close() except Exception as e: args.log.error(e) raise def _contrib(d): return { k: v for k, v in d.items() if k in {'name', 'affiliation', 'orcid', 'type'} } version_v = git_describe('.').split('-')[0] version = version_v.replace('v', '') git_url = [r for r in ds.repo.repo.remotes if r.name == 'origin'][0].url.replace('.git', '') with jsonlib.update(release_dir / ZENODO_FILE_NAME, indent=4, default=collections.OrderedDict()) as md: contribs = ds.dir / 'CONTRIBUTORS.md' creators, contributors = get_creators_and_contributors( contribs.read_text( encoding='utf8') if contribs.exists() else '', strict=False) if creators: md['creators'] = [_contrib(p) for p in creators] if contributors: md['contributors'] = [_contrib(p) for p in contributors] communities = [r["identifier"] for r in md.get("communities", [])] + \ [c.strip() for c in nfilter(args.communities.split(','))] + \ COMMUNITIES if communities and not args.debug: md['communities'] = [{ "identifier": community_id } for community_id in sorted(set(communities))] md.update({ 'title': '{0} {1} Files'.format(ds.metadata.title, MEDIA.title()), 'access_right': 'open', 'keywords': sorted(set(md.get('keywords', []) + ['linguistics'])), 'upload_type': 'dataset', 'publication_date': datetime.today().strftime('%Y-%m-%d'), 'version': version, 'related_identifiers': [ { 'scheme': 'url', 'identifier': '{0}/tree/{1}'.format(git_url, version_v), 'relation': 'isSupplementTo' }, ], }) if args.parent_doi: md['related_identifiers'].append({ 'scheme': 'doi', 'identifier': args.parent_doi, 'relation': 'isPartOf' }) supplement_to = " - Supplement to dataset " \ "<a href='https://doi.org/{0}'>{1}</a> ".format( args.parent_doi, ds.metadata.title) # noqa: E122 if ds.metadata.url: md['related_identifiers'].append({ 'scheme': 'url', 'identifier': ds.metadata.url, 'relation': 'isAlternateIdentifier' }) formats = ', '.join(sorted(used_file_extensions)) descr = '<br /><br />' + ds.metadata.description if ds.metadata.description else '' online_url, online = '', '' if ds.metadata.url: online_url = ds.metadata.url online = "<br /><br />Available online at: <a href='{0}'>{0}</a>".format( online_url) md['description'] = html.escape( DESCRIPTION.format( url=online_url, formats=' ({0})'.format(formats) if formats else '', title=md['title'], supplement_to=supplement_to, descr=descr, online=online)) license_md = '' if ds.metadata.zenodo_license: md['license'] = {'id': ds.metadata.zenodo_license} license_md = LICENCE.format(ds.metadata.zenodo_license) DataDir(release_dir).write( 'README.md', README.format( title=md['title'], doi='https://doi.org/{0}'.format(args.parent_doi), ds_title=ds.metadata.title, license=license_md, formats=' ({0})'.format(formats) if formats else '', media=MEDIA, index=INDEX_CSV)) if args.update_zenodo: md = {} md.update(jsonlib.load(release_dir / ZENODO_FILE_NAME)) if args.debug: api_url = API_URL_SANDBOX access_token = os.environ.get('ZENODO_SANDBOX_ACCESS_TOKEN') else: api_url = API_URL access_token = ACCESS_TOKEN zenodo_url = api_url.replace('api/', '') args.log.info('Updating Deposit ID {0} on {1} with:'.format( args.update_zenodo, zenodo_url)) api = Zenodo(api_url=api_url, access_token=access_token) try: rec = api.record_from_id('{0}record/{1}'.format( zenodo_url, args.update_zenodo)) except Exception as e: args.log.error( 'Check connection and credentials for accessing Zenodo.\n{0}'. format(e)) return latest_version = rec.links['latest'].split('/')[-1] if latest_version != args.update_zenodo: args.log.warn( 'Passed deposit ID does not refer to latest version {0}!'. format(latest_version)) args.log.info(' DOI: ' + rec.metadata.doi) args.log.info(' Title: ' + rec.metadata.title) args.log.info(' Version: ' + rec.metadata.version) args.log.info(' Date: ' + rec.metadata.publication_date) args.log.info(' Files: ' + ', '.join([f.key for f in rec.files])) p = input("Proceed? [y/N]: ") if p.lower() == 'y': dep = api.update_deposit(args.update_zenodo, **md) if dep.state != PUBLISHED: api.publish_deposit(dep) args.log.info('Updated successfully')
def describe_repos(r, org, name=None): return OrderedDict([ ('dc:title', '{0}/{1}'.format(org, name or r.name)), ('dc:description', git_describe(r))])
def run(args): legend = collections.Counter() glottocodes = get_glottocodes(args) nodes = {n.id: n for n in args.repos.languoids()} langs = [] for n in nodes.values(): if ((not glottocodes and n.level == args.repos.languoid_levels.language) or (glottocodes and n.id in glottocodes)) \ and (n.latitude != None or glottocodes.get(n.id, (None,))[0] != None): # noqa: W503 fid = n.lineage[0][1] if n.lineage else n.id if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id: langs.append((n, fid)) legend.update([fid]) color_map = [fid for fid, _ in legend.most_common()] color_map = dict(zip(color_map, qualitative_colors(len(color_map)))) def l2f(t): n, fid = t lat, lon = glottocodes.get(n.id, (None, None)) if lat is None: lat = n.latitude if lon is None: lon = n.longitude if lon <= -26: lon += 360 # make the map pacific-centered. return { "geometry": { "coordinates": [lon, lat], "type": "Point" }, "id": n.id, "properties": { "name": n.name, "color": color_map[fid], "family": nodes[fid].name, "family_id": fid, }, "type": "Feature" } def legend_item(fid, c): return \ '<span style="background-color: {0}; border: 1px solid black;">'\ ' </span> '\ '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format( color_map[fid], fid, nodes[fid].name, c) geojson = { "features": list(map(l2f, langs)), "properties": { "legend": { fid: legend_item(fid, c) for fid, c in legend.most_common() if c >= args.min_langs_for_legend }, }, "type": "FeatureCollection" } def rendered_template(name, **kw): return string.Template( args.pkg_dir.joinpath( 'templates', 'htmlmap', name).read_text(encoding='utf8')).substitute(**kw) jsname = 'glottolog_map.json' args.output.joinpath(jsname).write_text(rendered_template( 'htmlmap.js', geojson=json.dumps(geojson, indent=4)), encoding='utf8') html = args.output.joinpath('glottolog_map.html') html.write_text(rendered_template('htmlmap.html', version=git_describe(args.repos.repos), jsname=jsname, nlangs=len(langs)), encoding='utf8') print(html.resolve().as_uri()) if args.open: webbrowser.open(str(html)) # pragma: no cover
def htmlmap(args): """ glottolog htmlmap [OUTDIR] """ nodes = {n.id: n for n in args.repos.languoids()} legend = Counter() langs = [] for n in nodes.values(): if n.level == Level.language and n.latitude != None: fid = n.lineage[0][1] if n.lineage else n.id if not nodes[fid].category.startswith('Pseudo'): langs.append((n, fid)) legend.update([fid]) color_map = { fid: "{0:0{1}X}".format((i + 1) * 10, 3) for i, fid in enumerate(sorted(legend.keys())) } def l2f(t): n, fid = t lon, lat = n.longitude, n.latitude if lon <= -26: lon += 360 return { "geometry": { "coordinates": [lon, lat], "type": "Point" }, "id": n.id, "properties": { "name": n.name, "color": color_map[fid], "family": nodes[fid].name, "family_id": fid, }, "type": "Feature" } def legend_item(fid, c): return \ '<span style="background-color: #{0}; border: 1px solid black;">'\ ' </span> '\ '<a href="http://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format( color_map[fid], fid, nodes[fid].name, c) geojson = { "features": map(l2f, langs), "properties": { "legend": { fid: legend_item(fid, c) for fid, c in legend.most_common() if c > 10 }, }, "type": "FeatureCollection" } def rendered_template(name, **kw): return Template( read_text( Path(pyglottolog.__file__).parent.joinpath( 'templates', 'htmlmap', name))).substitute(**kw) jsname = 'glottolog_map.json' outdir = Path('.') if not args.args else Path(args.args[0]) write_text( outdir.joinpath(jsname), rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4))) html = outdir.joinpath('glottolog_map.html') write_text( html, rendered_template('htmlmap.html', version=git_describe(args.repos.repos), jsname=jsname, nlangs=len(langs))) print(html.resolve().as_uri())
def __unicode__(self): return '<D-PLACE data repos {0} at {1}>'.format(git_describe(self.dir), self.dir)