Example #1
0
def test_git_describe(tmppath, capsys):
    from clldutils.path import git_describe

    d = tmppath / 'testdir'
    with pytest.raises(ValueError):
        git_describe(d)
    d.mkdir()
    assert git_describe(d) == 'testdir'
Example #2
0
 def cldf(self, **kw):
     self.glottolog_version = git_describe(kw['glottolog_repos'])
     self.concepticon_version = git_describe(kw['concepticon_repos'])
     try:
         bag = bagit.Bag(self.raw.parent.as_posix())
         if not bag.is_valid():
             if confirm(
                     'The downloaded data has changed. Update checksums?'):
                 bag.save(manifests=True)
                 assert bag.is_valid()
             else:
                 raise bagit.BagError('invalid raw data')
         concepticon = Concepticon(kw['concepticon_repos'])
         if self.conceptlist:
             self.conceptlist = concepticon.conceptlists[self.conceptlist]
         self._run_command('cldf', concepticon, **kw)
     except bagit.BagError:
         self.log.error('invalid raw data for dataset %s' % self.id)
Example #3
0
 def json_ld(self):
     res = collections.OrderedDict([
         ('rdf:about', self.url),
         ('rdf:type', 'prov:Entity'),
     ])
     if self.version:
         res['dc:created'] = self.version
     elif self.clone:
         res['dc:created'] = git_describe(self.clone)
     res.update({'dc:{0}'.format(k): self.dc[k] for k in sorted(self.dc)})
     return res
Example #4
0
def run(args):
    if not args.version:  # pragma: no cover
        args.version = git_describe(args.repos.repos)
        if args.version.startswith('v'):
            args.version = args.version[1:]
    editors = []
    for e in args.repos.editors:
        if ((not e.end) or (int(e.end) >= args.year)) and int(e.start) <= args.year:
            name = HumanName(e.name)
            editors.append('{0.last}, {0.first} {0.middle}'.format(name).strip())
    editors = ' & '.join(editors)
    print(
        "{0} (eds.) {1.year}. {2.title} {1.version}. {2.description}. "
        "{2.publisher.place}: {2.publisher.name}. Available online at {2.url}".format(
            editors, args, args.repos.dataset_metadata,
        ))
Example #5
0
def run(args):
    if not args.version:  # pragma: no cover
        args.version = git_describe(args.repos.repos)
        if args.version.startswith('v'):
            args.version = args.version[1:]
    current_editors = [
        e for e in args.repos.editors
        if ((not e.end) or (int(e.end) >= args.year))
        and int(e.start) <= args.year
    ]
    editor_names = []
    for e in current_editors:
        name = HumanName(e.name)
        editor_names.append(
            '{0.last}, {0.first} {0.middle}'.format(name).strip())
    editor_names = ' & '.join(editor_names)
    res = "{0} (eds.) {1.year}. {2.title} {1.version}. {2.description}. "\
        "{2.publisher.place}: {2.publisher.name}. Available online at {2.url}".format(
            editor_names, args, args.repos.dataset_metadata,
        )
    print(res)
    dump(zenodo_json(res, args.version, current_editors),
         args.repos.repos / '.zenodo.json',
         indent=4)
Example #6
0
 def describe(self):
     return git_describe(self.repos)
Example #7
0
 def __unicode__(self):
     return '<Glottolog repos {0} at {1}>'.format(git_describe(self.repos),
                                                  self.repos)
Example #8
0
def htmlmap(args, min_langs_for_legend_item=10):
    """
    glottolog --repos=. htmlmap [OUTDIR] [GLOTTOCODES]
    """
    nodes = {n.id: n for n in args.repos.languoids()}
    legend = Counter()

    glottocodes = None
    if len(args.args) > 1:
        glottocodes = read_text(args.args[1]).split()

    langs = []
    for n in nodes.values():
        if ((glottocodes is None
             and n.level == args.repos.languoid_levels.language) or
            (glottocodes and n.id in glottocodes)) and n.latitude != None:
            fid = n.lineage[0][1] if n.lineage else n.id
            if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id:
                langs.append((n, fid))
                legend.update([fid])

    color_map = [fid for fid, _ in legend.most_common()]
    color_map = dict(zip(color_map, qualitative_colors(len(color_map))))
    print(color_map)

    def l2f(t):
        n, fid = t
        lon, lat = n.longitude, n.latitude
        if lon <= -26:
            lon += 360  # make the map pacific-centered.

        return {
            "geometry": {
                "coordinates": [lon, lat],
                "type": "Point"
            },
            "id": n.id,
            "properties": {
                "name": n.name,
                "color": color_map[fid],
                "family": nodes[fid].name,
                "family_id": fid,
            },
            "type": "Feature"
        }

    def legend_item(fid, c):
        return \
            '<span style="background-color: {0}; border: 1px solid black;">'\
            '&nbsp;&nbsp;&nbsp;</span> '\
            '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format(
                color_map[fid], fid, nodes[fid].name, c)

    geojson = {
        "features": list(map(l2f, langs)),
        "properties": {
            "legend": {
                fid: legend_item(fid, c)
                for fid, c in legend.most_common()
                if c >= min_langs_for_legend_item
            },
        },
        "type": "FeatureCollection"
    }

    def rendered_template(name, **kw):
        return Template(
            read_text(
                Path(pyglottolog.__file__).parent.joinpath(
                    'templates', 'htmlmap', name))).substitute(**kw)

    jsname = 'glottolog_map.json'
    outdir = Path('.') if not args.args else Path(args.args[0])
    write_text(
        outdir.joinpath(jsname),
        rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4)))
    html = outdir.joinpath('glottolog_map.html')
    write_text(
        html,
        rendered_template('htmlmap.html',
                          version=git_describe(args.repos.repos),
                          jsname=jsname,
                          nlangs=len(langs)))
    print(html.resolve().as_uri())
Example #9
0
File: app.py Project: esbesb/clld
def includeme(config):
    """Upgrading:

    - register utilities "by hand", after config.include('clld.web.app')
    - add routes by hand (and remove these from the **kw passed to Configurator)

    :param config:
    :return:
    """
    #
    # now we exploit the default package layout as created via the CLLD scaffold:
    #
    # note: the following exploits the import time side effect of modifying the webassets
    # environment!
    root_package = config.root_package.__name__
    pkg_dir = Path(config.root_package.__file__).parent.resolve()
    maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir)

    json_renderer = JSON()
    json_renderer.add_adapter(datetime.datetime,
                              lambda obj, req: obj.isoformat())
    json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('json', json_renderer)

    jsonp_renderer = JSONP(param_name='callback')
    jsonp_renderer.add_adapter(datetime.datetime,
                               lambda obj, req: obj.isoformat())
    jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('jsonp', jsonp_renderer)

    config.set_request_factory(ClldRequest)
    config.registry.registerUtility(CtxFactoryQuery(),
                                    interfaces.ICtxFactoryQuery)
    config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig)
    config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig)

    # initialize the db connection
    engine = engine_from_config(config.registry.settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine

    try:
        git_tag = git_describe(Path(pkg_dir).parent)
    except ValueError:  # pragma: no cover
        git_tag = None

    config.add_settings({
        'pyramid.default_locale_name': 'en',
        'clld.pkg': root_package,
        'clld.git_tag': git_tag,
        'clld.parameters': {}
    })
    if 'clld.files' in config.registry.settings:
        # deployment-specific location of static data files
        abspath = Path(config.registry.settings['clld.files']).resolve()
        config.add_settings({'clld.files': abspath})
        config.add_static_view('files', str(abspath))

    # event subscribers:
    config.add_subscriber(add_localizer, events.NewRequest)
    config.add_subscriber(init_map, events.ContextFound)
    config.add_subscriber(
        partial(add_renderer_globals,
                maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)),
        events.BeforeRender)

    #
    # make it easy to register custom functionality
    #
    for name, func in {
            'register_utility': register_utility,
            'register_datatable': partial(register_cls, interfaces.IDataTable),
            'register_map': partial(register_cls, interfaces.IMap),
            'register_menu': register_menu,
            'register_resource': register_resource,
            'register_adapter': register_adapter,
            'register_adapters': register_adapters,
            'register_download': register_download,
            'register_staticresource': register_staticresource,
            'add_route_and_view': add_route_and_view,
            'add_settings_from_file': add_settings_from_file,
            'add_301': add_301,
            'add_410': add_410,
            'add_page': add_page,
            'register_resource_routes_and_views':
            register_resource_routes_and_views,
    }.items():
        config.add_directive(name, func)

    #
    # routes and views
    #
    config.add_static_view('clld-static', 'clld:web/static')
    config.add_static_view('static', '%s:static' % root_package)

    config.add_route_and_view('_js', '/_js', js, http_cache=3600)

    # add some maintenance hatches
    config.add_route_and_view('_raise', '/_raise', _raise)
    config.add_route_and_view('_ping', '/_ping', _ping, renderer='json')

    # sitemap support:
    config.add_route_and_view('robots', '/robots.txt', robots)
    config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex)
    config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap)
    config.add_route('resourcemap', '/resourcemap.json')
    config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp')
    config.add_route_and_view('select_combination', '/_select_combination',
                              select_combination)

    config.add_route_and_view('unapi', '/unapi', unapi)
    config.add_route_and_view('olac', '/olac', olac)

    config.add_settings_from_file(pkg_dir.joinpath('appconf.ini'))
    if not config.registry.settings.get('mako.directories'):
        config.add_settings({'mako.directories': ['clld:web/templates']})

    for rsc in RESOURCES:
        config.register_resource_routes_and_views(rsc)
        config.register_datatable(
            rsc.plural, getattr(datatables, rsc.plural.capitalize(),
                                DataTable))
        register_resource_adapters(config, rsc)

    # maps
    config.register_map('languages', Map)
    config.register_map('language', LanguageMap)
    config.register_map('parameter', ParameterMap)
    config.register_map('combination', CombinationMap)

    config.include('clld.web.adapters')

    for icon in ICONS:
        config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name)
    config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList)
    config.registry.registerUtility(MapMarker(), interfaces.IMapMarker)

    #
    # inspect default locations for views and templates:
    #
    home_comp = OrderedDict()
    for name, template in [
        ('introduction', False),
        ('about', False),
        ('terms', False),
        ('glossary', False),
        ('history', False),
        ('changes', False),
        ('credits', False),
        ('legal', True),
        ('download', True),
        ('contact', True),
        ('help', False),
    ]:
        home_comp[name] = template

    if pkg_dir.joinpath('templates').exists():
        for p in pkg_dir.joinpath('templates').iterdir():
            if p.stem in home_comp and p.suffix == '.mako':
                home_comp[p.stem] = True

    for name, template in home_comp.items():
        if template:
            config.add_page(name)

    config.add_settings(
        {'home_comp': [k for k in home_comp.keys() if home_comp[k]]})

    if 'clld.favicon' not in config.registry.settings:
        favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'}
        # hard to test (in particular on travis) and without too much consequence
        # (and the consequences faced are easy to spot).
        if pkg_dir.joinpath('static',
                            'favicon.ico').exists():  # pragma: no cover
            favicon['clld.favicon'] = root_package + ':static/favicon.ico'
        config.add_settings(favicon)

    config.add_settings({
        'clld.favicon_hash':
        md5(abspath_from_asset_spec(config.registry.settings['clld.favicon']))
    })

    translation_dirs = ['clld:locale']
    if pkg_dir.joinpath('locale').exists():
        translation_dirs.append('%s:locale' % root_package)  # pragma: no cover
    config.add_translation_dirs(*translation_dirs)

    if pkg_dir.joinpath(
            'static/publisher_logo.png').exists():  # pragma: no cover
        config.add_settings({
            'clld.publisher_logo':
            '%s:static/publisher_logo.png' % root_package
        })

    if asbool(config.registry.settings.get('clld.pacific_centered_maps')):
        geojson.pacific_centered()

    v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir)
    if v:
        config.scan(v)  # pragma: no cover

    menuitems = config.registry.settings.get(
        'clld.menuitems_list',
        ['contributions', 'parameters', 'languages', 'contributors'])
    config.register_menu(('dataset', dict(label='Home')), *menuitems)

    config.include('pyramid_mako')

    for name in ['adapters', 'datatables', 'maps']:
        mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir)
        if mod and hasattr(mod, 'includeme'):
            config.include(mod)

    config.register_download(CldfDownload(common.Dataset, root_package))
Example #10
0
 def __unicode__(self):
     name = self.repos.resolve().name if self.repos.exists(
     ) else self.repos.name
     return '<{0} repository {1} at {2}>'.format(name,
                                                 git_describe(self.repos),
                                                 self.repos)
Example #11
0
def git_hash(d):
    return git_describe(d).split('-g')[-1]
Example #12
0
 def version(self):
     return git_describe(self.repos)
Example #13
0
def main(scripts, dev, glr):
    cldf_dir = Path('cldf')
    bib = parse_string(read_text(cldf_dir / 'sources.bib'),
                       bib_format='bibtex')
    write_text(cldf_dir / 'sources.bib', bib.lower().to_string('bibtex'))

    glottolog = Glottolog(glr)
    ds = StructureDataset.in_dir(cldf_dir)
    ds.tablegroup.notes.append(
        OrderedDict([('dc:title', 'environment'),
                     ('properties',
                      OrderedDict([
                          ('glottolog_version', git_describe(glottolog.repos)),
                      ]))]))
    ds.add_columns('ValueTable', {
        'name': 'Marginal',
        'datatype': 'boolean'
    }, {
        'name': 'Allophones',
        'separator': ' '
    }, 'Contribution_ID')
    features = [
        "tone", "stress", "syllabic", "short", "long", "consonantal",
        "sonorant", "continuant", "delayedRelease", "approximant", "tap",
        "trill", "nasal", "lateral", "labial", "round", "labiodental",
        "coronal", "anterior", "distributed", "strident", "dorsal", "high",
        "low", "front", "back", "tense", "retractedTongueRoot",
        "advancedTongueRoot", "periodicGlottalSource", "epilaryngealSource",
        "spreadGlottis", "constrictedGlottis", "fortis",
        "raisedLarynxEjective", "loweredLarynxImplosive", "click"
    ]
    ds.add_component('ParameterTable', 'SegmentClass', *features)
    ds.add_component('LanguageTable')
    ds.add_table(
        'contributions.csv', 'ID', 'Name', 'Contributor_ID', {
            'name': 'Source',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
            'separator': ';'
        }, 'URL')
    ds.add_table(
        'contributors.csv',
        'ID',
        'Name',
        'Description',
        'Readme',
        'Contents',
        {
            'name': 'Source',
            'propertyUrl': 'http://cldf.clld.org/v1.0/terms.rdf#source',
            'separator': ';'
        },
        'URL',
    )

    def read(what):
        return reader(scripts / 'to_cldf' / 'cldf' / what, namedtuples=True)

    languoids = {l.id: l for l in glottolog.languoids()}

    values, segments, languages, inventories, sources = [], [], {}, {}, []
    for contrib in read('contributors.csv'):
        sources.append(
            dict(
                ID=contrib.Name,
                Name=contrib.Contributor,
                Description=contrib.Description,
                Readme=desc(dev, contrib.Name),
                Contents=contrib.Contents,
                Source=[
                    c.strip().lower() for c in contrib.Citation.split(';')
                ],
                URL=contrib.SourceURL if contrib.SourceURL != 'NA' else '',
            ))

    pid_map = {}
    for row in read('parameters.csv'):
        pid = md5(row.Description.encode('utf8')).hexdigest().upper()
        pid_map[row.ID] = pid
        segments.append(
            dict(ID=pid,
                 Name=row.Name,
                 Description=row.Description,
                 SegmentClass=row.SegmentClass,
                 **{f: getattr(row, f)
                    for f in features}))

    src = {}
    for row in read('contributions.csv'):
        src[row.ID] = row.References.split(
            ';') if row.References != 'no source given' else []
        src[row.ID] = [sid.lower() for sid in src[row.ID]]
        inventories[row.ID] = dict(ID=row.ID,
                                   Name=row.Name,
                                   Contributor_ID=row.Contributor_ID,
                                   URL=row.URI if row.URI != 'NA' else '',
                                   Source=src[row.ID])

    uniq = set()
    for row in read('values.csv'):
        pk = (row.Language_ID, row.Parameter_ID, row.Contribution_ID)
        if pk in uniq:
            print('skipping duplicate phoneme {0}'.format(pk))
            continue
        uniq.add(pk)
        lid = row.Language_ID if row.Language_ID in languoids else slug(
            inventories[row.Contribution_ID]['Name'])
        if lid not in languages:
            #
            # FIXME: Language_ID == 'NA' for three inventories! This must be mapped!
            #
            lang = languoids.get(lid)
            languages[lid] = dict(
                ID=lid,
                Name=lang.name if lang else None,
                Glottocode=lang.id if lang else None,
                ISO639P3code=row.ISO639P3code
                if row.ISO639P3code != 'NA' else None,
            )
        values.append(
            dict(
                ID=row.ID,
                Language_ID=lid,
                Parameter_ID=pid_map[row.Parameter_ID],
                Contribution_ID=row.Contribution_ID,
                Value=row.Name,
                Marginal=None if row.Marginal == 'NA' else eval(
                    row.Marginal.lower().capitalize()),  # FALSE|TRUE|NA
                Allophones=row.Allophones.split()
                if row.Allophones != 'NA' else [],
                Source=src[row.Contribution_ID],
            ))

    ds.write(
        **{
            'ValueTable': values,
            'LanguageTable': languages.values(),
            'ParameterTable': segments,
            'contributions.csv': inventories.values(),
            'contributors.csv': sources
        })
    ds.validate(logging.getLogger(__name__))
Example #14
0
 def describe(self) -> str:
     return git_describe(self.repos)
Example #15
0
File: app.py Project: clld/clld
def includeme(config):
    """Upgrading:

    - register utilities "by hand", after config.include('clld.web.app')
    - add routes by hand (and remove these from the **kw passed to Configurator)

    :param config:
    :return:
    """
    #
    # now we exploit the default package layout as created via the CLLD scaffold:
    #
    # note: the following exploits the import time side effect of modifying the webassets
    # environment!
    root_package = config.root_package.__name__
    pkg_dir = Path(config.root_package.__file__).parent.resolve()
    maybe_import('%s.assets' % root_package, pkg_dir=pkg_dir)

    json_renderer = JSON()
    json_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat())
    json_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('json', json_renderer)

    jsonp_renderer = JSONP(param_name='callback')
    jsonp_renderer.add_adapter(datetime.datetime, lambda obj, req: obj.isoformat())
    jsonp_renderer.add_adapter(datetime.date, lambda obj, req: obj.isoformat())
    config.add_renderer('jsonp', jsonp_renderer)

    config.set_request_factory(ClldRequest)
    config.registry.registerUtility(CtxFactoryQuery(), interfaces.ICtxFactoryQuery)
    config.registry.registerUtility(OlacConfig(), interfaces.IOlacConfig)
    config.registry.registerUtility(CldfConfig(), interfaces.ICldfConfig)

    # initialize the db connection
    engine = engine_from_config(config.registry.settings, 'sqlalchemy.')
    DBSession.configure(bind=engine)
    Base.metadata.bind = engine

    try:
        git_tag = git_describe(Path(pkg_dir).parent)
    except ValueError:  # pragma: no cover
        git_tag = None

    config.add_settings({
        'pyramid.default_locale_name': 'en',
        'clld.pkg': root_package,
        'clld.git_tag': git_tag,
        'clld.parameters': {}})
    if 'clld.files' in config.registry.settings:
        # deployment-specific location of static data files
        abspath = Path(config.registry.settings['clld.files']).resolve()
        config.add_settings({'clld.files': abspath})
        config.add_static_view('files', str(abspath))

    # event subscribers:
    config.add_subscriber(add_localizer, events.NewRequest)
    config.add_subscriber(init_map, events.ContextFound)
    config.add_subscriber(
        partial(
            add_renderer_globals,
            maybe_import('%s.util' % root_package, pkg_dir=pkg_dir)),
        events.BeforeRender)

    #
    # make it easy to register custom functionality
    #
    for name, func in {
        'register_utility': register_utility,
        'register_datatable': partial(register_cls, interfaces.IDataTable),
        'register_map': partial(register_cls, interfaces.IMap),
        'register_menu': register_menu,
        'register_resource': register_resource,
        'register_adapter': register_adapter,
        'register_adapters': register_adapters,
        'register_download': register_download,
        'register_staticresource': register_staticresource,
        'add_route_and_view': add_route_and_view,
        'add_settings_from_file': add_settings_from_file,
        'add_301': add_301,
        'add_410': add_410,
        'add_page': add_page,
        'register_resource_routes_and_views': register_resource_routes_and_views,
    }.items():
        config.add_directive(name, func)

    #
    # routes and views
    #
    config.add_static_view('clld-static', 'clld:web/static')
    config.add_static_view('static', '%s:static' % root_package)

    config.add_route_and_view('_js', '/_js', js, http_cache=3600)

    # add some maintenance hatches
    config.add_route_and_view('_raise', '/_raise', _raise)
    config.add_route_and_view('_ping', '/_ping', _ping, renderer='json')

    # sitemap support:
    config.add_route_and_view('robots', '/robots.txt', robots)
    config.add_route_and_view('sitemapindex', '/sitemap.xml', sitemapindex)
    config.add_route_and_view('sitemap', '/sitemap.{rsc}.{n}.xml', sitemap)
    config.add_route('resourcemap', '/resourcemap.json')
    config.add_view(resourcemap, route_name='resourcemap', renderer='jsonp')
    config.add_route_and_view(
        'select_combination', '/_select_combination', select_combination)

    config.add_route_and_view('unapi', '/unapi', unapi)
    config.add_route_and_view('olac', '/olac', olac)

    config.add_settings_from_file(pkg_dir.joinpath('appconf.ini'))
    if not config.registry.settings.get('mako.directories'):
        config.add_settings({'mako.directories': ['clld:web/templates']})

    for rsc in RESOURCES:
        config.register_resource_routes_and_views(rsc)
        config.register_datatable(
            rsc.plural, getattr(datatables, rsc.plural.capitalize(), DataTable))
        register_resource_adapters(config, rsc)

    # maps
    config.register_map('languages', Map)
    config.register_map('language', LanguageMap)
    config.register_map('parameter', ParameterMap)
    config.register_map('combination', CombinationMap)

    config.include('clld.web.adapters')

    for icon in ICONS:
        config.registry.registerUtility(icon, interfaces.IIcon, name=icon.name)
    config.registry.registerUtility(ORDERED_ICONS, interfaces.IIconList)
    config.registry.registerUtility(MapMarker(), interfaces.IMapMarker)

    #
    # inspect default locations for views and templates:
    #
    home_comp = OrderedDict()
    for name, template in [
        ('introduction', False),
        ('about', False),
        ('terms', False),
        ('glossary', False),
        ('history', False),
        ('changes', False),
        ('credits', False),
        ('legal', True),
        ('download', True),
        ('contact', True),
        ('help', False),
    ]:
        home_comp[name] = template

    if pkg_dir.joinpath('templates').exists():
        for p in pkg_dir.joinpath('templates').iterdir():
            if p.stem in home_comp and p.suffix == '.mako':
                home_comp[p.stem] = True

    for name, template in home_comp.items():
        if template:
            config.add_page(name)

    config.add_settings({'home_comp': [k for k in home_comp.keys() if home_comp[k]]})

    if 'clld.favicon' not in config.registry.settings:
        favicon = {'clld.favicon': 'clld:web/static/images/favicon.ico'}
        # hard to test (in particular on travis) and without too much consequence
        # (and the consequences faced are easy to spot).
        if pkg_dir.joinpath('static', 'favicon.ico').exists():  # pragma: no cover
            favicon['clld.favicon'] = root_package + ':static/favicon.ico'
        config.add_settings(favicon)

    config.add_settings({
        'clld.favicon_hash': md5(abspath_from_asset_spec(
            config.registry.settings['clld.favicon']))})

    translation_dirs = ['clld:locale']
    if pkg_dir.joinpath('locale').exists():
        translation_dirs.append('%s:locale' % root_package)  # pragma: no cover
    config.add_translation_dirs(*translation_dirs)

    if pkg_dir.joinpath('static/publisher_logo.png').exists():  # pragma: no cover
        config.add_settings(
            {'clld.publisher_logo': '%s:static/publisher_logo.png' % root_package})

    if asbool(config.registry.settings.get('clld.pacific_centered_maps')):
        geojson.pacific_centered()

    v = maybe_import('%s.views' % root_package, pkg_dir=pkg_dir)
    if v:
        config.scan(v)  # pragma: no cover

    menuitems = config.registry.settings.get(
        'clld.menuitems_list',
        ['contributions', 'parameters', 'languages', 'contributors'])
    config.register_menu(('dataset', dict(label='Home')), *menuitems)

    config.include('pyramid_mako')

    for name in ['adapters', 'datatables', 'maps']:
        mod = maybe_import('%s.%s' % (root_package, name), pkg_dir=pkg_dir)
        if mod and hasattr(mod, 'includeme'):
            config.include(mod)

    config.register_download(CldfDownload(common.Dataset, root_package))
Example #16
0
def assert_release(repos):
    match = VERSION_NUMBER_PATTERN.match(git_describe(repos))
    assert match, 'Repository is not checked out to a valid release tag'
    return match.group('number')  # pragma: no cover
Example #17
0
def run(args):

    ds = get_dataset(args)
    ds_cldf = ds.cldf_reader()
    release_dir = args.out / '{0}_{1}'.format(ds.id, MEDIA)

    if ds_cldf.get('media.csv', None) is None:  # pragma: no cover
        args.log.error('Dataset has no media.csv')
        raise ParserError
    if args.parent_doi and not Zenodo.DOI_PATTERN.match(args.parent_doi):
        args.log.error('Invalid passed DOI')
        raise ParserError
    if args.update_zenodo:
        if not release_dir.exists():
            args.log.error(
                '"{0}" not found -- run --create-release first?'.format(
                    release_dir))
            raise ParserError
        if not (release_dir / ZENODO_FILE_NAME).exists():
            args.log.error(
                '"{0}" not found -- run --create-release first?'.format(
                    release_dir / ZENODO_FILE_NAME))
            raise ParserError
        if args.create_release:
            args.log.error(
                'You cannot create the release and update zenodo at the same time.'
            )
            raise ParserError
    if args.create_release:
        if not args.parent_doi:
            args.log.error(
                'The corresponding DOI is required (via --parent-doi).')
            raise ParserError

    mime_types = None
    if args.mimetype:
        mime_types = [m.strip() for m in nfilter(args.mimetype.split(','))]

    if args.list:
        size = collections.Counter()
        number = collections.Counter()
    else:
        media_dir = args.out / MEDIA
        media_dir.mkdir(exist_ok=True)
        media = []

    if not args.update_zenodo:
        used_file_extensions = set()
        with UnicodeWriter(media_dir /
                           INDEX_CSV if not args.list else None) as w:
            for i, row in enumerate(
                    tqdm.tqdm([r for r in ds_cldf['media.csv']],
                              desc='Getting {0} items'.format(MEDIA))):
                url = ds_cldf.get_row_url('media.csv', row)
                if isinstance(url, rfc3986.URIReference):
                    url = url.normalize().unsplit()
                    row['URL'] = url
                f_ext = url.split('.')[-1].lower()
                if args.debug and i > 500:
                    break
                if (mime_types is None) or f_ext in mime_types\
                        or any(row['mimetype'].startswith(x) for x in mime_types):
                    if args.list:
                        m = '{0} ({1})'.format(row['mimetype'], f_ext)
                        size[m] += int(row['size'])
                        number.update([m])
                    else:
                        used_file_extensions.add(f_ext.lower())
                        d = media_dir / row['ID'][:2]
                        d.mkdir(exist_ok=True)
                        fn = '.'.join([row['ID'], f_ext])
                        target = d / fn
                        row['local_path'] = pathlib.Path(row['ID'][:2]) / fn
                        if i == 0:
                            w.writerow(row)
                        w.writerow(row.values())
                        media.append(target)
                        if (not target.exists()) or md5(target) != row['ID']:
                            _create_download_thread(url, target)

    if args.list:
        for k, v in size.most_common():
            print('\t'.join([k.ljust(20), str(number[k]), format_size(v)]))
        return

    # Waiting for the download threads to finish
    if 'download_threads' in globals():
        for t in download_threads:
            t.join()

    if args.create_release:
        assert media_dir.exists(), 'No folder "{0}" found in {1}'.format(
            MEDIA, media_dir.resolve())

        release_dir.mkdir(exist_ok=True)

        media.append(media_dir / INDEX_CSV)

        try:
            zipf = zipfile.ZipFile(str(release_dir / '{0}.zip'.format(MEDIA)),
                                   'w', zipfile.ZIP_DEFLATED)
            fp = args.out
            for f in tqdm.tqdm(media, desc='Creating {0}.zip'.format(MEDIA)):
                zipf.write(str(f), str(os.path.relpath(str(f), str(fp))))
            zipf.close()
        except Exception as e:
            args.log.error(e)
            raise

        def _contrib(d):
            return {
                k: v
                for k, v in d.items()
                if k in {'name', 'affiliation', 'orcid', 'type'}
            }

        version_v = git_describe('.').split('-')[0]
        version = version_v.replace('v', '')
        git_url = [r for r in ds.repo.repo.remotes
                   if r.name == 'origin'][0].url.replace('.git', '')
        with jsonlib.update(release_dir / ZENODO_FILE_NAME,
                            indent=4,
                            default=collections.OrderedDict()) as md:
            contribs = ds.dir / 'CONTRIBUTORS.md'
            creators, contributors = get_creators_and_contributors(
                contribs.read_text(
                    encoding='utf8') if contribs.exists() else '',
                strict=False)
            if creators:
                md['creators'] = [_contrib(p) for p in creators]
            if contributors:
                md['contributors'] = [_contrib(p) for p in contributors]
            communities = [r["identifier"] for r in md.get("communities", [])] + \
                [c.strip() for c in nfilter(args.communities.split(','))] + \
                COMMUNITIES
            if communities and not args.debug:
                md['communities'] = [{
                    "identifier": community_id
                } for community_id in sorted(set(communities))]
            md.update({
                'title':
                '{0} {1} Files'.format(ds.metadata.title, MEDIA.title()),
                'access_right':
                'open',
                'keywords':
                sorted(set(md.get('keywords', []) + ['linguistics'])),
                'upload_type':
                'dataset',
                'publication_date':
                datetime.today().strftime('%Y-%m-%d'),
                'version':
                version,
                'related_identifiers': [
                    {
                        'scheme': 'url',
                        'identifier':
                        '{0}/tree/{1}'.format(git_url, version_v),
                        'relation': 'isSupplementTo'
                    },
                ],
            })
            if args.parent_doi:
                md['related_identifiers'].append({
                    'scheme': 'doi',
                    'identifier': args.parent_doi,
                    'relation': 'isPartOf'
                })
                supplement_to = " - Supplement to dataset " \
                                "<a href='https://doi.org/{0}'>{1}</a> ".format(
                    args.parent_doi, ds.metadata.title)  # noqa: E122
            if ds.metadata.url:
                md['related_identifiers'].append({
                    'scheme':
                    'url',
                    'identifier':
                    ds.metadata.url,
                    'relation':
                    'isAlternateIdentifier'
                })

            formats = ', '.join(sorted(used_file_extensions))
            descr = '<br /><br />' + ds.metadata.description if ds.metadata.description else ''
            online_url, online = '', ''
            if ds.metadata.url:
                online_url = ds.metadata.url
                online = "<br /><br />Available online at: <a href='{0}'>{0}</a>".format(
                    online_url)
            md['description'] = html.escape(
                DESCRIPTION.format(
                    url=online_url,
                    formats=' ({0})'.format(formats) if formats else '',
                    title=md['title'],
                    supplement_to=supplement_to,
                    descr=descr,
                    online=online))

            license_md = ''
            if ds.metadata.zenodo_license:
                md['license'] = {'id': ds.metadata.zenodo_license}
                license_md = LICENCE.format(ds.metadata.zenodo_license)

            DataDir(release_dir).write(
                'README.md',
                README.format(
                    title=md['title'],
                    doi='https://doi.org/{0}'.format(args.parent_doi),
                    ds_title=ds.metadata.title,
                    license=license_md,
                    formats=' ({0})'.format(formats) if formats else '',
                    media=MEDIA,
                    index=INDEX_CSV))

    if args.update_zenodo:

        md = {}
        md.update(jsonlib.load(release_dir / ZENODO_FILE_NAME))

        if args.debug:
            api_url = API_URL_SANDBOX
            access_token = os.environ.get('ZENODO_SANDBOX_ACCESS_TOKEN')
        else:
            api_url = API_URL
            access_token = ACCESS_TOKEN
        zenodo_url = api_url.replace('api/', '')

        args.log.info('Updating Deposit ID {0} on {1} with:'.format(
            args.update_zenodo, zenodo_url))
        api = Zenodo(api_url=api_url, access_token=access_token)
        try:
            rec = api.record_from_id('{0}record/{1}'.format(
                zenodo_url, args.update_zenodo))
        except Exception as e:
            args.log.error(
                'Check connection and credentials for accessing Zenodo.\n{0}'.
                format(e))
            return
        latest_version = rec.links['latest'].split('/')[-1]
        if latest_version != args.update_zenodo:
            args.log.warn(
                'Passed deposit ID does not refer to latest version {0}!'.
                format(latest_version))
        args.log.info('  DOI:     ' + rec.metadata.doi)
        args.log.info('  Title:   ' + rec.metadata.title)
        args.log.info('  Version: ' + rec.metadata.version)
        args.log.info('  Date:    ' + rec.metadata.publication_date)
        args.log.info('  Files:   ' + ', '.join([f.key for f in rec.files]))
        p = input("Proceed? [y/N]: ")
        if p.lower() == 'y':
            dep = api.update_deposit(args.update_zenodo, **md)
            if dep.state != PUBLISHED:
                api.publish_deposit(dep)
            args.log.info('Updated successfully')
Example #18
0
 def describe_repos(r, org, name=None):
     return OrderedDict([
         ('dc:title', '{0}/{1}'.format(org, name or r.name)),
         ('dc:description', git_describe(r))])
Example #19
0
def run(args):
    legend = collections.Counter()
    glottocodes = get_glottocodes(args)

    nodes = {n.id: n for n in args.repos.languoids()}
    langs = []
    for n in nodes.values():
        if ((not glottocodes and n.level == args.repos.languoid_levels.language)
                or (glottocodes and n.id in glottocodes)) \
                and (n.latitude != None or glottocodes.get(n.id, (None,))[0] != None):  # noqa: W503
            fid = n.lineage[0][1] if n.lineage else n.id
            if (not nodes[fid].category.startswith('Pseudo')) or fid == n.id:
                langs.append((n, fid))
                legend.update([fid])

    color_map = [fid for fid, _ in legend.most_common()]
    color_map = dict(zip(color_map, qualitative_colors(len(color_map))))

    def l2f(t):
        n, fid = t
        lat, lon = glottocodes.get(n.id, (None, None))
        if lat is None:
            lat = n.latitude
        if lon is None:
            lon = n.longitude
        if lon <= -26:
            lon += 360  # make the map pacific-centered.

        return {
            "geometry": {
                "coordinates": [lon, lat],
                "type": "Point"
            },
            "id": n.id,
            "properties": {
                "name": n.name,
                "color": color_map[fid],
                "family": nodes[fid].name,
                "family_id": fid,
            },
            "type": "Feature"
        }

    def legend_item(fid, c):
        return \
            '<span style="background-color: {0}; border: 1px solid black;">'\
            '&nbsp;&nbsp;&nbsp;</span> '\
            '<a href="https://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format(
                color_map[fid], fid, nodes[fid].name, c)

    geojson = {
        "features": list(map(l2f, langs)),
        "properties": {
            "legend": {
                fid: legend_item(fid, c)
                for fid, c in legend.most_common()
                if c >= args.min_langs_for_legend
            },
        },
        "type": "FeatureCollection"
    }

    def rendered_template(name, **kw):
        return string.Template(
            args.pkg_dir.joinpath(
                'templates', 'htmlmap',
                name).read_text(encoding='utf8')).substitute(**kw)

    jsname = 'glottolog_map.json'
    args.output.joinpath(jsname).write_text(rendered_template(
        'htmlmap.js', geojson=json.dumps(geojson, indent=4)),
                                            encoding='utf8')
    html = args.output.joinpath('glottolog_map.html')
    html.write_text(rendered_template('htmlmap.html',
                                      version=git_describe(args.repos.repos),
                                      jsname=jsname,
                                      nlangs=len(langs)),
                    encoding='utf8')
    print(html.resolve().as_uri())
    if args.open:
        webbrowser.open(str(html))  # pragma: no cover
Example #20
0
def htmlmap(args):
    """
    glottolog htmlmap [OUTDIR]
    """
    nodes = {n.id: n for n in args.repos.languoids()}
    legend = Counter()

    langs = []
    for n in nodes.values():
        if n.level == Level.language and n.latitude != None:
            fid = n.lineage[0][1] if n.lineage else n.id
            if not nodes[fid].category.startswith('Pseudo'):
                langs.append((n, fid))
                legend.update([fid])

    color_map = {
        fid: "{0:0{1}X}".format((i + 1) * 10, 3)
        for i, fid in enumerate(sorted(legend.keys()))
    }

    def l2f(t):
        n, fid = t
        lon, lat = n.longitude, n.latitude
        if lon <= -26:
            lon += 360

        return {
            "geometry": {
                "coordinates": [lon, lat],
                "type": "Point"
            },
            "id": n.id,
            "properties": {
                "name": n.name,
                "color": color_map[fid],
                "family": nodes[fid].name,
                "family_id": fid,
            },
            "type": "Feature"
        }

    def legend_item(fid, c):
        return \
            '<span style="background-color: #{0}; border: 1px solid black;">'\
            '&nbsp;&nbsp;&nbsp;</span> '\
            '<a href="http://glottolog.org/resource/languoid/id/{1}">{2}</a> ({3})'.format(
                color_map[fid], fid, nodes[fid].name, c)

    geojson = {
        "features": map(l2f, langs),
        "properties": {
            "legend": {
                fid: legend_item(fid, c)
                for fid, c in legend.most_common() if c > 10
            },
        },
        "type": "FeatureCollection"
    }

    def rendered_template(name, **kw):
        return Template(
            read_text(
                Path(pyglottolog.__file__).parent.joinpath(
                    'templates', 'htmlmap', name))).substitute(**kw)

    jsname = 'glottolog_map.json'
    outdir = Path('.') if not args.args else Path(args.args[0])
    write_text(
        outdir.joinpath(jsname),
        rendered_template('htmlmap.js', geojson=dumps(geojson, indent=4)))
    html = outdir.joinpath('glottolog_map.html')
    write_text(
        html,
        rendered_template('htmlmap.html',
                          version=git_describe(args.repos.repos),
                          jsname=jsname,
                          nlangs=len(langs)))
    print(html.resolve().as_uri())
Example #21
0
 def __unicode__(self):
     return '<D-PLACE data repos {0} at {1}>'.format(git_describe(self.dir), self.dir)