Beispiel #1
0
def update(repos, verbose=True):
    ecoregions = [(er['properties']['eco_code'], shape(er['geometry']))
                  for er in jsonlib.load(
                      data_file('ecoregions.json', repos=repos))['features']
                  if er['geometry']
                  and er['properties']['eco_code'] not in INVALID_ECO_CODES]

    with CsvData('distribution', repos=repos) as data:
        res = {i.id: i for i in data.items}

        occurrence_data = list(
            data_file('external', 'gbif', repos=repos).glob('*.json'))
        if verbose:  # pragma: no cover
            occurrence_data = tqdm(occurrence_data)
        for fname in occurrence_data:
            sid = fname.stem
            d = res.get(sid, Distribution(sid, '', ''))
            if not d.countries__ids or not d.ecoregions__ids:
                occurrences = jsonlib.load(fname).get('results', [])
                if not d.ecoregions__ids:
                    d.ecoregions__ids = list(match(occurrences, ecoregions))
                if not d.countries__ids:
                    d.countries__ids = list(
                        r.get('countryCode') for r in occurrences)
            res[sid] = d
            data.items = [res[key] for key in sorted(res.keys())]
Beispiel #2
0
def get_bib(args):
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    fname_to_cdstar = {}
    for type_ in ['texts', 'docs', 'data']:
        for hash_, paths in load(args.data_file('repos', type_ + '.json')).items():
            if hash_ in uploaded:
                for path in paths:
                    fname_to_cdstar[path.split('/')[-1]] = uploaded[hash_]
    for hash_, paths in load(args.data_file('repos', 'edmond.json')).items():
        if hash_ in uploaded:
            for path in paths:
                fname_to_cdstar[path.split('/')[-1]] = uploaded[hash_]
    db = Database.from_file(args.data_file('repos', 'Dogon.bib'), lowercase=True)
    for rec in db:
        doc = Document(rec)
        newurls = []
        for url in rec.get('url', '').split(';'):
            if not url.strip():
                continue
            if url.endswith('sequence=1'):
                newurls.append(url)
                continue
            url = URL(url.strip())
            if url.host() in ['dogonlanguages.org', 'github.com', '']:
                fname = url.path().split('/')[-1]
                doc.files.append((fname, fname_to_cdstar[fname]))
            else:
                newurls.append(url.as_string())
        doc.rec['url'] = '; '.join(newurls)
        yield doc
Beispiel #3
0
 def issues(self):
     issues = jsonlib.load(self.issues_path)
     comments = jsonlib.load(self.comments_path)
     return [
         Issue(issue, comments.get(str(issue['number']), []))
         for issue in issues
     ]
Beispiel #4
0
def village_images(args):
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    files = load(
        args.data_file('repos', 'Mali_villages_with_coordinates_for_website.json'))
    for hash_, paths in files.items():
        if hash_ in uploaded:
            fname = Path(paths[0])
            name, coords, desc, date_, creators = image_md(fname.stem)
            yield VillageImage(hash_, fname.name.decode('utf8'), VFN.get(fname.name), desc, date_, creators, coords, uploaded[hash_])
Beispiel #5
0
def ff_images(args):
    tsammalex = {
        i.id: i.taxa__id for i in
        reader(args.data_file('repos', 'tsammalex_images.csv'), namedtuples=True)}

    ref_pattern = re.compile('(?P<ref>[0-9]{5})')
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    files = load(args.data_file('repos', 'Heath_flora_fauna_images.json'))
    files.update(load(args.data_file('repos', 'ffmissing.json')))
    path_to_md5 = {}
    for md5, paths in files.items():
        for path in paths:
            path_to_md5[Path(path.encode('utf8')).stem] = md5
    missed, found, uploaded_ = 0, 0, 0
    for i, img in enumerate(reader(args.data_file('repos', 'dogon_flora-fauna.csv'), delimiter=',', namedtuples=True)):
        stem = Path(img.filenames.encode('utf8')).stem
        assert stem in path_to_md5
        found += 1
        if path_to_md5[stem] in uploaded:
            m = ref_pattern.search(stem)
            uploaded_ += 1
            yield FFImage(
                path_to_md5[stem],
                Path(files[path_to_md5[stem]][0].encode('utf8')).name,
                None,
                m.group('ref') if m else None,
                None,
                [],
                uploaded[path_to_md5[stem]],
                tsammalex.get(path_to_md5[stem]))

    videos = load(args.data_file('repos', 'videos_from_website.json'))
    videos.update(load(args.data_file('repos', 'videos.json')))

    for md5, paths in videos.items():
        if md5 in uploaded:
            path = Path(paths[0].encode('utf8'))
            m = ref_pattern.search(path.stem)
            uploaded_ += 1
            yield FFImage(
                md5,
                path.name,
                None,
                m.group('ref') if m else None,
                None,
                [],
                uploaded[md5],
                tsammalex.get(md5))
        else:
            missed += 1

    print('ff_images', missed, uploaded_)
Beispiel #6
0
def chapter(request):
    _html = get_html(ppath("Atlas", "%s.html" % request.matchdict["id"]))
    return {
        "md": jsonlib.load(ppath("Atlas", "%s.json" % request.matchdict["id"])),
        "html": lambda vt: _html.replace("<p>value-table</p>", vt),
        "ctx": Feature.get(request.matchdict["id"]),
    }
Beispiel #7
0
 def __init__(self, path, repos=REPOS, container_cls=dict, json_opts=None):
     DataManager.__init__(self, path, repos)
     if self.path.exists():
         self.items = jsonlib.load(self.path, object_pairs_hook=OrderedDict)
     else:
         self.items = container_cls()
     self._json_opts = json_opts or {}
Beispiel #8
0
 def __init__(self,
              path,
              cdstar_url=None,
              cdstar_user=None,
              cdstar_pwd=None):
     self.path = pathlib.Path(path)
     self.objects = {}
     if self.path.exists():
         if self.path.suffix.lower() == '.zip':
             with zipfile.ZipFile(str(self.path), 'r') as z:
                 for filename in z.namelist():
                     with z.open(filename) as f:
                         self.objects = {
                             i: Object.fromdict(i, d)
                             for i, d in json.loads(f.read().decode(
                                 'utf-8')).items()
                         }
                     break
         else:
             self.objects = {
                 i: Object.fromdict(i, d)
                 for i, d in load(self.path).items()
             }
     self.api = Cdstar(service_url=cdstar_url,
                       user=cdstar_user,
                       password=cdstar_pwd)
Beispiel #9
0
    def test_dataset_from_file(self):
        from pycldf.dataset import Dataset

        ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv'))
        self.assertIn('ds1', repr(ds))
        self.assertEqual(len(ds), 2)
        self.assertEqual(ds.table.url, 'ds1.csv')
        self.assertEqual(ds.metadata['dc:creator'], 'The Author')

        row = ['3', 'abcd1234', 'fid2', 'maybe', '', 'new[4]']
        with self.assertRaises(ValueError):
            ds.add_row(row)

        ds.sources.add('@book{new,\nauthor={new author}}')
        res = ds.add_row(row)
        self.assertEqual(res.url, 'http://example.org/valuesets/3')
        self.assertEqual(len(res.refs), 1)
        self.assertEqual(
            res.valueUrl('Language_ID'),
            'http://glottolog.org/resource/languoid/id/abcd1234')
        res = ds.add_row(['4', None, None, None, None, None])
        self.assertEqual(res.valueUrl('Language_ID'), None)
        out = self.tmp_path()
        ds.write(out, '.tsv')
        self.assertTrue(out.joinpath('ds1.bib').exists())
        md = load(out.joinpath('ds1.tsv-metadata.json'))
        self.assertEqual('ds1.tsv', md['tables'][0]['url'])
        Dataset.from_file(out.joinpath('ds1.tsv'))
Beispiel #10
0
 def test_get_subset(self):
     self.lex.get_subset([])
     self.assertEquals([v for v in self.lex.subsets.values() if v], [])
     pairs = jsonlib.load(test_data('KSL.pairs.json'))
     self.assertEquals(
         sorted('---'.join(k) for k in self.lex.subsets.keys()),
         sorted(pairs.keys()))
Beispiel #11
0
def load(table, csv, engine):
    schema = jsonlib.load(
        csv.parent.joinpath(csv.stem + '.' + CsvmJsonAdapter.extension))
    converter = get_converter(schema['tableSchema'], table)
    engine.execute(table.insert(),
                   [converted(d, converter) for d in reader(csv, dicts=True)])
    return schema.get("dc:identifier")
Beispiel #12
0
def contribute(req):
    return {
        'missing':
        load(
            Path(asjp.__file__).parent.joinpath(
                'static', 'ethnologue17_diff.json'))['missing']
    }
Beispiel #13
0
    def __init__(self, path):
        self.dir = path
        self.id = path.name

        self.cdstar = load(REPOS.joinpath('cdstar.json'))
        print(self.dir)
        assert self.dir.exists()
        desc = self.dir.joinpath('md.html')
        if desc.exists():
            with desc.open(encoding='utf8') as fp:
                self.description = fp.read()
        else:
            self.description = None
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.props = self.md.get('properties', {}) if self.md else {}
Beispiel #14
0
def iter_languages():
    ldstatus = load(
        GLOTTOLOG_VENV.joinpath('glottolog3/glottolog3/static/ldstatus.json'))
    for l in Glottolog(GLOTTOLOG_VENV.joinpath('glottolog')).languoids():
        if l.level == Level.language and not l.category.startswith('Pseudo'):
            yield Language(l, ((ldstatus.get(l.id) or [[0, None]])[0]
                               or [0, None])[1])
Beispiel #15
0
    def add_component(self, component, *cols, **kw):
        if isinstance(component, str):
            component = jsonlib.load(
                pkg_path('components', '{0}{1}'.format(component, MD_SUFFIX)))
        if isinstance(component, dict):
            component = Table.fromvalue(component)
        assert isinstance(component, Table)

        if kw.get('url'):
            component.url = Link(kw['url'])

        for other_table in self.tables:
            if other_table.url == component.url:
                raise ValueError('tables must have distinct url properties')

        self.add_columns(component, *cols)
        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            table_type = None
        if table_type:
            for other_table in self.tables:
                try:
                    other_table_type = self.get_tabletype(other_table)
                except ValueError:  # pragma: no cover
                    continue
                if other_table_type == table_type:
                    raise ValueError('components must not be added twice')

        self.tables.append(component)
        component._parent = self.tablegroup
        self.auto_constraints(component)
        return component
Beispiel #16
0
    def load(cls, path, contrib_md):
        # zenodo download dumps all files into a subfolder
        if not (path / 'cldf').exists():
            for subpath in path.glob('*'):
                if (subpath / 'cldf').exists():
                    path = subpath
                    break
        assert path.exists(), str(path)

        try:
            cldf_dataset = next(iter_datasets(path / 'cldf'))
        except StopIteration:
            raise ValueError('No cldf metadata file found in {}'.format(path))

        bib_path = path / 'cldf' / 'sources.bib'
        sources = bibtex.Database.from_file(
            bib_path) if bib_path.exists() else None

        md_path = path / 'metadata.json'
        md = jsonlib.load(md_path) if md_path.exists() else {}

        # XXX maybe also allow README.txt?
        readme_path = path / 'README.md'
        try:
            with readme_path.open(encoding='utf-8') as f:
                readme = f.read().strip()
        except IOError:
            readme = None

        authors = contrib_md.get('authors') or ()

        return cls(cldf_dataset, sources, authors, md.get('title'), readme)
Beispiel #17
0
 def __init__(self, path, repos=REPOS, container_cls=dict, json_opts=None):
     DataManager.__init__(self, path, repos)
     if self.path.exists():
         self.items = jsonlib.load(self.path, object_pairs_hook=OrderedDict)
     else:
         self.items = container_cls()
     self._json_opts = json_opts or {}
Beispiel #18
0
def wals_detail_html(context=None, request=None, **kw):
    wals_data = Path(apics.__file__).parent.joinpath(
        'static', 'wals', '%sA.json' % context.parameter.wals_id)
    if not wals_data.exists():
        raise HTTPNotFound()

    wals_data = jsonlib.load(wals_data)
    value_map = {}

    for layer in wals_data['layers']:
        for feature in layer['features']:
            feature['properties']['icon'] = request.registry.getUtility(
                IIcon, name=feature['properties']['icon']).url(request)
            feature['properties']['popup'] = external_link(
                'http://wals.info/languoid/lect/wals_code_'
                + feature['properties']['language']['id'],
                label=feature['properties']['language']['name'])
        value_map[layer['properties']['number']] = {
            'icon': layer['features'][0]['properties']['icon'],
            'name': layer['properties']['name'],
            'number': layer['properties']['number'],
        }

    return {
        'wals_data': wals_data,
        'wals_map': WalsMap(
            context.parameter, request, data=wals_data, value_map=value_map),
        'apics_map': ApicsWalsMap(
            context.parameter, request, data=wals_data, value_map=value_map)}
Beispiel #19
0
def register(args):  # pragma: no cover
    """Register a dataset with datahub.io."""
    dataset = Dataset.first()
    name = 'clld-' + dataset.id.lower()
    package = datahub('package_show', id=name)
    if not package:
        package = datahub(
            'package_create',
            **{'name': name, 'title': 'CLLD-' + dataset.id.upper(), 'owner_org': 'clld'})
    md = {
        'url': 'http://%s' % dataset.domain,
        'notes': '%s published by the CLLD project' % dataset.name,
        'maintainer': 'CLLD Project',
        'tags': [
            {'name': 'linguistics'},
            {'name': 'lod'},
            {'name': 'llod'},
        ]}
    if dataset.contact:
        md['maintainer_email'] = dataset.contact
    if dataset.license:
        if 'creativecommons.org/licenses/by/' in dataset.license:
            md['license_id'] = 'cc-by-sa'
            md['license_title'] = "Creative Commons Attribution Share-Alike"
        elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license:
            md['license_id'] = 'cc-nc'
            md['license_title'] = "Creative Commons Non-Commercial (Any)"
    rdf_md = args.data_file('rdf-metadata.json')
    if rdf_md.exists():
        rdf_md = jsonlib.load(rdf_md)
        md['extras'] = [
            {'key': k, 'value': str(rdf_md[k])} for k in rdf_md.keys()
            if k.split(':')[0] in ['triples', 'resources', 'links']]

    package = datahub('package_update', id=name, **md)
    resources = [rsc['name'] for rsc in package['resources']]
    if 'VoID description' not in resources:
        rsc = datahub(
            'resource_create',
            package_id=package['id'],
            name='VoID description',
            url='http://%s/void.ttl' % dataset.domain,
            format='meta/void',
            mimetype='text/turtle')
        assert rsc

    rdf_dump = '%s-dataset.n3.gz' % dataset.id
    if ('RDF dump' not in resources) \
            and args.module_dir.joinpath('static', 'download', rdf_dump).exists():
        rsc = datahub(
            'resource_create',
            package_id=package['id'],
            name='RDF dump',
            url='http://%s/static/download/%s' % (dataset.domain, rdf_dump),
            format='text/n3',
            mimetype='text/n3')
        assert rsc

    print('>>> Make sure to upload the RDF dump to the production site.')
Beispiel #20
0
    def write_languoids_table(self, outdir, version=None):
        version = version or self.describe()
        if outdir is not None and not outdir.exists():
            raise IOError("Specified output directory %s does not exist. Please create it." % outdir)
        out = outdir / 'glottolog-languoids-{0}.csv'.format(version)
        md = outdir / (out.name + '-metadata.json')
        tg = TableGroup.fromvalue({
            "@context": "http://www.w3.org/ns/csvw",
            "dc:version": version,
            "dc:": "Harald Hammarström, Robert Forkel & Martin Haspelmath. "
                   "clld/glottolog: Glottolog database (Version {0}) [Data set]. "
                   "Zenodo. http://doi.org/10.5281/zenodo.596479".format(version),
            "tables": [load(pycldf.util.pkg_path('components', 'LanguageTable-metadata.json'))],
        })
        tg.tables[0].url = out.name
        for col in [
            dict(name='LL_Code'),
            dict(name='Classification', separator='/'),
            dict(name='Family_Glottocode'),
            dict(name='Family_Name'),
            dict(name='Language_Glottocode'),
            dict(name='Language_Name'),
            dict(name='Level', datatype=dict(base='string', format='family|language|dialect')),
            dict(name='Status'),
        ]:
            tg.tables[0].tableSchema.columns.append(Column.fromvalue(col))

        langs = []
        for lang in self.languoids():
            lid, lname = None, None
            if lang.level == self.languoid_levels.language:
                lid, lname = lang.id, lang.name
            elif lang.level == self.languoid_levels.dialect:
                for lname, lid, level in reversed(lang.lineage):
                    if level == self.languoid_levels.language:
                        break
                else:  # pragma: no cover
                    raise ValueError
            langs.append(dict(
                ID=lang.id,
                Name=lang.name,
                Macroarea=lang.macroareas[0].name if lang.macroareas else None,
                Latitude=lang.latitude,
                Longitude=lang.longitude,
                Glottocode=lang.id,
                ISO639P3code=lang.iso,
                LL_Code=lang.identifier.get('multitree'),
                Classification=[c[1] for c in lang.lineage],
                Language_Glottocode=lid,
                Language_Name=lname,
                Family_Name=lang.lineage[0][0] if lang.lineage else None,
                Family_Glottocode=lang.lineage[0][1] if lang.lineage else None,
                Level=lang.level.name,
                Status=lang.endangerment.status.name if lang.endangerment else None,
            ))

        tg.to_file(md)
        tg.tables[0].write(langs, fname=out)
        return md, out
Beispiel #21
0
    def test_json(self):
        from clldutils.jsonlib import dump, load

        d = {'a': 234, 'ä': 'öäüß'}
        p = self.tmp_path('test')
        dump(d, p)
        for k, v in load(p).items():
            assert d[k] == v
Beispiel #22
0
def jsondump(obj, fname, log=None):
    fname = Path(fname)
    if fname.exists():
        d = jsonlib.load(fname)
        d.update(obj)
        obj = d
    jsonlib.dump(sorted_obj(obj), fname, indent=4)
    log_dump(fname, log=log)
    return obj
Beispiel #23
0
def get_concept(s):
    global _concepticon
    if _concepticon is None:
        _concepticon = load(Path(dictionaria.__file__).parent.joinpath(
            'static', 'concepticon-1.0-labels.json'))
    s = s.lower()
    if s in _concepticon['conceptset_labels']:
        return _concepticon['conceptset_labels'][s]
    return _concepticon['alternative_labels'].get(s)
Beispiel #24
0
def test_read_editors(api_copy):
    prepare_release(api_copy, '3.3')
    zenodo = load(api_copy.path('.zenodo.json'))
    assert zenodo['creators'][1]['affiliation'] == 'University Uppsala'
    assert zenodo['description'] == '<p>, C &amp; Hammarstr&ouml;m, Harald &amp; Forkel, Robert. '\
                                    '1999. Glottolog 3.3. ' \
                                    'Jena: Max Planck Institute for the Science of Human History. '\
                                    '(Available online at ' \
                                    '<a href="https://glottolog.org">https://glottolog.org</a>)</p>'
Beispiel #25
0
    def __init__(self, path):
        self.dir = path
        self.id = path.name

        self.cdstar = load(REPOS.joinpath('cdstar.json'))
        print(self.dir)
        assert self.dir.exists()
        desc = self.dir.joinpath('intro.md')
        if desc.exists():
            with desc.open(encoding='utf8') as fp:
                self.description = fp.read()
        else:
            self.description = None
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.props = self.md.get('properties', {}) if self.md else {}
        bib = self.dir.joinpath('sources.bib')
        self.bib = bibtex.Database.from_file(bib) if bib.exists() else None
Beispiel #26
0
    def cmd_makecldf(self, args):
        concepts = args.writer.add_concepts(
            id_factory=lambda x: x.id.split("-")[-1] + "_" + slug(x.english),
            lookup_factory="Database_ID",
        )

        language_map = {
            lang["ID"]: lang["Glottocode"] or None
            for lang in self.languages
        }

        sources = {}
        for path in sorted(self.raw_dir.glob("*.json"),
                           key=lambda _p: int(_p.stem)):
            data = jsonlib.load(path)
            iso = data.get("ISO 639-3")
            if iso:
                iso = iso.strip()
            args.writer.add_language(
                ID=data["id"],
                Name=data["name"],
                ISO639P3code=iso if iso not in {"no", "XXX"} else None,
                Glottocode=language_map[data["id"]],
            )

            for table in ["basic", "flora", "cult"]:
                if table not in data["tables"]:
                    continue
                for item in data["tables"][table]["rows"]:
                    item = dict(zip(data["tables"][table]["header"], item))
                    form = item["Orthographic Form"].strip()
                    if form:
                        refs = [
                            ref for ref in itersources(item, data, sources)
                            if ref
                        ]
                        args.writer.add_sources(*[ref.source for ref in refs])
                        href, _ = item["English"]

                        concept_database_id = href.split("/")[-1]

                        if not concepts.get(concept_database_id):
                            # https://huntergatherer.la.utexas.edu/lexical/feature/729
                            # is missing from the concept list(s)
                            continue

                        args.writer.add_lexemes(
                            Language_ID=data["id"],
                            Parameter_ID=concepts[concept_database_id],
                            Value=form,
                            Loan=bool(item["Loan Source"]
                                      or item["Wanderwort Status"]),
                            Phonemic=item["Phonemicized Form"] or None,
                            Source=["%s" % ref for ref in refs],
                            Creator=item.get("Created By"),
                            Comment=item.get("General Notes"),
                        )
Beispiel #27
0
def test_SourcesCatalog(tmp_path):
    cat_path = tmp_path / 'test.json'
    with SourcesCatalog(cat_path) as cat:
        cat.add(
            'key', Object('id', [Bitstream('bsid', 5, 'text/plain', '', '', '')], {}))
        assert 'key' in cat
        assert 'url' in cat.get('key')

    assert 'key' in load(str(cat_path))
Beispiel #28
0
def rename(args):  # pragma: no cover
    api = Concepticon(args.repos)

    from_, to_ = args.args
    assert CONCEPTLIST_ID_PATTERN.match(to_)
    cl = api.conceptlists[from_]

    # write the adapted concept list to the new path:
    with UnicodeWriter(cl.path.parent / cl.path.name.replace(from_, to_),
                       delimiter='\t') as writer:
        header = []
        for i, row in enumerate(reader(cl.path, delimiter='\t')):
            if i == 0:
                header = row
                writer.writerow(row)
                header = {v: k
                          for k, v in enumerate(header)
                          }  # Map col name to row index
            else:
                oid = row[header['ID']]
                assert oid.startswith(from_)
                nid = oid.replace(from_, to_)
                api.add_retirement(
                    'Concept', dict(id=oid,
                                    comment='renaming',
                                    replacement=nid))
                row[header['ID']] = nid
                writer.writerow(row)

    # write adapted metadata to the new path:
    fname = cl.path.name.replace(from_, to_) + MD_SUFFIX
    md = jsonlib.load(cl.path.parent / (cl.path.name + MD_SUFFIX),
                      object_pairs_hook=OrderedDict)
    md['tables'][0]['url'] = fname
    jsonlib.dump(md, cl.path.parent / fname, indent=4)

    # remove obsolete concept list and metadata:
    cl.path.unlink()
    cl.path.parent.joinpath(cl.path.name + MD_SUFFIX).unlink()

    # adapt conceptlists.tsv
    rows = []
    for row in reader(api.data_path('conceptlists.tsv'), delimiter='\t'):
        rows.append([col.replace(from_, to_) if col else col for col in row])

    with UnicodeWriter(api.data_path('conceptlists.tsv'),
                       delimiter='\t') as writer:
        writer.writerows(rows)

    api.add_retirement('Conceptlist',
                       dict(id=from_, comment='renaming', replacement=to_))

    print("""Please run
grep -r "{0}" concepticondata/ | grep -v retired.json

to confirm the renaming was complete!""".format(from_))
Beispiel #29
0
def test():
    if not REPOS.exists():
        return
    data = {
        n: OrderedDict([(item.id, item)
                        for item in models.CsvData(n, on_error=error)])
        for n in CSV
    }
    data['ecoregions'] = {}
    for ecoregion in jsonlib.load(data_file('ecoregions.json'))['features']:
        data['ecoregions'][ecoregion['properties']['eco_code']] = ecoregion

    data['refs'] = {}
    with data_file('sources.bib').open(encoding='utf8') as fp:
        for line in fp:
            match = BIB_ID_PATTERN.match(line.strip())
            if match:
                data['refs'][match.group('id')] = 1

    data['countries'] = {country.alpha2: country for country in countries}

    for name in ['names', 'taxa']:
        for line, item in enumerate(data[name].values()):
            for ref in item.refs__ids:
                if '[' in ref:
                    source_id, pages = ref.split('[', 1)
                    if not pages.endswith(']'):  # pragma: no cover
                        error('invalid reference %s' % (ref, ), name, line + 2)
                else:
                    source_id = ref
                if source_id not in data['refs']:  # pragma: no cover
                    error('invalid id referenced: %s' % (source_id, ), name,
                          line + 2)

    for name, model in [(n, getattr(models, n.capitalize())) for n in CSV]:
        for line, item in enumerate(data[name].values()):
            for col in [f.name for f in attr.fields(model)]:
                if '__' in col:
                    ref, cardinality = col.split('__', 1)
                    #if ref not in data:
                    #    continue
                    ids = getattr(item, col)
                    if cardinality == 'id':
                        assert not isinstance(ids, list)
                        ids = [ids]
                    for v in ids:
                        if ref not in data:
                            raise ValueError(ref)  # pragma: no cover
                        if ref == 'refs' and '[' in v:
                            v = v.split('[')[0]
                        if v not in data[ref]:  # pragma: no cover
                            error('invalid %s id referenced: %s' % (ref, v),
                                  name, line + 2)

    if not SUCCESS:  # pragma: no cover
        raise ValueError('integrity checks failed!')
Beispiel #30
0
 def vocabularies(self):
     """
     Provide access to a `dict` of controlled vocabularies.
     """
     res = jsonlib.load(self.data_path('concepticon.json'))
     for k in res['COLUMN_TYPES']:
         v = res['COLUMN_TYPES'][k]
         if isinstance(v, list) and v and v[0] == 'languoid':
             res['COLUMN_TYPES'][k] = Languoid(v[1])
     return res
Beispiel #31
0
 def _metadata(self, id_):
     values_path = self.data_path('concept_set_meta', id_ + '.tsv')
     md_path = self.data_path('concept_set_meta', id_ + '.tsv' + MD_SUFFIX)
     assert values_path.exists() and md_path.exists()
     md = jsonlib.load(md_path)
     return Metadata(id=id_,
                     meta=md,
                     values=to_dict(
                         read_dicts(values_path, schema=md['tableSchema']),
                         key=operator.itemgetter('CONCEPTICON_ID')))
Beispiel #32
0
def get_concept(s):
    global _concepticon
    if _concepticon is None:
        _concepticon = load(
            Path(dictionaria.__file__).parent.joinpath(
                'static', 'concepticon-1.0-labels.json'))
    s = s.lower()
    if s in _concepticon['conceptset_labels']:
        return _concepticon['conceptset_labels'][s]
    return _concepticon['alternative_labels'].get(s)
Beispiel #33
0
 def __init__(self, dataset, fname):
     self.dataset = dataset
     self.fname = fname
     if fname.exists():
         try:
             self.report = jsonlib.load(fname)
         except ValueError:
             self.report = {}
     else:
         self.report = {}
Beispiel #34
0
 def experiments(self):
     gbif = load(self.path('gbif.json'))
     res = [
         Experiment.from_dict(d, self.sources) for d in list(
             dsv.reader(self.path('data.Sheet1.csv'), dicts=True))[1:]
     ]
     for ex in res:
         key, md = gbif.get(ex.species_latin, (None, None))
         if key:
             ex.gbif = GBIF(key=key, metadata=md)
     return res
Beispiel #35
0
 def cmd_readme(self, args):
     res = self.metadata.markdown()
     tr = self.cldf_dir / '.transcription-report.json'
     tr = jsonlib.load(tr) if tr.exists() else None
     res += report.report(self, tr, getattr(args, 'glottolog', None),
                          args.log)
     if self.contributors_path.exists():
         res += '\n\n{0}\n\n'.format(
             self.contributors_path.read_text(encoding='utf8'))
     self.dir.write('FORMS.md', self.form_spec.as_markdown(self))
     return res
Beispiel #36
0
def test_makecldf(repos, dataset, dataset_cldf, dataset_no_cognates, sndcmp,
                  capsys, tmp_path):
    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset.dir / 'td.py'),
              str(repos),
          ))
    assert 'Papunesia' in dataset.cldf_dir.joinpath('languages.csv').read_text(
        encoding='utf8')
    # Metadata for Zenodo is merged if this makes sense:
    assert len(jsonlib.load(dataset.dir / '.zenodo.json')['communities']) == 3

    _main(
        'lexibank.makecldf {0} --dev --glottolog {1} --concepticon {1} --clts {1}'
        .format(
            str(dataset.dir / 'td.py'),
            str(repos),
        ))
    assert 'Papunesia' not in dataset.cldf_dir.joinpath(
        'languages.csv').read_text(encoding='utf8')
    assert '### Replacement' in dataset.dir.joinpath('FORMS.md').read_text(
        encoding='utf8')

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(sndcmp.dir / 'ts.py'),
              str(repos),
          ))
    assert 'Bislama_Gloss' in sndcmp.cldf_dir.joinpath(
        'parameters.csv').read_text(encoding='utf8')
    assert 'e56a5fc78ae5a66e783c17bc30019568' in sndcmp.cldf_dir.joinpath(
        'media.csv').read_text(encoding='utf8')

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset_cldf.dir / 'tdc.py'),
              str(repos),
          ))
    capout = capsys.readouterr().out
    assert 'The dataset has no sources' not in capout

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset_no_cognates.dir / 'tdn.py'),
              str(repos),
          ))
    assert not dataset_no_cognates.cldf_dir.joinpath('cognates.csv').exists()
    _main(
        'lexibank.load --db {3} {0} --glottolog {1} --concepticon {2}'.format(
            str(dataset_no_cognates.dir / 'tdn.py'),
            str(repos),
            str(repos),
            str(tmp_path / 'db'),
        ))
Beispiel #37
0
 def content_callback(request, context):
     if 'access' in request.url:  # file download
         with zipfile.ZipFile(tmp_path / 'ds.zip', 'w') as zip:
             zip.write(
                 pathlib.Path(__file__).parent / 'repos' / 'csv' /
                 '2022-06-1KRR1P_ZIMBABWE_CRATON_ARCHEAN.csv',
                 '2022-06-1KRR1P_ZIMBABWE_CRATON_ARCHEAN.csv')
         return tmp_path.joinpath('ds.zip').read_bytes()
     # Dataset metadata:
     return json.dumps(dict(data=load(repos /
                                      'datasets.json')[0])).encode('utf8')
Beispiel #38
0
def test():
    if not REPOS.exists():
        return
    data = {
        n: OrderedDict([(item.id, item) for item in models.CsvData(n, on_error=error)])
        for n in CSV}
    data['ecoregions'] = {}
    for ecoregion in jsonlib.load(data_file('ecoregions.json'))['features']:
        data['ecoregions'][ecoregion['properties']['eco_code']] = ecoregion

    data['refs'] = {}
    with data_file('sources.bib').open(encoding='utf8') as fp:
        for line in fp:
            match = BIB_ID_PATTERN.match(line.strip())
            if match:
                data['refs'][match.group('id')] = 1

    data['countries'] = {country.alpha2: country for country in countries}

    for name in ['names', 'taxa']:
        for line, item in enumerate(data[name].values()):
            for ref in item.refs__ids:
                if '[' in ref:
                    source_id, pages = ref.split('[', 1)
                    if not pages.endswith(']'):  # pragma: no cover
                        error('invalid reference %s' % (ref,), name, line + 2)
                else:
                    source_id = ref
                if source_id not in data['refs']:  # pragma: no cover
                    error('invalid id referenced: %s' % (source_id,), name, line + 2)

    for name, model in [(n, getattr(models, n.capitalize())) for n in CSV]:
        for line, item in enumerate(data[name].values()):
            for col in [f.name for f in attr.fields(model)]:
                if '__' in col:
                    ref, cardinality = col.split('__', 1)
                    #if ref not in data:
                    #    continue
                    ids = getattr(item, col)
                    if cardinality == 'id':
                        assert not isinstance(ids, list)
                        ids = [ids]
                    for v in ids:
                        if ref not in data:
                            raise ValueError(ref)  # pragma: no cover
                        if ref == 'refs' and '[' in v:
                            v = v.split('[')[0]
                        if v not in data[ref]:  # pragma: no cover
                            error(
                                'invalid %s id referenced: %s' % (ref, v), name, line + 2)

    if not SUCCESS:  # pragma: no cover
        raise ValueError('integrity checks failed!')
Beispiel #39
0
def update(repos, log):
    ecoregions = [
        (er['properties']['eco_code'], shape(er['geometry']))
        for er in jsonlib.load(data_file('ecoregions.json', repos=repos))['features']
        if er['geometry'] and er['properties']['eco_code'] not in INVALID_ECO_CODES]

    with CsvData('distribution', repos=repos) as data:
        res = {i.id: i for i in data.items}

        occurrence_data = list(data_file('external', 'gbif', repos=repos).glob('*.json'))
        for fname in tqdm(occurrence_data):
            sid = fname.stem
            d = res.get(sid, Distribution(sid, '', ''))
            if not d.countries__ids or not d.ecoregions__ids:
                occurrences = jsonlib.load(fname).get('results', [])
                if not d.ecoregions__ids:
                    d.ecoregions__ids = list(match(occurrences, ecoregions, log))
                if not d.countries__ids:
                    d.countries__ids = list(r.get('countryCode') for r in occurrences)
            res[sid] = d
            data.items = [res[key] for key in sorted(res.keys())]
Beispiel #40
0
def load_whitelist():
    """
    Basic function to load the CLPA whitelist.
    """
    _clpadata = jsonlib.load(local_path('clpa.main.json'))
    whitelist = {}
    for group in ['consonants', 'vowels', 'markers', 'tones', 'diphtongs']:
        for val in _clpadata[group]:
            whitelist[_clpadata[val]['glyph']] = _clpadata[val]
            whitelist[_clpadata[val]['glyph']]["ID"] = val

    return whitelist
Beispiel #41
0
def load_whitelist():
    """
    Basic function to load the CLPA whitelist.
    """
    _clpadata = jsonlib.load(local_path('clpa.main.json'))
    whitelist = {}
    for group in ['consonants', 'vowels', 'markers', 'tones', 'diphtongs']:
        for val in _clpadata[group]:
            whitelist[_clpadata[val]['glyph']] = _clpadata[val]
            whitelist[_clpadata[val]['glyph']]["ID"] = val

    return whitelist
Beispiel #42
0
def iter_files(args):
    files = defaultdict(list)
    for n in """
        Burkina_flora_for_website.json
        data.json
        docs.json
        edmond.json
        ffmissing.json
        Heath_flora_fauna_images.json
        Mali_villages_with_coordinates_for_website.json
        texts.json
        videos_from_website.json
        videos.json
    """.split():
        files.update(load(args.data_file('repos', n)))
    missing, matched = 0, 0
    for md5, cdstar in load(args.data_file('repos', 'cdstar.json')).items():
        if md5 in files:
            fnames = [Path(p.encode('utf8')).name.decode('utf8') for p in files[md5]]
            fname = sorted(fnames, key=lambda n: len(n))[-1]
            fname = fname.replace(' ', '_')
            if fname == 'Thumbs.db':
                continue
            m = date_p.search(fname)
            if m:
                d = date(int(m.group('y')), int(m.group('m')), int(m.group('d') or 1))
            else:
                d = None
            yield File(
                md5,
                fname,
                guess_type(fname)[0].decode('utf8'),
                d,
                cdstar['size'],
                cdstar.get('duration'),
                cdstar)
            matched += 1
        else:
            missing += 1
    print('iter_files', missing, matched)
Beispiel #43
0
def get_text(what, id_, fmt):
    p = text_path(what, '{0}.{1}'.format(id_, fmt))
    if not p.exists():
        raise ValueError(p)
    if fmt == 'json':
        return jsonlib.load(p)
    text = read_text(p)
    if fmt == 'css':
        return text
    body = bs(text).find('body')
    body.name = 'div'
    body.attrs.clear()
    return '{0}'.format(body).replace('.popover(', '.clickover(')
Beispiel #44
0
def old_downloads():
    from clldmpg import cdstar

    def bitstream_link(oid, spec):
        url = cdstar.SERVICE_URL.path(
            '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    for number, spec in sorted(
            load(Path(__file__).parent.joinpath('static', 'downloads.json')).items()):
        yield number, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Beispiel #45
0
def old_downloads():
    from clldmpg import cdstar

    def bitstream_link(oid, spec):
        url = cdstar.SERVICE_URL.path(
            '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    for number, spec in sorted(
            load(Path(__file__).parent.joinpath('static', 'downloads.json')).items()):
        yield number, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Beispiel #46
0
def get(dataset, resource, offset=0, limit=LIMIT, download_=False):
    fname = dataset.raw.joinpath("%(resource)s-%(limit)s-%(offset)s.json" %
                                 locals())
    if fname.exists() and not download_:
        return jsonlib.load(fname)
    if not download_:
        raise ValueError
    res = requests.get("{0}/api/v1/{1}/".format(BASE_URL, resource),
                       params=dict(format='json',
                                   limit='{0}'.format(limit),
                                   offset='{0}'.format(offset))).json()
    jsonlib.dump(res, fname)
    return res
Beispiel #47
0
def get_text(what, id_, fmt):
    p = text_path(what, '{0}.{1}'.format(id_, fmt))
    if not p.exists():
        raise ValueError(p)
    if fmt == 'json':
        return jsonlib.load(p)
    text = read_text(p)
    if fmt == 'css':
        return text
    body = bs(text).find('body')
    body.name = 'div'
    body.attrs.clear()
    return '{0}'.format(body).replace('.popover(', '.clickover(')
Beispiel #48
0
def run():
    terms = []
    for e in read_terms().iter():
        if ns('rdf:about') in e.attrib:
            terms.append(e.attrib[ns('rdf:about')])

    for d in ['components', 'modules']:
        for f in walk(REPO_DIR.joinpath(d)):
            if f.suffix == '.json':
                md = load(f)
                for k, v in iterproperties(md):
                    if k in ['propertyUrl', 'dc:conformsTo'] and v not in terms:
                        print(f)
                        print(v)
Beispiel #49
0
def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')
    print(dls)

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
Beispiel #50
0
def fixtures(type_, name):
    res = {}
    for fname in fixture_path(type_).iterdir():
        name_, key = fname.stem.split('_')
        if name_ == name:
            value = fname
            if fname.suffix == '.json':
                value = jsonlib.load(fname)
            elif fname.suffix == '.html':
                with fname.open(encoding='utf8') as fp:
                    value = fp.read()
            elif fname.suffix == '.xml':
                with open(fname.as_posix(), 'rb') as fp:
                    value = fp.read()
            res[key] = value
    return res
Beispiel #51
0
def prime_cache(args):
    """If data needs to be denormalized for lookup, do that here.
    This procedure should be separate from the db initialization, because
    it will have to be run periodically whenever data has been updated.
    """
    concepticon = {
        c.GLOSS: c.CONCEPTICON_ID for c in
        reader(args.data_file('repos', 'conceptlist.tsv'), delimiter='\t', namedtuples=True)
        if c.CONCEPTICON_ID}
    sdata = jsonlib.load(args.data_file('repos', 'classification.json'))
    for concept in DBSession.query(models.Concept).options(joinedload(common.Parameter._files)):
        for t_ in ['image', 'video']:
            setattr(concept, 'count_{0}s'.format(t_), len(getattr(concept, t_ + 's')))
        if concept.jsondata['ref'] in sdata:
            util.update_species_data(concept, sdata[concept.jsondata['ref']])
        if concept.name in concepticon:
            concept.concepticon_id = int(concepticon[concept.name])
Beispiel #52
0
    def __init__(self, path_or_id):
        if isinstance(path_or_id, Path):
            self.dir = path_or_id
            self.id = path_or_id.name
        else:
            self.id = path_or_id
            self.dir = REPOS.joinpath('submissions', path_or_id)

        assert self.dir.exists()
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.db_name = None
        self.type = None
        if self.dir.joinpath('db.sfm').exists():
            self.db_name = 'db.sfm'
            self.type = 'sfm'
        else:
            raise ValueError('no valid db file in %s' % self.dir)
Beispiel #53
0
def x(args):
    try:
        from cdstarcat.catalog import Catalog
    except ImportError:
        args.log.error('pip install cdstarcat')
        return
    fname = args.pkg_dir.joinpath('static', 'downloads.json')
    downloads = load(fname)
    release = args.args[0]
    with Catalog(
            Path(os.environ['CDSTAR_CATALOG']),
            cdstar_url=os.environ['CDSTAR_URL'],
            cdstar_user=os.environ['CDSTAR_USER'],
            cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
        obj = cat.api.get_object(uid=downloads[release]['oid'])
        bitstreams = obj.bitstreams[:]
        for bs in bitstreams:
            print(bs.id, bs._properties)
Beispiel #54
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexiRumahLanguage),
            glottolog_repos=glottolog_repos,
            isolates_icon='tcccccc')
Beispiel #55
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexibank/lexibank-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexibank.__name__,
            name="lexibank",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexibank.clld.org',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog = Glottolog(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog'))
    languoids = {l.id: l for l in glottolog.languoids()}
    concepticon = Concepticon(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c['ID']: c for c in concepticon.conceptsets()}

    for dname in repos.joinpath('datasets').iterdir():
        #if dname.name not in ['acbd']:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            #if dname.name != 'zenodo34092':
            #    continue
            mdpath = dname.joinpath('metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexibankLanguage),
            glottolog=languoids,
            isolates_icon='tcccccc')
Beispiel #56
0
def dl2cdstar(args):
    app = app_name(args.project)
    if not app:
        args.log.error('cannot parse package name')
        return

    try:
        from cdstarcat.catalog import Catalog
    except ImportError:
        args.log.error('pip install cdstarcat')
        return

    title_pattern = re.compile('%s (?P<version>[0-9.]+) - downloads' % re.escape(app))
    title = '{0} {1} - downloads'.format(app, args.version)
    pkg_dir = args.project.joinpath(app)
    with Catalog(
            Path(os.environ['CDSTAR_CATALOG']),
            cdstar_url=os.environ['CDSTAR_URL'],
            cdstar_user=os.environ['CDSTAR_USER'],
            cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
        obj = cat.api.get_object()
        obj.metadata = {"creator": "pycdstar", "title": title}
        if args.args:
            obj.metadata["description"] = args.args[0]
        for fname in pkg_dir.joinpath('static', 'download').iterdir():
            if fname.is_file() and not fname.name.startswith('.'):
                print(fname.name)
                obj.add_bitstream(
                    fname=fname.as_posix(), name=fname.name.replace('-', '_'))
        cat.add(obj)

    fname = pkg_dir.joinpath('static', 'downloads.json')
    with update(fname, default={}, indent=4) as downloads:
        for oid, spec in load(Path(os.environ['CDSTAR_CATALOG'])).items():
            if 'metadata' in spec and 'title' in spec['metadata']:
                match = title_pattern.match(spec['metadata']['title'])
                if match:
                    if match.group('version') not in downloads:
                        spec['oid'] = oid
                        downloads[match.group('version')] = spec
    args.log.info('{0} written'.format(fname))
    args.log.info('{0}'.format(os.environ['CDSTAR_CATALOG']))
Beispiel #57
0
    def cached_metadata(self, sid, id=None, name=None, refresh=False):
        if data_file('external', self.name, repos=self.repos).is_dir():
            fname = data_file('external', self.name, sid + '.json', repos=self.repos)
            if not fname.exists() or refresh:
                try:
                    data = self.metadata(id or self.identify(name))
                except:  # pragma: no cover
                    data = None
                if not data:
                    return  # pragma: no cover
                jsonlib.dump(data, fname)
                return data
            return jsonlib.load(fname)

        if sid not in self.items or refresh:
            try:
                self.items[sid] = self.metadata(id or self.identify(name))
            except:
                return
        return self.items[sid]
Beispiel #58
0
def survey(request):
    id_ = request.matchdict["id"]
    md = jsonlib.load(ppath("Surveys", "%s.json" % id_))
    html = get_html(ppath("Surveys", "%s.html" % id_))
    maps = []
    for fname in sorted(
        ppath("Surveys", processed="maps").glob("%s*.png" % id_.split(".")[1].replace("-", "_")), key=lambda fn: fn.stem
    ):
        img = b64encode(open(fname.as_posix(), "rb").read())
        if "figure" in fname.stem:
            html = html.replace("{%s}" % fname.stem, "data:image/png;base64,%s" % img)
        else:
            maps.append(img)

    return {
        "maps": maps,
        "md": md,
        "authors": [Contributor.get(a["id"]) for a in md["authors"]],
        "html": html,
        "ctx": ApicsContribution.get(id_.split(".")[0]),
    }
Beispiel #59
0
    def test_read_write(self):
        from pycldf.csv import Reader, Writer

        table = load(FIXTURES.joinpath('ds1.csv-metadata.json'))['tables'][0]
        table['tableSchema']['columns'][0]['datatype'] = 'integer'
        table['url'] = 'test.tsv'

        row = '1,abcd1234,fid1,yes,,80086;meier2015[2-5]'.split(',')

        with Archive(self.tmp_path('test.zip'), 'w') as archive:
            with Writer(table, container=archive) as writer:
                writer.writerow(row)

        with Archive(self.tmp_path('test.zip')) as archive:
            with Reader(table, container=archive) as reader:
                rows = list(reader)
                self.assertEqual(rows[0]['ID'], 1)
                self.assertEqual(
                    rows[0].valueUrl('Language_ID'),
                    'http://glottolog.org/resource/languoid/id/abcd1234')
                self.assertEqual(rows[0].to_list(), row)

        table = Table(table)
        del table.dialect['header']
        self.assertTrue(table.dialect.header)
        del table.dialect['delimiter']
        self.assertEqual(table.dialect.delimiter, ',')
        table.dialect.header = False

        with Writer(table, container=self.tmp_path()) as writer:
            writer.writerow(row)
            writer.writerows(rows)

        with Reader(table, container=self.tmp_path()) as reader:
            rows = list(reader)
            self.assertEqual(rows[0]['ID'], 1)
            self.assertEqual(
                rows[0].valueUrl('Language_ID'),
                'http://glottolog.org/resource/languoid/id/abcd1234')
            self.assertEqual(rows[0].to_list(), row)
Beispiel #60
0
def datasets(args):
    """
    cldf datasets <DIR> [ATTRS]

    List all CLDF datasets in directory <DIR>
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    d = Path(args.args[0])
    if not d.exists() or not d.is_dir():
        raise ParserError('%s is not an existing directory' % d)
    for fname in sorted(d.glob('*' + MD_SUFFIX), key=lambda p: p.name):
        md = Metadata(load(fname))
        data = fname.parent.joinpath(
            md.get_table().url or fname.name[:-len(MD_SUFFIX)])
        if data.exists():
            print(data)
            if len(args.args) > 1:
                maxlen = max(len(a) for a in args.args[1:])
                for attr in args.args[1:]:
                    if md.get(attr):
                        print('    %s %s' % ((attr + ':').ljust(maxlen + 1), md[attr]))