Python load Beispiele, clldutils.jsonlib.load Python Beispiele

Beispiel #1

0

Datei anzeigen

def update(repos, verbose=True):
    ecoregions = [(er['properties']['eco_code'], shape(er['geometry']))
                  for er in jsonlib.load(
                      data_file('ecoregions.json', repos=repos))['features']
                  if er['geometry']
                  and er['properties']['eco_code'] not in INVALID_ECO_CODES]

    with CsvData('distribution', repos=repos) as data:
        res = {i.id: i for i in data.items}

        occurrence_data = list(
            data_file('external', 'gbif', repos=repos).glob('*.json'))
        if verbose:  # pragma: no cover
            occurrence_data = tqdm(occurrence_data)
        for fname in occurrence_data:
            sid = fname.stem
            d = res.get(sid, Distribution(sid, '', ''))
            if not d.countries__ids or not d.ecoregions__ids:
                occurrences = jsonlib.load(fname).get('results', [])
                if not d.ecoregions__ids:
                    d.ecoregions__ids = list(match(occurrences, ecoregions))
                if not d.countries__ids:
                    d.countries__ids = list(
                        r.get('countryCode') for r in occurrences)
            res[sid] = d
            data.items = [res[key] for key in sorted(res.keys())]

Beispiel #2

0

Datei anzeigen

Datei: util.py Projekt: clld/dogonlanguages

def get_bib(args):
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    fname_to_cdstar = {}
    for type_ in ['texts', 'docs', 'data']:
        for hash_, paths in load(args.data_file('repos', type_ + '.json')).items():
            if hash_ in uploaded:
                for path in paths:
                    fname_to_cdstar[path.split('/')[-1]] = uploaded[hash_]
    for hash_, paths in load(args.data_file('repos', 'edmond.json')).items():
        if hash_ in uploaded:
            for path in paths:
                fname_to_cdstar[path.split('/')[-1]] = uploaded[hash_]
    db = Database.from_file(args.data_file('repos', 'Dogon.bib'), lowercase=True)
    for rec in db:
        doc = Document(rec)
        newurls = []
        for url in rec.get('url', '').split(';'):
            if not url.strip():
                continue
            if url.endswith('sequence=1'):
                newurls.append(url)
                continue
            url = URL(url.strip())
            if url.host() in ['dogonlanguages.org', 'github.com', '']:
                fname = url.path().split('/')[-1]
                doc.files.append((fname, fname_to_cdstar[fname]))
            else:
                newurls.append(url.as_string())
        doc.rec['url'] = '; '.join(newurls)
        yield doc

Beispiel #3

0

Datei anzeigen

Datei: api.py Projekt: grambank/pygrambank

 def issues(self):
     issues = jsonlib.load(self.issues_path)
     comments = jsonlib.load(self.comments_path)
     return [
         Issue(issue, comments.get(str(issue['number']), []))
         for issue in issues
     ]

Beispiel #4

0

Datei anzeigen

Datei: util.py Projekt: clld/dogonlanguages

def village_images(args):
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    files = load(
        args.data_file('repos', 'Mali_villages_with_coordinates_for_website.json'))
    for hash_, paths in files.items():
        if hash_ in uploaded:
            fname = Path(paths[0])
            name, coords, desc, date_, creators = image_md(fname.stem)
            yield VillageImage(hash_, fname.name.decode('utf8'), VFN.get(fname.name), desc, date_, creators, coords, uploaded[hash_])

Beispiel #5

0

Datei anzeigen

Datei: util.py Projekt: clld/dogonlanguages

def ff_images(args):
    tsammalex = {
        i.id: i.taxa__id for i in
        reader(args.data_file('repos', 'tsammalex_images.csv'), namedtuples=True)}

    ref_pattern = re.compile('(?P<ref>[0-9]{5})')
    uploaded = load(args.data_file('repos', 'cdstar.json'))
    files = load(args.data_file('repos', 'Heath_flora_fauna_images.json'))
    files.update(load(args.data_file('repos', 'ffmissing.json')))
    path_to_md5 = {}
    for md5, paths in files.items():
        for path in paths:
            path_to_md5[Path(path.encode('utf8')).stem] = md5
    missed, found, uploaded_ = 0, 0, 0
    for i, img in enumerate(reader(args.data_file('repos', 'dogon_flora-fauna.csv'), delimiter=',', namedtuples=True)):
        stem = Path(img.filenames.encode('utf8')).stem
        assert stem in path_to_md5
        found += 1
        if path_to_md5[stem] in uploaded:
            m = ref_pattern.search(stem)
            uploaded_ += 1
            yield FFImage(
                path_to_md5[stem],
                Path(files[path_to_md5[stem]][0].encode('utf8')).name,
                None,
                m.group('ref') if m else None,
                None,
                [],
                uploaded[path_to_md5[stem]],
                tsammalex.get(path_to_md5[stem]))

    videos = load(args.data_file('repos', 'videos_from_website.json'))
    videos.update(load(args.data_file('repos', 'videos.json')))

    for md5, paths in videos.items():
        if md5 in uploaded:
            path = Path(paths[0].encode('utf8'))
            m = ref_pattern.search(path.stem)
            uploaded_ += 1
            yield FFImage(
                md5,
                path.name,
                None,
                m.group('ref') if m else None,
                None,
                [],
                uploaded[md5],
                tsammalex.get(md5))
        else:
            missed += 1

    print('ff_images', missed, uploaded_)

Beispiel #6

0

Datei anzeigen

Datei: views.py Projekt: clld/apics

def chapter(request):
    _html = get_html(ppath("Atlas", "%s.html" % request.matchdict["id"]))
    return {
        "md": jsonlib.load(ppath("Atlas", "%s.json" % request.matchdict["id"])),
        "html": lambda vt: _html.replace("<p>value-table</p>", vt),
        "ctx": Feature.get(request.matchdict["id"]),
    }

Beispiel #7

0

Datei anzeigen

Datei: util.py Projekt: clld/tsammalex-data

 def __init__(self, path, repos=REPOS, container_cls=dict, json_opts=None):
     DataManager.__init__(self, path, repos)
     if self.path.exists():
         self.items = jsonlib.load(self.path, object_pairs_hook=OrderedDict)
     else:
         self.items = container_cls()
     self._json_opts = json_opts or {}

Beispiel #8

0

Datei anzeigen

Datei: catalog.py Projekt: dlce-eva/cdstarcat

 def __init__(self,
              path,
              cdstar_url=None,
              cdstar_user=None,
              cdstar_pwd=None):
     self.path = pathlib.Path(path)
     self.objects = {}
     if self.path.exists():
         if self.path.suffix.lower() == '.zip':
             with zipfile.ZipFile(str(self.path), 'r') as z:
                 for filename in z.namelist():
                     with z.open(filename) as f:
                         self.objects = {
                             i: Object.fromdict(i, d)
                             for i, d in json.loads(f.read().decode(
                                 'utf-8')).items()
                         }
                     break
         else:
             self.objects = {
                 i: Object.fromdict(i, d)
                 for i, d in load(self.path).items()
             }
     self.api = Cdstar(service_url=cdstar_url,
                       user=cdstar_user,
                       password=cdstar_pwd)

Beispiel #9

0

Datei anzeigen

Datei: test_dataset.py Projekt: LinguList/pycldf

    def test_dataset_from_file(self):
        from pycldf.dataset import Dataset

        ds = Dataset.from_file(FIXTURES.joinpath('ds1.csv'))
        self.assertIn('ds1', repr(ds))
        self.assertEqual(len(ds), 2)
        self.assertEqual(ds.table.url, 'ds1.csv')
        self.assertEqual(ds.metadata['dc:creator'], 'The Author')

        row = ['3', 'abcd1234', 'fid2', 'maybe', '', 'new[4]']
        with self.assertRaises(ValueError):
            ds.add_row(row)

        ds.sources.add('@book{new,\nauthor={new author}}')
        res = ds.add_row(row)
        self.assertEqual(res.url, 'http://example.org/valuesets/3')
        self.assertEqual(len(res.refs), 1)
        self.assertEqual(
            res.valueUrl('Language_ID'),
            'http://glottolog.org/resource/languoid/id/abcd1234')
        res = ds.add_row(['4', None, None, None, None, None])
        self.assertEqual(res.valueUrl('Language_ID'), None)
        out = self.tmp_path()
        ds.write(out, '.tsv')
        self.assertTrue(out.joinpath('ds1.bib').exists())
        md = load(out.joinpath('ds1.tsv-metadata.json'))
        self.assertEqual('ds1.tsv', md['tables'][0]['url'])
        Dataset.from_file(out.joinpath('ds1.tsv'))

Beispiel #10

0

Datei anzeigen

Datei: test_lexstat.py Projekt: SimonGreenhill/lingpy

 def test_get_subset(self):
     self.lex.get_subset([])
     self.assertEquals([v for v in self.lex.subsets.values() if v], [])
     pairs = jsonlib.load(test_data('KSL.pairs.json'))
     self.assertEquals(
         sorted('---'.join(k) for k in self.lex.subsets.keys()),
         sorted(pairs.keys()))

Beispiel #11

0

Datei anzeigen

Datei: freeze.py Projekt: cevmartinez/clld

def load(table, csv, engine):
    schema = jsonlib.load(
        csv.parent.joinpath(csv.stem + '.' + CsvmJsonAdapter.extension))
    converter = get_converter(schema['tableSchema'], table)
    engine.execute(table.insert(),
                   [converted(d, converter) for d in reader(csv, dicts=True)])
    return schema.get("dc:identifier")

Beispiel #12

0

Datei anzeigen

Datei: views.py Projekt: somiyagawa/asjp

def contribute(req):
    return {
        'missing':
        load(
            Path(asjp.__file__).parent.joinpath(
                'static', 'ethnologue17_diff.json'))['missing']
    }

Beispiel #13

0

Datei anzeigen

    def __init__(self, path):
        self.dir = path
        self.id = path.name

        self.cdstar = load(REPOS.joinpath('cdstar.json'))
        print(self.dir)
        assert self.dir.exists()
        desc = self.dir.joinpath('md.html')
        if desc.exists():
            with desc.open(encoding='utf8') as fp:
                self.description = fp.read()
        else:
            self.description = None
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.props = self.md.get('properties', {}) if self.md else {}

Beispiel #14

0

Datei anzeigen

def iter_languages():
    ldstatus = load(
        GLOTTOLOG_VENV.joinpath('glottolog3/glottolog3/static/ldstatus.json'))
    for l in Glottolog(GLOTTOLOG_VENV.joinpath('glottolog')).languoids():
        if l.level == Level.language and not l.category.startswith('Pseudo'):
            yield Language(l, ((ldstatus.get(l.id) or [[0, None]])[0]
                               or [0, None])[1])

Beispiel #15

0

Datei anzeigen

Datei: dataset.py Projekt: SimonGreenhill/pycldf

    def add_component(self, component, *cols, **kw):
        if isinstance(component, str):
            component = jsonlib.load(
                pkg_path('components', '{0}{1}'.format(component, MD_SUFFIX)))
        if isinstance(component, dict):
            component = Table.fromvalue(component)
        assert isinstance(component, Table)

        if kw.get('url'):
            component.url = Link(kw['url'])

        for other_table in self.tables:
            if other_table.url == component.url:
                raise ValueError('tables must have distinct url properties')

        self.add_columns(component, *cols)
        try:
            table_type = self.get_tabletype(component)
        except ValueError:
            table_type = None
        if table_type:
            for other_table in self.tables:
                try:
                    other_table_type = self.get_tabletype(other_table)
                except ValueError:  # pragma: no cover
                    continue
                if other_table_type == table_type:
                    raise ValueError('components must not be added twice')

        self.tables.append(component)
        component._parent = self.tablegroup
        self.auto_constraints(component)
        return component

Beispiel #16

0

Datei anzeigen

Datei: cldf.py Projekt: clld/crossgram

    def load(cls, path, contrib_md):
        # zenodo download dumps all files into a subfolder
        if not (path / 'cldf').exists():
            for subpath in path.glob('*'):
                if (subpath / 'cldf').exists():
                    path = subpath
                    break
        assert path.exists(), str(path)

        try:
            cldf_dataset = next(iter_datasets(path / 'cldf'))
        except StopIteration:
            raise ValueError('No cldf metadata file found in {}'.format(path))

        bib_path = path / 'cldf' / 'sources.bib'
        sources = bibtex.Database.from_file(
            bib_path) if bib_path.exists() else None

        md_path = path / 'metadata.json'
        md = jsonlib.load(md_path) if md_path.exists() else {}

        # XXX maybe also allow README.txt?
        readme_path = path / 'README.md'
        try:
            with readme_path.open(encoding='utf-8') as f:
                readme = f.read().strip()
        except IOError:
            readme = None

        authors = contrib_md.get('authors') or ()

        return cls(cldf_dataset, sources, authors, md.get('title'), readme)

Beispiel #17

0

Datei anzeigen

Datei: util.py Projekt: xrotwang/tsammalex-data

 def __init__(self, path, repos=REPOS, container_cls=dict, json_opts=None):
     DataManager.__init__(self, path, repos)
     if self.path.exists():
         self.items = jsonlib.load(self.path, object_pairs_hook=OrderedDict)
     else:
         self.items = container_cls()
     self._json_opts = json_opts or {}

Beispiel #18

0

Datei anzeigen

Datei: util.py Projekt: clld/apics

def wals_detail_html(context=None, request=None, **kw):
    wals_data = Path(apics.__file__).parent.joinpath(
        'static', 'wals', '%sA.json' % context.parameter.wals_id)
    if not wals_data.exists():
        raise HTTPNotFound()

    wals_data = jsonlib.load(wals_data)
    value_map = {}

    for layer in wals_data['layers']:
        for feature in layer['features']:
            feature['properties']['icon'] = request.registry.getUtility(
                IIcon, name=feature['properties']['icon']).url(request)
            feature['properties']['popup'] = external_link(
                'http://wals.info/languoid/lect/wals_code_'
                + feature['properties']['language']['id'],
                label=feature['properties']['language']['name'])
        value_map[layer['properties']['number']] = {
            'icon': layer['features'][0]['properties']['icon'],
            'name': layer['properties']['name'],
            'number': layer['properties']['number'],
        }

    return {
        'wals_data': wals_data,
        'wals_map': WalsMap(
            context.parameter, request, data=wals_data, value_map=value_map),
        'apics_map': ApicsWalsMap(
            context.parameter, request, data=wals_data, value_map=value_map)}

Beispiel #19

0

Datei anzeigen

def register(args):  # pragma: no cover
    """Register a dataset with datahub.io."""
    dataset = Dataset.first()
    name = 'clld-' + dataset.id.lower()
    package = datahub('package_show', id=name)
    if not package:
        package = datahub(
            'package_create',
            **{'name': name, 'title': 'CLLD-' + dataset.id.upper(), 'owner_org': 'clld'})
    md = {
        'url': 'http://%s' % dataset.domain,
        'notes': '%s published by the CLLD project' % dataset.name,
        'maintainer': 'CLLD Project',
        'tags': [
            {'name': 'linguistics'},
            {'name': 'lod'},
            {'name': 'llod'},
        ]}
    if dataset.contact:
        md['maintainer_email'] = dataset.contact
    if dataset.license:
        if 'creativecommons.org/licenses/by/' in dataset.license:
            md['license_id'] = 'cc-by-sa'
            md['license_title'] = "Creative Commons Attribution Share-Alike"
        elif 'creativecommons.org/' in dataset.license and '-nc' in dataset.license:
            md['license_id'] = 'cc-nc'
            md['license_title'] = "Creative Commons Non-Commercial (Any)"
    rdf_md = args.data_file('rdf-metadata.json')
    if rdf_md.exists():
        rdf_md = jsonlib.load(rdf_md)
        md['extras'] = [
            {'key': k, 'value': str(rdf_md[k])} for k in rdf_md.keys()
            if k.split(':')[0] in ['triples', 'resources', 'links']]

    package = datahub('package_update', id=name, **md)
    resources = [rsc['name'] for rsc in package['resources']]
    if 'VoID description' not in resources:
        rsc = datahub(
            'resource_create',
            package_id=package['id'],
            name='VoID description',
            url='http://%s/void.ttl' % dataset.domain,
            format='meta/void',
            mimetype='text/turtle')
        assert rsc

    rdf_dump = '%s-dataset.n3.gz' % dataset.id
    if ('RDF dump' not in resources) \
            and args.module_dir.joinpath('static', 'download', rdf_dump).exists():
        rsc = datahub(
            'resource_create',
            package_id=package['id'],
            name='RDF dump',
            url='http://%s/static/download/%s' % (dataset.domain, rdf_dump),
            format='text/n3',
            mimetype='text/n3')
        assert rsc

    print('>>> Make sure to upload the RDF dump to the production site.')

Beispiel #20

0

Datei anzeigen

Datei: api.py Projekt: dyna-dot/pyglottolog

    def write_languoids_table(self, outdir, version=None):
        version = version or self.describe()
        if outdir is not None and not outdir.exists():
            raise IOError("Specified output directory %s does not exist. Please create it." % outdir)
        out = outdir / 'glottolog-languoids-{0}.csv'.format(version)
        md = outdir / (out.name + '-metadata.json')
        tg = TableGroup.fromvalue({
            "@context": "http://www.w3.org/ns/csvw",
            "dc:version": version,
            "dc:": "Harald Hammarström, Robert Forkel & Martin Haspelmath. "
                   "clld/glottolog: Glottolog database (Version {0}) [Data set]. "
                   "Zenodo. http://doi.org/10.5281/zenodo.596479".format(version),
            "tables": [load(pycldf.util.pkg_path('components', 'LanguageTable-metadata.json'))],
        })
        tg.tables[0].url = out.name
        for col in [
            dict(name='LL_Code'),
            dict(name='Classification', separator='/'),
            dict(name='Family_Glottocode'),
            dict(name='Family_Name'),
            dict(name='Language_Glottocode'),
            dict(name='Language_Name'),
            dict(name='Level', datatype=dict(base='string', format='family|language|dialect')),
            dict(name='Status'),
        ]:
            tg.tables[0].tableSchema.columns.append(Column.fromvalue(col))

        langs = []
        for lang in self.languoids():
            lid, lname = None, None
            if lang.level == self.languoid_levels.language:
                lid, lname = lang.id, lang.name
            elif lang.level == self.languoid_levels.dialect:
                for lname, lid, level in reversed(lang.lineage):
                    if level == self.languoid_levels.language:
                        break
                else:  # pragma: no cover
                    raise ValueError
            langs.append(dict(
                ID=lang.id,
                Name=lang.name,
                Macroarea=lang.macroareas[0].name if lang.macroareas else None,
                Latitude=lang.latitude,
                Longitude=lang.longitude,
                Glottocode=lang.id,
                ISO639P3code=lang.iso,
                LL_Code=lang.identifier.get('multitree'),
                Classification=[c[1] for c in lang.lineage],
                Language_Glottocode=lid,
                Language_Name=lname,
                Family_Name=lang.lineage[0][0] if lang.lineage else None,
                Family_Glottocode=lang.lineage[0][1] if lang.lineage else None,
                Level=lang.level.name,
                Status=lang.endangerment.status.name if lang.endangerment else None,
            ))

        tg.to_file(md)
        tg.tables[0].write(langs, fname=out)
        return md, out

Beispiel #21

0

Datei anzeigen

Datei: test_jsonlib.py Projekt: pombredanne/clldutils

    def test_json(self):
        from clldutils.jsonlib import dump, load

        d = {'a': 234, 'ä': 'öäüß'}
        p = self.tmp_path('test')
        dump(d, p)
        for k, v in load(p).items():
            assert d[k] == v

Beispiel #22

0

Datei anzeigen

def jsondump(obj, fname, log=None):
    fname = Path(fname)
    if fname.exists():
        d = jsonlib.load(fname)
        d.update(obj)
        obj = d
    jsonlib.dump(sorted_obj(obj), fname, indent=4)
    log_dump(fname, log=log)
    return obj

Beispiel #23

0

Datei anzeigen

Datei: ingest.py Projekt: clld/dictionaria

def get_concept(s):
    global _concepticon
    if _concepticon is None:
        _concepticon = load(Path(dictionaria.__file__).parent.joinpath(
            'static', 'concepticon-1.0-labels.json'))
    s = s.lower()
    if s in _concepticon['conceptset_labels']:
        return _concepticon['conceptset_labels'][s]
    return _concepticon['alternative_labels'].get(s)

Beispiel #24

0

Datei anzeigen

def test_read_editors(api_copy):
    prepare_release(api_copy, '3.3')
    zenodo = load(api_copy.path('.zenodo.json'))
    assert zenodo['creators'][1]['affiliation'] == 'University Uppsala'
    assert zenodo['description'] == '<p>, C &amp; Hammarstr&ouml;m, Harald &amp; Forkel, Robert. '\
                                    '1999. Glottolog 3.3. ' \
                                    'Jena: Max Planck Institute for the Science of Human History. '\
                                    '(Available online at ' \
                                    '<a href="https://glottolog.org">https://glottolog.org</a>)</p>'

Beispiel #25

0

Datei anzeigen

Datei: submission.py Projekt: clld/dictionaria

    def __init__(self, path):
        self.dir = path
        self.id = path.name

        self.cdstar = load(REPOS.joinpath('cdstar.json'))
        print(self.dir)
        assert self.dir.exists()
        desc = self.dir.joinpath('intro.md')
        if desc.exists():
            with desc.open(encoding='utf8') as fp:
                self.description = fp.read()
        else:
            self.description = None
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.props = self.md.get('properties', {}) if self.md else {}
        bib = self.dir.joinpath('sources.bib')
        self.bib = bibtex.Database.from_file(bib) if bib.exists() else None

Beispiel #26

0

Datei anzeigen

    def cmd_makecldf(self, args):
        concepts = args.writer.add_concepts(
            id_factory=lambda x: x.id.split("-")[-1] + "_" + slug(x.english),
            lookup_factory="Database_ID",
        )

        language_map = {
            lang["ID"]: lang["Glottocode"] or None
            for lang in self.languages
        }

        sources = {}
        for path in sorted(self.raw_dir.glob("*.json"),
                           key=lambda _p: int(_p.stem)):
            data = jsonlib.load(path)
            iso = data.get("ISO 639-3")
            if iso:
                iso = iso.strip()
            args.writer.add_language(
                ID=data["id"],
                Name=data["name"],
                ISO639P3code=iso if iso not in {"no", "XXX"} else None,
                Glottocode=language_map[data["id"]],
            )

            for table in ["basic", "flora", "cult"]:
                if table not in data["tables"]:
                    continue
                for item in data["tables"][table]["rows"]:
                    item = dict(zip(data["tables"][table]["header"], item))
                    form = item["Orthographic Form"].strip()
                    if form:
                        refs = [
                            ref for ref in itersources(item, data, sources)
                            if ref
                        ]
                        args.writer.add_sources(*[ref.source for ref in refs])
                        href, _ = item["English"]

                        concept_database_id = href.split("/")[-1]

                        if not concepts.get(concept_database_id):
                            # https://huntergatherer.la.utexas.edu/lexical/feature/729
                            # is missing from the concept list(s)
                            continue

                        args.writer.add_lexemes(
                            Language_ID=data["id"],
                            Parameter_ID=concepts[concept_database_id],
                            Value=form,
                            Loan=bool(item["Loan Source"]
                                      or item["Wanderwort Status"]),
                            Phonemic=item["Phonemicized Form"] or None,
                            Source=["%s" % ref for ref in refs],
                            Creator=item.get("Created By"),
                            Comment=item.get("General Notes"),
                        )

Beispiel #27

0

Datei anzeigen

def test_SourcesCatalog(tmp_path):
    cat_path = tmp_path / 'test.json'
    with SourcesCatalog(cat_path) as cat:
        cat.add(
            'key', Object('id', [Bitstream('bsid', 5, 'text/plain', '', '', '')], {}))
        assert 'key' in cat
        assert 'url' in cat.get('key')

    assert 'key' in load(str(cat_path))

Beispiel #28

0

Datei anzeigen

def rename(args):  # pragma: no cover
    api = Concepticon(args.repos)

    from_, to_ = args.args
    assert CONCEPTLIST_ID_PATTERN.match(to_)
    cl = api.conceptlists[from_]

    # write the adapted concept list to the new path:
    with UnicodeWriter(cl.path.parent / cl.path.name.replace(from_, to_),
                       delimiter='\t') as writer:
        header = []
        for i, row in enumerate(reader(cl.path, delimiter='\t')):
            if i == 0:
                header = row
                writer.writerow(row)
                header = {v: k
                          for k, v in enumerate(header)
                          }  # Map col name to row index
            else:
                oid = row[header['ID']]
                assert oid.startswith(from_)
                nid = oid.replace(from_, to_)
                api.add_retirement(
                    'Concept', dict(id=oid,
                                    comment='renaming',
                                    replacement=nid))
                row[header['ID']] = nid
                writer.writerow(row)

    # write adapted metadata to the new path:
    fname = cl.path.name.replace(from_, to_) + MD_SUFFIX
    md = jsonlib.load(cl.path.parent / (cl.path.name + MD_SUFFIX),
                      object_pairs_hook=OrderedDict)
    md['tables'][0]['url'] = fname
    jsonlib.dump(md, cl.path.parent / fname, indent=4)

    # remove obsolete concept list and metadata:
    cl.path.unlink()
    cl.path.parent.joinpath(cl.path.name + MD_SUFFIX).unlink()

    # adapt conceptlists.tsv
    rows = []
    for row in reader(api.data_path('conceptlists.tsv'), delimiter='\t'):
        rows.append([col.replace(from_, to_) if col else col for col in row])

    with UnicodeWriter(api.data_path('conceptlists.tsv'),
                       delimiter='\t') as writer:
        writer.writerows(rows)

    api.add_retirement('Conceptlist',
                       dict(id=from_, comment='renaming', replacement=to_))

    print("""Please run
grep -r "{0}" concepticondata/ | grep -v retired.json

to confirm the renaming was complete!""".format(from_))

Beispiel #29

0

Datei anzeigen

Datei: test_csv.py Projekt: xrotwang/tsammalex-data

def test():
    if not REPOS.exists():
        return
    data = {
        n: OrderedDict([(item.id, item)
                        for item in models.CsvData(n, on_error=error)])
        for n in CSV
    }
    data['ecoregions'] = {}
    for ecoregion in jsonlib.load(data_file('ecoregions.json'))['features']:
        data['ecoregions'][ecoregion['properties']['eco_code']] = ecoregion

    data['refs'] = {}
    with data_file('sources.bib').open(encoding='utf8') as fp:
        for line in fp:
            match = BIB_ID_PATTERN.match(line.strip())
            if match:
                data['refs'][match.group('id')] = 1

    data['countries'] = {country.alpha2: country for country in countries}

    for name in ['names', 'taxa']:
        for line, item in enumerate(data[name].values()):
            for ref in item.refs__ids:
                if '[' in ref:
                    source_id, pages = ref.split('[', 1)
                    if not pages.endswith(']'):  # pragma: no cover
                        error('invalid reference %s' % (ref, ), name, line + 2)
                else:
                    source_id = ref
                if source_id not in data['refs']:  # pragma: no cover
                    error('invalid id referenced: %s' % (source_id, ), name,
                          line + 2)

    for name, model in [(n, getattr(models, n.capitalize())) for n in CSV]:
        for line, item in enumerate(data[name].values()):
            for col in [f.name for f in attr.fields(model)]:
                if '__' in col:
                    ref, cardinality = col.split('__', 1)
                    #if ref not in data:
                    #    continue
                    ids = getattr(item, col)
                    if cardinality == 'id':
                        assert not isinstance(ids, list)
                        ids = [ids]
                    for v in ids:
                        if ref not in data:
                            raise ValueError(ref)  # pragma: no cover
                        if ref == 'refs' and '[' in v:
                            v = v.split('[')[0]
                        if v not in data[ref]:  # pragma: no cover
                            error('invalid %s id referenced: %s' % (ref, v),
                                  name, line + 2)

    if not SUCCESS:  # pragma: no cover
        raise ValueError('integrity checks failed!')

Beispiel #30

0

Datei anzeigen

Datei: api.py Projekt: chirila/concepticon-data

 def vocabularies(self):
     """
     Provide access to a `dict` of controlled vocabularies.
     """
     res = jsonlib.load(self.data_path('concepticon.json'))
     for k in res['COLUMN_TYPES']:
         v = res['COLUMN_TYPES'][k]
         if isinstance(v, list) and v and v[0] == 'languoid':
             res['COLUMN_TYPES'][k] = Languoid(v[1])
     return res

Beispiel #31

0

Datei anzeigen

Datei: api.py Projekt: concepticon/pyconcepticon

 def _metadata(self, id_):
     values_path = self.data_path('concept_set_meta', id_ + '.tsv')
     md_path = self.data_path('concept_set_meta', id_ + '.tsv' + MD_SUFFIX)
     assert values_path.exists() and md_path.exists()
     md = jsonlib.load(md_path)
     return Metadata(id=id_,
                     meta=md,
                     values=to_dict(
                         read_dicts(values_path, schema=md['tableSchema']),
                         key=operator.itemgetter('CONCEPTICON_ID')))

Beispiel #32

0

Datei anzeigen

Datei: ingest.py Projekt: marchdown/dictionaria

def get_concept(s):
    global _concepticon
    if _concepticon is None:
        _concepticon = load(
            Path(dictionaria.__file__).parent.joinpath(
                'static', 'concepticon-1.0-labels.json'))
    s = s.lower()
    if s in _concepticon['conceptset_labels']:
        return _concepticon['conceptset_labels'][s]
    return _concepticon['alternative_labels'].get(s)

Beispiel #33

0

Datei anzeigen

 def __init__(self, dataset, fname):
     self.dataset = dataset
     self.fname = fname
     if fname.exists():
         try:
             self.report = jsonlib.load(fname)
         except ValueError:
             self.report = {}
     else:
         self.report = {}

Beispiel #34

0

Datei anzeigen

 def experiments(self):
     gbif = load(self.path('gbif.json'))
     res = [
         Experiment.from_dict(d, self.sources) for d in list(
             dsv.reader(self.path('data.Sheet1.csv'), dicts=True))[1:]
     ]
     for ex in res:
         key, md = gbif.get(ex.species_latin, (None, None))
         if key:
             ex.gbif = GBIF(key=key, metadata=md)
     return res

Beispiel #35

0

Datei anzeigen

 def cmd_readme(self, args):
     res = self.metadata.markdown()
     tr = self.cldf_dir / '.transcription-report.json'
     tr = jsonlib.load(tr) if tr.exists() else None
     res += report.report(self, tr, getattr(args, 'glottolog', None),
                          args.log)
     if self.contributors_path.exists():
         res += '\n\n{0}\n\n'.format(
             self.contributors_path.read_text(encoding='utf8'))
     self.dir.write('FORMS.md', self.form_spec.as_markdown(self))
     return res

Beispiel #36

0

Datei anzeigen

def test_makecldf(repos, dataset, dataset_cldf, dataset_no_cognates, sndcmp,
                  capsys, tmp_path):
    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset.dir / 'td.py'),
              str(repos),
          ))
    assert 'Papunesia' in dataset.cldf_dir.joinpath('languages.csv').read_text(
        encoding='utf8')
    # Metadata for Zenodo is merged if this makes sense:
    assert len(jsonlib.load(dataset.dir / '.zenodo.json')['communities']) == 3

    _main(
        'lexibank.makecldf {0} --dev --glottolog {1} --concepticon {1} --clts {1}'
        .format(
            str(dataset.dir / 'td.py'),
            str(repos),
        ))
    assert 'Papunesia' not in dataset.cldf_dir.joinpath(
        'languages.csv').read_text(encoding='utf8')
    assert '### Replacement' in dataset.dir.joinpath('FORMS.md').read_text(
        encoding='utf8')

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(sndcmp.dir / 'ts.py'),
              str(repos),
          ))
    assert 'Bislama_Gloss' in sndcmp.cldf_dir.joinpath(
        'parameters.csv').read_text(encoding='utf8')
    assert 'e56a5fc78ae5a66e783c17bc30019568' in sndcmp.cldf_dir.joinpath(
        'media.csv').read_text(encoding='utf8')

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset_cldf.dir / 'tdc.py'),
              str(repos),
          ))
    capout = capsys.readouterr().out
    assert 'The dataset has no sources' not in capout

    _main('lexibank.makecldf {0} --glottolog {1} --concepticon {1} --clts {1}'.
          format(
              str(dataset_no_cognates.dir / 'tdn.py'),
              str(repos),
          ))
    assert not dataset_no_cognates.cldf_dir.joinpath('cognates.csv').exists()
    _main(
        'lexibank.load --db {3} {0} --glottolog {1} --concepticon {2}'.format(
            str(dataset_no_cognates.dir / 'tdn.py'),
            str(repos),
            str(repos),
            str(tmp_path / 'db'),
        ))

Beispiel #37

0

Datei anzeigen

Datei: test_commands.py Projekt: pofatu/pygeoroc

 def content_callback(request, context):
     if 'access' in request.url:  # file download
         with zipfile.ZipFile(tmp_path / 'ds.zip', 'w') as zip:
             zip.write(
                 pathlib.Path(__file__).parent / 'repos' / 'csv' /
                 '2022-06-1KRR1P_ZIMBABWE_CRATON_ARCHEAN.csv',
                 '2022-06-1KRR1P_ZIMBABWE_CRATON_ARCHEAN.csv')
         return tmp_path.joinpath('ds.zip').read_bytes()
     # Dataset metadata:
     return json.dumps(dict(data=load(repos /
                                      'datasets.json')[0])).encode('utf8')

Beispiel #38

0

Datei anzeigen

Datei: test_csv.py Projekt: clld/tsammalex-data

def test():
    if not REPOS.exists():
        return
    data = {
        n: OrderedDict([(item.id, item) for item in models.CsvData(n, on_error=error)])
        for n in CSV}
    data['ecoregions'] = {}
    for ecoregion in jsonlib.load(data_file('ecoregions.json'))['features']:
        data['ecoregions'][ecoregion['properties']['eco_code']] = ecoregion

    data['refs'] = {}
    with data_file('sources.bib').open(encoding='utf8') as fp:
        for line in fp:
            match = BIB_ID_PATTERN.match(line.strip())
            if match:
                data['refs'][match.group('id')] = 1

    data['countries'] = {country.alpha2: country for country in countries}

    for name in ['names', 'taxa']:
        for line, item in enumerate(data[name].values()):
            for ref in item.refs__ids:
                if '[' in ref:
                    source_id, pages = ref.split('[', 1)
                    if not pages.endswith(']'):  # pragma: no cover
                        error('invalid reference %s' % (ref,), name, line + 2)
                else:
                    source_id = ref
                if source_id not in data['refs']:  # pragma: no cover
                    error('invalid id referenced: %s' % (source_id,), name, line + 2)

    for name, model in [(n, getattr(models, n.capitalize())) for n in CSV]:
        for line, item in enumerate(data[name].values()):
            for col in [f.name for f in attr.fields(model)]:
                if '__' in col:
                    ref, cardinality = col.split('__', 1)
                    #if ref not in data:
                    #    continue
                    ids = getattr(item, col)
                    if cardinality == 'id':
                        assert not isinstance(ids, list)
                        ids = [ids]
                    for v in ids:
                        if ref not in data:
                            raise ValueError(ref)  # pragma: no cover
                        if ref == 'refs' and '[' in v:
                            v = v.split('[')[0]
                        if v not in data[ref]:  # pragma: no cover
                            error(
                                'invalid %s id referenced: %s' % (ref, v), name, line + 2)

    if not SUCCESS:  # pragma: no cover
        raise ValueError('integrity checks failed!')

Beispiel #39

0

Datei anzeigen

Datei: distribution.py Projekt: clld/tsammalex-data

def update(repos, log):
    ecoregions = [
        (er['properties']['eco_code'], shape(er['geometry']))
        for er in jsonlib.load(data_file('ecoregions.json', repos=repos))['features']
        if er['geometry'] and er['properties']['eco_code'] not in INVALID_ECO_CODES]

    with CsvData('distribution', repos=repos) as data:
        res = {i.id: i for i in data.items}

        occurrence_data = list(data_file('external', 'gbif', repos=repos).glob('*.json'))
        for fname in tqdm(occurrence_data):
            sid = fname.stem
            d = res.get(sid, Distribution(sid, '', ''))
            if not d.countries__ids or not d.ecoregions__ids:
                occurrences = jsonlib.load(fname).get('results', [])
                if not d.ecoregions__ids:
                    d.ecoregions__ids = list(match(occurrences, ecoregions, log))
                if not d.countries__ids:
                    d.countries__ids = list(r.get('countryCode') for r in occurrences)
            res[sid] = d
            data.items = [res[key] for key in sorted(res.keys())]

Beispiel #40

0

Datei anzeigen

Datei: util.py Projekt: LinguList/clpa

def load_whitelist():
    """
    Basic function to load the CLPA whitelist.
    """
    _clpadata = jsonlib.load(local_path('clpa.main.json'))
    whitelist = {}
    for group in ['consonants', 'vowels', 'markers', 'tones', 'diphtongs']:
        for val in _clpadata[group]:
            whitelist[_clpadata[val]['glyph']] = _clpadata[val]
            whitelist[_clpadata[val]['glyph']]["ID"] = val

    return whitelist

Beispiel #41

0

Datei anzeigen

Datei: util.py Projekt: LinguList/clpa

def load_whitelist():
    """
    Basic function to load the CLPA whitelist.
    """
    _clpadata = jsonlib.load(local_path('clpa.main.json'))
    whitelist = {}
    for group in ['consonants', 'vowels', 'markers', 'tones', 'diphtongs']:
        for val in _clpadata[group]:
            whitelist[_clpadata[val]['glyph']] = _clpadata[val]
            whitelist[_clpadata[val]['glyph']]["ID"] = val

    return whitelist

Beispiel #42

0

Datei anzeigen

Datei: util.py Projekt: clld/dogonlanguages

def iter_files(args):
    files = defaultdict(list)
    for n in """
        Burkina_flora_for_website.json
        data.json
        docs.json
        edmond.json
        ffmissing.json
        Heath_flora_fauna_images.json
        Mali_villages_with_coordinates_for_website.json
        texts.json
        videos_from_website.json
        videos.json
    """.split():
        files.update(load(args.data_file('repos', n)))
    missing, matched = 0, 0
    for md5, cdstar in load(args.data_file('repos', 'cdstar.json')).items():
        if md5 in files:
            fnames = [Path(p.encode('utf8')).name.decode('utf8') for p in files[md5]]
            fname = sorted(fnames, key=lambda n: len(n))[-1]
            fname = fname.replace(' ', '_')
            if fname == 'Thumbs.db':
                continue
            m = date_p.search(fname)
            if m:
                d = date(int(m.group('y')), int(m.group('m')), int(m.group('d') or 1))
            else:
                d = None
            yield File(
                md5,
                fname,
                guess_type(fname)[0].decode('utf8'),
                d,
                cdstar['size'],
                cdstar.get('duration'),
                cdstar)
            matched += 1
        else:
            missing += 1
    print('iter_files', missing, matched)

Beispiel #43

0

Datei anzeigen

Datei: util.py Projekt: DarjaAppelganz/apics

def get_text(what, id_, fmt):
    p = text_path(what, '{0}.{1}'.format(id_, fmt))
    if not p.exists():
        raise ValueError(p)
    if fmt == 'json':
        return jsonlib.load(p)
    text = read_text(p)
    if fmt == 'css':
        return text
    body = bs(text).find('body')
    body.name = 'div'
    body.attrs.clear()
    return '{0}'.format(body).replace('.popover(', '.clickover(')

Beispiel #44

0

Datei anzeigen

Datei: util.py Projekt: clld/glottolog3

def old_downloads():
    from clldmpg import cdstar

    def bitstream_link(oid, spec):
        url = cdstar.SERVICE_URL.path(
            '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    for number, spec in sorted(
            load(Path(__file__).parent.joinpath('static', 'downloads.json')).items()):
        yield number, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]

Beispiel #45

0

Datei anzeigen

Datei: util.py Projekt: uwblueprint/glottolog3

def old_downloads():
    from clldmpg import cdstar

    def bitstream_link(oid, spec):
        url = cdstar.SERVICE_URL.path(
            '/bitstreams/{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    for number, spec in sorted(
            load(Path(__file__).parent.joinpath('static', 'downloads.json')).items()):
        yield number, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]

Beispiel #46

0

Datei anzeigen

def get(dataset, resource, offset=0, limit=LIMIT, download_=False):
    fname = dataset.raw.joinpath("%(resource)s-%(limit)s-%(offset)s.json" %
                                 locals())
    if fname.exists() and not download_:
        return jsonlib.load(fname)
    if not download_:
        raise ValueError
    res = requests.get("{0}/api/v1/{1}/".format(BASE_URL, resource),
                       params=dict(format='json',
                                   limit='{0}'.format(limit),
                                   offset='{0}'.format(offset))).json()
    jsonlib.dump(res, fname)
    return res

Beispiel #47

0

Datei anzeigen

Datei: util.py Projekt: clld/apics

def get_text(what, id_, fmt):
    p = text_path(what, '{0}.{1}'.format(id_, fmt))
    if not p.exists():
        raise ValueError(p)
    if fmt == 'json':
        return jsonlib.load(p)
    text = read_text(p)
    if fmt == 'css':
        return text
    body = bs(text).find('body')
    body.name = 'div'
    body.attrs.clear()
    return '{0}'.format(body).replace('.popover(', '.clickover(')

Beispiel #48

0

Datei anzeigen

Datei: test.py Projekt: cysouw/cldf

def run():
    terms = []
    for e in read_terms().iter():
        if ns('rdf:about') in e.attrib:
            terms.append(e.attrib[ns('rdf:about')])

    for d in ['components', 'modules']:
        for f in walk(REPO_DIR.joinpath(d)):
            if f.suffix == '.json':
                md = load(f)
                for k, v in iterproperties(md):
                    if k in ['propertyUrl', 'dc:conformsTo'] and v not in terms:
                        print(f)
                        print(v)

Beispiel #49

0

Datei anzeigen

Datei: cdstar.py Projekt: Anaphory/clldlucl

def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')
    print(dls)

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]

Beispiel #50

0

Datei anzeigen

Datei: util.py Projekt: clld/tsammalex-data

def fixtures(type_, name):
    res = {}
    for fname in fixture_path(type_).iterdir():
        name_, key = fname.stem.split('_')
        if name_ == name:
            value = fname
            if fname.suffix == '.json':
                value = jsonlib.load(fname)
            elif fname.suffix == '.html':
                with fname.open(encoding='utf8') as fp:
                    value = fp.read()
            elif fname.suffix == '.xml':
                with open(fname.as_posix(), 'rb') as fp:
                    value = fp.read()
            res[key] = value
    return res

Beispiel #51

0

Datei anzeigen

Datei: initializedb.py Projekt: clld/dogonlanguages

def prime_cache(args):
    """If data needs to be denormalized for lookup, do that here.
    This procedure should be separate from the db initialization, because
    it will have to be run periodically whenever data has been updated.
    """
    concepticon = {
        c.GLOSS: c.CONCEPTICON_ID for c in
        reader(args.data_file('repos', 'conceptlist.tsv'), delimiter='\t', namedtuples=True)
        if c.CONCEPTICON_ID}
    sdata = jsonlib.load(args.data_file('repos', 'classification.json'))
    for concept in DBSession.query(models.Concept).options(joinedload(common.Parameter._files)):
        for t_ in ['image', 'video']:
            setattr(concept, 'count_{0}s'.format(t_), len(getattr(concept, t_ + 's')))
        if concept.jsondata['ref'] in sdata:
            util.update_species_data(concept, sdata[concept.jsondata['ref']])
        if concept.name in concepticon:
            concept.concepticon_id = int(concepticon[concept.name])

Beispiel #52

0

Datei anzeigen

Datei: submission.py Projekt: pombredanne/dictionaria

    def __init__(self, path_or_id):
        if isinstance(path_or_id, Path):
            self.dir = path_or_id
            self.id = path_or_id.name
        else:
            self.id = path_or_id
            self.dir = REPOS.joinpath('submissions', path_or_id)

        assert self.dir.exists()
        md = self.dir.joinpath('md.json')
        self.md = load(md) if md.exists() else None
        self.db_name = None
        self.type = None
        if self.dir.joinpath('db.sfm').exists():
            self.db_name = 'db.sfm'
            self.type = 'sfm'
        else:
            raise ValueError('no valid db file in %s' % self.dir)

Beispiel #53

0

Datei anzeigen

Datei: __main__.py Projekt: clld/glottolog3

def x(args):
    try:
        from cdstarcat.catalog import Catalog
    except ImportError:
        args.log.error('pip install cdstarcat')
        return
    fname = args.pkg_dir.joinpath('static', 'downloads.json')
    downloads = load(fname)
    release = args.args[0]
    with Catalog(
            Path(os.environ['CDSTAR_CATALOG']),
            cdstar_url=os.environ['CDSTAR_URL'],
            cdstar_user=os.environ['CDSTAR_USER'],
            cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
        obj = cat.api.get_object(uid=downloads[release]['oid'])
        bitstreams = obj.bitstreams[:]
        for bs in bitstreams:
            print(bs.id, bs._properties)

Beispiel #54

0

Datei anzeigen

Datei: initializedb.py Projekt: Anaphory/lexibank

def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexiRumahLanguage),
            glottolog_repos=glottolog_repos,
            isolates_icon='tcccccc')

Beispiel #55

0

Datei anzeigen

Datei: initializedb.py Projekt: clld/lexibank

def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexibank/lexibank-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexibank.__name__,
            name="lexibank",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexibank.clld.org',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog = Glottolog(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog'))
    languoids = {l.id: l for l in glottolog.languoids()}
    concepticon = Concepticon(
        Path(lexibank.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c['ID']: c for c in concepticon.conceptsets()}

    for dname in repos.joinpath('datasets').iterdir():
        #if dname.name not in ['acbd']:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            #if dname.name != 'zenodo34092':
            #    continue
            mdpath = dname.joinpath('metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexibankLanguage),
            glottolog=languoids,
            isolates_icon='tcccccc')

Beispiel #56

0

Datei anzeigen

Datei: commands.py Projekt: Anaphory/clldlucl

def dl2cdstar(args):
    app = app_name(args.project)
    if not app:
        args.log.error('cannot parse package name')
        return

    try:
        from cdstarcat.catalog import Catalog
    except ImportError:
        args.log.error('pip install cdstarcat')
        return

    title_pattern = re.compile('%s (?P<version>[0-9.]+) - downloads' % re.escape(app))
    title = '{0} {1} - downloads'.format(app, args.version)
    pkg_dir = args.project.joinpath(app)
    with Catalog(
            Path(os.environ['CDSTAR_CATALOG']),
            cdstar_url=os.environ['CDSTAR_URL'],
            cdstar_user=os.environ['CDSTAR_USER'],
            cdstar_pwd=os.environ['CDSTAR_PWD']) as cat:
        obj = cat.api.get_object()
        obj.metadata = {"creator": "pycdstar", "title": title}
        if args.args:
            obj.metadata["description"] = args.args[0]
        for fname in pkg_dir.joinpath('static', 'download').iterdir():
            if fname.is_file() and not fname.name.startswith('.'):
                print(fname.name)
                obj.add_bitstream(
                    fname=fname.as_posix(), name=fname.name.replace('-', '_'))
        cat.add(obj)

    fname = pkg_dir.joinpath('static', 'downloads.json')
    with update(fname, default={}, indent=4) as downloads:
        for oid, spec in load(Path(os.environ['CDSTAR_CATALOG'])).items():
            if 'metadata' in spec and 'title' in spec['metadata']:
                match = title_pattern.match(spec['metadata']['title'])
                if match:
                    if match.group('version') not in downloads:
                        spec['oid'] = oid
                        downloads[match.group('version')] = spec
    args.log.info('{0} written'.format(fname))
    args.log.info('{0}'.format(os.environ['CDSTAR_CATALOG']))

Beispiel #57

0

Datei anzeigen

Datei: base.py Projekt: clld/tsammalex-data

    def cached_metadata(self, sid, id=None, name=None, refresh=False):
        if data_file('external', self.name, repos=self.repos).is_dir():
            fname = data_file('external', self.name, sid + '.json', repos=self.repos)
            if not fname.exists() or refresh:
                try:
                    data = self.metadata(id or self.identify(name))
                except:  # pragma: no cover
                    data = None
                if not data:
                    return  # pragma: no cover
                jsonlib.dump(data, fname)
                return data
            return jsonlib.load(fname)

        if sid not in self.items or refresh:
            try:
                self.items[sid] = self.metadata(id or self.identify(name))
            except:
                return
        return self.items[sid]

Beispiel #58

0

Datei anzeigen

Datei: views.py Projekt: clld/apics

def survey(request):
    id_ = request.matchdict["id"]
    md = jsonlib.load(ppath("Surveys", "%s.json" % id_))
    html = get_html(ppath("Surveys", "%s.html" % id_))
    maps = []
    for fname in sorted(
        ppath("Surveys", processed="maps").glob("%s*.png" % id_.split(".")[1].replace("-", "_")), key=lambda fn: fn.stem
    ):
        img = b64encode(open(fname.as_posix(), "rb").read())
        if "figure" in fname.stem:
            html = html.replace("{%s}" % fname.stem, "data:image/png;base64,%s" % img)
        else:
            maps.append(img)

    return {
        "maps": maps,
        "md": md,
        "authors": [Contributor.get(a["id"]) for a in md["authors"]],
        "html": html,
        "ctx": ApicsContribution.get(id_.split(".")[0]),
    }

Beispiel #59

0

Datei anzeigen

Datei: test_csv.py Projekt: LinguList/pycldf

    def test_read_write(self):
        from pycldf.csv import Reader, Writer

        table = load(FIXTURES.joinpath('ds1.csv-metadata.json'))['tables'][0]
        table['tableSchema']['columns'][0]['datatype'] = 'integer'
        table['url'] = 'test.tsv'

        row = '1,abcd1234,fid1,yes,,80086;meier2015[2-5]'.split(',')

        with Archive(self.tmp_path('test.zip'), 'w') as archive:
            with Writer(table, container=archive) as writer:
                writer.writerow(row)

        with Archive(self.tmp_path('test.zip')) as archive:
            with Reader(table, container=archive) as reader:
                rows = list(reader)
                self.assertEqual(rows[0]['ID'], 1)
                self.assertEqual(
                    rows[0].valueUrl('Language_ID'),
                    'http://glottolog.org/resource/languoid/id/abcd1234')
                self.assertEqual(rows[0].to_list(), row)

        table = Table(table)
        del table.dialect['header']
        self.assertTrue(table.dialect.header)
        del table.dialect['delimiter']
        self.assertEqual(table.dialect.delimiter, ',')
        table.dialect.header = False

        with Writer(table, container=self.tmp_path()) as writer:
            writer.writerow(row)
            writer.writerows(rows)

        with Reader(table, container=self.tmp_path()) as reader:
            rows = list(reader)
            self.assertEqual(rows[0]['ID'], 1)
            self.assertEqual(
                rows[0].valueUrl('Language_ID'),
                'http://glottolog.org/resource/languoid/id/abcd1234')
            self.assertEqual(rows[0].to_list(), row)

Beispiel #60

0

Datei anzeigen

Datei: cli.py Projekt: LinguList/pycldf

def datasets(args):
    """
    cldf datasets <DIR> [ATTRS]

    List all CLDF datasets in directory <DIR>
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    d = Path(args.args[0])
    if not d.exists() or not d.is_dir():
        raise ParserError('%s is not an existing directory' % d)
    for fname in sorted(d.glob('*' + MD_SUFFIX), key=lambda p: p.name):
        md = Metadata(load(fname))
        data = fname.parent.joinpath(
            md.get_table().url or fname.name[:-len(MD_SUFFIX)])
        if data.exists():
            print(data)
            if len(args.args) > 1:
                maxlen = max(len(a) for a in args.args[1:])
                for attr in args.args[1:]:
                    if md.get(attr):
                        print('    %s %s' % ((attr + ':').ljust(maxlen + 1), md[attr]))