예제 #1
0
def get_config(p):
    """Read a config file.

    :return: dict of ('section.option', value) pairs.
    """
    if not isinstance(p, Path):
        p = Path(p)
    cfg = {}

    parser = ConfigParser()
    parser.readfp(p.open(encoding='utf8'))

    for section in parser.sections():
        getters = {
            'int': partial(parser.getint, section),
            'boolean': partial(parser.getboolean, section),
            'float': partial(parser.getfloat, section),
            'list': lambda option: parser.get(section, option).split(),
        }
        default = partial(parser.get, section)
        for option in parser.options(section):
            type_ = option.rpartition('_')[2] if '_' in option else None
            value = getters.get(type_, default)(option)
            cfg['{0}.{1}'.format(section, option)] = value

    return cfg
예제 #2
0
 def __init__(self, repos=None):
     self.repos = (Path(repos)
                   if repos else Path(__file__).parent.parent).resolve()
     self.tree = self.repos / 'languoids' / 'tree'
     if not self.tree.exists():
         raise ValueError('repos dir %s missing tree dir: %s' %
                          (self.repos, self.tree))
예제 #3
0
파일: download.py 프로젝트: gopyruby/clld
 def create(self, req, filename=None, verbose=True, outfile=None):
     with safe_overwrite(outfile or self.abspath(req)) as tmp:
         if self.rdf:
             # we do not create archives with a readme for rdf downloads, because each
             # RDF entity points to the dataset and the void description of the dataset
             # covers all relevant metadata.
             #
             # TODO: write test for the file name things!?
             #
             with closing(
                     GzipFile(filename=Path(tmp.stem).stem,
                              fileobj=tmp.open('wb'))) as fp:
                 self.before(req, fp)
                 for i, item in enumerate(
                         page_query(self.query(req), verbose=verbose)):
                     self.dump(req, fp, item, i)
                 self.after(req, fp)
         else:
             with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                 if not filename:
                     fp = self.get_stream()
                     self.before(req, fp)
                     for i, item in enumerate(
                             page_query(self.query(req), verbose=verbose)):
                         self.dump(req, fp, item, i)
                     self.after(req, fp)
                     zipfile.writestr(self.name, self.read_stream(fp))
                 else:  # pragma: no cover
                     zipfile.write(Path(filename).as_posix(), self.name)
                 zipfile.writestr(
                     'README.txt',
                     format_readme(
                         req,
                         req.db.query(Dataset).first()).encode('utf8'))
예제 #4
0
def create_repos(dir_):
    tsammalexdata = dir_.join('tsammalexdata')
    tsammalexdata.mkdir()
    data = tsammalexdata.join('data')
    data.mkdir()

    with data.join('test.csv').open('w', encoding='utf8') as fp:
        fp.write("""\
a,b,c
1,2,3
4,5,6""")

    with data.join('distribution.csv').open('w', encoding='utf8') as fp:
        fp.write("id,coregions__ids,countries_ids")

    test_eco_path = fixture_path('test_ecoregions.json')
    eco_path = data.join('ecoregions.json')

    copy(Path(test_eco_path), Path(eco_path))

    external = data.join('external')
    external.mkdir()
    with external.join('test.csv').open('w', encoding='utf8') as fp:
        fp.write("""\
a,b,c
1,2,3
4,5,6""")
    external.join('gbif').mkdir()
    occurrences = fixture_path('abelmoschusesculentus.json')

    copy(Path(occurrences), Path(external.join('gbif', occurrences.name)))

    return dir_
예제 #5
0
def test_catalogue_of_life(tmpdir):
    data = fixtures('data_providers', 'catalogueoflife')
    id_ = '9249d9473aac5c8e99fb9d758ced91ec'
    repos = create_repos(tmpdir)

    with patch('pytsammalex.util.requests',
               MockRequests(content=data['identify'])):
        prov = CatalogueOfLife(Path(repos))
        assert (prov.identify('x') == id_)

    with patch('pytsammalex.util.requests',
               MockRequests(content=data['metadata'])):
        prov = CatalogueOfLife(Path(repos))
        md = prov.cached_metadata('test', id_)
        taxon = {}
        prov.update(taxon, md)
        assert (taxon == {
            'catalogueoflife_url':
            'http://www.catalogueoflife.org/col/'
            'browse/tree/id/'
            '9249d9473aac5c8e99fb9d758ced91ec',
            'class':
            'Mammalia',
            'family':
            'Felidae',
            'kingdom':
            'Animalia',
            'order':
            'Carnivora',
            'phylum':
            'Chordata'
        })
예제 #6
0
def llod_func(args):  # pragma: no cover
    """Create an RDF dump and compute some statistics about it."""
    tmp = Path(mkdtemp())
    count_rsc = 0
    count_triples = 0

    tmp_dump = tmp.joinpath('rdf.n3')
    with open(as_posix(tmp_dump), 'w') as fp:
        for rsc in RESOURCES:
            args.log.info('Resource type %s ...' % rsc.name)
            try:
                q = DBSession.query(rsc.model)
            except InvalidRequestError:
                args.log.info('... skipping')
                continue
            for obj in page_query(q.order_by(rsc.model.pk), n=10000, verbose=True):
                graph = get_graph(obj, args.env['request'], rsc.name)
                count_triples += len(graph)
                count_rsc += 1
                fp.write(n3(graph, with_head=count_rsc == 1))
            args.log.info('... finished')

    # put in args.data_file('..', 'static', 'download')?
    md = {'path': as_posix(tmp), 'resources': count_rsc, 'triples': count_triples}
    md.update(count_links(as_posix(tmp_dump)))
    jsonlib.dump(md, args.data_file('rdf-metadata.json'))
    print(md)

    dataset = Dataset.first()
    rdf_dump = args.module_dir.joinpath(
        'static', 'download', '%s-dataset.n3' % dataset.id)
    tmp_dump.copy(rdf_dump)
    check_call('gzip -f %s' % rdf_dump, shell=True)
    print(str(rdf_dump))
예제 #7
0
파일: db.py 프로젝트: glottobank/pycldf
    def to_cldf(self, dest, mdname='cldf-metadata.json'):
        """
        Write the data from the db to a CLDF dataset according to the metadata in `self.dataset`.

        :param dest:
        :param mdname:
        :return: path of the metadata file
        """
        dest = Path(dest)
        if not dest.exists():
            dest.mkdir()

        data = self.read()

        if data[self.source_table_name]:
            sources = Sources()
            for src in data[self.source_table_name]:
                sources.add(Source(
                    src['genre'],
                    src['id'],
                    **{k: v for k, v in src.items() if k not in ['id', 'genre']}))
            sources.write(dest / self.dataset.properties.get('dc:source', 'sources.bib'))

        for table_type, items in data.items():
            try:
                table = self.dataset[table_type]
                table.common_props['dc:extent'] = table.write(
                    [self.retranslate(table, item) for item in items],
                    base=dest)
            except KeyError:
                assert table_type == self.source_table_name, table_type
        return self.dataset.write_metadata(dest / 'cldf-metadata.json')
예제 #8
0
파일: util.py 프로젝트: cevmartinez/clld
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, 'path does not exist')
     if not path_.is_dir():
         raise argparse.ArgumentError(self, 'path is no directory')
     setattr(namespace, self.dest, path_)
예제 #9
0
파일: util.py 프로젝트: clld/apics
def wals_detail_html(context=None, request=None, **kw):
    wals_data = Path(apics.__file__).parent.joinpath(
        'static', 'wals', '%sA.json' % context.parameter.wals_id)
    if not wals_data.exists():
        raise HTTPNotFound()

    wals_data = jsonlib.load(wals_data)
    value_map = {}

    for layer in wals_data['layers']:
        for feature in layer['features']:
            feature['properties']['icon'] = request.registry.getUtility(
                IIcon, name=feature['properties']['icon']).url(request)
            feature['properties']['popup'] = external_link(
                'http://wals.info/languoid/lect/wals_code_'
                + feature['properties']['language']['id'],
                label=feature['properties']['language']['name'])
        value_map[layer['properties']['number']] = {
            'icon': layer['features'][0]['properties']['icon'],
            'name': layer['properties']['name'],
            'number': layer['properties']['number'],
        }

    return {
        'wals_data': wals_data,
        'wals_map': WalsMap(
            context.parameter, request, data=wals_data, value_map=value_map),
        'apics_map': ApicsWalsMap(
            context.parameter, request, data=wals_data, value_map=value_map)}
예제 #10
0
    def create(self, path, metadata, filter_=filter_hidden, object_class=None):
        """
        Create objects in CDSTAR and register them in the catalog.

        Note that we guess the mimetype based on the filename extension, using
        `mimetypes.guess_type`. Thus, it is the caller's responsibility to add custom or
        otherwise uncommon types to the list of known types using `mimetypes.add_type`.

        :param path:
        :param metadata:
        :param filter_:
        :return:
        """
        path = Path(path)
        if path.is_file():
            fnames = [path]
        elif path.is_dir():
            fnames = list(walk(path, mode='files'))
        else:
            raise ValueError(
                'path must be a file or directory')  # pragma: no cover
        for fname in fnames:
            if not filter_ or filter_(fname):
                created, obj = self._create(fname,
                                            metadata,
                                            object_class=object_class)
                yield fname, created, obj
예제 #11
0
def langsearch(args):
    """Search Glottolog languoids

    glottolog --repos=. langsearch "QUERY"
    """
    def highlight(text):
        res, i = '', 0
        for m in re.finditer('\[\[(?P<m>[^\]]+)\]\]', text):
            res += text[i:m.start()]
            res += colored(m.group('m'), 'red', attrs=['bold'])
            i = m.end()
        res += text[i:]
        return res + '\n'

    count, results = fts.search_langs(args.repos, args.args[0])
    cwd = os.getcwd()
    print('{} matches'.format(count))
    for res in results:
        try:
            p = Path(res.fname).relative_to(Path(cwd))
        except ValueError:
            p = res.fname
        sprint('{0.name} [{0.id}] {0.level}'.format(res),
               color=None,
               attrs=['bold'])
        sprint(p, color='green')
        sprint(highlight(res.highlights) if res.highlights else '')
    print('{} matches'.format(count))
예제 #12
0
def iso2codes(args):
    """
    Map ISO codes to the list of all Glottolog languages and dialects subsumed "under" it.
    """
    nodes = list(args.repos.languoids())

    res = {}
    for node in nodes:
        if node.iso:
            res[node.id] = (node.iso, set())

    for node in nodes:
        if node.level == args.repos.languoid_levels.family or node.id in res:
            continue
        for nid in res:
            matched = False
            for l in node.lineage:
                if l[1] == nid:
                    res[nid][1].add(node.id)
                    matched = True
                    break
            if matched:
                break

    outdir = Path('.') if not args.args else Path(args.args[0])
    with UnicodeWriter(outdir / 'iso2glottocodes.csv') as writer:
        writer.writerow(['iso', 'glottocodes'])
        for gc, (iso, gcs) in res.items():
            writer.writerow([iso, ';'.join([gc] + list(gcs))])
예제 #13
0
def download_tables(outdir=None):
    match = ZIP_NAME_PATTERN.search(urlopen(BASE_URL + 'download.asp').read())
    if not match:
        raise ValueError('no matching zip file name found')  # pragma: no cover
    target = Path(outdir or '.').joinpath(match.group('name'))
    urlretrieve(BASE_URL + match.group('name'), target.as_posix())
    return target
예제 #14
0
def get_dataset(fname=None):
    """Load a CLDF dataset.

    Load the file as `json` CLDF metadata description file, or as metadata-free
    dataset contained in a single csv file.

    The distinction is made depending on the file extension: `.json` files are
    loaded as metadata descriptions, all other files are matched against the
    CLDF module specifications. Directories are checked for the presence of
    any CLDF datasets in undefined order of the dataset types.

    Parameters
    ----------
    fname : str or Path
        Path to a CLDF dataset

    Returns
    -------
    pycldf.Dataset
    """
    if fname is None:
        fname = repository
    else:
        fname = Path(fname)
    if not fname.exists():
        raise FileNotFoundError('{:} does not exist'.format(fname))
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
예제 #15
0
파일: db.py 프로젝트: afcarl/pycldf
    def __init__(self, fname):
        """
        A `Database` instance is initialized with a file path.

        :param fname: Path to a file in the file system where the db is to be stored.
        """
        self.fname = Path(fname)
예제 #16
0
def test_data_url_from_string():
    from clldutils.path import Path

    assert data_url('ü') == 'data:application/octet-stream;base64,w7w='
    assert data_url(Path(__file__)).startswith('data:')
    assert data_url(Path(__file__),
                    mimetype='text/plain').startswith('data:text/plain')
예제 #17
0
파일: cli.py 프로젝트: LinguList/pycldf
def stats(args):
    """
    cldf stats <DATASET>

    Print basic stats for CLDF dataset <DATASET>, where <DATASET> may be the path to
    - a CLDF metadata file
    - a CLDF core data file
    - a CLDF zip archive
    """
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.zip':
        ds = Dataset.from_zip(fname)
    elif fname.name.endswith(MD_SUFFIX):
        ds = Dataset.from_metadata(fname)
    else:
        ds = Dataset.from_file(fname)
    print(fname)
    stats_ = ds.stats
    print("""
Name: %s
Different languages: %s
Different parameters: %s
Rows: %s
""" % (
        ds.name,
        len(stats_['languages']),
        len(stats_['parameters']),
        stats_['rowcount']
    ))
예제 #18
0
파일: util.py 프로젝트: cevmartinez/clld
 def __call__(self, parser, namespace, values, option_string=None):
     path_ = Path(values)
     if not path_.exists():
         raise argparse.ArgumentError(self, "path does not exist")
     if not path_.is_dir():
         raise argparse.ArgumentError(self, "path is no directory")
     setattr(namespace, self.dest, path_)
예제 #19
0
파일: dataset.py 프로젝트: LinguList/pycldf
    def write(self, outdir='.', suffix='.csv', cited_sources_only=False, archive=False):
        outdir = Path(outdir)
        if not outdir.exists():
            raise ValueError(outdir.as_posix())

        close = False
        if archive:
            if isinstance(archive, Archive):
                container = archive
            else:
                container = Archive(outdir.joinpath(self.name + '.zip'), mode='w')
                close = True
        else:
            container = outdir

        fname = Path(outdir).joinpath(self.name + suffix)
        if fname.suffix in TAB_SUFFIXES:
            self.table.dialect.delimiter = '\t'

        with UnicodeWriter(
                None if isinstance(container, Archive) else fname,
                delimiter=self.table.dialect.delimiter) as writer:
            writer.writerow(self.fields)
            for row in self.rows:
                writer.writerow(row.to_list())

        if isinstance(container, Archive):
            container.write_text(writer.read(), fname.name)
        self.table.url = fname.name

        self.metadata.write(Dataset.filename(fname, 'metadata'), container)
        ids = self._cited_sources if cited_sources_only else None
        self.sources.write(Dataset.filename(fname, 'sources'), container, ids=ids)
        if close:
            container.close()
예제 #20
0
파일: cache.py 프로젝트: xrotwang/lingpy3
class Cache(object):
    def __init__(self, dir_=None):
        self._dir = Path(dir_ or CACHE_DIR)
        if not self._dir.exists():
            self._dir.mkdir(parents=True)  # pragma: no cover

    def _path(self, key):
        return self._dir.joinpath(path_component(key))

    def __len__(self):
        return len(list(self.keys()))

    def __getitem__(self, item):
        with self._path(item).open('rb') as fp:
            return pickle.load(fp)

    def __setitem__(self, key, value):
        with self._path(key).open('wb') as fp:
            pickle.dump(value, fp)

    def __delitem__(self, key):
        remove(self._path(key))

    def __contains__(self, item):
        return self._path(item).exists()

    def keys(self):
        for p in self._dir.iterdir():
            yield as_unicode(p.name)

    def clear(self):
        for key in self.keys():
            remove(self._path(key))
예제 #21
0
def cldf(args):
    """
    Create CLDF datasets from the raw data for a dataset.

    lexibank --glottolog-repos PATH --concepticon-repos PATH cldf [DATASET_ID]
    """
    if not args.glottolog_repos or not Path(args.glottolog_repos).exists():
        raise ParserError('Invalid glottolog repository path given')

    if not args.concepticon_repos or not Path(args.concepticon_repos).exists():
        raise ParserError('Invalid concepticon repository path given')

    # FIXME: get dict of all glottolog langs right here, and attach to datasets!
    try:
        languoids = load('glottolog')
    except ValueError:
        languoids = {
            l.id: l
            for l in Glottolog(args.glottolog_repos).languoids()
        }
        dump(languoids, 'glottolog')

    def _cldf(ds, **kw):
        ds.glottolog_languoids = languoids
        ds.cldf(**kw)
        ds.write_cognates()

    with_dataset(args, _cldf)
예제 #22
0
def download_tables(outdir=None):
    match = ZIP_NAME_PATTERN.search(_open('code_tables/download_tables').read().decode('utf-8-sig'))
    if not match:
        raise ValueError('no matching zip file name found')  # pragma: no cover
    target = Path(outdir or '.').joinpath(match.group('name').split('/')[-1])
    with target.open('wb') as fp:
        fp.write(_open(match.group('name')).read())
    return target
예제 #23
0
 def in_dir(cls, d, empty_tables=False):
     fname = Path(d)
     if not fname.exists():
         fname.mkdir()
     assert fname.is_dir()
     res = cls.from_metadata(fname)
     if empty_tables:
         del res.tables[:]
     return res
예제 #24
0
 def test_init3(self):  # with kw check=True
     bad_file = Path(test_data('bad_file.tsv'))
     assert_raises(ValueError, LexStat, bad_file.as_posix())
     ls = self._make_one(bad_file.as_posix(), check=True, apply_checks=True)
     assert hasattr(ls, 'errors')
     cleaned = bad_file.parent.joinpath(bad_file.name + '_cleaned.tsv')
     self.assertTrue(cleaned.exists())
     os.remove(cleaned.as_posix())
     assert_raises(ValueError, LexStat, {0: ['concept', 'language', 'ipa']})
예제 #25
0
def jsondump(obj, fname, log=None):
    fname = Path(fname)
    if fname.exists():
        d = jsonlib.load(fname)
        d.update(obj)
        obj = d
    jsonlib.dump(sorted_obj(obj), fname, indent=4)
    log_dump(fname, log=log)
    return obj
예제 #26
0
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
예제 #27
0
파일: __main__.py 프로젝트: Anaphory/pycldf
def _get_dataset(args):
    if len(args.args) < 1:
        raise ParserError('not enough arguments')
    fname = Path(args.args[0])
    if not fname.exists() or not fname.is_file():
        raise ParserError('%s is not an existing directory' % fname)
    if fname.suffix == '.json':
        return Dataset.from_metadata(fname)
    return Dataset.from_data(fname)
예제 #28
0
파일: util.py 프로젝트: clld/clldfabric
def copy_downloads(app, pattern='*'):
    dl_dir = app.src.joinpath(app.name, 'static', 'download')
    require.files.directory(dl_dir, use_sudo=True, mode="777")
    local_dl_dir = Path(import_module(app.name).__file__).parent.joinpath('static', 'download')
    for f in local_dl_dir.glob(pattern):
        target = dl_dir.joinpath(f.name)
        create_file_as_root(target, open(f.as_posix()).read())
        sudo('chown %s:%s %s' % (app.name, app.name, target))
    require.files.directory(dl_dir, use_sudo=True, mode="755")
예제 #29
0
def update(path, default=None, load_kw=None, **kw):
    path = Path(path)
    if not path.exists():
        if default is None:
            raise ValueError('path does not exist')
        res = default
    else:
        res = load(path, **(load_kw or {}))
    yield res
    dump(res, path, **kw)
예제 #30
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":", 1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
예제 #31
0
def write_data_file(comment_text, overwrite):
    lines = comment_text.split("\n")
    filename = Path(lines[0].split(":",1)[1].strip())
    if filename.exists() and not overwrite:
        return "Embedded data file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()
    with filename.open("w", encoding='utf8') as fp:
        fp.write("\n".join(lines[1:]))
    return "Wrote embedded data file %s.\n" % filename
예제 #32
0
def create_archive(args):
    rels = get_release_config()
    for section in rels.sections():
        _load_sql_dump(rels[section], args.log)
    out = Path('archive')
    if args.args:
        out = Path(args.args[0])
    static_archive.create(
        [rels.get(sec, 'version') for sec in rels.sections()], out)
    args.log.info('static archive created in {0}'.format(out))
예제 #33
0
 def format_data_file(self, filename):
     """
     Return an ElementTree node corresponding to a comment containing
     the text of the specified data file.
     """
     header = "BEASTling embedded data file: %s" % filename
     fp = Path(filename).open("r")
     data_block = "\n".join([header, fp.read()])
     fp.close()
     return ET.Comment(data_block)
예제 #34
0
 def from_file(cls, path, **keywords):
     """
     Function loads a concept list outside the Concepticon collection.
     """
     path = Path(path)
     assert path.exists()
     attrs = {f: keywords.get(f, '') for f in Conceptlist.public_fields()}
     attrs.update(id=path.stem,
                  items=keywords.get('items', len(read_dicts(path))),
                  year=keywords.get('year', 0))
     return cls(api=path, **attrs)
예제 #35
0
def test_json_data(tmpdir):
    tmp_ = create_repos(tmpdir)

    with JsonData('test.json', repos=Path(tmp_)) as jdat:
        jdat['a'] = 1

    assert (data_file('test.json', repos=Path(tmp_)).exists() is True)

    with JsonData('test.json', repos=Path(tmp_)) as jdat:
        assert (len(jdat) == 1)
        assert (jdat['a'] == 1)
예제 #36
0
파일: util.py 프로젝트: LinguList/clpa
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
예제 #37
0
파일: util.py 프로젝트: tjade273/lingpy
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
예제 #38
0
파일: util.py 프로젝트: gopyruby/clld
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
예제 #39
0
def curate(args):  # pragma: no cover
    datasets = {ds.id: ds for ds in args.cfg.datasets}

    class TheCompleter(Completer):
        def get_completions(self, document, complete_event):
            word_before_cursor = document.get_word_before_cursor(WORD=True)
            words = document.text_before_cursor.split()
            if words and words[0] in commands:
                for ds in fuzzyfinder(word_before_cursor, datasets):
                    yield Completion(ds,
                                     start_position=-len(word_before_cursor))
            else:  # elif word_before_cursor:
                for c in fuzzyfinder(word_before_cursor, commands):
                    yield Completion(c,
                                     start_position=-len(word_before_cursor))

    user_input = []
    appdir = Path(user_data_dir('lexibank'))
    if not appdir.exists():
        appdir.mkdir(parents=True)

    while not user_input or user_input[0] != 'quit':
        try:
            user_input = prompt(
                u'lexibank-curator> ',
                history=FileHistory(str(appdir / 'history.txt')),
                auto_suggest=AutoSuggestFromHistory(),
                completer=TheCompleter(),
            ).split()
        except EOFError:
            break
        except KeyboardInterrupt:
            break

        if len(user_input) == 0:
            continue  # ignore empty commands
        if user_input[0] not in commands:
            print(colored('Invalid command!', 'red'))
            continue
        if len(user_input) > 1 and user_input[1] not in datasets:
            print(colored('Invalid dataset!', 'red'))
            continue

        args.args = user_input[1:]
        try:
            s = time()
            commands[user_input[0]](args)
            print('[{0:.3f}]'.format(time() - s))
        except Exception as e:
            traceback.print_exc()
            print(colored('{0}: {1}'.format(e.__class__.__name__, e), 'red'))

    print('see ya!')
예제 #40
0
파일: util.py 프로젝트: clld/clld
def safe_overwrite(fname):
    fname = Path(fname)
    if not fname.parent.exists():
        fname.parent.mkdir()
    assert fname.parent.exists()
    tmp = fname.parent
    while tmp.exists():
        tmp = fname.parent.joinpath('%s.%s' % (fname.name, random_string(6)))
    yield tmp
    if fname.exists():
        remove(fname)
    move(tmp, fname)
예제 #41
0
파일: util.py 프로젝트: LinguList/clpa
def load_normalized(_path):
    """Normalization for quasi-identical strings which are often confused."""
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)
    norms = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                norms[eval('"' + source + '"')] = eval('r"' + target + '"')
    return norms
예제 #42
0
def main():  # pragma: no cover
    pkg_dir = Path(glottolog3.__file__).parent
    parser = ArgumentParserWithLogging('glottolog3')
    parser.add_argument(
        '--repos',
        help="path to glottolog data repository",
        type=Glottolog,
        default=Glottolog(
            Path(glottolog3.__file__).parent.parent.parent.joinpath(
                'glottolog')))
    parser.add_argument('--pkg-dir', help=argparse.SUPPRESS, default=pkg_dir)
    sys.exit(parser.main())
예제 #43
0
파일: util.py 프로젝트: LinguList/lingpy
class TemporaryPath(object):
    def __init__(self, suffix=''):
        fp = NamedTemporaryFile(suffix=suffix)
        self.name = Path(fp.name)
        fp.close()

    def __enter__(self):
        return self.name.as_posix()

    def __exit__(self, exc_type, exc_val, exc_tb):
        if self.name.exists():
            remove(self.name)
예제 #44
0
def upgrade():
    mappings = Path(
        __file__).parent.joinpath('..', '..', 'data', 'apics_phoible.json').as_posix()
    with open(mappings) as fp:
        mappings = json.load(fp)

    conn = op.get_bind()
    for k, v in mappings.items():
        d = conn.execute("select jsondata from parameter where id = %s", (k,)).fetchone()
        d = json.loads(d[0])
        d.update(phoible=v)
        conn.execute(
            "update parameter set jsondata = %s where id = %s", (json.dumps(d), k))
예제 #45
0
파일: _mixins.py 프로젝트: clld/clld
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        p = Path(dir_).joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            if isinstance(content, text_type):
                content = content.encode('utf8')
            fp.write(content)
        return p.as_posix()
예제 #46
0
파일: _mixins.py 프로젝트: cevmartinez/clld
    def create(self, dir_, content):
        """Write ``content`` to a file using ``dir_`` as file-system directory.

        :return: File-system path of the file that was created.
        """
        if not isinstance(dir_, Path):
            dir_ = Path(dir_)
        p = dir_.joinpath(self.relpath)
        if not p.parent.exists():
            p.parent.mkdir(parents=True)
        with open(p.as_posix(), 'wb') as fp:
            fp.write(content)
        return p.as_posix()
예제 #47
0
 def test_generate_extract(self):
     xml = self.tmp_path('test.xml')
     self._run_main('-v -o {0} {1}'.format(xml.as_posix(), config_path('basic')))
     self.assertTrue(xml.exists())
     # Overwriting existing files must be specified explicitely:
     self._run_main('-o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=4)
     self._run_main('--overwrite -o {0} {1}'.format(
         xml.as_posix(), config_path('basic')), status=0)
     tcfg = Path('beastling_test.conf')
     self._run_main('--extract {0}'.format(xml.as_posix()))
     self.assertTrue(tcfg.exists())
     remove(tcfg)
예제 #48
0
파일: stats_util.py 프로젝트: clld/grambank
def dependencies_graph(imps):
    deps = dict([((f1, f2), v) for (v, f1, f2) in imps if v > 0.0])
    V = set([f for fs in deps.iterkeys() for f in fs])
    G = dict([(k, v) for (k, v) in deps.items() if v > 0.0])
    MSTs = [mst(G, x) for x in V]
    (mv, H) = max([(sum(H.values()), H) for H in MSTs])
    #W = dict([(y, 1.0-v) for ((x, y), v) in H.iteritems()])
    #sav(dot(H, V), 'grambank_mst.gv')
    path = Path(grambank.__file__).parent.joinpath('static', 'dependencies.gv')
    with open(path.as_posix(), 'w') as fp:
        fp.write(dot(H, V))

    return (H, V) #dot(H, V)
예제 #49
0
파일: languoids.py 프로젝트: clld/glottolog
 def write_info(self, outdir=None):
     outdir = outdir or self.id
     if not isinstance(outdir, Path):
         outdir = Path(outdir)
     if not outdir.exists():
         outdir.mkdir()
     fname = outdir.joinpath(self.fname('.ini'))
     self.cfg.write(fname)
     if os.linesep == '\n':
         with fname.open(encoding='utf8') as fp:
             text = fp.read()
         with fname.open('w', encoding='utf8') as fp:
             fp.write(text.replace('\n', '\r\n'))
     return fname
예제 #50
0
def link(args):
    """\
Complete linking of concepts to concept sets. If either CONCEPTICON_GLOSS or
CONCEPTICON_ID is given, the other is added.

concepticon link <concept-list>
"""
    conceptlist = Path(args.args[0])
    if not conceptlist.exists() or not conceptlist.is_file():
        conceptlist = data_path('conceptlists', args.args[0])
        if not conceptlist.exists() or not conceptlist.is_file():
            raise ParserError('no file %s found' % args.args[0])

    rewrite(conceptlist, Linker(conceptlist.stem))
예제 #51
0
파일: cdstar.py 프로젝트: Anaphory/clldlucl
def downloads(req):
    mod = importlib.import_module(req.registry.settings['clld.pkg'])
    dls = Path(mod.__file__).parent.joinpath('static', 'downloads.json')
    print(dls)

    def bitstream_link(oid, spec):
        url = SERVICE_URL.path(
            '{0}/{1}'.format(oid, spec['bitstreamid'])).as_string()
        return HTML.a(
            '{0} [{1}]'.format(spec['bitstreamid'], format_size(spec['filesize'])),
            href=url)

    dls = load(dls) if dls.exists() else {}
    for rel, spec in sorted(dls.items()):
        yield rel, [bitstream_link(spec['oid'], bs) for bs in spec['bitstreams']]
예제 #52
0
파일: bibtex.py 프로젝트: cevmartinez/clld
    def from_file(cls, bibFile, encoding='utf8', lowercase=False):
        """Create bibtex database from a bib-file.

        @param bibFile: path of the bibtex-database-file to be read.
        """
        if not isinstance(bibFile, Path):
            bibFile = Path(bibFile)
        if bibFile.exists():
            with bibFile.open(encoding=encoding) as fp:
                content = fp.read()
        else:
            content = ''

        return cls((Record.from_string('@' + m, lowercase=lowercase)
                    for m in re.split('^\s*@', content, 0, re.MULTILINE)))
예제 #53
0
파일: util.py 프로젝트: LinguList/clpa
def load_alias(_path):
    """
    Alias are one-character sequences which we can convert on a step-by step
    basis by applying them successively to all subsegments of a segment.
    """
    path = Path(_path)
    if not path.is_file():
        path = local_path(_path)

    alias = {}
    with path.open(encoding='utf-8') as handle:
        for line in handle:
            if not line.startswith('#') and line.strip():
                source, target = line.strip().split('\t')
                alias[eval('"' + source + '"')] = eval('r"' + target + '"')
    return alias
예제 #54
0
    def test_Matrix(self):
        from wals3.adapters import Matrix

        p = Path(mktemp())
        assert not p.exists()

        class TestMatrix(Matrix):
            def abspath(self, req):
                return p

            def query(self, req):
                return Matrix.query(self, req).filter(Language.pk < 100)

        m = TestMatrix(Language, "wals3", description="Feature values CSV")
        m.create(self.env["request"], verbose=False)
        assert p.exists()
        remove(p)
예제 #55
0
def write_config(comment_text, overwrite):
    lines = comment_text.split("\n")
    assert lines[1] in (_config_file_str, _proggen_str)
    if lines[1] == _proggen_str:
        return "Original configuration was generated programmatically, no configuration to extract."
    config_text = "\n".join(lines[2:])
    p = INI()
    p.read_string(config_text)
    filename = p.get("admin", "basename") \
        if p.has_option("admin", "basename") else 'beastling'
    filename = Path(filename + '.conf')
    if filename.exists() and not overwrite:
        return "BEASTling configuration file %s already exists!  Run beastling with the --overwrite option if you wish to overwrite it.\n" % filename
    if not filename.parent.exists():
        filename.parent.mkdir()

    p.write(filename)
    return "Wrote BEASTling configuration file %s.\n" % filename
예제 #56
0
def main(args):
    Index('ducet', collkey(common.Value.name)).create(DBSession.bind)
    repos = Path(os.path.expanduser('~')).joinpath('venvs/lexirumah/lexirumah-data')

    with transaction.manager:
        dataset = common.Dataset(
            id=lexirumah.__name__,
            name="lexirumah",
            publisher_name="Max Planck Institute for the Science of Human History",
            publisher_place="Jena",
            publisher_url="http://shh.mpg.de",
            license="http://creativecommons.org/licenses/by/4.0/",
            domain='lexirumah.model-ling.eu',
            contact='*****@*****.**',
            jsondata={
                'license_icon': 'cc-by.png',
                'license_name': 'Creative Commons Attribution 4.0 International License'})
        DBSession.add(dataset)

    glottolog_repos = Path(
        lexirumah.__file__).parent.parent.parent.parent.joinpath('glottolog3', 'glottolog')
    languoids = {l.id: l for l in Glottolog(glottolog_repos).languoids()}
    concepticon = Concepticon(
        Path(lexirumah.__file__).parent.parent.parent.parent.joinpath('concepticon', 'concepticon-data'))
    conceptsets = {c.id: c for c in concepticon.conceptsets.values()}

    skip = True
    for dname in sorted(repos.joinpath('datasets').iterdir(), key=lambda p: p.name):
        #if dname.name == 'benuecongo':
        #    skip = False
        #if skip:
        #    continue
        if dname.is_dir() and dname.name != '_template':
            mdpath = dname.joinpath('cldf', 'metadata.json')
            if mdpath.exists():
                print(dname.name)
                import_cldf(dname, load(mdpath), languoids, conceptsets)

    with transaction.manager:
        load_families(
            Data(),
            DBSession.query(LexiRumahLanguage),
            glottolog_repos=glottolog_repos,
            isolates_icon='tcccccc')
예제 #57
0
    def create(self, req, filename=None, verbose=True):
        p = self.abspath(req)
        if not p.parent.exists():  # pragma: no cover
            p.parent.mkdir()
        tmp = Path('%s.tmp' % p.as_posix())

        if self.rdf:
            # we do not create archives with a readme for rdf downloads, because each
            # RDF entity points to the dataset and the void description of the dataset
            # covers all relevant metadata.
            #
            # TODO: write test for the file name things!?
            #
            with closing(GzipFile(
                    filename=Path(tmp.stem).stem, fileobj=tmp.open('wb')
            )) as fp:
                self.before(req, fp)
                for i, item in enumerate(page_query(self.query(req), verbose=verbose)):
                    self.dump(req, fp, item, i)
                self.after(req, fp)
        else:
            with ZipFile(tmp.as_posix(), 'w', ZIP_DEFLATED) as zipfile:
                if not filename:
                    fp = self.get_stream()
                    self.before(req, fp)
                    for i, item in enumerate(
                            page_query(self.query(req), verbose=verbose)):
                        self.dump(req, fp, item, i)
                    self.after(req, fp)
                    zipfile.writestr(self.name, self.read_stream(fp))
                else:  # pragma: no cover
                    zipfile.write(filename, self.name)
                zipfile.writestr(
                    'README.txt',
                    README.format(
                        req.dataset.name,
                        '=' * (
                            len(req.dataset.name)
                            + len(' data download')),
                        req.dataset.license,
                        TxtCitation(None).render(req.dataset, req)).encode('utf8'))
        if p.exists():  # pragma: no cover
            remove(p)
        move(tmp, p)
예제 #58
0
    def test_extractor(self):
        config = self.make_cfg(
            [config_path(f).as_posix() for f in ("admin", "mk", "embed_data")])
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        self.assertTrue(bool(self._extract(xmlfile)))

        config = self.make_cfg({
            'admin': {'basename': 'abcdefg'},
            'model': {
                'model': 'mk',
                'data': data_path('basic.csv').as_posix()}})
        xml = beastling.beastxml.BeastXml(config)
        xmlfile = self.tmp.joinpath("beastling.xml")
        xml.write_file(xmlfile.as_posix())
        beastling.extractor.extract(xmlfile)
        p = Path('abcdefg.conf')
        self.assertTrue(p.exists())
        cfg = INI(interpolation=None)
        cfg.read(p.as_posix())
        remove(p)
        self.assertEqual(cfg['admin']['basename'], 'abcdefg')
        self.assertEqual(cfg['model']['model'], 'mk')

        fname = self.tmp.joinpath('test.xml')
        datafile = self.tmp.joinpath(('test.csv'))
        self.assertFalse(datafile.exists())
        with fname.open('w', encoding='utf8') as fp:
            fp.write("""<?xml version="1.0" encoding="UTF-8"?>
<r>
  <!--%s
%s
[admin]
[model]
-->
  <!--%s:%s-->
</r>
""" % (beastling.extractor._generated_str,
       beastling.extractor._config_file_str,
       beastling.extractor._data_file_str,
       datafile.as_posix()))
        res = self._extract(fname)
        self.assertIn(datafile.name, ''.join(res))
예제 #59
0
파일: util.py 프로젝트: LinguList/lingpy
def _str_path(path, mkdir=False):
    """Get a file-system path as text_type, suitable for passing into io.open.

    Parameters
    ----------
    path : {text_type, Path}
        A fs path either as Path instance or as text_type.
    mkdir : bool (default=False)
        If True, create the directories within the path.

    Returns
    -------
    path : text_type
        The path as text_type.
    """
    res = Path(path_component(path))
    if mkdir and res.parent and not res.parent.exists():
        res.parent.mkdir(parents=True)
    return res.as_posix()