def test_non_ascii(): from clldutils.path import Path, path_component, as_unicode assert path_component(b'abc') == 'abc' p = Path(path_component('äöü')).joinpath(path_component('äöü')) assert isinstance(as_unicode(p), text_type) assert isinstance(as_unicode(p.name), text_type)
def from_path(cls, path): assert path.is_dir() cache = Cache() def cache_key(suffix): return '.'.join(['SoundClassModel', as_unicode(path.name), suffix]) if cache_key('converter') not in cache: cache[cache_key('converter')] = _read_converter(path.joinpath('converter')) converter = cache[cache_key('converter')] if cache_key('scorer') not in cache: cache[cache_key('scorer')] = _read_scorer(path) scorer = cache[cache_key('scorer')] # read information from the info-file info = {k: '' for k in ['description', 'compiler', 'source', 'date', 'vowels', 'tones']} meta_pattern = re.compile('@(?P<key>[^:]+):\s*(?P<value>.*)') for line in read_lines(path.joinpath('INFO')): match = meta_pattern.match(line) if match: info[match.group('key')] = match.group('value') return cls( as_unicode(path.name), converter, scorer, info['vowels'], info['tones'], info)
def upload_sources(args): """ concepticon upload_sources path/to/cdstar/catalog """ toc = ['# Sources\n'] api = Concepticon(args.data) with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat: with Catalog(args.args[0], cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: for fname in sorted(api.data_path('sources').glob('*.pdf'), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list( cat.create(fname, {'collection': 'concepticon'}))[0] spec = lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append('- [{0} [PDF {1}]]({2})'.format( key, format_size(spec['size']), spec['url'])) readme(api.data_path('sources'), toc)
def run_and_dump(name, obj, __checksum__=None, **kw): adapter = get_adapter(obj, interfaces.IOperation, name=name) if __checksum__: cached = path_from_checksum(__checksum__, outdir=CACHE_DIR) if cached.exists(): return load(cached, cls=adapter.returns), __checksum__ res = adapter(**kw) out = dump(res, outdir=CACHE_DIR) return res, as_unicode(out.stem)
def _create(self, path, metadata, object_class=None): mimetype = mimetypes.guess_type(path.as_posix(), strict=False)[0] \ or 'application/octet-stream' maintype, subtype = mimetype.split('/') cls = object_class or getattr(media, maintype.capitalize(), media.File) file_ = cls(as_unicode(path.as_posix())) if file_.md5 not in self.md5_to_object: obj, md, bitstreams = file_.create_object(self.api, metadata) return True, self.add(obj, metadata=md) return False, self.md5_to_object[file_.md5][0]
def upload_mediafiles(args): """ Uploads media files from the passed directory to the CDSTAR server, if an object identified by metadata's 'name' exists it will be deleted first """ supported_types = { 'imagefile': ['png', 'gif', 'jpg', 'jpeg', 'tif', 'tiff'], 'pdffile': ['pdf'], 'moviefile': ['mp4'] } if not args.args or not Path(args.args[0]).exists(): print("Error: Upload path does not exist") exit(1) with get_catalog(args) as cat: name_map = {obj.metadata['name']: obj for obj in cat} for ifn in sorted(Path(args.args[0]).iterdir()): print(ifn.name) fmt = ifn.suffix[1:].lower() meta_type = None for t, suffixes in supported_types.items(): if fmt in suffixes: meta_type = t break if meta_type is None: print('No supported media format - skipping {0}'.format(fmt)) continue md = { 'collection': 'amsd', 'name': as_unicode(ifn.stem), 'type': meta_type, 'path': as_unicode(ifn.name) } # Create the new object for (fname, created, obj) in cat.create(str(ifn), md): args.log.info('{0} -> {1} object {2.id}'.format( fname, 'new' if created else 'existing', obj))
def _load_sql_dump(rel, log): dump = Path('glottolog-{0}.sql'.format(rel['version'])) dbname = as_unicode(dump.stem) dbs = [ l.split(b'|')[0].decode('utf8') for l in subprocess.check_output(['psql', '-l', '-t', '-A']).splitlines()] if dbname in dbs: log.warn('db {0} exists! Drop first to recreate.'.format(dump.name)) else: if not dump.exists(): _download_sql_dump(rel, log) subprocess.check_call(['createdb', dbname]) subprocess.check_call(['psql', '-d', dbname, '-f', str(dump)]) log.info('db {0} created'.format(dbname))
def _load_sql_dump(rel, log): dump = Path('glottolog-{0}.sql'.format(rel['version'])) dbname = as_unicode(dump.stem) dbs = [ l.split(b'|')[0].decode('utf8') for l in subprocess.check_output( ['psql', '-l', '-t', '-A']).splitlines() ] if dbname in dbs: log.warn('db {0} exists! Drop first to recreate.'.format(dump.name)) else: if not dump.exists(): _download_sql_dump(rel, log) subprocess.check_call(['createdb', dbname]) subprocess.check_call(['psql', '-d', dbname, '-f', str(dump)]) log.info('db {0} created'.format(dbname))
def build_langs_index(api, log): writer = get_langs_index(api, recreate=True).writer() for lang in api.languoids(): writer.add_document( id=lang.id, name=lang.name, fname=as_unicode(lang.fname), iso=lang.iso, level=lang.level.name.decode() if PY2 else lang.level.name, macroarea=' '.join('{0}'.format(ma) for ma in lang.macroareas), country=' '.join('{0}'.format(c) for c in lang.countries), latitude=lang.latitude, longitude=lang.longitude, ini=lang.cfg.write_string(), ) writer.commit()
def __call__(self, args): opargs, opkw = _args_kw(args.name) readargs, readkw = _args_kw(args.object) oname, if_, input_ = readargs input_ = text_type(input_) if Path(path_component(input_)).exists(): # We heuristically interpret the input as filename, if a file with that name # exists. input_ = Path(path_component(input_)) res = run( opargs[0], read(oname, getattr(interfaces, if_), input_, **readkw), **opkw) p = jsonlib.dump(res, outdir=Path(args.output)) print('Result written to <%s>' % as_unicode(p)) return p
def upload_sources(args): """ Compile sources and upload the result to GWDG CDSTAR instance. Notes ----- CDSTAR authorisation information should be supplied in the form of environment variables: - CDSTAR_URL - CDSTAR_USER - CDSTAR_PWD Examples -------- $ concepticon upload_sources path/to/cdstar/catalog """ catalog_path = args.args[0] if args.args else os.environ["CDSTAR_CATALOG"] toc = ["# Sources\n"] api = Concepticon(args.repos) with SourcesCatalog(api.data_path("sources", "cdstar.json")) as lcat: with Catalog( catalog_path, cdstar_url=os.environ["CDSTAR_URL"], cdstar_user=os.environ["CDSTAR_USER"], cdstar_pwd=os.environ["CDSTAR_PWD"], ) as cat: for fname in sorted(api.data_path("sources").glob("*.pdf"), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list( cat.create(fname, {"collection": "concepticon"}))[0] lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append("- [{0} [PDF {1}]]({2})".format( key, format_size(spec["size"]), spec["url"])) readme(api.data_path("sources"), toc) print(catalog_path)
def upload_sources(args): """ Compile sources and upload the result to GWDG CDSTAR instance. Notes ----- CDSTAR authorisation information should be supplied in the form of environment variables: - CDSTAR_URL - CDSTAR_USER - CDSTAR_PWD Examples -------- $ concepticon upload_sources path/to/cdstar/catalog """ catalog_path = args.args[0] if args.args else os.environ['CDSTAR_CATALOG'] toc = ['# Sources\n'] api = Concepticon(args.repos) with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat: with Catalog( catalog_path, cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: for fname in sorted( api.data_path('sources').glob('*.pdf'), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list(cat.create(fname, {'collection': 'concepticon'}))[0] spec = lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append('- [{0} [PDF {1}]]({2})'.format( key, format_size(spec['size']), spec['url'])) readme(api.data_path('sources'), toc) print(catalog_path)
def from_path(cls, path): cache = Cache() cache_key = 'DiacriticsVowelsTones.{0}'.format(as_unicode(path.name)) if cache_key not in cache: cache[cache_key] = read_dvt(path) return cls(as_unicode(path.name), *cache[cache_key])
def cache_key(suffix): return '.'.join(['SoundClassModel', as_unicode(path.name), suffix])
def test_non_ascii(self): from clldutils.path import Path, path_component, as_unicode p = Path(path_component('äöü')).joinpath(path_component('äöü')) self.assertIsInstance(as_unicode(p), text_type) self.assertIsInstance(as_unicode(p.name), text_type)
def file_written(fname, logger=None): logger = logger or get_logger() logger.info("File created at <{0}>.".format(as_unicode(fname)))
def filter_hidden(fname): return not as_unicode(fname.stem).startswith('.')
def keys(self): for p in self._dir.iterdir(): yield as_unicode(p.name)