def _patch_api(tmpdir, mocker, cdstar_object, obj=None, side_effect=None): class MockApi(object): def __init__(self, obj=None, side_effect=None): self.obj = obj self.side_effect = side_effect self.search_called = 0 def __call__(self, *args, **kw): return self def get_object(self, *args): if self.obj: return self.obj if self.side_effect: raise self.side_effect() def search(self, *args, **kw): self.search_called += 1 if self.search_called < 2: return [mocker.Mock(resource=cdstar_object())] return [] mocker.patch('cdstarcat.catalog.Cdstar', MockApi(obj=obj, side_effect=side_effect)) return Catalog(str(tmpdir.join('new.json')))
def upload_images(args): """ tsammalex upload_images path/to/cdstar/catalog """ images_path = data_file('images.csv', repos=args.tsammalex_data) staged_images_path = data_file('staged_images.csv', repos=args.tsammalex_data) checksums = set( d.id for d in models.CsvData('images', repos=args.tsammalex_data)) providers = [prov(args.tsammalex_data) for prov in PROVIDERS] with MediaCatalog('cdstar.json', repos=args.tsammalex_data, json_opts=dict(indent=4)) as mcat: with Catalog(args.args[0], cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: for item in models.CsvData('staged_images', repos=args.tsammalex_data): for provider in providers: if item in provider: img = provider.retrieve(item, cat, checksums, mcat) if img: try: add_rows(images_path, img.csv_row()) except: print(img) raise filter_rows(staged_images_path, lambda d: d['id'] != item.id) break
def upload_sources(args): """ concepticon upload_sources path/to/cdstar/catalog """ toc = ['# Sources\n'] api = Concepticon(args.data) with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat: with Catalog(args.args[0], cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: for fname in sorted(api.data_path('sources').glob('*.pdf'), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list( cat.create(fname, {'collection': 'concepticon'}))[0] spec = lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append('- [{0} [PDF {1}]]({2})'.format( key, format_size(spec['size']), spec['url'])) readme(api.data_path('sources'), toc)
def test_checks(tmp_catalog_path): c = Catalog(str(tmp_catalog_path)) with pytest.raises(ValueError): c['objid'] = 1 with pytest.raises(ValueError): c['12345-1234-1234-1234-1'] = 1
def test_attrs(catalog_path): cat = Catalog(catalog_path) assert OBJID in cat assert cat[OBJID].bitstreams[0].md5 in cat assert cat.size_h == '109.8KB' assert cat[OBJID].bitstreams[0].modified_datetime < datetime.datetime.now() assert cat[OBJID].bitstreams[0].created_datetime < datetime.datetime.now() assert not cat[OBJID].is_special
def test_idempotency(catalog_path, tmp_catalog_path): with catalog_path.open(encoding='utf8') as fp: orig = fp.read() with Catalog(str(tmp_catalog_path)) as c: obj = c[OBJID].asdict() obj['metadata'] = OrderedDict( sorted(obj['metadata'].items(), reverse=True)) c[OBJID] = Object.fromdict(OBJID, obj) assert orig.split() == tmp_catalog_path.read_text('utf8').split()
def x(args): try: from cdstarcat.catalog import Catalog except ImportError: args.log.error('pip install cdstarcat') return fname = args.pkg_dir.joinpath('static', 'downloads.json') downloads = load(fname) release = args.args[0] with Catalog(Path(os.environ['CDSTAR_CATALOG']), cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: obj = cat.api.get_object(uid=downloads[release]['oid']) bitstreams = obj.bitstreams[:] for bs in bitstreams: print(bs.id, bs._properties)
def upload_sources(args): """ Compile sources and upload the result to GWDG CDSTAR instance. Notes ----- CDSTAR authorisation information should be supplied in the form of environment variables: - CDSTAR_URL - CDSTAR_USER - CDSTAR_PWD Examples -------- $ concepticon upload_sources path/to/cdstar/catalog """ catalog_path = args.args[0] if args.args else os.environ["CDSTAR_CATALOG"] toc = ["# Sources\n"] api = Concepticon(args.repos) with SourcesCatalog(api.data_path("sources", "cdstar.json")) as lcat: with Catalog( catalog_path, cdstar_url=os.environ["CDSTAR_URL"], cdstar_user=os.environ["CDSTAR_USER"], cdstar_pwd=os.environ["CDSTAR_PWD"], ) as cat: for fname in sorted(api.data_path("sources").glob("*.pdf"), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list( cat.create(fname, {"collection": "concepticon"}))[0] lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append("- [{0} [PDF {1}]]({2})".format( key, format_size(spec["size"]), spec["url"])) readme(api.data_path("sources"), toc) print(catalog_path)
def dl2cdstar(args): app = app_name(args.project) if not app: args.log.error('cannot parse package name') return try: from cdstarcat.catalog import Catalog except ImportError: args.log.error('pip install cdstarcat') return title_pattern = re.compile('%s (?P<version>[0-9.]+) - downloads' % re.escape(app)) title = '{0} {1} - downloads'.format(app, args.version) pkg_dir = args.project.joinpath(app) with Catalog( Path(os.environ['CDSTAR_CATALOG']), cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: obj = cat.api.get_object() obj.metadata = {"creator": "pycdstar", "title": title} if args.args: obj.metadata["description"] = args.args[0] for fname in pkg_dir.joinpath('static', 'download').iterdir(): if fname.is_file() and not fname.name.startswith('.'): print(fname.name) obj.add_bitstream( fname=fname.as_posix(), name=fname.name.replace('-', '_')) cat.add(obj) fname = pkg_dir.joinpath('static', 'downloads.json') with update(fname, default={}, indent=4) as downloads: for oid, spec in load(Path(os.environ['CDSTAR_CATALOG'])).items(): if 'metadata' in spec and 'title' in spec['metadata']: match = title_pattern.match(spec['metadata']['title']) if match: if match.group('version') not in downloads: spec['oid'] = oid downloads[match.group('version')] = spec args.log.info('{0} written'.format(fname)) args.log.info('{0}'.format(os.environ['CDSTAR_CATALOG']))
def cdstar(args): try: from cdstarcat.catalog import Catalog except ImportError: args.log.error('pip install cdstarcat') return title_pattern = re.compile('glottolog (?P<version>[0-9.]+) - downloads') with Catalog(Path(os.environ['CDSTAR_CATALOG']), cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: obj = cat.api.get_object() obj.metadata = { "creator": "pycdstar", "title": "glottolog %s - downloads" % args.args[0], "description": "Custom downloads for release %s of " "[Glottolog](http://glottolog.org)" % args.args[0], } for fname in args.pkg_dir.joinpath('static', 'download').iterdir(): if fname.is_file() and not fname.name.startswith('.'): print(fname.name) obj.add_bitstream(fname=fname.as_posix(), name=fname.name.replace('-', '_')) cat.add(obj) fname = args.pkg_dir.joinpath('static', 'downloads.json') with update(fname, default={}, indent=4) as downloads: for oid, spec in load(Path(os.environ['CDSTAR_CATALOG'])).items(): if 'metadata' in spec and 'title' in spec['metadata']: match = title_pattern.match(spec['metadata']['title']) if match: if match.group('version') not in downloads: spec['oid'] = oid downloads[match.group('version')] = spec args.log.info('{0} written'.format(fname)) args.log.info('{0}'.format(os.environ['CDSTAR_CATALOG']))
def upload_sources(args): """ Compile sources and upload the result to GWDG CDSTAR instance. Notes ----- CDSTAR authorisation information should be supplied in the form of environment variables: - CDSTAR_URL - CDSTAR_USER - CDSTAR_PWD Examples -------- $ concepticon upload_sources path/to/cdstar/catalog """ catalog_path = args.args[0] if args.args else os.environ['CDSTAR_CATALOG'] toc = ['# Sources\n'] api = Concepticon(args.repos) with SourcesCatalog(api.data_path('sources', 'cdstar.json')) as lcat: with Catalog( catalog_path, cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: for fname in sorted( api.data_path('sources').glob('*.pdf'), key=lambda f: f.stem): clid = as_unicode(fname.stem) spec = lcat.get(clid) if not spec: _, _, obj = list(cat.create(fname, {'collection': 'concepticon'}))[0] spec = lcat.add(clid, obj) for key in sorted(lcat.items): spec = lcat.get(key) toc.append('- [{0} [PDF {1}]]({2})'.format( key, format_size(spec['size']), spec['url'])) readme(api.data_path('sources'), toc) print(catalog_path)
def test_empty(tmpdir, tmp_catalog_path, catalog_path): with Catalog(str(tmpdir.join('new.json'))) as cat1: assert len(cat1) == 0 cat1[OBJID] = Catalog(catalog_path)[OBJID] assert len(Catalog(str(tmp_catalog_path))) == 1
def test_context_manager(tmp_catalog_path): jsonlib.dump({}, str(tmp_catalog_path)) mtime = tmp_catalog_path.mtime() with Catalog(str(tmp_catalog_path)): time.sleep(0.1) assert tmp_catalog_path.mtime() > mtime
def new_catalog(tmpdir): return Catalog(str(tmpdir.join('new.json')))
def test_update_metadata(mocker, catalog_path, cdstar_object, tmpdir): _patch_api(tmpdir, mocker, cdstar_object, obj=cdstar_object(OBJID)) cat = Catalog(catalog_path) assert 'collection' in cat[OBJID].metadata cat.update_metadata(OBJID, {}, mode='replace') assert 'collection' not in cat[OBJID].metadata
def cdstar(args): try: from cdstarcat.catalog import Catalog except ImportError: args.log.error('pip install cdstarcat') return # # FIXME: look up oid for release in downloads.json! if it exists, replace the bitstreams # rather than creating a new object! # dlfname = args.pkg_dir.joinpath('static', 'downloads.json') downloads = load(dlfname) release = args.args[0] title_pattern = re.compile('glottolog (?P<version>[0-9.]+) - downloads') with Catalog(Path(os.environ['CDSTAR_CATALOG']), cdstar_url=os.environ['CDSTAR_URL'], cdstar_user=os.environ['CDSTAR_USER'], cdstar_pwd=os.environ['CDSTAR_PWD']) as cat: # # FIXME: there must be a way to overwrite old releases in case of bugfixes! # if release in downloads: # This is a bugfix release, we don't have to create a new object on CDSTAR! obj = cat.api.get_object(uid=downloads[release]['oid']) else: obj = cat.api.get_object() obj.metadata = { "creator": "pycdstar", "title": "glottolog %s - downloads" % release, "description": "Custom downloads for release %s of " "[Glottolog](http://glottolog.org)" % release, } bitstreams = obj.bitstreams[:] for fname in args.pkg_dir.joinpath('static', 'download').iterdir(): if fname.is_file() and not fname.name.startswith('.'): bsname = fname.name.replace('-', '_') bitstream, skip = None, False for bitstream in bitstreams: if bitstream.id == bsname: break else: bitstream = None if bitstream: if bitstream._properties['checksum'] != md5(fname): bitstream.delete() else: skip = True if not skip: print(fname.name) obj.add_bitstream(fname=fname.as_posix(), name=bsname) cat.add(obj, update=True) with update(dlfname, default=collections.OrderedDict(), indent=4, sort_keys=True) as downloads: for oid, spec in load(Path(os.environ['CDSTAR_CATALOG'])).items(): if 'metadata' in spec and 'title' in spec['metadata']: match = title_pattern.match(spec['metadata']['title']) if match: if (match.group('version') not in downloads ) or match.group('version') == release: args.log.info('update info for release {0}'.format( match.group('version'))) spec['oid'] = oid downloads[match.group('version')] = spec args.log.info('{0} written'.format(dlfname)) args.log.info('{0}'.format(os.environ['CDSTAR_CATALOG']))