def detail(setid, path): try: setid = str(SetID(setid)) except TypeError: flask.abort(404) setdir = os.path.join(current_app.site.config.marv.storedir, setid) # dataset file download try: idx = int(path) except (TypeError, ValueError): pass else: path = db.session.query(File.path)\ .join(Dataset)\ .filter(Dataset.setid == setid)\ .filter(File.idx == idx)\ .scalar() try: return flask.send_file(path, as_attachment=True) except ValueError: flask.abort(404) try: if path: return flask.send_from_directory(setdir, path, conditional=True) with open(os.path.join(setdir, 'detail.json')) as f: detail = json.load(f) except IOError: return flask.abort(404) # TODO: investigate merge into one dataset_id, collection = db.session.query(Dataset.id, Dataset.collection)\ .filter(Dataset.setid == setid)\ .one() comments = db.session.query(Comment.author, Comment.time_added, Comment.text)\ .filter(Comment.dataset_id == dataset_id)\ .order_by(Comment.time_added) detail['comments'] = [{ 'author': x[0], 'timeAdded': x[1], 'text': x[2] } for x in comments] collection = current_app.site.collections[collection] alltags = db.session.query(Tag.value)\ .filter(Tag.collection == collection.name)\ .order_by(Tag.value) detail['all_known_tags'] = [x[0] for x in alltags] tags = db.session.query(Tag.value)\ .join(dataset_tag)\ .filter(dataset_tag.c.dataset_id == dataset_id)\ .order_by(Tag.value) detail['tags'] = [x[0] for x in tags] detail['collection'] = collection.name detail['id'] = dataset_id detail['setid'] = setid return flask.jsonify(detail)
def load_dataset(setdir, dataset): setid = SetID(dataset.setid) files = [{ 'path': x.path, 'missing': bool(x.missing), 'mtime': x.mtime * 10**6, 'size': x.size } for x in sorted(dataset.files, key=lambda x: x.idx)] dct = { 'id': setid, 'name': dataset.name, 'files': files, 'time_added': dataset.time_added * 10**6, 'timestamp': dataset.timestamp * 10**6 } try: wrapper = Wrapper.from_dict(Dataset, dct) except KjException as e: from pprint import pformat print('Schema violation for %s with data:\n%s\nschema: %s' % (Dataset.schema.node.displayName, pformat(dct), Dataset.schema.node.displayName), file=sys.stderr) raise e wrapper._setdir = setdir # needed by dataset.load(node) return [wrapper]
def __getattr__(self, name): # TODO: parse schema and do this properly if name == 'id' and hasattr(self._reader, 'id0'): return SetID(self._reader.id0, self._reader.id1) value = getattr(self._reader, name) if isinstance(value, StructReader): return Wrapper(value, self._streamdir, self._setdir) elif isinstance(value, ListReader): return ListWrapper(value, self._streamdir, self._setdir) else: return value
def detail(setid, path): try: setid = str(SetID(setid)) except TypeError: flask.abort(404) setdir = os.path.join(current_app.site.config.marv.storedir, setid) if path == 'detail.json': return _send_detail_json(setid, setdir) if path.isdigit(): path = db.session.query(File.path)\ .join(Dataset)\ .filter(Dataset.setid == setid)\ .filter(File.idx == int(path))\ .scalar() else: path = flask.safe_join(setdir, path) # Make sure path exists and is safe if not os.path.isabs(path) \ or path != os.path.normpath(path) \ or not os.path.isfile(path): return flask.abort(404) if current_app.site.config.marv.reverse_proxy == 'nginx': resp = flask.make_response() mime = mimetypes.guess_type(path) resp.headers['content-type'] = \ mime[0] if mime[0] else 'application/octet-stream' resp.headers['cache-control'] = 'no-cache' resp.headers['x-accel-buffering'] = 'no' resp.headers['x-accel-redirect'] = \ (current_app.config['APPLICATION_ROOT'] or '') + path resp.headers.add('Content-Disposition', 'attachment', filename=os.path.basename(path)) return resp try: resp = flask.send_file(path, as_attachment=True, conditional=True) resp.headers['Cache-Control'] = 'no-cache' return resp except ValueError: flask.abort(404)
def __getattr__(self, name): parts = name.split('_') field_name = parts[0] + ''.join( ((x[0].upper() + x[1:]) if x else '_') for x in parts[1:]) if field_name in self._reader.schema.fieldnames and self._reader._has( field_name): field = self._reader.schema.fields[field_name] return _wrap(getattr(self._reader, name), self._streamdir, self._setdir, field=field) elif name == 'id' and self._reader._has('id0') and self._reader._has( 'id1'): return SetID(self._reader.id0, self._reader.id1) return getattr(self._reader, name) # Not a field, but e.g. a method
def setid(self): return SetID(self._setid)
def marvcli_run(ctx, datasets, deps, excluded_nodes, force, force_dependent, force_deps, keep, keep_going, list_nodes, list_dependent, selected_nodes, update_detail, update_listing, cachesize, collections): """Run nodes for selected datasets. Datasets are specified by a list of set ids, or --collection <name>, use --collection=* to run for all collections. --node in conjunction with --collection=* will pick those collections for which the selected nodes are configured. Set ids may be abbreviated to any uniquely identifying prefix. Suffix a prefix by '+' to match multiple. """ if collections and datasets: ctx.fail('--collection and DATASETS are mutually exclusive') if list_dependent and not selected_nodes: ctx.fail('--list-dependent needs at least one selected --node') if force_dependent and not selected_nodes: ctx.fail('--force-dependent needs at least one selected --node') if not any([datasets, collections, list_nodes]): click.echo(ctx.get_help()) ctx.exit(1) deps = 'force' if force_deps else deps force = force_deps or force site = create_app().site if '*' in collections: if selected_nodes: collections = [ k for k, v in site.collections.items() if set(v.nodes).issuperset(selected_nodes) ] if not collections: ctx.fail('No collections have all selected nodes') else: collections = None else: for col in collections: if col not in site.collections: ctx.fail('Unknown collection: {}'.format(col)) if list_nodes: for col in (collections or sorted(site.collections.keys())): click.echo('{}:'.format(col)) for name in sorted(site.collections[col].nodes): if name == 'dataset': continue click.echo(' {}'.format(name)) return if list_dependent: for col in (collections or sorted(site.collections.keys())): click.echo('{}:'.format(col)) dependent = { x for name in selected_nodes for x in site.collections[col].nodes[name].dependent } for name in sorted(x.name for x in dependent): click.echo(' {}'.format(name)) return errors = [] setids = [SetID(x) for x in parse_setids(datasets)] if not setids: query = db.session.query(Dataset.setid)\ .filter(Dataset.discarded.isnot(True))\ .filter(Dataset.status.op('&')(STATUS_MISSING) == 0) if collections is not None: query = query.filter(Dataset.collection.in_(collections)) setids = (SetID(x[0]) for x in query) for setid in setids: if IPDB: site.run(setid, selected_nodes, deps, force, keep, force_dependent, update_detail, update_listing, excluded_nodes, cachesize=cachesize) else: try: site.run(setid, selected_nodes, deps, force, keep, force_dependent, update_detail, update_listing, excluded_nodes, cachesize=cachesize) except UnknownNode as e: ctx.fail('Collection {} has no node {}'.format(*e.args)) except NoResultFound: click.echo('ERROR: unknown {!r}'.format(setid), err=True) if not keep_going: raise except RequestedMessageTooOld as e: ctx.fail(( "{} pulled {} message {} not being in memory anymore." "\nSee https://ternaris.com/marv-robotics/docs/patterns.html#reduce-separately" ).format(e.args[0]._requestor.node.name, e.args[0].handle.node.name, e.args[1])) except BaseException as e: errors.append(setid) if isinstance(e, KeyboardInterrupt): log.warn('KeyboardInterrupt: aborting') raise elif isinstance(e, DirectoryAlreadyExists): click.echo(""" ERROR: Directory for node run already exists: {!r} In case no other node run is in progress, this is a bug which you are kindly asked to report, providing information regarding any previous, failed node runs. """.format(e.args[0]), err=True) if not keep_going: ctx.abort() else: log.error('Exception occured for dataset %s:', setid, exc_info=True) log.error('Error occured for dataset %s: %s', setid, e) if not keep_going: ctx.exit(1) if errors: log.error('There were errors for %r', errors)
# -*- coding: utf-8 -*- # # Copyright 2016 - 2018 Ternaris. # SPDX-License-Identifier: AGPL-3.0-only from __future__ import absolute_import, division, print_function import os import unittest from . import Store from marv_node.testing import temporary_directory from marv_node.setid import SetID from marv_nodes import dataset SETID = SetID(42) @unittest.skip class TestCase(unittest.TestCase): def test(self): with temporary_directory() as tmpdir: scanroot = os.path.join(tmpdir, 'scanroot') os.mkdir(scanroot) file1 = os.path.join(scanroot, 'file1') file2 = os.path.join(scanroot, 'file2') with open(file1, 'w') as f: f.write('1') with open(file2, 'w') as f: f.write('10')