Esempio n. 1
0
    def make_dataset(self, files, name, time_added=None):
        setid = SetID.random()
        files = [
            File(idx=i,
                 mtime=int(stat.st_mtime * 1000),
                 path=path,
                 size=stat.st_size) for i, (path, stat) in enumerate(
                     (path, os.stat(path)) for path in files)
        ]
        time_added = int(time.time() *
                         1000) if time_added is None else time_added
        dataset = Dataset(
            collection=self.name,
            files=files,
            name=name,
            #status=8,  # pending state see marv/model
            time_added=time_added,
            timestamp=max(x.mtime for x in files),
            setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset)
        self.render_detail(dataset)
        return dataset
Esempio n. 2
0
def load_dataset(setdir, dataset):
    setid = SetID(dataset.setid)
    files = [{
        'path': x.path,
        'missing': bool(x.missing),
        'mtime': x.mtime * 10**6,
        'size': x.size
    } for x in sorted(dataset.files, key=lambda x: x.idx)]
    dct = {
        'id': setid,
        'name': dataset.name,
        'files': files,
        'time_added': dataset.time_added * 10**6,
        'timestamp': dataset.timestamp * 10**6
    }
    try:
        wrapper = Wrapper.from_dict(Dataset, dct)
    except KjException as e:
        from pprint import pformat
        print('Schema violation for %s with data:\n%s\nschema: %s' %
              (Dataset.schema.node.displayName, pformat(dct),
               Dataset.schema.node.displayName),
              file=sys.stderr)
        raise e
    wrapper._setdir = setdir  # needed by dataset.load(node)
    return [wrapper]
Esempio n. 3
0
def detail(setid, path):
    try:
        setid = str(SetID(setid))
    except TypeError:
        flask.abort(404)

    setdir = os.path.join(current_app.site.config.marv.storedir, setid)

    # dataset file download
    try:
        idx = int(path)
    except (TypeError, ValueError):
        pass
    else:
        path = db.session.query(File.path)\
                         .join(Dataset)\
                         .filter(Dataset.setid == setid)\
                         .filter(File.idx == idx)\
                         .scalar()
        try:
            return flask.send_file(path, as_attachment=True)
        except ValueError:
            flask.abort(404)

    try:
        if path:
            return flask.send_from_directory(setdir, path, conditional=True)
        with open(os.path.join(setdir, 'detail.json')) as f:
            detail = json.load(f)
    except IOError:
        return flask.abort(404)

    # TODO: investigate merge into one
    dataset_id, collection = db.session.query(Dataset.id, Dataset.collection)\
                                       .filter(Dataset.setid == setid)\
                                       .one()
    comments = db.session.query(Comment.author, Comment.time_added, Comment.text)\
                         .filter(Comment.dataset_id == dataset_id)\
                         .order_by(Comment.time_added)
    detail['comments'] = [{
        'author': x[0],
        'timeAdded': x[1],
        'text': x[2]
    } for x in comments]

    collection = current_app.site.collections[collection]
    alltags = db.session.query(Tag.value)\
                        .filter(Tag.collection == collection.name)\
                        .order_by(Tag.value)
    detail['all_known_tags'] = [x[0] for x in alltags]

    tags = db.session.query(Tag.value)\
                     .join(dataset_tag)\
                     .filter(dataset_tag.c.dataset_id == dataset_id)\
                     .order_by(Tag.value)
    detail['tags'] = [x[0] for x in tags]
    detail['collection'] = collection.name
    detail['id'] = dataset_id
    detail['setid'] = setid
    return flask.jsonify(detail)
Esempio n. 4
0
    def __getattr__(self, name):
        # TODO: parse schema and do this properly
        if name == 'id' and hasattr(self._reader, 'id0'):
            return SetID(self._reader.id0, self._reader.id1)

        value = getattr(self._reader, name)

        if isinstance(value, StructReader):
            return Wrapper(value, self._streamdir, self._setdir)
        elif isinstance(value, ListReader):
            return ListWrapper(value, self._streamdir, self._setdir)
        else:
            return value
Esempio n. 5
0
def detail(setid, path):
    try:
        setid = str(SetID(setid))
    except TypeError:
        flask.abort(404)

    setdir = os.path.join(current_app.site.config.marv.storedir, setid)

    if path == 'detail.json':
        return _send_detail_json(setid, setdir)

    if path.isdigit():
        path = db.session.query(File.path)\
                         .join(Dataset)\
                         .filter(Dataset.setid == setid)\
                         .filter(File.idx == int(path))\
                         .scalar()
    else:
        path = flask.safe_join(setdir, path)

    # Make sure path exists and is safe
    if not os.path.isabs(path) \
       or path != os.path.normpath(path) \
       or not os.path.isfile(path):
        return flask.abort(404)

    if current_app.site.config.marv.reverse_proxy == 'nginx':
        resp = flask.make_response()
        mime = mimetypes.guess_type(path)
        resp.headers['content-type'] = \
            mime[0] if mime[0] else 'application/octet-stream'
        resp.headers['cache-control'] = 'no-cache'
        resp.headers['x-accel-buffering'] = 'no'
        resp.headers['x-accel-redirect'] = \
            (current_app.config['APPLICATION_ROOT'] or '') + path
        resp.headers.add('Content-Disposition',
                         'attachment',
                         filename=os.path.basename(path))
        return resp

    try:
        resp = flask.send_file(path, as_attachment=True, conditional=True)
        resp.headers['Cache-Control'] = 'no-cache'
        return resp
    except ValueError:
        flask.abort(404)
Esempio n. 6
0
    def __getattr__(self, name):
        parts = name.split('_')
        field_name = parts[0] + ''.join(
            ((x[0].upper() + x[1:]) if x else '_') for x in parts[1:])

        if field_name in self._reader.schema.fieldnames and self._reader._has(
                field_name):
            field = self._reader.schema.fields[field_name]
            return _wrap(getattr(self._reader, name),
                         self._streamdir,
                         self._setdir,
                         field=field)

        elif name == 'id' and self._reader._has('id0') and self._reader._has(
                'id1'):
            return SetID(self._reader.id0, self._reader.id1)

        return getattr(self._reader, name)  # Not a field, but e.g. a method
Esempio n. 7
0
    def make_dataset(self, files, name, time_added=None, discarded=None, setid=None, status=None,
                     timestamp=None, _restore=None):
        setid = setid or SetID.random()
        if _restore:
            files = [File(idx=i, **x) for i, x in enumerate(files)]
        else:
            files = [File(idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size)
                     for i, (path, stat)
                     in enumerate((path, os.stat(path)) for path in files)]
        time_added = int(utils.now() * 1000) if time_added is None else time_added
        dataset = Dataset(collection=self.name,
                          files=files,
                          name=name,
                          discarded=discarded,
                          status=status,
                          time_added=time_added,
                          timestamp=timestamp or max(x.mtime for x in files),
                          setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset, exists_okay=_restore)
        self.render_detail(dataset)
        return dataset
Esempio n. 8
0
 def setid(self):
     return SetID(self._setid)
Esempio n. 9
0
def marvcli_run(ctx, datasets, deps, excluded_nodes, force, force_dependent,
                force_deps, keep, keep_going, list_nodes, list_dependent,
                selected_nodes, update_detail, update_listing, cachesize,
                collections):
    """Run nodes for selected datasets.

    Datasets are specified by a list of set ids, or --collection
    <name>, use --collection=* to run for all collections. --node in
    conjunction with --collection=* will pick those collections for
    which the selected nodes are configured.

    Set ids may be abbreviated to any uniquely identifying
    prefix. Suffix a prefix by '+' to match multiple.

    """
    if collections and datasets:
        ctx.fail('--collection and DATASETS are mutually exclusive')

    if list_dependent and not selected_nodes:
        ctx.fail('--list-dependent needs at least one selected --node')

    if force_dependent and not selected_nodes:
        ctx.fail('--force-dependent needs at least one selected --node')

    if not any([datasets, collections, list_nodes]):
        click.echo(ctx.get_help())
        ctx.exit(1)

    deps = 'force' if force_deps else deps
    force = force_deps or force

    site = create_app().site

    if '*' in collections:
        if selected_nodes:
            collections = [
                k for k, v in site.collections.items()
                if set(v.nodes).issuperset(selected_nodes)
            ]
            if not collections:
                ctx.fail('No collections have all selected nodes')
        else:
            collections = None
    else:
        for col in collections:
            if col not in site.collections:
                ctx.fail('Unknown collection: {}'.format(col))

    if list_nodes:
        for col in (collections or sorted(site.collections.keys())):
            click.echo('{}:'.format(col))
            for name in sorted(site.collections[col].nodes):
                if name == 'dataset':
                    continue
                click.echo('    {}'.format(name))
        return

    if list_dependent:
        for col in (collections or sorted(site.collections.keys())):
            click.echo('{}:'.format(col))
            dependent = {
                x
                for name in selected_nodes
                for x in site.collections[col].nodes[name].dependent
            }
            for name in sorted(x.name for x in dependent):
                click.echo('    {}'.format(name))
        return

    errors = []

    setids = [SetID(x) for x in parse_setids(datasets)]
    if not setids:
        query = db.session.query(Dataset.setid)\
                           .filter(Dataset.discarded.isnot(True))\
                           .filter(Dataset.status.op('&')(STATUS_MISSING) == 0)
        if collections is not None:
            query = query.filter(Dataset.collection.in_(collections))
        setids = (SetID(x[0]) for x in query)

    for setid in setids:
        if IPDB:
            site.run(setid,
                     selected_nodes,
                     deps,
                     force,
                     keep,
                     force_dependent,
                     update_detail,
                     update_listing,
                     excluded_nodes,
                     cachesize=cachesize)
        else:
            try:
                site.run(setid,
                         selected_nodes,
                         deps,
                         force,
                         keep,
                         force_dependent,
                         update_detail,
                         update_listing,
                         excluded_nodes,
                         cachesize=cachesize)
            except UnknownNode as e:
                ctx.fail('Collection {} has no node {}'.format(*e.args))
            except NoResultFound:
                click.echo('ERROR: unknown {!r}'.format(setid), err=True)
                if not keep_going:
                    raise
            except RequestedMessageTooOld as e:
                ctx.fail((
                    "{} pulled {} message {} not being in memory anymore."
                    "\nSee https://ternaris.com/marv-robotics/docs/patterns.html#reduce-separately"
                ).format(e.args[0]._requestor.node.name,
                         e.args[0].handle.node.name, e.args[1]))
            except BaseException as e:
                errors.append(setid)
                if isinstance(e, KeyboardInterrupt):
                    log.warn('KeyboardInterrupt: aborting')
                    raise
                elif isinstance(e, DirectoryAlreadyExists):
                    click.echo("""
ERROR: Directory for node run already exists:
{!r}
In case no other node run is in progress, this is a bug which you are kindly
asked to report, providing information regarding any previous, failed node runs.
""".format(e.args[0]),
                               err=True)
                    if not keep_going:
                        ctx.abort()
                else:
                    log.error('Exception occured for dataset %s:',
                              setid,
                              exc_info=True)
                    log.error('Error occured for dataset %s: %s', setid, e)
                    if not keep_going:
                        ctx.exit(1)
    if errors:
        log.error('There were errors for %r', errors)
Esempio n. 10
0
# -*- coding: utf-8 -*-
#
# Copyright 2016 - 2018  Ternaris.
# SPDX-License-Identifier: AGPL-3.0-only

from __future__ import absolute_import, division, print_function

import os
import unittest

from . import Store
from marv_node.testing import temporary_directory
from marv_node.setid import SetID
from marv_nodes import dataset

SETID = SetID(42)


@unittest.skip
class TestCase(unittest.TestCase):
    def test(self):
        with temporary_directory() as tmpdir:
            scanroot = os.path.join(tmpdir, 'scanroot')
            os.mkdir(scanroot)
            file1 = os.path.join(scanroot, 'file1')
            file2 = os.path.join(scanroot, 'file2')
            with open(file1, 'w') as f:
                f.write('1')
            with open(file2, 'w') as f:
                f.write('10')