async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, PERSIST)
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [node], store, PERSIST)
         self.assertNodeOutput(streams[0], node)
Beispiel #2
0
    def make_dataset(self, files, name, time_added=None):
        setid = SetID.random()
        files = [
            File(idx=i,
                 mtime=int(stat.st_mtime * 1000),
                 path=path,
                 size=stat.st_size) for i, (path, stat) in enumerate(
                     (path, os.stat(path)) for path in files)
        ]
        time_added = int(time.time() *
                         1000) if time_added is None else time_added
        dataset = Dataset(
            collection=self.name,
            files=files,
            name=name,
            #status=8,  # pending state see marv/model
            time_added=time_added,
            timestamp=max(x.mtime for x in files),
            setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset)
        self.render_detail(dataset)
        return dataset
 async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [node], store)
         assert streams == [None]
Beispiel #4
0
 async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [fulltext, collect], store)
         assert 'hello' in streams[0][0].words
         assert any('hello' in x for x in streams[1])
Beispiel #5
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, PERSIST)
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(node)
         run_nodes(dataset, [sink], store, PERSIST)
         self.assertNodeOutput(sink.stream, node)
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(collect)
         run_nodes(dataset, [sink], store)
         self.assertEquals(sink.stream, ['Success'])
Beispiel #7
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(node)
         run_nodes(dataset, [sink], store)
         self.assertEqual(len(sink.stream), 0)
Beispiel #8
0
    def run(self,
            setid,
            selected_nodes=None,
            deps=None,
            force=None,
            keep=None,
            update_detail=None,
            update_listing=None,
            excluded_nodes=None,
            cachesize=None):
        excluded_nodes = excluded_nodes or []
        dataset = Dataset.query.filter(Dataset.setid == str(setid))\
                               .options(db.joinedload(Dataset.files))\
                               .one()
        collection = self.collections[dataset.collection]
        selected_nodes = set(selected_nodes or [])
        if not (selected_nodes or update_listing or update_detail):
            selected_nodes.update(collection.listing_deps)
            selected_nodes.update(collection.detail_deps)
        persistent = collection.nodes
        try:
            nodes = [
                persistent[name] if not ':' in name else find_obj(name)
                for name in selected_nodes if name not in excluded_nodes
                if name != 'dataset'
            ]
        except KeyError as e:
            raise UnknownNode(dataset.collection, e.args[0])
        nodes.sort()

        storedir = app.site.config.marv.storedir
        store = Store(storedir, persistent)

        changed = False
        try:
            if nodes:
                changed = run_nodes(dataset,
                                    nodes,
                                    store,
                                    force=force,
                                    persistent=persistent,
                                    deps=deps,
                                    cachesize=cachesize)
        except:
            raise
        else:
            if changed or update_detail:
                collection.render_detail(dataset)
                log.verbose('%s detail rendered', setid)
            if changed or update_listing:
                collection.update_listings([dataset])
                log.verbose('%s listing rendered', setid)
        finally:
            if not keep:
                for tmpdir in store.pending.values():
                    store.logdebug('Cleaning up %r', tmpdir)
                    shutil.rmtree(tmpdir)
                store.pending.clear()
Beispiel #9
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink1 = make_sink(fulltext)
         sink2 = make_sink(collect)
         run_nodes(dataset, [sink1, sink2], store)
         assert 'hello' in sink1.stream[0].words
         assert any('hello' in x for x in sink2.stream)
Beispiel #10
0
    async def make_dataset(self,
                           connection,
                           files,
                           name,
                           time_added=None,
                           discarded=False,
                           setid=None,
                           status=0,
                           timestamp=None,
                           _restore=None):
        # pylint: disable=too-many-arguments
        time_added = int(utils.now() *
                         1000) if time_added is None else time_added

        collection = await CollectionModel.filter(
            name=self.name).using_db(connection).first()
        dataset = await Dataset.create(collection=collection,
                                       name=name,
                                       discarded=discarded,
                                       status=status,
                                       time_added=time_added,
                                       timestamp=0,
                                       setid=setid or SetID.random(),
                                       acn_id=collection.acn_id,
                                       using_db=connection)

        if _restore:
            files = [
                File(dataset=dataset, idx=i, **x) for i, x in enumerate(files)
            ]
        else:
            files = [
                File(dataset=dataset,
                     idx=i,
                     mtime=int(utils.mtime(path) * 1000),
                     path=path,
                     size=stat.st_size) for i, (path, stat) in enumerate(
                         (path, utils.stat(path)) for path in files)
            ]

        dataset.timestamp = timestamp or max(x.mtime for x in files)
        await dataset.save(using_db=connection)
        await File.bulk_create(files, using_db=connection)

        await dataset.fetch_related('files', using_db=connection)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset, exists_okay=_restore)
        self.render_detail(dataset)
        return dataset
Beispiel #11
0
    def render_listing(self, dataset):
        storedir = self.config.marv.storedir
        setdir = os.path.join(storedir, str(dataset.setid))
        store = Store(storedir, self.nodes)
        funcs = make_funcs(dataset, setdir, store)

        values = []
        for col, functree in self.listing_functions:
            value = calltree(functree, funcs)
            transform = FORMATTER_MAP[col.formatter +
                                      ('[]' if col.islist else '')]
            value = transform(value)
            values.append(value)
        row = {
            'id': dataset.id,
            'setid': str(dataset.setid),
            'tags': ['#TAGS#'],
            'values': values
        }

        fields = {}
        relfields = {}
        relations = self.model.relations
        for filter_spec, functree in self.filter_functions:
            value = calltree(functree, funcs)
            transform = FILTER_MAP[filter_spec.value_type]
            value = transform(value)
            target = relfields if filter_spec.name in relations else fields
            target[filter_spec.name] = value

        return row, fields, relfields
Beispiel #12
0
    def render_detail(self, dataset):
        storedir = self.config.marv.storedir
        setdir = os.path.join(storedir, str(dataset.setid))
        try:
            os.mkdir(setdir)
        except OSError:
            pass
        assert os.path.isdir(setdir), setdir
        store = Store(storedir, self.nodes)
        funcs = make_funcs(dataset, setdir, store)

        summary_widgets = [
            x[0]._reader for x in  # pylint: disable=protected-access
            [
                store.load(setdir, node, default=None)
                for node in self.detail_summary_widgets
            ] if x
        ]

        sections = [
            x[0]._reader for x in  # pylint: disable=protected-access
            [
                store.load(setdir, node, default=None)
                for node in self.detail_sections
            ] if x
        ]

        dct = {
            'title': calltree(self.detail_title, funcs),
            'sections': sections,
            'summary': {
                'widgets': summary_widgets
            }
        }
        detail = Detail.new_message(**dct).as_reader()
        dct = detail_to_dict(detail)
        fd = os.open(os.path.join(setdir, '.detail.json'),
                     os.O_CREAT | os.O_EXCL | os.O_WRONLY, 0o666)
        jsonfile = os.fdopen(fd, 'w')
        json.dump(dct, jsonfile, sort_keys=True)
        jsonfile.close()
        os.rename(os.path.join(setdir, '.detail.json'),
                  os.path.join(setdir, 'detail.json'))
        self._check_outdated(dataset)
Beispiel #13
0
    def make_dataset(self, files, name, time_added=None, discarded=None, setid=None, status=None,
                     timestamp=None, _restore=None):
        setid = setid or SetID.random()
        if _restore:
            files = [File(idx=i, **x) for i, x in enumerate(files)]
        else:
            files = [File(idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size)
                     for i, (path, stat)
                     in enumerate((path, os.stat(path)) for path in files)]
        time_added = int(utils.now() * 1000) if time_added is None else time_added
        dataset = Dataset(collection=self.name,
                          files=files,
                          name=name,
                          discarded=discarded,
                          status=status,
                          time_added=time_added,
                          timestamp=timestamp or max(x.mtime for x in files),
                          setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset, exists_okay=_restore)
        self.render_detail(dataset)
        return dataset
Beispiel #14
0
 def load(self, node):
     from marv_store import Store  # pylint: disable=import-outside-toplevel
     store = Store(str(self._storedir), {node.name: node})
     return store.load(str(self._setdir), node, default=None)
Beispiel #15
0
    async def run(self,
                  setid,
                  selected_nodes=None,
                  deps=None,
                  force=None,
                  keep=None,
                  force_dependent=None,
                  update_detail=None,
                  update_listing=None,
                  excluded_nodes=None,
                  cachesize=None):
        # pylint: disable=too-many-arguments,too-many-locals,too-many-branches

        assert not force_dependent or selected_nodes

        excluded_nodes = set(excluded_nodes or [])
        async with scoped_session(self.db) as txn:
            dataset = await Dataset.get(setid=setid)\
                                   .prefetch_related('collection', 'files')\
                                   .using_db(txn)
        collection = self.collections[dataset.collection.name]
        selected_nodes = set(selected_nodes or [])
        if not (selected_nodes or update_listing or update_detail):
            selected_nodes.update(collection.listing_deps)
            selected_nodes.update(collection.detail_deps)
        persistent = collection.nodes
        try:
            nodes = {
                persistent[name] if ':' not in name else Node.from_dag_node(
                    find_obj(name))
                for name in selected_nodes if name not in excluded_nodes
                if name != 'dataset'
            }
        except KeyError as exc:
            raise ConfigError(
                f'Collection {collection.name!r} has no node {exc}')

        if force_dependent:
            nodes.update(x for name in selected_nodes
                         for x in persistent[name].dependent)
        nodes = sorted(nodes)

        storedir = self.config.marv.storedir
        store = Store(storedir, persistent)

        changed = False
        try:
            if nodes:
                changed = await run_nodes(dataset,
                                          nodes,
                                          store,
                                          force=force,
                                          persistent=persistent,
                                          deps=deps,
                                          cachesize=cachesize,
                                          site=self)
        finally:
            if not keep:
                for stream in store.pending:
                    if stream.streamfile:
                        stream.streamfile.close()
                for stream in store.readstreams:
                    if stream.streamfile:
                        stream.streamfile.close()
                for tmpdir, tmpdir_fd in store.pending.values():
                    store.logdebug('Cleaning up %r', tmpdir)
                    shutil.rmtree(tmpdir)
                    fcntl.flock(tmpdir_fd, fcntl.LOCK_UN)
                    os.close(tmpdir_fd)
                store.pending.clear()

        if changed or update_detail:
            collection.render_detail(dataset)
            log.verbose('%s detail rendered', setid)
        if changed or update_listing:
            await collection.update_listings([dataset])
            log.verbose('%s listing rendered', setid)

        return changed
Beispiel #16
0
 def load(self, node):
     from marv_store import Store
     storedir = current_app.site.config.marv.storedir
     store = Store(storedir, {node.name: node})
     return store.load(self._setdir, node, default=None)