async def test_node(self): with temporary_directory() as storedir: store = Store(storedir, PERSIST) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) streams = await run_nodes(dataset, [node], store, PERSIST) self.assertNodeOutput(streams[0], node)
def make_dataset(self, files, name, time_added=None): setid = SetID.random() files = [ File(idx=i, mtime=int(stat.st_mtime * 1000), path=path, size=stat.st_size) for i, (path, stat) in enumerate( (path, os.stat(path)) for path in files) ] time_added = int(time.time() * 1000) if time_added is None else time_added dataset = Dataset( collection=self.name, files=files, name=name, #status=8, # pending state see marv/model time_added=time_added, timestamp=max(x.mtime for x in files), setid=setid) storedir = self.config.marv.storedir store = Store(storedir, self.nodes) store.add_dataset(dataset) self.render_detail(dataset) return dataset
async def test_node(self): with temporary_directory() as storedir: store = Store(storedir, {}) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) streams = await run_nodes(dataset, [node], store) assert streams == [None]
async def test_node(self): with temporary_directory() as storedir: store = Store(storedir, {}) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) streams = await run_nodes(dataset, [fulltext, collect], store) assert 'hello' in streams[0][0].words assert any('hello' in x for x in streams[1])
def test_node(self): with temporary_directory() as storedir: store = Store(storedir, PERSIST) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) sink = make_sink(node) run_nodes(dataset, [sink], store, PERSIST) self.assertNodeOutput(sink.stream, node)
def test_node(self): with temporary_directory() as storedir: store = Store(storedir, {}) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) sink = make_sink(collect) run_nodes(dataset, [sink], store) self.assertEquals(sink.stream, ['Success'])
def test_node(self): with temporary_directory() as storedir: store = Store(storedir, {}) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) sink = make_sink(node) run_nodes(dataset, [sink], store) self.assertEqual(len(sink.stream), 0)
def test_node(self): with temporary_directory() as storedir: store = Store(storedir, {}) dataset = make_dataset(self.BAGS) store.add_dataset(dataset) sink1 = make_sink(fulltext) sink2 = make_sink(collect) run_nodes(dataset, [sink1, sink2], store) assert 'hello' in sink1.stream[0].words assert any('hello' in x for x in sink2.stream)
async def make_dataset(self, connection, files, name, time_added=None, discarded=False, setid=None, status=0, timestamp=None, _restore=None): # pylint: disable=too-many-arguments time_added = int(utils.now() * 1000) if time_added is None else time_added collection = await CollectionModel.filter( name=self.name).using_db(connection).first() dataset = await Dataset.create(collection=collection, name=name, discarded=discarded, status=status, time_added=time_added, timestamp=0, setid=setid or SetID.random(), acn_id=collection.acn_id, using_db=connection) if _restore: files = [ File(dataset=dataset, idx=i, **x) for i, x in enumerate(files) ] else: files = [ File(dataset=dataset, idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size) for i, (path, stat) in enumerate( (path, utils.stat(path)) for path in files) ] dataset.timestamp = timestamp or max(x.mtime for x in files) await dataset.save(using_db=connection) await File.bulk_create(files, using_db=connection) await dataset.fetch_related('files', using_db=connection) storedir = self.config.marv.storedir store = Store(storedir, self.nodes) store.add_dataset(dataset, exists_okay=_restore) self.render_detail(dataset) return dataset
def make_dataset(self, files, name, time_added=None, discarded=None, setid=None, status=None, timestamp=None, _restore=None): setid = setid or SetID.random() if _restore: files = [File(idx=i, **x) for i, x in enumerate(files)] else: files = [File(idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size) for i, (path, stat) in enumerate((path, os.stat(path)) for path in files)] time_added = int(utils.now() * 1000) if time_added is None else time_added dataset = Dataset(collection=self.name, files=files, name=name, discarded=discarded, status=status, time_added=time_added, timestamp=timestamp or max(x.mtime for x in files), setid=setid) storedir = self.config.marv.storedir store = Store(storedir, self.nodes) store.add_dataset(dataset, exists_okay=_restore) self.render_detail(dataset) return dataset