예제 #1
0
 async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, PERSIST)
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [node], store, PERSIST)
         self.assertNodeOutput(streams[0], node)
예제 #2
0
    def make_dataset(self, files, name, time_added=None):
        setid = SetID.random()
        files = [
            File(idx=i,
                 mtime=int(stat.st_mtime * 1000),
                 path=path,
                 size=stat.st_size) for i, (path, stat) in enumerate(
                     (path, os.stat(path)) for path in files)
        ]
        time_added = int(time.time() *
                         1000) if time_added is None else time_added
        dataset = Dataset(
            collection=self.name,
            files=files,
            name=name,
            #status=8,  # pending state see marv/model
            time_added=time_added,
            timestamp=max(x.mtime for x in files),
            setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset)
        self.render_detail(dataset)
        return dataset
예제 #3
0
 async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [node], store)
         assert streams == [None]
예제 #4
0
 async def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         streams = await run_nodes(dataset, [fulltext, collect], store)
         assert 'hello' in streams[0][0].words
         assert any('hello' in x for x in streams[1])
예제 #5
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, PERSIST)
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(node)
         run_nodes(dataset, [sink], store, PERSIST)
         self.assertNodeOutput(sink.stream, node)
예제 #6
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(collect)
         run_nodes(dataset, [sink], store)
         self.assertEquals(sink.stream, ['Success'])
예제 #7
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink = make_sink(node)
         run_nodes(dataset, [sink], store)
         self.assertEqual(len(sink.stream), 0)
예제 #8
0
 def test_node(self):
     with temporary_directory() as storedir:
         store = Store(storedir, {})
         dataset = make_dataset(self.BAGS)
         store.add_dataset(dataset)
         sink1 = make_sink(fulltext)
         sink2 = make_sink(collect)
         run_nodes(dataset, [sink1, sink2], store)
         assert 'hello' in sink1.stream[0].words
         assert any('hello' in x for x in sink2.stream)
예제 #9
0
    async def make_dataset(self,
                           connection,
                           files,
                           name,
                           time_added=None,
                           discarded=False,
                           setid=None,
                           status=0,
                           timestamp=None,
                           _restore=None):
        # pylint: disable=too-many-arguments
        time_added = int(utils.now() *
                         1000) if time_added is None else time_added

        collection = await CollectionModel.filter(
            name=self.name).using_db(connection).first()
        dataset = await Dataset.create(collection=collection,
                                       name=name,
                                       discarded=discarded,
                                       status=status,
                                       time_added=time_added,
                                       timestamp=0,
                                       setid=setid or SetID.random(),
                                       acn_id=collection.acn_id,
                                       using_db=connection)

        if _restore:
            files = [
                File(dataset=dataset, idx=i, **x) for i, x in enumerate(files)
            ]
        else:
            files = [
                File(dataset=dataset,
                     idx=i,
                     mtime=int(utils.mtime(path) * 1000),
                     path=path,
                     size=stat.st_size) for i, (path, stat) in enumerate(
                         (path, utils.stat(path)) for path in files)
            ]

        dataset.timestamp = timestamp or max(x.mtime for x in files)
        await dataset.save(using_db=connection)
        await File.bulk_create(files, using_db=connection)

        await dataset.fetch_related('files', using_db=connection)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset, exists_okay=_restore)
        self.render_detail(dataset)
        return dataset
예제 #10
0
    def make_dataset(self, files, name, time_added=None, discarded=None, setid=None, status=None,
                     timestamp=None, _restore=None):
        setid = setid or SetID.random()
        if _restore:
            files = [File(idx=i, **x) for i, x in enumerate(files)]
        else:
            files = [File(idx=i, mtime=int(utils.mtime(path) * 1000), path=path, size=stat.st_size)
                     for i, (path, stat)
                     in enumerate((path, os.stat(path)) for path in files)]
        time_added = int(utils.now() * 1000) if time_added is None else time_added
        dataset = Dataset(collection=self.name,
                          files=files,
                          name=name,
                          discarded=discarded,
                          status=status,
                          time_added=time_added,
                          timestamp=timestamp or max(x.mtime for x in files),
                          setid=setid)

        storedir = self.config.marv.storedir
        store = Store(storedir, self.nodes)
        store.add_dataset(dataset, exists_okay=_restore)
        self.render_detail(dataset)
        return dataset