コード例 #1
0
    async def test_subscribes_to_data(self):
        db: Session = self.app["db"]
        my_stream = db.query(DataStream).filter_by(name="stream1").one()
        blk1 = helpers.create_data(my_stream.layout)
        blk2 = helpers.create_data(my_stream.layout, length=50)
        my_pipe = pipes.LocalPipe(my_stream.layout)
        my_pipe.write_nowait(blk1)
        my_pipe.close_interval_nowait()
        my_pipe.write_nowait(blk2)
        my_pipe.close_nowait()
        self.supervisor.subscription_pipe = my_pipe
        async with self.client.get("/data", params={"id": my_stream.id,
                                                    "subscribe": '1'}) as resp:
            pipe = pipes.InputPipe(stream=my_stream, reader=resp.content)
            rx_blk1 = await pipe.read()
            pipe.consume(len(rx_blk1))
            np.testing.assert_array_equal(blk1, rx_blk1)
            self.assertTrue(pipe.end_of_interval)

            rx_blk2 = await pipe.read()
            pipe.consume(len(rx_blk2))
            np.testing.assert_array_equal(blk2, rx_blk2)
            with self.assertRaises(pipes.EmptyPipe):
                await pipe.read()
        self.assertEqual(self.supervisor.unsubscribe_calls, 1)
コード例 #2
0
ファイル: ingest.py プロジェクト: wattsworth/joule
async def copy_interval(start: int, end: int, bar, src_stream: 'DataStream',
                        dest_stream: 'DataStream', src_datastore: 'DataStore',
                        dest_datastore: 'DataStore'):
    from joule.models import pipes, DataStream
    pipe = pipes.LocalPipe(src_stream.layout, write_limit=4, debug=False)
    dest_stream.keep_us = DataStream.KEEP_ALL  # do not delete any data
    insert_task = await dest_datastore.spawn_inserter(dest_stream, pipe,
                                                      asyncio.get_event_loop())

    last_ts = start

    async def writer(data, layout, decimated):
        nonlocal last_ts
        global num_rows

        num_rows += (len(data))
        if len(data) > 0:
            cur_ts = data['timestamp'][-1]
            await pipe.write(data)
            # await asyncio.sleep(0.01)
            bar.update(cur_ts - last_ts)
            last_ts = cur_ts

    await src_datastore.extract(src_stream, start, end, writer)
    await pipe.close()
    await insert_task

    bar.update(end - last_ts)
コード例 #3
0
    async def _test_row_count(self):
        test_data = helpers.create_data(layout=self.test_stream.layout,
                                        length=10000)
        test_stream = DataStream(
            id=95,
            name="stream1",
            datatype=DataStream.DATATYPE.FLOAT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(3)])
        pipe = pipes.LocalPipe(test_stream.layout)
        task = await self.store.spawn_inserter(test_stream, pipe)
        await pipe.write(test_data)
        await pipe.close()
        await task
        conn: asyncpg.Connection = await asyncpg.connect(self.db_url)
        # test to make sure nrows is within 10% of actual value
        # Test: [start, end]
        nrows = await psql_helpers.get_row_count(conn, test_stream, None, None)
        self.assertGreater(nrows, len(test_data) * 0.9)
        # Test: [ts,end]
        nrows = await psql_helpers.get_row_count(
            conn, test_stream, test_data[len(test_data) // 2][0], None)
        self.assertLess(abs(nrows - len(test_data) // 2), 0.1 * len(test_data))
        # Test: [start, ts]
        nrows = await psql_helpers.get_row_count(
            conn, test_stream, None, test_data[len(test_data) // 3][0])
        self.assertLess(abs(nrows - len(test_data) // 3), 0.1 * len(test_data))

        # Test: [ts, ts]
        nrows = await psql_helpers.get_row_count(
            conn, test_stream, test_data[2 * len(test_data) // 6][0],
            test_data[3 * len(test_data) // 6][0])
        self.assertLess(abs(nrows - len(test_data) // 6), 0.1 * len(test_data))

        # Test: [ts, ts] (no data)
        nrows = await psql_helpers.get_row_count(
            conn, test_stream, test_data['timestamp'][0] - 100,
            test_data['timestamp'][0] - 50)
        self.assertEqual(0, nrows)

        # Test row count for stream with no data tables
        empty_stream = DataStream(
            id=96,
            name="empty",
            datatype=DataStream.DATATYPE.FLOAT64,
            keep_us=100,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(8)])
        nrows = await psql_helpers.get_row_count(conn, empty_stream, None,
                                                 None)
        self.assertEqual(0, nrows)
        nrows = await psql_helpers.get_row_count(
            conn, empty_stream, test_data[len(test_data) // 2][0], None)
        self.assertEqual(0, nrows)
        nrows = await psql_helpers.get_row_count(
            conn, test_stream, test_data['timestamp'][0] - 100,
            test_data['timestamp'][0] - 50)
        self.assertEqual(0, nrows)
コード例 #4
0
    async def setup(self, parsed_args, inputs, outputs):
        reader_module = SimpleReader()
        filter_module = SimpleFilter()

        pipe = pipes.LocalPipe("float32_3")
        reader_task = reader_module.run(parsed_args, pipe)
        filter_task = filter_module.run(parsed_args, {'to_filter': pipe},
                                        {'from_filter': outputs['output']})
        return [reader_task, filter_task]
コード例 #5
0
ファイル: test_supervisor.py プロジェクト: wattsworth/joule
    def test_subscribes_to_remote_inputs(self):
        remote_stream = helpers.create_stream('remote', 'float64_2')
        remote_stream.set_remote('http://remote:3000', '/path/to/stream')
        p1 = pipes.LocalPipe(layout='float64_2', name='p1')
        p2 = pipes.LocalPipe(layout='float64_2', name='p2')
        subscription_requests = 0

        class MockNode:
            def __init__(self):
                pass

            async def data_subscribe(self, stream):
                nonlocal subscription_requests
                pipe = pipes.LocalPipe(layout='float64_2')
                subscription_requests += 1
                return pipe

            async def close(self):
                pass

        def get_mock_node(name: str):
            #TODO: name should just be the CN of the node
            self.assertEqual(name, "http://remote:3000")
            return MockNode()

        async def setup():
            self.supervisor.get_node = get_mock_node
            self.supervisor.subscribe(remote_stream, p1)
            self.supervisor.subscribe(remote_stream, p2)

            self.supervisor.task = asyncio.sleep(0)

        self.loop.run_until_complete(setup())
        self.loop.run_until_complete(self.supervisor.stop())

        # make sure there is only one connection to the remote
        self.assertEqual(subscription_requests, 1)
        # p2 should be subscribed to p1
        self.assertTrue(p1.subscribers[0] is p2)

        # now "stop" the supervisor and the pipes should be closed

        self.assertTrue(p1.closed)
        self.assertTrue(p2.closed)
コード例 #6
0
    async def _test_consolidate(self):
        # intervals less than max_gap us apart are consolidated
        # data: 100 samples spaced at 1000us
        test_stream = DataStream(
            id=1,
            name="stream1",
            datatype=DataStream.DATATYPE.FLOAT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(3)])
        pipe = pipes.LocalPipe(test_stream.layout)
        nrows = 955
        orig_data = helpers.create_data(layout=test_stream.layout,
                                        length=nrows)
        chunks = [
            orig_data[:300], orig_data[305:400], orig_data[402:700],
            orig_data[800:]
        ]
        # data: |++++++|  |+++++++++|    |++++++|    |++++|
        #               ^--5000 us    ^--2000 us   ^---0.1 sec (retained)
        chunks = [
            orig_data[:300], orig_data[305:400], orig_data[402:700],
            orig_data[800:850], orig_data[852:]
        ]
        # data: |++++++|  |+++++++++|    |++++++|    |++++|  |++++|
        #               ^--5000 us    ^--2000 us   |        ^--- 2000 us
        #                                          `---0.1 sec (retained)
        task = await self.store.spawn_inserter(test_stream, pipe)
        for chunk in chunks:
            await pipe.write(chunk)
            await pipe.close_interval()
        await pipe.close()
        await task

        # extract data
        extracted_data = []

        rx_chunks = []

        async def callback(rx_data, layout, factor):
            if rx_data[0] != pipes.interval_token(layout):
                rx_chunks.append(rx_data)

        await self.store.consolidate(test_stream,
                                     start=None,
                                     end=None,
                                     max_gap=6e3)
        await self.store.extract(test_stream,
                                 start=None,
                                 end=None,
                                 callback=callback)

        # should only be two intervals left (the first two are consolidated)
        np.testing.assert_array_equal(rx_chunks[0], np.hstack(chunks[:3]))
        np.testing.assert_array_equal(rx_chunks[1], np.hstack(chunks[3:]))
        self.assertEqual(len(rx_chunks), 2)
コード例 #7
0
ファイル: data_controller.py プロジェクト: wattsworth/joule
async def _subscribe(request: web.Request, json: bool):
    db: Session = request.app["db"]
    supervisor: Supervisor = request.app['supervisor']
    if json:
        return web.Response(text="JSON subscription not implemented",
                            status=400)

    # find the requested stream
    if 'path' in request.query:
        stream = folder.find_stream_by_path(request.query['path'],
                                            db,
                                            stream_type=DataStream)
    elif 'id' in request.query:
        stream = db.query(DataStream).get(request.query["id"])
    else:
        return web.Response(text="specify an id or a path", status=400)
    if stream is None:
        return web.Response(text="stream does not exist", status=404)
    pipe = pipes.LocalPipe(stream.layout)
    try:
        unsubscribe = supervisor.subscribe(stream, pipe)
    except SubscriptionError:
        return web.Response(text="stream is not being produced", status=400)
    resp = web.StreamResponse(status=200,
                              headers={
                                  'joule-layout': stream.layout,
                                  'joule-decimation': '1'
                              })
    resp.enable_chunked_encoding()

    try:
        await resp.prepare(request)
    except ConnectionResetError:
        unsubscribe()
        return

    try:
        while True:
            try:
                data = await pipe.read()
            except pipes.EmptyPipe:
                unsubscribe()
                return resp
            pipe.consume(len(data))
            if len(data) > 0:
                await resp.write(data.tobytes())
            if pipe.end_of_interval:
                await resp.write(pipes.interval_token(stream.layout).tobytes())
    except asyncio.CancelledError as e:
        unsubscribe()
        # propogate the CancelledError up
        raise e
    except ConnectionResetError:
        unsubscribe()
コード例 #8
0
 async def test_unsubscribes_terminated_connections(self):
     db: Session = self.app["db"]
     supervisor: MockSupervisor = self.app["supervisor"]
     supervisor.hang_pipe = True
     my_stream = db.query(DataStream).filter_by(name="stream1").one()
     my_pipe = pipes.LocalPipe(my_stream.layout)
     my_pipe.close_nowait()
     self.supervisor.subscription_pipe = my_pipe
     async with self.client.get("/data", params={"id": my_stream.id,
                                                 "subscribe": '1'}):
         # ignore the data
         pass
コード例 #9
0
    async def _test_info(self):
        # create another stream
        empty_stream = DataStream(
            id=103,
            name="empty stream",
            datatype=DataStream.DATATYPE.INT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(8)])
        stream2 = DataStream(
            id=104,
            name="stream2",
            datatype=DataStream.DATATYPE.INT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(8)])
        pipe = pipes.LocalPipe(stream2.layout)
        test_data = helpers.create_data(layout=stream2.layout, length=800)
        task = await self.store.spawn_inserter(stream2, pipe)
        await pipe.write(test_data)
        await pipe.close()
        await task
        records = await self.store.info(
            [self.test_stream, stream2, empty_stream])
        # check stream1
        info = records[self.test_stream.id]
        self.assertEqual(info.start, self.test_data['timestamp'][0])
        self.assertEqual(info.end, self.test_data['timestamp'][-1])
        self.assertEqual(info.total_time, info.end - info.start)
        # rows are approximate
        self.assertLess(abs(len(self.test_data) - info.rows),
                        len(self.test_data) * 0.1)
        self.assertGreater(info.bytes, 0)

        # check stream2
        info = records[stream2.id]
        self.assertEqual(info.start, test_data['timestamp'][0])
        self.assertEqual(info.end, test_data['timestamp'][-1])
        self.assertEqual(info.total_time, info.end - info.start)
        self.assertLess(abs(len(test_data) - info.rows), len(test_data) * 0.1)
        self.assertGreater(info.bytes, 0)

        # check the empty stream
        info = records[empty_stream.id]
        self.assertEqual(info.start, None)
        self.assertEqual(info.end, None)
        self.assertEqual(info.total_time, 0)
        self.assertEqual(info.rows, 0)
        self.assertEqual(info.bytes, 0)
コード例 #10
0
ファイル: supervisor.py プロジェクト: wattsworth/joule
 async def _connect_remote_outputs(self, worker: Worker):
     """ Provide a pipe to the worker for each remote stream, spawn a
         task that reads from the worker's pipe and writes out to a remote
         network pipe, if the remote network pipe goes down or is not available,
         continuously try to restore it
     """
     remote_streams = [
         stream for stream in worker.subscribers if stream.is_remote
     ]
     for stream in remote_streams:
         src_pipe = pipes.LocalPipe(stream.layout, stream=stream)
         # ignore unsubscribe cb, not used
         worker.subscribe(stream, src_pipe)
         task = asyncio.create_task(
             self._handle_remote_output(src_pipe, stream))
         self.remote_tasks.append(task)
コード例 #11
0
    def test_restarts_child(self):
        loop = asyncio.get_event_loop()
        self.worker.RESTART_INTERVAL = 0.2

        # subscribe to the module outputs
        output1 = pipes.LocalPipe(layout=self.streams[2].layout)
        self.worker.subscribe(self.streams[2], output1)

        with self.check_fd_leakage():
            with self.assertLogs(logging.getLogger('joule'), logging.WARNING):
                loop.run_until_complete(asyncio.gather(
                    loop.create_task(self._stop_worker(loop)),
                    loop.create_task(self.worker.run(self.supervisor.subscribe, restart=True))
                ))
        self.assertEqual(self.worker.process.returncode, 0)
        self.assertEqual(len(output1.read_nowait()), 0)
        self.assertTrue(output1.end_of_interval)
コード例 #12
0
    async def test_runner(self):
        tests = [
            self._test_basic_insert_extract,
            self._test_extract_data_with_intervals,
            self._test_extract_decimated_data, self._test_db_info,
            self._test_info, self._test_intervals, self._test_remove,
            self._test_destroy, self._test_row_count,
            self._test_actions_on_empty_streams, self._test_consolidate,
            self._test_consolidate_with_time_bounds
        ]
        for test in tests:
            conn: asyncpg.Connection = await asyncpg.connect(self.db_url)
            await conn.execute("DROP SCHEMA IF EXISTS data CASCADE")
            await conn.execute("CREATE SCHEMA data")
            await conn.execute("GRANT ALL ON SCHEMA data TO public")

            self.store = TimescaleStore(
                self.db_url,
                0,
                60,
            )
            await self.store.initialize([])
            # make a sample stream with data
            self.test_stream = DataStream(
                id=100,
                name="stream1",
                datatype=DataStream.DATATYPE.FLOAT32,
                keep_us=DataStream.KEEP_ALL,
                decimate=True,
                elements=[Element(name="e%d" % x) for x in range(3)])
            pipe = pipes.LocalPipe(self.test_stream.layout)
            self.test_data = helpers.create_data(
                layout=self.test_stream.layout, length=1005)
            task = self.store.spawn_inserter(self.test_stream, pipe)
            await pipe.write(self.test_data)
            await pipe.close()
            runner = await task
            await runner
            await conn.close()
            # await self.store.initialize([])
            await test()
            # simulate the nose2 test output
            sys.stdout.write('o')
            await self.store.close()
            sys.stdout.flush()
コード例 #13
0
ファイル: daemon.py プロジェクト: wattsworth/joule
 async def _spawn_inserter(self, stream: DataStream):
     while True:
         pipe = pipes.LocalPipe(layout=stream.layout,
                                name='inserter:%s' % stream.name)
         unsubscribe = self.supervisor.subscribe(stream, pipe)
         task = None
         try:
             task = await self.data_store.spawn_inserter(stream, pipe)
             await task
             break  # inserter terminated, program is closing
         except DataError as e:
             msg = "stream [%s]: %s" % (stream.name, str(e))
             await self.supervisor.restart_producer(stream, msg=msg)
         except asyncio.CancelledError:
             if task is not None:
                 task.cancel()
             break
         unsubscribe()
コード例 #14
0
    async def _test_extract_data_with_intervals(self):
        test_stream = DataStream(
            id=1,
            name="stream1",
            datatype=DataStream.DATATYPE.FLOAT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(3)])
        pipe = pipes.LocalPipe(test_stream.layout)
        nrows = 955
        data = helpers.create_data(layout=test_stream.layout, length=nrows)
        task = await self.store.spawn_inserter(test_stream, pipe)
        for chunk in helpers.to_chunks(data, 300):
            await pipe.write(chunk)
            await pipe.close_interval()
        await pipe.close()
        await task

        # extract data
        extracted_data = []

        async def callback(rx_data, layout, factor):
            self.assertEqual(layout, test_stream.layout)
            self.assertEqual(factor, 1)
            extracted_data.append(rx_data)

        await self.store.extract(test_stream,
                                 start=None,
                                 end=None,
                                 callback=callback)
        extracted_data = np.hstack(extracted_data)
        # check for interval boundaries
        np.testing.assert_array_equal(extracted_data[300],
                                      pipes.interval_token(test_stream.layout))
        np.testing.assert_array_equal(extracted_data[601],
                                      pipes.interval_token(test_stream.layout))
        np.testing.assert_array_equal(extracted_data[902],
                                      pipes.interval_token(test_stream.layout))
コード例 #15
0
    def test_passes_data_across_pipes(self):

        loop = asyncio.get_event_loop()
        # create worker connections
        # child runs until stopped
        self.module.exec_cmd = "/usr/bin/env python3 " + MODULE_SIMPLE_FILTER

        interval1_data = helpers.create_data('float32_3', start=1000, step=100, length=100)
        interval2_data = helpers.create_data('float32_3', start=1001 + 100 * 100, step=100, length=100)

        # create a stub server to respond to API calls as the module starts up
        app = web.Application()

        node_stream_info_api_call_count = 0

        async def stub_stream_info(request):
            nonlocal node_stream_info_api_call_count
            node_stream_info_api_call_count += 1
            stream_id = int(request.query['id'])
            return web.json_response(self.streams[stream_id].to_json())

        app.add_routes([web.get('/stream.json', stub_stream_info)])
        runner = web.AppRunner(app)
        loop.run_until_complete(runner.setup())
        tmp_dir = tempfile.TemporaryDirectory()
        sock_file = os.path.join(tmp_dir.name, 'testing')
        sock_site = web.UnixSite(runner, sock_file)
        loop.run_until_complete(sock_site.start())
        self.worker.API_SOCKET = sock_file

        async def mock_producers():
            # await asyncio.sleep(0.5)
            subscribers = self.producers[0].subscribers[self.streams[0]]
            while len(subscribers) == 0:
                await asyncio.sleep(0.01)
            # add two intervals of mock data to the producer queues
            input1 = subscribers[0]  # self.producers[0].subscribers[self.streams[0]][0]
            await input1.write(interval1_data)
            await input1.close_interval()
            await input1.write(interval2_data)
            await input1.close_interval()
            await input1.close()

            input2 = self.producers[1].subscribers[self.streams[1]][0]
            await input2.write(interval1_data)
            await input2.close_interval()
            await input2.write(interval2_data)
            await input2.close_interval()
            await input2.close()

            await asyncio.sleep(2)

        # subscribe to the module outputs
        output1 = pipes.LocalPipe(layout=self.streams[2].layout, name="output1", debug=False)
        output2 = pipes.LocalPipe(layout=self.streams[3].layout, name="output2", debug=False)

        # create a slow subscriber that times out
        class SlowPipe(pipes.Pipe):
            async def write(self, data):
                await asyncio.sleep(10)

            async def close_interval(self):
                pass

        slow_pipe = SlowPipe(stream=helpers.create_stream('slow stream', self.streams[2].layout), name='slow pipe')
        self.worker.subscribe(self.streams[2], slow_pipe)
        self.worker.SUBSCRIBER_TIMEOUT = 0.1

        # create a subscriber that errors out
        class ErrorPipe(pipes.Pipe):
            async def write(self, data):
                raise BrokenPipeError()

        error_pipe = ErrorPipe(stream=helpers.create_stream('error stream', self.streams[2].layout), name='error pipe')
        self.worker.subscribe(self.streams[3], error_pipe)
        self.worker.subscribe(self.streams[3], output2)

        self.worker.subscribe(self.streams[2], output1)
        with self.assertLogs() as log:
            loop.run_until_complete(asyncio.gather(
                self.worker.run(self.supervisor.subscribe, restart=False),
                mock_producers()))
        loop.run_until_complete(asyncio.gather(
            runner.shutdown(),
            runner.cleanup()
        ))
        # make sure the module queried the API endpoint for each stream
        self.assertEqual(node_stream_info_api_call_count, 4)
        tmp_dir.cleanup()  # remove socket file and directory
        log_dump = '\n'.join(log.output)
        self.assertIn("subscriber write error", log_dump)
        self.assertIn("timed out", log_dump)
        # check stream2, should be stream0*2.0 [] stream0*2.0
        output_data = output1.read_nowait()
        output1.consume(len(output_data))
        np.testing.assert_array_almost_equal(interval1_data['data'] * 2.0,
                                             output_data['data'])
        self.assertTrue(output1.end_of_interval)
        output_data = output1.read_nowait()
        output1.consume(len(output_data))
        np.testing.assert_array_almost_equal(interval2_data['data'] * 2.0,
                                             output_data['data'])
        self.assertTrue(output1.end_of_interval)

        # check stream3, should be stream1*3.0 [] stream1*3.0
        output_data = output2.read_nowait()
        output2.consume(len(output_data))
        np.testing.assert_array_almost_equal(interval1_data['data'] * 3.0,
                                             output_data['data'])
        self.assertTrue(output2.end_of_interval)
        output_data = output2.read_nowait()
        output2.consume(len(output_data))
        np.testing.assert_array_almost_equal(interval2_data['data'] * 3.0,
                                             output_data['data'])
        self.assertTrue(output2.end_of_interval)
コード例 #16
0
ファイル: test_supervisor.py プロジェクト: wattsworth/joule
 async def data_subscribe(self, stream):
     nonlocal subscription_requests
     pipe = pipes.LocalPipe(layout='float64_2')
     subscription_requests += 1
     return pipe
コード例 #17
0
ファイル: test_supervisor.py プロジェクト: wattsworth/joule
 async def data_write(self, stream):
     nonlocal output_requested
     output_requested = True
     pipe = pipes.LocalPipe(layout='float64_2')
     return pipe