async def test_subscribes_to_data(self): db: Session = self.app["db"] my_stream = db.query(DataStream).filter_by(name="stream1").one() blk1 = helpers.create_data(my_stream.layout) blk2 = helpers.create_data(my_stream.layout, length=50) my_pipe = pipes.LocalPipe(my_stream.layout) my_pipe.write_nowait(blk1) my_pipe.close_interval_nowait() my_pipe.write_nowait(blk2) my_pipe.close_nowait() self.supervisor.subscription_pipe = my_pipe async with self.client.get("/data", params={"id": my_stream.id, "subscribe": '1'}) as resp: pipe = pipes.InputPipe(stream=my_stream, reader=resp.content) rx_blk1 = await pipe.read() pipe.consume(len(rx_blk1)) np.testing.assert_array_equal(blk1, rx_blk1) self.assertTrue(pipe.end_of_interval) rx_blk2 = await pipe.read() pipe.consume(len(rx_blk2)) np.testing.assert_array_equal(blk2, rx_blk2) with self.assertRaises(pipes.EmptyPipe): await pipe.read() self.assertEqual(self.supervisor.unsubscribe_calls, 1)
async def copy_interval(start: int, end: int, bar, src_stream: 'DataStream', dest_stream: 'DataStream', src_datastore: 'DataStore', dest_datastore: 'DataStore'): from joule.models import pipes, DataStream pipe = pipes.LocalPipe(src_stream.layout, write_limit=4, debug=False) dest_stream.keep_us = DataStream.KEEP_ALL # do not delete any data insert_task = await dest_datastore.spawn_inserter(dest_stream, pipe, asyncio.get_event_loop()) last_ts = start async def writer(data, layout, decimated): nonlocal last_ts global num_rows num_rows += (len(data)) if len(data) > 0: cur_ts = data['timestamp'][-1] await pipe.write(data) # await asyncio.sleep(0.01) bar.update(cur_ts - last_ts) last_ts = cur_ts await src_datastore.extract(src_stream, start, end, writer) await pipe.close() await insert_task bar.update(end - last_ts)
async def _test_row_count(self): test_data = helpers.create_data(layout=self.test_stream.layout, length=10000) test_stream = DataStream( id=95, name="stream1", datatype=DataStream.DATATYPE.FLOAT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(3)]) pipe = pipes.LocalPipe(test_stream.layout) task = await self.store.spawn_inserter(test_stream, pipe) await pipe.write(test_data) await pipe.close() await task conn: asyncpg.Connection = await asyncpg.connect(self.db_url) # test to make sure nrows is within 10% of actual value # Test: [start, end] nrows = await psql_helpers.get_row_count(conn, test_stream, None, None) self.assertGreater(nrows, len(test_data) * 0.9) # Test: [ts,end] nrows = await psql_helpers.get_row_count( conn, test_stream, test_data[len(test_data) // 2][0], None) self.assertLess(abs(nrows - len(test_data) // 2), 0.1 * len(test_data)) # Test: [start, ts] nrows = await psql_helpers.get_row_count( conn, test_stream, None, test_data[len(test_data) // 3][0]) self.assertLess(abs(nrows - len(test_data) // 3), 0.1 * len(test_data)) # Test: [ts, ts] nrows = await psql_helpers.get_row_count( conn, test_stream, test_data[2 * len(test_data) // 6][0], test_data[3 * len(test_data) // 6][0]) self.assertLess(abs(nrows - len(test_data) // 6), 0.1 * len(test_data)) # Test: [ts, ts] (no data) nrows = await psql_helpers.get_row_count( conn, test_stream, test_data['timestamp'][0] - 100, test_data['timestamp'][0] - 50) self.assertEqual(0, nrows) # Test row count for stream with no data tables empty_stream = DataStream( id=96, name="empty", datatype=DataStream.DATATYPE.FLOAT64, keep_us=100, decimate=True, elements=[Element(name="e%d" % x) for x in range(8)]) nrows = await psql_helpers.get_row_count(conn, empty_stream, None, None) self.assertEqual(0, nrows) nrows = await psql_helpers.get_row_count( conn, empty_stream, test_data[len(test_data) // 2][0], None) self.assertEqual(0, nrows) nrows = await psql_helpers.get_row_count( conn, test_stream, test_data['timestamp'][0] - 100, test_data['timestamp'][0] - 50) self.assertEqual(0, nrows)
async def setup(self, parsed_args, inputs, outputs): reader_module = SimpleReader() filter_module = SimpleFilter() pipe = pipes.LocalPipe("float32_3") reader_task = reader_module.run(parsed_args, pipe) filter_task = filter_module.run(parsed_args, {'to_filter': pipe}, {'from_filter': outputs['output']}) return [reader_task, filter_task]
def test_subscribes_to_remote_inputs(self): remote_stream = helpers.create_stream('remote', 'float64_2') remote_stream.set_remote('http://remote:3000', '/path/to/stream') p1 = pipes.LocalPipe(layout='float64_2', name='p1') p2 = pipes.LocalPipe(layout='float64_2', name='p2') subscription_requests = 0 class MockNode: def __init__(self): pass async def data_subscribe(self, stream): nonlocal subscription_requests pipe = pipes.LocalPipe(layout='float64_2') subscription_requests += 1 return pipe async def close(self): pass def get_mock_node(name: str): #TODO: name should just be the CN of the node self.assertEqual(name, "http://remote:3000") return MockNode() async def setup(): self.supervisor.get_node = get_mock_node self.supervisor.subscribe(remote_stream, p1) self.supervisor.subscribe(remote_stream, p2) self.supervisor.task = asyncio.sleep(0) self.loop.run_until_complete(setup()) self.loop.run_until_complete(self.supervisor.stop()) # make sure there is only one connection to the remote self.assertEqual(subscription_requests, 1) # p2 should be subscribed to p1 self.assertTrue(p1.subscribers[0] is p2) # now "stop" the supervisor and the pipes should be closed self.assertTrue(p1.closed) self.assertTrue(p2.closed)
async def _test_consolidate(self): # intervals less than max_gap us apart are consolidated # data: 100 samples spaced at 1000us test_stream = DataStream( id=1, name="stream1", datatype=DataStream.DATATYPE.FLOAT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(3)]) pipe = pipes.LocalPipe(test_stream.layout) nrows = 955 orig_data = helpers.create_data(layout=test_stream.layout, length=nrows) chunks = [ orig_data[:300], orig_data[305:400], orig_data[402:700], orig_data[800:] ] # data: |++++++| |+++++++++| |++++++| |++++| # ^--5000 us ^--2000 us ^---0.1 sec (retained) chunks = [ orig_data[:300], orig_data[305:400], orig_data[402:700], orig_data[800:850], orig_data[852:] ] # data: |++++++| |+++++++++| |++++++| |++++| |++++| # ^--5000 us ^--2000 us | ^--- 2000 us # `---0.1 sec (retained) task = await self.store.spawn_inserter(test_stream, pipe) for chunk in chunks: await pipe.write(chunk) await pipe.close_interval() await pipe.close() await task # extract data extracted_data = [] rx_chunks = [] async def callback(rx_data, layout, factor): if rx_data[0] != pipes.interval_token(layout): rx_chunks.append(rx_data) await self.store.consolidate(test_stream, start=None, end=None, max_gap=6e3) await self.store.extract(test_stream, start=None, end=None, callback=callback) # should only be two intervals left (the first two are consolidated) np.testing.assert_array_equal(rx_chunks[0], np.hstack(chunks[:3])) np.testing.assert_array_equal(rx_chunks[1], np.hstack(chunks[3:])) self.assertEqual(len(rx_chunks), 2)
async def _subscribe(request: web.Request, json: bool): db: Session = request.app["db"] supervisor: Supervisor = request.app['supervisor'] if json: return web.Response(text="JSON subscription not implemented", status=400) # find the requested stream if 'path' in request.query: stream = folder.find_stream_by_path(request.query['path'], db, stream_type=DataStream) elif 'id' in request.query: stream = db.query(DataStream).get(request.query["id"]) else: return web.Response(text="specify an id or a path", status=400) if stream is None: return web.Response(text="stream does not exist", status=404) pipe = pipes.LocalPipe(stream.layout) try: unsubscribe = supervisor.subscribe(stream, pipe) except SubscriptionError: return web.Response(text="stream is not being produced", status=400) resp = web.StreamResponse(status=200, headers={ 'joule-layout': stream.layout, 'joule-decimation': '1' }) resp.enable_chunked_encoding() try: await resp.prepare(request) except ConnectionResetError: unsubscribe() return try: while True: try: data = await pipe.read() except pipes.EmptyPipe: unsubscribe() return resp pipe.consume(len(data)) if len(data) > 0: await resp.write(data.tobytes()) if pipe.end_of_interval: await resp.write(pipes.interval_token(stream.layout).tobytes()) except asyncio.CancelledError as e: unsubscribe() # propogate the CancelledError up raise e except ConnectionResetError: unsubscribe()
async def test_unsubscribes_terminated_connections(self): db: Session = self.app["db"] supervisor: MockSupervisor = self.app["supervisor"] supervisor.hang_pipe = True my_stream = db.query(DataStream).filter_by(name="stream1").one() my_pipe = pipes.LocalPipe(my_stream.layout) my_pipe.close_nowait() self.supervisor.subscription_pipe = my_pipe async with self.client.get("/data", params={"id": my_stream.id, "subscribe": '1'}): # ignore the data pass
async def _test_info(self): # create another stream empty_stream = DataStream( id=103, name="empty stream", datatype=DataStream.DATATYPE.INT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(8)]) stream2 = DataStream( id=104, name="stream2", datatype=DataStream.DATATYPE.INT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(8)]) pipe = pipes.LocalPipe(stream2.layout) test_data = helpers.create_data(layout=stream2.layout, length=800) task = await self.store.spawn_inserter(stream2, pipe) await pipe.write(test_data) await pipe.close() await task records = await self.store.info( [self.test_stream, stream2, empty_stream]) # check stream1 info = records[self.test_stream.id] self.assertEqual(info.start, self.test_data['timestamp'][0]) self.assertEqual(info.end, self.test_data['timestamp'][-1]) self.assertEqual(info.total_time, info.end - info.start) # rows are approximate self.assertLess(abs(len(self.test_data) - info.rows), len(self.test_data) * 0.1) self.assertGreater(info.bytes, 0) # check stream2 info = records[stream2.id] self.assertEqual(info.start, test_data['timestamp'][0]) self.assertEqual(info.end, test_data['timestamp'][-1]) self.assertEqual(info.total_time, info.end - info.start) self.assertLess(abs(len(test_data) - info.rows), len(test_data) * 0.1) self.assertGreater(info.bytes, 0) # check the empty stream info = records[empty_stream.id] self.assertEqual(info.start, None) self.assertEqual(info.end, None) self.assertEqual(info.total_time, 0) self.assertEqual(info.rows, 0) self.assertEqual(info.bytes, 0)
async def _connect_remote_outputs(self, worker: Worker): """ Provide a pipe to the worker for each remote stream, spawn a task that reads from the worker's pipe and writes out to a remote network pipe, if the remote network pipe goes down or is not available, continuously try to restore it """ remote_streams = [ stream for stream in worker.subscribers if stream.is_remote ] for stream in remote_streams: src_pipe = pipes.LocalPipe(stream.layout, stream=stream) # ignore unsubscribe cb, not used worker.subscribe(stream, src_pipe) task = asyncio.create_task( self._handle_remote_output(src_pipe, stream)) self.remote_tasks.append(task)
def test_restarts_child(self): loop = asyncio.get_event_loop() self.worker.RESTART_INTERVAL = 0.2 # subscribe to the module outputs output1 = pipes.LocalPipe(layout=self.streams[2].layout) self.worker.subscribe(self.streams[2], output1) with self.check_fd_leakage(): with self.assertLogs(logging.getLogger('joule'), logging.WARNING): loop.run_until_complete(asyncio.gather( loop.create_task(self._stop_worker(loop)), loop.create_task(self.worker.run(self.supervisor.subscribe, restart=True)) )) self.assertEqual(self.worker.process.returncode, 0) self.assertEqual(len(output1.read_nowait()), 0) self.assertTrue(output1.end_of_interval)
async def test_runner(self): tests = [ self._test_basic_insert_extract, self._test_extract_data_with_intervals, self._test_extract_decimated_data, self._test_db_info, self._test_info, self._test_intervals, self._test_remove, self._test_destroy, self._test_row_count, self._test_actions_on_empty_streams, self._test_consolidate, self._test_consolidate_with_time_bounds ] for test in tests: conn: asyncpg.Connection = await asyncpg.connect(self.db_url) await conn.execute("DROP SCHEMA IF EXISTS data CASCADE") await conn.execute("CREATE SCHEMA data") await conn.execute("GRANT ALL ON SCHEMA data TO public") self.store = TimescaleStore( self.db_url, 0, 60, ) await self.store.initialize([]) # make a sample stream with data self.test_stream = DataStream( id=100, name="stream1", datatype=DataStream.DATATYPE.FLOAT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(3)]) pipe = pipes.LocalPipe(self.test_stream.layout) self.test_data = helpers.create_data( layout=self.test_stream.layout, length=1005) task = self.store.spawn_inserter(self.test_stream, pipe) await pipe.write(self.test_data) await pipe.close() runner = await task await runner await conn.close() # await self.store.initialize([]) await test() # simulate the nose2 test output sys.stdout.write('o') await self.store.close() sys.stdout.flush()
async def _spawn_inserter(self, stream: DataStream): while True: pipe = pipes.LocalPipe(layout=stream.layout, name='inserter:%s' % stream.name) unsubscribe = self.supervisor.subscribe(stream, pipe) task = None try: task = await self.data_store.spawn_inserter(stream, pipe) await task break # inserter terminated, program is closing except DataError as e: msg = "stream [%s]: %s" % (stream.name, str(e)) await self.supervisor.restart_producer(stream, msg=msg) except asyncio.CancelledError: if task is not None: task.cancel() break unsubscribe()
async def _test_extract_data_with_intervals(self): test_stream = DataStream( id=1, name="stream1", datatype=DataStream.DATATYPE.FLOAT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(3)]) pipe = pipes.LocalPipe(test_stream.layout) nrows = 955 data = helpers.create_data(layout=test_stream.layout, length=nrows) task = await self.store.spawn_inserter(test_stream, pipe) for chunk in helpers.to_chunks(data, 300): await pipe.write(chunk) await pipe.close_interval() await pipe.close() await task # extract data extracted_data = [] async def callback(rx_data, layout, factor): self.assertEqual(layout, test_stream.layout) self.assertEqual(factor, 1) extracted_data.append(rx_data) await self.store.extract(test_stream, start=None, end=None, callback=callback) extracted_data = np.hstack(extracted_data) # check for interval boundaries np.testing.assert_array_equal(extracted_data[300], pipes.interval_token(test_stream.layout)) np.testing.assert_array_equal(extracted_data[601], pipes.interval_token(test_stream.layout)) np.testing.assert_array_equal(extracted_data[902], pipes.interval_token(test_stream.layout))
def test_passes_data_across_pipes(self): loop = asyncio.get_event_loop() # create worker connections # child runs until stopped self.module.exec_cmd = "/usr/bin/env python3 " + MODULE_SIMPLE_FILTER interval1_data = helpers.create_data('float32_3', start=1000, step=100, length=100) interval2_data = helpers.create_data('float32_3', start=1001 + 100 * 100, step=100, length=100) # create a stub server to respond to API calls as the module starts up app = web.Application() node_stream_info_api_call_count = 0 async def stub_stream_info(request): nonlocal node_stream_info_api_call_count node_stream_info_api_call_count += 1 stream_id = int(request.query['id']) return web.json_response(self.streams[stream_id].to_json()) app.add_routes([web.get('/stream.json', stub_stream_info)]) runner = web.AppRunner(app) loop.run_until_complete(runner.setup()) tmp_dir = tempfile.TemporaryDirectory() sock_file = os.path.join(tmp_dir.name, 'testing') sock_site = web.UnixSite(runner, sock_file) loop.run_until_complete(sock_site.start()) self.worker.API_SOCKET = sock_file async def mock_producers(): # await asyncio.sleep(0.5) subscribers = self.producers[0].subscribers[self.streams[0]] while len(subscribers) == 0: await asyncio.sleep(0.01) # add two intervals of mock data to the producer queues input1 = subscribers[0] # self.producers[0].subscribers[self.streams[0]][0] await input1.write(interval1_data) await input1.close_interval() await input1.write(interval2_data) await input1.close_interval() await input1.close() input2 = self.producers[1].subscribers[self.streams[1]][0] await input2.write(interval1_data) await input2.close_interval() await input2.write(interval2_data) await input2.close_interval() await input2.close() await asyncio.sleep(2) # subscribe to the module outputs output1 = pipes.LocalPipe(layout=self.streams[2].layout, name="output1", debug=False) output2 = pipes.LocalPipe(layout=self.streams[3].layout, name="output2", debug=False) # create a slow subscriber that times out class SlowPipe(pipes.Pipe): async def write(self, data): await asyncio.sleep(10) async def close_interval(self): pass slow_pipe = SlowPipe(stream=helpers.create_stream('slow stream', self.streams[2].layout), name='slow pipe') self.worker.subscribe(self.streams[2], slow_pipe) self.worker.SUBSCRIBER_TIMEOUT = 0.1 # create a subscriber that errors out class ErrorPipe(pipes.Pipe): async def write(self, data): raise BrokenPipeError() error_pipe = ErrorPipe(stream=helpers.create_stream('error stream', self.streams[2].layout), name='error pipe') self.worker.subscribe(self.streams[3], error_pipe) self.worker.subscribe(self.streams[3], output2) self.worker.subscribe(self.streams[2], output1) with self.assertLogs() as log: loop.run_until_complete(asyncio.gather( self.worker.run(self.supervisor.subscribe, restart=False), mock_producers())) loop.run_until_complete(asyncio.gather( runner.shutdown(), runner.cleanup() )) # make sure the module queried the API endpoint for each stream self.assertEqual(node_stream_info_api_call_count, 4) tmp_dir.cleanup() # remove socket file and directory log_dump = '\n'.join(log.output) self.assertIn("subscriber write error", log_dump) self.assertIn("timed out", log_dump) # check stream2, should be stream0*2.0 [] stream0*2.0 output_data = output1.read_nowait() output1.consume(len(output_data)) np.testing.assert_array_almost_equal(interval1_data['data'] * 2.0, output_data['data']) self.assertTrue(output1.end_of_interval) output_data = output1.read_nowait() output1.consume(len(output_data)) np.testing.assert_array_almost_equal(interval2_data['data'] * 2.0, output_data['data']) self.assertTrue(output1.end_of_interval) # check stream3, should be stream1*3.0 [] stream1*3.0 output_data = output2.read_nowait() output2.consume(len(output_data)) np.testing.assert_array_almost_equal(interval1_data['data'] * 3.0, output_data['data']) self.assertTrue(output2.end_of_interval) output_data = output2.read_nowait() output2.consume(len(output_data)) np.testing.assert_array_almost_equal(interval2_data['data'] * 3.0, output_data['data']) self.assertTrue(output2.end_of_interval)
async def data_subscribe(self, stream): nonlocal subscription_requests pipe = pipes.LocalPipe(layout='float64_2') subscription_requests += 1 return pipe
async def data_write(self, stream): nonlocal output_requested output_requested = True pipe = pipes.LocalPipe(layout='float64_2') return pipe