async def data_write(self, request: web.Request): if self.stub_data_write: return web.Response(text=self.response, status=self.http_code) if 'id' in request.query: stream_id = int(request.query['id']) mock_entry = [ x for x in self.streams.values() if x.stream.id == stream_id ][0] else: mock_entry = self.streams[request.query['path']] pipe = pipes.InputPipe(name="inbound", stream=mock_entry.stream, reader=request.content) istart = None iend = None while True: try: chunk = await pipe.read() if len(chunk) > 0: if istart is None: istart = chunk['timestamp'][0] iend = chunk['timestamp'][-1] pipe.consume(len(chunk)) if pipe.end_of_interval and istart is not None and iend is not None: mock_entry.intervals.append([istart, iend]) istart = None iend = None mock_entry.add_data(chunk) except pipes.EmptyPipe: break self.msgs.put(mock_entry) return web.Response(text="ok")
async def test_inserter_clean(self): self.stream1.datatype = DataStream.DATATYPE.UINT16 self.stream1.keep_us = 24 * 60 * 60 * 1e6 # 1 day self.stream1.decimate = True source = QueueReader(delay=0.1) await source.put(helpers.create_data(layout="uint16_3").tobytes()) pipe = pipes.InputPipe(stream=self.stream1, reader=source) self.store.cleanup_period = 0 task = await self.store.spawn_inserter(self.stream1, pipe, insert_period=0) await task self.assertTrue(len(self.fake_nilmdb.remove_calls) > 0) # make sure decimations have been removed too removed_paths = [x['path'] for x in self.fake_nilmdb.remove_calls] self.assertTrue('/joule/1' in removed_paths) self.assertTrue('/joule/1~decim-4' in removed_paths) self.assertTrue('/joule/1~decim-16' in removed_paths) # make sure nilmdb cleanup executed with correct parameters params = self.fake_nilmdb.remove_calls[-1] self.assertEqual(int(params['start']), 0) expected = int(time.time() * 1e6) - self.stream1.keep_us actual = int(params['end']) # should be within 0.1 second np.testing.assert_almost_equal(expected / 1e6, actual / 1e6, decimal=1)
async def write(request: web.Request): db: Session = request.app["db"] data_store: DataStore = request.app["data-store"] # find the requested stream if 'path' in request.query: stream = folder.find_stream_by_path(request.query['path'], db, stream_type=DataStream) elif 'id' in request.query: stream = db.query(DataStream).get(request.query["id"]) else: return web.Response(text="specify an id or a path", status=400) if stream is None: return web.Response(text="stream does not exist", status=404) # spawn in inserter task stream.is_destination = True db.commit() pipe = pipes.InputPipe(name="inbound", stream=stream, reader=request.content) try: task = await data_store.spawn_inserter(stream, pipe, insert_period=0) await task except DataError as e: return web.Response(text=str(e), status=400) except asyncio.CancelledError as e: raise e finally: stream.is_destination = False db.commit() return web.Response(text="ok")
async def test_propogates_intervals_to_decimations(self): self.stream1.decimate = True source = QueueReader() pipe = pipes.InputPipe(stream=self.stream1, reader=source) # insert the following broken chunks of data # |----15*8----|-15-|-15-|-15-|-15-| ==> raw (120 samples) # |-----30-----|-3--|--3-|--3-|--3-| ==> x4 (27 samples) # |-----7------| ==> x16 (7 samples) # |-----1------| ==> x64 (1 sample n_chunks = 12 for i in range(n_chunks): data = helpers.create_data(layout="int8_3", length=15, start=i * 1e6, step=1) await source.put(data.tobytes()) if i > 6: # breaks in the 2nd half await source.put(pipes.interval_token("int8_3").tobytes()) task = await self.store.spawn_inserter(self.stream1, pipe, insert_period=0) await task # should have raw, x4, x16, x64, x256 self.assertEqual(len(self.fake_nilmdb.streams), 5) self.assertEqual(self.fake_nilmdb.streams["/joule/1"].rows, n_chunks * 15) # x4 level should be missing data due to interval breaks self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-4"].rows, 42) # x16 level should have 7 sample (only from first part) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-16"].rows, 7) # x64 level should have 1 sample self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-64"].rows, 1) # x256 level should be empty self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-256"].rows, 0)
async def test_subscribes_to_data(self): db: Session = self.app["db"] my_stream = db.query(DataStream).filter_by(name="stream1").one() blk1 = helpers.create_data(my_stream.layout) blk2 = helpers.create_data(my_stream.layout, length=50) my_pipe = pipes.LocalPipe(my_stream.layout) my_pipe.write_nowait(blk1) my_pipe.close_interval_nowait() my_pipe.write_nowait(blk2) my_pipe.close_nowait() self.supervisor.subscription_pipe = my_pipe async with self.client.get("/data", params={"id": my_stream.id, "subscribe": '1'}) as resp: pipe = pipes.InputPipe(stream=my_stream, reader=resp.content) rx_blk1 = await pipe.read() pipe.consume(len(rx_blk1)) np.testing.assert_array_equal(blk1, rx_blk1) self.assertTrue(pipe.end_of_interval) rx_blk2 = await pipe.read() pipe.consume(len(rx_blk2)) np.testing.assert_array_equal(blk2, rx_blk2) with self.assertRaises(pipes.EmptyPipe): await pipe.read() self.assertEqual(self.supervisor.unsubscribe_calls, 1)
async def build_fd_pipes(pipe_args: str, node: BaseNode) -> Tuple[Pipes, Pipes]: try: pipe_json = json.loads(json.loads(pipe_args)) # if debugging, pycharm escapes the outer JSON # pipe_json = json.loads(pipe_args.encode('utf-8').decode('unicode_escape')) dest_args = pipe_json['outputs'] src_args = pipe_json['inputs'] except (KeyError, json.JSONDecodeError): raise errors.ConfigurationError("invalid pipes argument: [%s]" % pipe_args) pipes_out = {} pipes_in = {} for name, arg in dest_args.items(): wf = pipes.writer_factory(arg['fd']) dest_stream = None if arg['id'] is not None: # used in testing when no API is available dest_stream = await node.data_stream_get(arg['id']) pipes_out[name] = pipes.OutputPipe(stream=dest_stream, layout=arg['layout'], writer_factory=wf) for name, arg in src_args.items(): rf = pipes.reader_factory(arg['fd']) src_stream = None if arg['id'] is not None: # used in testing when no API is available src_stream = await node.data_stream_get(arg['id']) pipes_in[name] = pipes.InputPipe(stream=src_stream, layout=arg['layout'], reader_factory=rf) return pipes_in, pipes_out
async def test_inserter_data_error(self): # when stream has invalid data (eg bad timestamps) self.stream1.datatype = DataStream.DATATYPE.UINT16 source = QueueReader() pipe = pipes.InputPipe(stream=self.stream1, reader=source) task = await self.store.spawn_inserter(self.stream1, pipe, insert_period=0) await source.put(helpers.create_data(layout="uint16_3").tobytes()) # when nilmdb server generates an error self.fake_nilmdb.generate_error_on_path("/joule/1", 400, "bad data") with self.assertRaises(DataError): await task
async def test_retries_when_nilmdb_is_not_available(self): # when nilmdb server is not available the inserter should retry self.stream1.datatype = DataStream.DATATYPE.UINT16 source = QueueReader() await self.fake_nilmdb.stop() await source.put(helpers.create_data(layout="uint16_3").tobytes()) pipe = pipes.InputPipe(stream=self.stream1, reader=source) with self.assertLogs(level="WARNING") as logs: task = await self.store.spawn_inserter(self.stream1, pipe, retry_interval=0.05) await asyncio.sleep(0.1) task.cancel() await task self.assertTrue("retrying request" in ''.join(logs.output))
async def test_nondecimating_inserter(self): self.stream1.decimate = False self.stream1.datatype = DataStream.DATATYPE.UINT16 source = QueueReader() pipe = pipes.InputPipe(stream=self.stream1, reader=source) nrows = 896 data = helpers.create_data(layout="uint16_3", length=nrows) task = await self.store.spawn_inserter(self.stream1, pipe, insert_period=0) for chunk in helpers.to_chunks(data, 300): await source.put(chunk.tobytes()) await task self.assertEqual(self.fake_nilmdb.streams["/joule/1"].rows, nrows) self.assertEqual(len(self.fake_nilmdb.streams), 1)
async def _spawn_outputs(self) -> asyncio.Task: tasks: List[asyncio.Task] = [] # configure output pipes [module]==>[worker] for (name, stream) in self.module.outputs.items(): (r, w) = os.pipe() rf = pipes.reader_factory(r) os.set_inheritable(w, True) pipe = pipes.InputPipe(name=name, stream=stream, reader_factory=rf) self.output_connections.append( DataConnection(name, w, stream, pipe)) t = asyncio.create_task( self._output_handler(pipe, self.subscribers[stream])) t.set_name("worker [%s]: output [%s]" % (self.module.name, stream.name)) tasks.append(t) return asyncio.gather(*tasks)
async def _build_pipes_new(self, interval, input_streams, output_streams, pipe_args) -> Tuple[Pipes, Pipes]: input_pipes = {} output_pipes = {} # use network sockets for connection to inputs and outputs if pipe_args == 'unset': for (name, stream) in input_streams.items(): if interval is None: # subscribe to live data input_pipes[name] = await self.node.data_subscribe(stream) else: input_pipes[name] = await self.node.data_read( stream, interval[0], interval[1]) for (name, stream) in output_streams.items(): if interval is None: output_pipes[name] = await self.node.data_write(stream) else: output_pipes[name] = await self.node.data_write( stream, interval[0], interval[1]) # use file descriptors provided by joule for connection to inputs and outputs else: try: pipe_json = json.loads(json.loads(pipe_args)) # if debugging, pycharm escapes the outer JSON # pipe_json = json.loads(pipe_args.encode('utf-8').decode('unicode_escape')) output_args = pipe_json['outputs'] input_args = pipe_json['inputs'] except (KeyError, json.JSONDecodeError): raise ConfigurationError( f"invalid pipes argument: {pipe_args}") for name, arg in output_args.items(): wf = pipes.writer_factory(arg['fd']) output_pipes[name] = pipes.OutputPipe( stream=output_streams[name], layout=arg['layout'], writer_factory=wf) for name, arg in input_args.items(): rf = pipes.reader_factory(arg['fd']) input_pipes[name] = pipes.InputPipe(stream=input_streams[name], layout=arg['layout'], reader_factory=rf) # keep track of the pipes so they can be closed self.pipes = list(input_pipes.values()) + list(output_pipes.values()) return input_pipes, output_pipes
async def test_decimating_inserter(self): self.stream1.decimate = True source = QueueReader() pipe = pipes.InputPipe(stream=self.stream1, reader=source) nrows = 955 data = helpers.create_data(layout="int8_3", length=nrows) task = await self.store.spawn_inserter(self.stream1, pipe) for chunk in helpers.to_chunks(data, 300): await source.put(chunk.tobytes()) await task self.assertEqual(len(self.fake_nilmdb.streams), 6) self.assertEqual(self.fake_nilmdb.streams["/joule/1"].rows, nrows) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-4"].rows, np.floor(nrows / 4)) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-16"].rows, np.floor(nrows / 16)) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-64"].rows, np.floor(nrows / 64)) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-256"].rows, np.floor(nrows / 256))
def test_writes_to_pipes(self): module = SimpleReader() (r, w) = os.pipe() loop = asyncio.get_event_loop() rf = pipes.reader_factory(r) pipe = pipes.InputPipe(name="output", stream=self.stream, reader_factory=rf) pipe_arg = json.dumps( json.dumps({ "outputs": { 'output': { 'fd': w, 'id': None, 'layout': self.stream.layout } }, "inputs": {} })) data = helpers.create_data(self.stream.layout) args = argparse.Namespace(pipes=pipe_arg, socket="unset", url='http://localhost:8080', node="", api_socket="", mock_data=data) # run the reader module loop = asyncio.new_event_loop() loop.set_debug(True) asyncio.set_event_loop(loop) module.start(args) asyncio.set_event_loop(self.loop) # check the output received_data = self.loop.run_until_complete(pipe.read()) np.testing.assert_array_equal(data, received_data) self.loop.run_until_complete(pipe.close()) if not loop.is_closed(): loop.close()
async def _extract_by_path(self, path: str, start: Optional[int], end: Optional[int], layout: str, callback): url = "{server}/stream/extract".format(server=self.server) params = {"path": path, "binary": 1} decimation_factor = 1 r = re.search(r'~decim-(\d+)$', path) if r is not None: decimation_factor = int(r[1]) async with self._get_client() as session: # first determine the intervals, use the base path for this if path.find("~decim") == -1: base_path = path else: base_path = path[:path.find("~decim")] intervals = await self._intervals_by_path(base_path, start, end) i = 0 num_intervals = len(intervals) # now extract each interval for interval in intervals: params["start"] = interval[0] params["end"] = interval[1] async with session.get(url, params=params) as resp: await check_for_error(resp) # put data into the queue as it arrives reader = pipes.InputPipe(name="outbound", layout=layout, reader=resp.content) while True: try: data = await reader.read() await callback(data, layout, decimation_factor) reader.consume(len(data)) except pipes.EmptyPipe: break # insert the interval token to indicate a break i += 1 if i < num_intervals: await callback(pipes.interval_token(layout), layout, decimation_factor)
def test_runs_composited_modules(self): module = SimpleComposite() (r, w) = os.pipe() module_loop = asyncio.new_event_loop() rf = pipes.reader_factory(r) pipe = pipes.InputPipe(name="output", stream=self.stream, reader_factory=rf) pipe_arg = json.dumps(json.dumps({"outputs": {'output': {'fd': w, 'id': None, 'layout': self.stream.layout}}, "inputs": {}})) data = helpers.create_data(self.stream.layout) args = argparse.Namespace(pipes=pipe_arg, socket="unset", node="", api_socket="", url='http://localhost:8080', mock_data=data) # run the composite module asyncio.set_event_loop(module_loop) module.start(args) asyncio.set_event_loop(self.loop) # check the output received_data = self.loop.run_until_complete(pipe.read()) np.testing.assert_array_equal(data['timestamp'], received_data['timestamp']) np.testing.assert_array_almost_equal(data['data'] * 2, received_data['data']) self.loop.run_until_complete(pipe.close()) self.loop.close()
def test_writes_to_pipes(self): module = SimpleFilter() (r, w_module) = os.pipe() loop = asyncio.get_event_loop() rf = pipes.reader_factory(r) from_filter = pipes.InputPipe(name="from_filter", stream=self.output, reader_factory=rf) (r_module, w) = os.pipe() loop = asyncio.get_event_loop() wf = pipes.writer_factory(w) to_filter = pipes.OutputPipe(name="to_filter", stream=self.input, writer_factory=wf) pipe_arg = json.dumps( json.dumps({ "outputs": { 'from_filter': { 'fd': w_module, 'id': 2, 'layout': self.output.layout } }, "inputs": { 'to_filter': { 'fd': r_module, 'id': 3, 'layout': self.input.layout } } })) data = helpers.create_data(self.input.layout) self.loop.run_until_complete(to_filter.write(data)) self.loop.run_until_complete(to_filter.close()) args = argparse.Namespace(pipes=pipe_arg, socket="unset", node="", api_socket="", live=False, url='http://localhost:8080') # run the reader module loop = asyncio.new_event_loop() loop.set_debug(True) asyncio.set_event_loop(loop) class MockNode(BaseNode): def __init__(self): self.session = mock.Mock() self.session.close = asynctest.CoroutineMock() pass @property def loop(self): return asyncio.get_event_loop() with mock.patch('joule.client.base_module.node') as mock_node_pkg: node = MockNode() node.data_stream_get = asynctest.CoroutineMock( return_value=self.output) mock_node_pkg.UnixNode = mock.Mock(return_value=node) module.start(args) # make sure the API was used to retrieve stream objects self.assertEqual(node.data_stream_get.await_count, 2) asyncio.set_event_loop(self.loop) # check the output received_data = self.loop.run_until_complete(from_filter.read()) np.testing.assert_array_equal(data['timestamp'], received_data['timestamp']) np.testing.assert_array_almost_equal(data['data'] * 2, received_data['data']) self.loop.run_until_complete(from_filter.close()) if not loop.is_closed(): loop.close()
async def _copy_interval(istart, iend, bar): #print("[%s] -> [%s]" % (timestamp_to_human(istart), timestamp_to_human(iend))) if nilmdb_source: src_params = { 'path': source, 'binary': 1, 'start': istart, 'end': iend } src_url = "{server}/stream/extract".format(server=source_node) src_headers = {} src_ssl = None else: src_params = {'id': src_stream.id, 'start': istart, 'end': iend} src_url = "{server}/data".format(server=source_node.session.url) src_headers = {"X-API-KEY": source_node.session.key} src_ssl = source_node.session.ssl_context async with aiohttp.ClientSession(timeout=aiohttp.ClientTimeout( total=None)) as session: async with session.get(src_url, params=src_params, headers=src_headers, ssl=src_ssl) as src_response: if src_response.status != 200: msg = await src_response.text() if msg == 'this stream has no data': # This is not an error because a previous copy may have been interrupted # This will cause the destination to have an interval gap where the source has no data # Example: source: |** *******| # dest: |** | |*******| # ^--- looks like missing data but there's nothing in the source return # ignore empty intervals raise click.ClickException( "Error reading from source: %s" % msg) pipe = pipes.InputPipe(stream=dest_stream, reader=src_response.content) async def _data_sender(): last_ts = istart try: while True: data = await pipe.read() pipe.consume(len(data)) if len(data) > 0: cur_ts = data[-1]['timestamp'] yield data.tobytes() # total time extents of this chunk bar.update(cur_ts - last_ts) last_ts = cur_ts # if pipe.end_of_interval: # yield pipes.interval_token(dest_stream.layout). \ # tostring() except pipes.EmptyPipe: pass bar.update(iend - last_ts) if nilmdb_dest: dst_params = { "start": istart, "end": iend, "path": destination, "binary": 1 } dst_url = "{server}/stream/insert".format(server=dest_node) await _send_nilmdb_data( dst_url, dst_params, _data_sender(), pipes.compute_dtype(dest_stream.layout), session) else: dst_url = "{server}/data".format( server=dest_node.session.url) dst_params = {"id": dest_stream.id} dst_headers = {"X-API-KEY": dest_node.session.key} dst_ssl = dest_node.session.ssl_context async with session.post(dst_url, params=dst_params, data=_data_sender(), headers=dst_headers, ssl=dst_ssl, chunked=True) as dest_response: if dest_response.status != 200: msg = await dest_response.text() raise errors.ApiError( "Error writing to destination: %s" % msg)
async def _test_basic_insert_extract(self): stream_id = 990 self.store.extract_block_size = 500 psql_types = [ 'double precision', 'real', 'bigint', 'integer', 'smallint' ] datatypes = [ DataStream.DATATYPE.FLOAT64, DataStream.DATATYPE.FLOAT32, DataStream.DATATYPE.INT64, DataStream.DATATYPE.INT32, DataStream.DATATYPE.INT16 ] conn: asyncpg.Connection = await asyncpg.connect(self.db_url) for i in range(len(datatypes)): datatype = datatypes[i] psql_type = psql_types[i] for n_elements in range(1, 5): test_stream = DataStream(id=stream_id, name="stream1", datatype=datatype, keep_us=DataStream.KEEP_ALL, elements=[ Element(name="e%d" % x) for x in range(n_elements) ]) test_stream.decimate = True source = QueueReader() pipe = pipes.InputPipe(stream=test_stream, reader=source) nrows = 955 data = helpers.create_data(layout=test_stream.layout, length=nrows) task = await self.store.spawn_inserter(test_stream, pipe) for chunk in helpers.to_chunks(data, 300): await source.put(chunk.tobytes()) await task # make sure the correct tables have been created records = await conn.fetch( '''SELECT table_name FROM information_schema.tables WHERE table_schema='data';''' ) tables = list(itertools.chain(*records)) for table in [ 'stream%d' % stream_id, 'stream%d_intervals' % stream_id ]: self.assertIn(table, tables) # check the column data types records = await conn.fetch( '''SELECT column_name, data_type FROM information_schema.columns WHERE table_name='stream%d' AND table_schema='data';''' % stream_id) (names, types) = zip(*records) expected_elements = ['time'] + [ 'elem%d' % x for x in range(n_elements) ] self.assertCountEqual(names, expected_elements) expected_psql_types = tuple( ['timestamp without time zone'] + [psql_type for x in range(n_elements)]) self.assertEqual(types, expected_psql_types) self.assertEqual(len(records), n_elements + 1) # extract raw data extracted_data = [] async def callback(rx_data, layout, factor): self.assertEqual(layout, test_stream.layout) self.assertEqual(factor, 1) extracted_data.append(rx_data) await self.store.extract(test_stream, start=None, end=None, callback=callback) extracted_data = np.hstack(extracted_data) np.testing.assert_array_equal(extracted_data, data) level = 64 data_mean = np.mean(extracted_data['data'][:level], axis=0) data_max = np.max(extracted_data['data'][:level], axis=0) data_min = np.min(extracted_data['data'][:level], axis=0) # extract decimated data async def d_callback(rx_data, layout, factor): self.assertEqual(layout, test_stream.decimated_layout) self.assertEqual(factor, level) extracted_data.append(rx_data) extracted_data = [] await self.store.extract(test_stream, decimation_level=level, start=None, end=None, callback=d_callback) extracted_data = np.hstack(extracted_data) expected = np.hstack((data_mean, data_min, data_max)) np.testing.assert_array_almost_equal(expected, extracted_data['data'][0]) stream_id += 1 await conn.close()