def create_source_data(server): # create the source stream src = DataStream(id=0, name="source", keep_us=100, datatype=DataStream.DATATYPE.FLOAT32) src.elements = [ Element(name="e%d" % x, index=x, display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3) ] # source has 100 rows of data in four intervals between [0, 100] src_data = helpers.create_data(src.layout, length=100, start=0, step=1) # insert the intervals pipe_data = np.hstack( (src_data[:25], pipes.interval_token(src.layout), src_data[25:50], pipes.interval_token(src.layout), src_data[50:75], pipes.interval_token(src.layout), src_data[75:])) ts = src_data['timestamp'] intervals = [[ts[0], ts[24]], [ts[25], ts[49]], [ts[50], ts[74]], [ts[75], ts[99]]] src_info = StreamInfo(int(src_data['timestamp'][0]), int(src_data['timestamp'][-1]), len(src_data)) server.add_stream('/test/source', src, src_info, pipe_data, intervals) return src_data
async def _data_sender(): nonlocal output_complete try: while True: data = await pipe.read() if len(data) > 0: yield data.tobytes() if pipe.end_of_interval: yield interval_token(stream.layout).tobytes() pipe.consume(len(data)) except EmptyPipe: yield interval_token(stream.layout).tobytes() output_complete = True
async def extract(self, stream: DataStream, start: Optional[int], end: Optional[int], callback: Callable[[np.ndarray, str, bool], Coroutine], max_rows: int = None, decimation_level=1): if self.no_data: return # do not call the callback func if self.raise_data_error: raise DataError("nilmdb error") if self.raise_decimation_error: raise InsufficientDecimationError("insufficient decimation") if decimation_level is not None and decimation_level > 1: layout = stream.decimated_layout else: decimation_level = 1 layout = stream.layout for i in range(self.nintervals): for x in range(self.nchunks): await callback(helpers.create_data(layout, length=25), layout, decimation_level) if i < (self.nintervals - 1): await callback(pipes.interval_token(layout), layout, decimation_level)
async def test_propogates_intervals_to_decimations(self): self.stream1.decimate = True source = QueueReader() pipe = pipes.InputPipe(stream=self.stream1, reader=source) # insert the following broken chunks of data # |----15*8----|-15-|-15-|-15-|-15-| ==> raw (120 samples) # |-----30-----|-3--|--3-|--3-|--3-| ==> x4 (27 samples) # |-----7------| ==> x16 (7 samples) # |-----1------| ==> x64 (1 sample n_chunks = 12 for i in range(n_chunks): data = helpers.create_data(layout="int8_3", length=15, start=i * 1e6, step=1) await source.put(data.tobytes()) if i > 6: # breaks in the 2nd half await source.put(pipes.interval_token("int8_3").tobytes()) task = await self.store.spawn_inserter(self.stream1, pipe, insert_period=0) await task # should have raw, x4, x16, x64, x256 self.assertEqual(len(self.fake_nilmdb.streams), 5) self.assertEqual(self.fake_nilmdb.streams["/joule/1"].rows, n_chunks * 15) # x4 level should be missing data due to interval breaks self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-4"].rows, 42) # x16 level should have 7 sample (only from first part) self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-16"].rows, 7) # x64 level should have 1 sample self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-64"].rows, 1) # x256 level should be empty self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-256"].rows, 0)
async def _extract_data(conn: asyncpg.Connection, stream: DataStream, callback, decimation_level: int = 1, start: int = None, end: int = None, block_size=50000): if decimation_level > 1: layout = stream.decimated_layout else: layout = stream.layout table_name = "data.stream%d" % stream.id if decimation_level > 1: table_name += "_%d" % decimation_level # extract by interval query = "SELECT time FROM data.stream%d_intervals " % stream.id query += psql_helpers.query_time_bounds(start, end) try: boundary_records = await conn.fetch(query) except asyncpg.UndefinedTableError: # no data tables data = np.array([], dtype=pipes.compute_dtype(layout)) await callback(data, layout, decimation_level) return boundary_records += [{'time': end}] for i in range(len(boundary_records)): record = boundary_records[i] end = record['time'] # extract the interval data done = False while not done: query = "SELECT * FROM %s " % table_name query += psql_helpers.query_time_bounds(start, end) query += " ORDER BY time ASC LIMIT %d" % block_size psql_bytes = BytesIO() try: await conn.copy_from_query(query, format='binary', output=psql_bytes) except asyncpg.UndefinedTableError: # interval table exists but not the data table data = np.array([], dtype=pipes.compute_dtype(layout)) await callback(data, layout, decimation_level) return psql_bytes.seek(0) dtype = pipes.compute_dtype(layout) np_data = psql_helpers.bytes_to_data(psql_bytes, dtype) await callback(np_data, layout, decimation_level) if len(np_data) < block_size: break start = np_data['timestamp'][-1] + 1 # do not put an interval token at the end of the data if i < len(boundary_records) - 1: await callback(pipes.interval_token(layout), layout, decimation_level) start = end
async def close_interval(self): if self.closed: raise PipeError("Pipe is closed") if self.writer is None: return # nothing has been written yet so nothing to close if self._caching: await self.flush_cache() self.writer.write(interval_token(self.layout).tobytes()) await self.writer.drain()
async def callback(data, layout, decimation_factor): self.assertEqual(decimation_factor, 1) self.assertEqual(self.stream1.layout, layout) self.assertEqual(pipes.compute_dtype(self.stream1.layout), data.dtype) if len(data) == 0: self.assertEqual(0, len(data['timestamp'])) else: for row in data: self.assertEqual(row, pipes.interval_token(self.stream1.layout))
def close_interval_nowait(self): if self.closed: raise PipeError("Pipe is closed") if self.writer is None: return # nothing has been written yet so nothing to close if self._cache_index > 0: log.warning("dumping %d rows of cached data due on %s" % (self._cache_index, self.name)) self._cache = np.empty(len(self._cache), self.dtype) self._cache_index = 0 self.writer.write(interval_token(self.layout).tobytes())
async def _subscribe(request: web.Request, json: bool): db: Session = request.app["db"] supervisor: Supervisor = request.app['supervisor'] if json: return web.Response(text="JSON subscription not implemented", status=400) # find the requested stream if 'path' in request.query: stream = folder.find_stream_by_path(request.query['path'], db, stream_type=DataStream) elif 'id' in request.query: stream = db.query(DataStream).get(request.query["id"]) else: return web.Response(text="specify an id or a path", status=400) if stream is None: return web.Response(text="stream does not exist", status=404) pipe = pipes.LocalPipe(stream.layout) try: unsubscribe = supervisor.subscribe(stream, pipe) except SubscriptionError: return web.Response(text="stream is not being produced", status=400) resp = web.StreamResponse(status=200, headers={ 'joule-layout': stream.layout, 'joule-decimation': '1' }) resp.enable_chunked_encoding() try: await resp.prepare(request) except ConnectionResetError: unsubscribe() return try: while True: try: data = await pipe.read() except pipes.EmptyPipe: unsubscribe() return resp pipe.consume(len(data)) if len(data) > 0: await resp.write(data.tobytes()) if pipe.end_of_interval: await resp.write(pipes.interval_token(stream.layout).tobytes()) except asyncio.CancelledError as e: unsubscribe() # propogate the CancelledError up raise e except ConnectionResetError: unsubscribe()
async def _test_remove(self): # XXXXXXX------XXXXXXX # ^| ^ # remove a chunk of data from the middle await self.store.remove(self.test_stream, self.test_data['timestamp'][300], self.test_data['timestamp'][400]) # XXXXXXX------===XXXX # ^| ^ # remove another chunk await self.store.remove(self.test_stream, self.test_data['timestamp'][350], self.test_data['timestamp'][500]) # XXX___----===XXX # ^| ^ await self.store.remove(self.test_stream, self.test_data['timestamp'][250], self.test_data['timestamp'][300]) # extract the data, should have an interval gap between 249 and 500 # and a *single* interval boundary at 250 extracted_data = [] async def callback(rx_data, layout, decimated): extracted_data.append(rx_data) await self.store.extract(self.test_stream, start=None, end=None, callback=callback) extracted_data = np.hstack(extracted_data) # beginning is unchanged np.testing.assert_array_equal(extracted_data[:249], self.test_data[:249]) # interval boundary marking missing data np.testing.assert_array_equal( extracted_data[250], pipes.interval_token(self.test_stream.layout)) # end is unchanged (closing interval boundary ignored) np.testing.assert_array_equal(extracted_data[251:], self.test_data[500:]) # two intervals of data intervals = await self.store.intervals(self.test_stream, start=None, end=None) ts = self.test_data['timestamp'] expected = [[ts[0], ts[249] + 1], [ts[500], ts[-1] + 1]] self.assertEqual(intervals, expected)
async def _test_extract_data_with_intervals(self): test_stream = DataStream( id=1, name="stream1", datatype=DataStream.DATATYPE.FLOAT32, keep_us=DataStream.KEEP_ALL, decimate=True, elements=[Element(name="e%d" % x) for x in range(3)]) pipe = pipes.LocalPipe(test_stream.layout) nrows = 955 data = helpers.create_data(layout=test_stream.layout, length=nrows) task = await self.store.spawn_inserter(test_stream, pipe) for chunk in helpers.to_chunks(data, 300): await pipe.write(chunk) await pipe.close_interval() await pipe.close() await task # extract data extracted_data = [] async def callback(rx_data, layout, factor): self.assertEqual(layout, test_stream.layout) self.assertEqual(factor, 1) extracted_data.append(rx_data) await self.store.extract(test_stream, start=None, end=None, callback=callback) extracted_data = np.hstack(extracted_data) # check for interval boundaries np.testing.assert_array_equal(extracted_data[300], pipes.interval_token(test_stream.layout)) np.testing.assert_array_equal(extracted_data[601], pipes.interval_token(test_stream.layout)) np.testing.assert_array_equal(extracted_data[902], pipes.interval_token(test_stream.layout))
async def retrieve_data(data: np.ndarray, layout, factor): nonlocal data_blocks, data_segment, decimation_factor decimation_factor = factor if np.array_equal(data, pipes.interval_token(layout)): if data_segment is not None: data_blocks.append(data_segment.tolist()) data_segment = None else: data = np.c_[data['timestamp'][:, None], data['data']] if data_segment is None: data_segment = data else: data_segment = np.vstack((data_segment, data))
async def runner(): pipes_in, pipes_out = await build_network_pipes( {'input': '/test/source:uint8[x,y,z]'}, {}, {}, my_node, None, None, True) blk1 = await pipes_in['input'].read() self.assertTrue(pipes_in['input'].end_of_interval) pipes_in['input'].consume(len(blk1)) blk2 = await pipes_in['input'].read() pipes_in['input'].consume(len(blk2)) rx_data = np.hstack( (blk1, interval_token(pipes_in['input'].layout), blk2)) np.testing.assert_array_equal(rx_data, src_data) with self.assertRaises(EmptyPipe): await pipes_in['input'].read() await my_node.close()
def create_source_data(server, is_destination=False): # create the source stream src = DataStream(id=0, name="source", keep_us=100, datatype=DataStream.DATATYPE.UINT8, is_destination=is_destination) src.elements = [ Element(name="e%d" % x, index=x, display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3) ] # source has 100 rows of data src_data = np.hstack( (helpers.create_data(src.layout), interval_token(src.layout), helpers.create_data(src.layout))) src_info = StreamInfo(int(src_data['timestamp'][0]), int(src_data['timestamp'][-1]), len(src_data)) server.add_stream('/test/source', src, src_info, src_data, [src_info.start, src_info.end]) return src_data
async def _extract_by_path(self, path: str, start: Optional[int], end: Optional[int], layout: str, callback): url = "{server}/stream/extract".format(server=self.server) params = {"path": path, "binary": 1} decimation_factor = 1 r = re.search(r'~decim-(\d+)$', path) if r is not None: decimation_factor = int(r[1]) async with self._get_client() as session: # first determine the intervals, use the base path for this if path.find("~decim") == -1: base_path = path else: base_path = path[:path.find("~decim")] intervals = await self._intervals_by_path(base_path, start, end) i = 0 num_intervals = len(intervals) # now extract each interval for interval in intervals: params["start"] = interval[0] params["end"] = interval[1] async with session.get(url, params=params) as resp: await check_for_error(resp) # put data into the queue as it arrives reader = pipes.InputPipe(name="outbound", layout=layout, reader=resp.content) while True: try: data = await reader.read() await callback(data, layout, decimation_factor) reader.consume(len(data)) except pipes.EmptyPipe: break # insert the interval token to indicate a break i += 1 if i < num_intervals: await callback(pipes.interval_token(layout), layout, decimation_factor)
def test_when_server_returns_error_code(self): server = FakeJoule() # create the source stream src = DataStream(id=0, name="source", keep_us=100, datatype=DataStream.DATATYPE.FLOAT32) src.elements = [ Element(name="e%d" % x, index=x, display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3) ] # source has 200 rows of data between [0, 200] in two intervals src_data = np.hstack((helpers.create_data(src.decimated_layout, start=0, length=100, step=1), pipes.interval_token(src.decimated_layout), helpers.create_data(src.decimated_layout, start=100, length=100, step=1))) src_info = StreamInfo(int(src_data['timestamp'][0]), int(src_data['timestamp'][-1]), len(src_data)) server.add_stream('/test/source', src, src_info, src_data) server.response = "test error" server.http_code = 500 server.stub_data_read = True self.start_server(server) runner = CliRunner() with self.assertLogs(level=logging.ERROR): runner.invoke(main, ['data', 'read', '/test/source', '--start', 'now']) self.stop_server()
def test_reads_decimated_data(self): server = FakeJoule() # create the source stream src = DataStream(id=0, name="source", keep_us=100, datatype=DataStream.DATATYPE.FLOAT32) src.elements = [ Element(name="e%d" % x, index=x, display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3) ] # source has 200 rows of data between [0, 200] in two intervals src_data = np.hstack((helpers.create_data(src.decimated_layout, start=0, length=100, step=1), pipes.interval_token(src.decimated_layout), helpers.create_data(src.decimated_layout, start=100, length=100, step=1))) src_info = StreamInfo(int(src_data['timestamp'][0]), int(src_data['timestamp'][-1]), len(src_data)) server.add_stream('/test/source', src, src_info, src_data) self.start_server(server) # mark the intervals and show the bounds runner = CliRunner() result = runner.invoke(main, [ 'data', 'read', '/test/source', '--start', '0', '--end', '1 hour ago', '--max-rows', '28', '--mark-intervals', '--show-bounds' ]) _print_result_on_error(result) self.assertEqual(result.exit_code, 0) output = result.output.split('\n') for x in range(len(src_data)): row = src_data[x] if row == pipes.interval_token(src.decimated_layout): expected = '# interval break' else: expected = "%d %s" % (row['timestamp'], ' '.join( '%f' % x for x in row['data'])) # import pdb; pdb.set_trace() self.assertTrue(expected in output[x + 1]) # create a new event loop for the next run loop = asyncio.new_event_loop() loop.set_debug(True) asyncio.set_event_loop(loop) # do not mark the intervals and hide the bounds runner = CliRunner() result = runner.invoke(main, [ 'data', 'read', '/test/source', '--start', '0', '--end', '1 hour ago', '--max-rows', '28' ]) self.assertEqual(result.exit_code, 0) output = result.output.split('\n') offset = 0 for x in range(len(src_data)): row = src_data[x] if row == pipes.interval_token(src.decimated_layout): offset = 1 continue else: expected = "%d %s" % (row['timestamp'], ' '.join( '%f' % x for x in row['data'][:3])) self.assertTrue(expected in output[x - offset + 1]) self.stop_server()
async def callback(rx_data, layout, factor): if rx_data[0] != pipes.interval_token(layout): rx_chunks.append(rx_data)