async def _handle_remote_output(self, src_pipe: pipes.Pipe, dest_stream: DataStream): """ Continuously tries to make a connection to dest_stream and write src_pipe's data to it """ dest_pipe = None node = self.get_node(dest_stream.remote_node) if node is None: log.error( "output requested from [%s] but this node is not a follower" % dest_stream.remote_node) return try: while True: try: dest_pipe = await node.data_write(dest_stream.remote_path) while True: data = await src_pipe.read() await dest_pipe.write(data) src_pipe.consume(len(data)) except ConfigurationError as e: log.error("Subscriber::_handle_remote_output: %s" % str(e)) await asyncio.sleep(self.REMOTE_HANDLER_RESTART_INTERVAL) except asyncio.CancelledError: pass finally: await src_pipe.close() if dest_pipe is not None: await dest_pipe.close() await node.close()
async def _send_data(session: BaseSession, stream: DataStream, pipe: Pipe): # TODO is this necssary? output_complete = False async def _data_sender(): nonlocal output_complete try: while True: data = await pipe.read() if len(data) > 0: yield data.tobytes() if pipe.end_of_interval: yield interval_token(stream.layout).tobytes() pipe.consume(len(data)) except EmptyPipe: yield interval_token(stream.layout).tobytes() output_complete = True try: result = await session.post("/data", params={"id": stream.id}, data=_data_sender(), chunked=True) except Exception as e: pipe.fail() raise e
async def _live_reader(session: BaseSession, my_stream: DataStream, pipe_out: Pipe): log.info("requesting live connection to [%s]" % my_stream.name) params = {'id': my_stream.id, 'subscribe': '1'} try: raw_session = await session.get_session() async with raw_session.get(session.url + "/data", params=params, ssl=session.ssl_context) as response: if response.status != 200: # pragma: no cover msg = await response.text() log.error("Error reading input [%s]: %s" % (my_stream.name, msg)) await pipe_out.close() return pipe_out.change_layout(response.headers['joule-layout']) pipe_out.decimation_level = int( response.headers['joule-decimation']) pipe_in = InputPipe(layout=pipe_out.layout, stream=my_stream, reader=response.content) while True: data = await pipe_in.read() pipe_in.consume(len(data)) await pipe_out.write(data) if pipe_in.end_of_interval: await pipe_out.close_interval() except (asyncio.CancelledError, EmptyPipe, aiohttp.ClientError): pass except Exception as e: print("unexpected exception: ", e) raise e await pipe_out.close()
def test_raises_dtype_errors(self): pipe = Pipe(layout="uint8_4") # data for a different stream type data1 = helpers.create_data("float32_3") # invalid array structure (must be 2D) data2 = np.ones((3, 3, 3)) # invalid structured array data3 = np.array([('timestamp', 1, 2, 3), ('bad', 1, 2, 3)]) for data in [data1, data2, data3]: with self.assertRaises(PipeError): _ = pipe._apply_dtype(data)
async def _output_handler(self, child_output: pipes.Pipe, subscribers: List[pipes.Pipe]): """given a numpy pipe, get data and put it into each queue in [output_queues] """ last_ts = None try: while True: data = await child_output.read() if len(data) > 0: if not self._verify_monotonic_timestamps( data, last_ts, child_output.name): for pipe in subscribers: await pipe.close_interval() await self.restart() break last_ts = data['timestamp'][-1] child_output.consume(len(data)) for pipe in subscribers[:]: # if child_output.name=='output2': # print("writing to %d subscribers" % len(subscribers)) try: await asyncio.wait_for(pipe.write(data), self.SUBSCRIBER_TIMEOUT) except (ConnectionResetError, BrokenPipeError): log.warning("subscriber write error [%s] " % pipe.stream) subscribers.remove(pipe) except asyncio.TimeoutError: log.warning("subscriber [%s] timed out" % pipe.stream) pipe.close_interval_nowait() if child_output.end_of_interval: for pipe in subscribers: pipe.close_interval_nowait() except (EmptyPipe, asyncio.CancelledError): pass except PipeError as e: if 'closed pipe' in str(e): # during shutdown the pipe may be closed but # another read might be attempted by the output_handler pass else: log.warning("Worker %s, pipe %s: %s" % (self.name, child_output.name, str(e)))
def test_raises_cache_errors(self): loop = asyncio.get_event_loop() # input pipes cannot cache pipe = Pipe(direction=Pipe.DIRECTION.INPUT) with self.assertRaises(PipeError): pipe.enable_cache(100) with self.assertRaises(PipeError): loop.run_until_complete(pipe.flush_cache()) # output pipes must implement caching pipe = Pipe(direction=Pipe.DIRECTION.OUTPUT) with self.assertRaises(PipeError) as e: pipe.enable_cache(100) self.assertTrue("abstract" in "%r" % e.exception) with self.assertRaises(PipeError): loop.run_until_complete(pipe.flush_cache()) self.assertTrue("abstract" in "%r" % e.exception)
async def _historic_reader(session: BaseSession, my_stream: DataStream, pipe_out: Pipe, start_time: int, end_time: int, max_rows: Optional[int]): log.info("requesting historic connection to [%s]" % my_stream.name) params = {'id': my_stream.id} if max_rows is not None: params['max-rows'] = max_rows if start_time is not None: params['start'] = int(start_time) if end_time is not None: params['end'] = int(end_time) try: my_session = await session.get_session() async with my_session.get(session.url + "/data", params=params, ssl=session.ssl_context) as response: if response.status != 200: # pragma: no cover msg = await response.text() log.error("Error reading input [%s]: %s" % (my_stream.name, msg)) await pipe_out.close() return pipe_out.change_layout(response.headers['joule-layout']) pipe_out.decimation_level = int( response.headers['joule-decimation']) pipe_in = InputPipe(layout=pipe_out.layout, stream=my_stream, reader=response.content) while True: data = await pipe_in.read() pipe_in.consume(len(data)) await pipe_out.write(data) if pipe_in.end_of_interval: await pipe_out.close_interval() except (asyncio.CancelledError, EmptyPipe): pass except Exception as e: print("unexpected exception: ", e) raise e finally: await pipe_out.close()
def test_subscribe(self): LAYOUT = "uint8_4" # cannot subscribe to input pipes input_pipe = Pipe(layout=LAYOUT, direction=Pipe.DIRECTION.INPUT) output_pipe = Pipe(layout=LAYOUT, direction=Pipe.DIRECTION.OUTPUT) subscriber = Pipe(layout=LAYOUT, direction=Pipe.DIRECTION.OUTPUT) with self.assertRaises(PipeError): input_pipe.subscribe(subscriber) # can subscribe to output pipes unsubscribe = output_pipe.subscribe(subscriber) self.assertIn(subscriber, output_pipe.subscribers) # can unsubscribe unsubscribe() self.assertNotIn(subscriber, output_pipe.subscribers)
def test_raises_read_errors(self): loop = asyncio.get_event_loop() # input pipes must implement read pipe = Pipe(direction=Pipe.DIRECTION.INPUT) with self.assertRaises(PipeError) as e: loop.run_until_complete(pipe.read()) self.assertTrue("abstract" in "%r" % e.exception) # output pipes cannot be read pipe = Pipe(direction=Pipe.DIRECTION.OUTPUT) with self.assertRaises(PipeError): loop.run_until_complete(pipe.read())
def test_raises_write_errors(self): loop = asyncio.get_event_loop() # input pipes cannot write pipe = Pipe(direction=Pipe.DIRECTION.INPUT) with self.assertRaises(PipeError): loop.run_until_complete(pipe.write(np.array([1, 2, 3, 4]))) # output pipes must implement write pipe = Pipe(direction=Pipe.DIRECTION.OUTPUT) with self.assertRaises(PipeError) as e: loop.run_until_complete(pipe.write(np.array([1, 2, 3, 4]))) self.assertTrue("abstract" in "%r" % e.exception)
async def run(self, pipe: pipes.Pipe) -> None: """insert stream data from the queue until the queue is empty""" # create the database path # lazy stream creation, try: await self._create_path() except asyncio.CancelledError: return cleaner_task: Optional[asyncio.Task] = None if self.stream.keep_us != DataStream.KEEP_ALL: cleaner_task = asyncio.create_task(self._clean()) cleaner_task.set_name("NilmDB Clean Task for [%s]" % self.path) while True: try: async with self._get_client() as session: last_ts = None while True: await asyncio.sleep(self.insert_period) data = await pipe.read() # there might be an interval break and no new data if len(data) > 0: if last_ts is not None: start = last_ts else: start = data['timestamp'][0] end = data['timestamp'][-1] + 1 last_ts = end # lazy initialization of decimator if self.stream.decimate and self.decimator is None: self.decimator = NilmdbDecimator(self.server, self.stream, 1, 4, self._get_client) # send the data params = {"start": "%d" % start, "end": "%d" % end, "path": self.path, "binary": '1'} async with session.put(self.insert_url, params=params, data=data.tobytes()) as resp: if resp.status != 200: error = await resp.text() if cleaner_task is not None: cleaner_task.cancel() await cleaner_task raise errors.DataError("NilmDB error: %s" % error) # this was successful so consume the data pipe.consume(len(data)) # decimate the data if self.decimator is not None: await self.decimator.process(data) # check for interval breaks if pipe.end_of_interval: last_ts = None if self.decimator is not None: self.decimator.close_interval() except aiohttp.ClientError as e: # pragma: no cover log.warning("NilmDB raw inserter error: %r, retrying request" % e) await asyncio.sleep(self.retry_interval) # retry the request except (pipes.EmptyPipe, asyncio.CancelledError) as e: break # terminate the inserter if cleaner_task is not None: cleaner_task.cancel() await cleaner_task
async def run(self, pipe: pipes.Pipe) -> None: """insert stream data from the queue until the queue is empty""" # lazy stream creation try: async with self.pool.acquire() as conn: await psql_helpers.create_stream_table(conn, self.stream) except asyncio.CancelledError: return # close the beginning of the data insert first_insert = True # track the last timestamp inserted last_ts = None ticks = 0 try: while True: await asyncio.sleep(self.insert_period) data = await pipe.read() # there might be an interval break and no new data try: async with self.pool.acquire() as conn: if len(data) > 0: if first_insert: first_insert = False await psql_helpers.close_interval( conn, self.stream, data['timestamp'][0] - 1) last_ts = data['timestamp'][-1] # lazy initialization of decimator if self.stream.decimate and self.decimator is None: self.decimator = Decimator(self.stream, 1, 4) psql_bytes = psql_helpers.data_to_bytes(data) await conn.copy_to_table("stream%d" % self.stream.id, schema_name='data', format='binary', source=psql_bytes) # this was successful so consume the data pipe.consume(len(data)) # decimate the data if self.decimator is not None: await self.decimator.process(conn, data) # check for interval breaks if pipe.end_of_interval and last_ts is not None: await psql_helpers.close_interval( conn, self.stream, last_ts) if self.decimator is not None: self.decimator.close_interval() ticks += 1 if ticks % self.cleanup_interval == 0: await self.cleanup(conn) except (asyncpg.exceptions.PostgresConnectionError, socket.error) as e: log.error( f"Timescale inserter: [{str(e)}, trying again in 2 seconds" ) await asyncio.sleep(2) except (pipes.EmptyPipe, asyncio.CancelledError): pass
def test_end_of_interval_default(self): # end of interval flag is false by default pipe = Pipe(direction=Pipe.DIRECTION.INPUT) self.assertFalse(pipe.end_of_interval)
def test_checks_dtype(self): for layout in ['invalid', 'bad_3', 'float_abc']: pipe = Pipe(layout=layout) with self.assertRaises(ValueError) as e: _ = pipe.dtype self.assertTrue("layout" in "%r" % e.exception)