async def info(self, streams: List[DataStream]) -> Dict[int, StreamInfo]: info_dict = await self._path_info(streams) # go through each stream and compute the effective DataStreamInfo # object by looking at the raw and decimated paths streams_info = {} for s in streams: base_info_list = [ info for (path, info) in info_dict.items() if path == compute_path(s) ] if len(base_info_list) == 0: # this stream has no data records, just make up an empty result streams_info[s.id] = StreamInfo(None, None, 0, 0, 0) continue stream_info: StreamInfo = base_info_list[0] if stream_info.rows != 0: # stream has data, check for decimations regex = re.compile(r"%s~decim-(\d)+$" % compute_path(s)) decim_info_list = [ info for (path, info) in info_dict.items() if regex.match(path) is not None ] for decim_info in decim_info_list: if decim_info.rows == 0: continue # no data, don't try to look for start and end values stream_info.start = min( (stream_info.start, decim_info.start)) stream_info.end = max((stream_info.end, decim_info.end)) stream_info.bytes += decim_info.bytes stream_info.total_time = max( (stream_info.total_time, decim_info.total_time)) streams_info[s.id] = stream_info return streams_info
async def extract(self, stream: DataStream, start: Optional[int], end: Optional[int], callback: Callable[[np.ndarray, str, int], Coroutine], max_rows: int = None, decimation_level=None): # figure out appropriate decimation level if decimation_level is None: if max_rows is None: decimation_level = 1 else: # find out how much data this represents count = await self._count_by_path(compute_path(stream), start, end) if count > 0: desired_decimation = np.ceil(count / max_rows) decimation_level = 4**np.ceil( np.log(desired_decimation) / np.log(self.decimation_factor)) else: # create an empty array with the right data type data = np.array([], dtype=pipes.compute_dtype(stream.layout)) await callback(data, stream.layout, 1) return # make sure the target decimation level exists and has data try: path = compute_path(stream, decimation_level) if (await self._count_by_path(path, start, end)) == 0: # no data in the decimated path raise errors.InsufficientDecimationError( "required level is empty") except errors.DataError as e: if ERRORS.NO_SUCH_STREAM.value in str(e): # no decimated data or required level does not exist raise errors.InsufficientDecimationError( "required level %d does not exist" % decimation_level) # some other error, propogate it up raise e # pragma: no cover elif max_rows is not None: # two constraints, make sure we aren't going to return too much data count = await self._count_by_path( compute_path(stream, decimation_level), start, end) if count > max_rows: raise errors.InsufficientDecimationError( "actual_rows(%d) > max_rows(%d)" % (count, max_rows)) # retrieve data from stream path = compute_path(stream, decimation_level) if decimation_level > 1: layout = stream.decimated_layout else: layout = stream.layout try: await self._extract_by_path(path, start, end, layout, callback) except aiohttp.ClientError as e: raise errors.DataError(str(e))
async def consolidate(self, stream: 'DataStream', start: int, end: int, max_gap: int) -> int: # remove interval gaps less than or equal to max_gap duration (in us) intervals = await self.intervals(stream, start, end) if len(intervals) == 0: return # no data, nothing to do duration = [intervals[0][0], intervals[-1][1]] gaps = interval_tools.interval_difference([duration], intervals) if len(gaps) == 0: return # no interval breaks, nothing to do small_gaps = [gap for gap in gaps if (gap[1] - gap[0]) <= max_gap] # spawn an inserter to close each gap info = await self._path_info([stream]) all_paths = info.keys() path = compute_path(stream) insert_url = "{server}/stream/insert".format(server=self.server) for gap in small_gaps: async with self._get_client() as session: params = { "start": "%d" % gap[0], "end": "%d" % gap[1], "path": path, "binary": '1' } async with session.put(insert_url, params=params, data=None) as resp: if resp.status != 200: # pragma: no cover error = await resp.text() raise errors.DataError("NilmDB(d) error: %s" % error) return len(small_gaps)
async def intervals(self, stream: DataStream, start: Optional[int], end: Optional[int]): try: return await self._intervals_by_path(compute_path(stream), start, end) except errors.DataError as e: # if the stream hasn't been written to it won't exist in the database if ERRORS.NO_SUCH_STREAM.value in str(e): return [] else: raise e
async def _create_path(self): data = {"path": compute_path(self.stream), "layout": self.stream.layout} while True: try: async with self._get_client() as session: async with session.post(self.create_url, data=data) as resp: await check_for_error(resp, ignore=[ERRORS.STREAM_ALREADY_EXISTS]) break except aiohttp.ClientError as e: # pragma: no cover log.warning("NilmDB inserter create_path error: %r, retrying request" % e) await asyncio.sleep(self.retry_interval) # retry the request
async def remove(self, stream, start: Optional[int] = None, end: Optional[int] = None): """ remove [start,end] in path and all decimations """ info = await self._path_info([stream]) all_paths = info.keys() base_path = compute_path(stream) regex = re.compile(r"%s~decim-(\d)+$" % base_path) decim_paths = list(filter(regex.match, all_paths)) for path in [base_path, *decim_paths]: await self._remove_by_path(path, start, end)
async def destroy(self, stream: DataStream): await self.remove(stream) url = "{server}/stream/destroy".format(server=self.server) info = await self._path_info([stream]) all_paths = info.keys() base_path = compute_path(stream) regex = re.compile(r'%s~decim-(\d)+$' % base_path) decim_paths = list(filter(regex.match, all_paths)) async with self._get_client() as session: for path in [base_path, *decim_paths]: async with session.post(url, data={"path": path}) as resp: await check_for_error(resp, ignore=[ERRORS.NO_STREAM_AT_PATH]) return web.Response(text="ok")
def __init__(self, server: str, stream: DataStream, insert_period: float, cleanup_period: float, session_factory: Callable[[], aiohttp.ClientSession], retry_interval=0.5): self.insert_url = "{server}/stream/insert".format(server=server) self.remove_url = "{server}/stream/remove".format(server=server) self.create_url = "{server}/stream/create".format(server=server) self.server = server # save for initializing decimators self.stream = stream self.path = compute_path(stream) # add offsets to the period to distribute traffic self.insert_period = insert_period + insert_period * random.random() * 0.5 self.cleanup_period = cleanup_period + cleanup_period * random.random() * 0.25 self._get_client = session_factory self.decimator = None self.retry_interval = retry_interval
async def initialize(self, streams: List[DataStream]) -> None: self.connector = aiohttp.TCPConnector() url = "{server}/stream/create".format(server=self.server) try: async with self._get_client() as session: for stream in streams: data = { "path": compute_path(stream), "layout": stream.layout } async with session.post(url, data=data) as resp: await check_for_error( resp, ignore=[ERRORS.STREAM_ALREADY_EXISTS]) except aiohttp.ClientError: raise errors.DataError("cannot contact NilmDB at [%s]" % self.server)
async def insert(self, stream: DataStream, start: int, end: int, data: np.array) -> None: """insert stream data""" url = "{server}/stream/insert".format(server=self.server) params = { "start": "%d" % start, "end": "%d" % end, "path": compute_path(stream), "binary": '1' } async with self._get_client() as session: async with session.put(url, params=params, data=data.tobytes()) as resp: if resp.status != 200: if resp.status == 400: error = await resp.json() # nilmdb rejected the data raise errors.DataError(error["message"]) raise errors.DataError(await resp.text()) # pragma: no cover
def __init__(self, server: str, stream: DataStream, from_level: int, factor: int, session_factory: Callable[[], aiohttp.ClientSession], retry_interval=0.5): self.stream = stream self.level = from_level * factor self.insert_url = "{server}/stream/insert".format(server=server) self.create_url = "{server}/stream/create".format(server=server) self.server = server self.path = compute_path(stream, self.level) if from_level > 1: self.again = True else: self.again = False self.factor = factor self.layout = stream.decimated_layout self.buffer = [] self.last_ts = None self.path_created = False self.child: NilmdbDecimator = None self.retry_interval = retry_interval self._get_client = session_factory # hold off to rate limit NilmDB traffic self.holdoff = 0 # random.random()
def get_paths(self) -> List[str]: paths = [compute_path(self.stream, self.level)] if self.child is not None: paths = paths + self.child.get_paths() return paths