예제 #1
0
def create_source_data(server):
    # create the source stream
    src = DataStream(id=0,
                     name="source",
                     keep_us=100,
                     datatype=DataStream.DATATYPE.FLOAT32)
    src.elements = [
        Element(name="e%d" % x,
                index=x,
                display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3)
    ]

    # source has 100 rows of data in four intervals between [0, 100]
    src_data = helpers.create_data(src.layout, length=100, start=0, step=1)
    # insert the intervals
    pipe_data = np.hstack(
        (src_data[:25], pipes.interval_token(src.layout), src_data[25:50],
         pipes.interval_token(src.layout), src_data[50:75],
         pipes.interval_token(src.layout), src_data[75:]))
    ts = src_data['timestamp']
    intervals = [[ts[0], ts[24]], [ts[25], ts[49]], [ts[50], ts[74]],
                 [ts[75], ts[99]]]
    src_info = StreamInfo(int(src_data['timestamp'][0]),
                          int(src_data['timestamp'][-1]), len(src_data))
    server.add_stream('/test/source', src, src_info, pipe_data, intervals)
    return src_data
예제 #2
0
파일: data.py 프로젝트: wattsworth/joule
 async def _data_sender():
     nonlocal output_complete
     try:
         while True:
             data = await pipe.read()
             if len(data) > 0:
                 yield data.tobytes()
             if pipe.end_of_interval:
                 yield interval_token(stream.layout).tobytes()
             pipe.consume(len(data))
     except EmptyPipe:
         yield interval_token(stream.layout).tobytes()
     output_complete = True
예제 #3
0
 async def extract(self,
                   stream: DataStream,
                   start: Optional[int],
                   end: Optional[int],
                   callback: Callable[[np.ndarray, str, bool], Coroutine],
                   max_rows: int = None,
                   decimation_level=1):
     if self.no_data:
         return  # do not call the callback func
     if self.raise_data_error:
         raise DataError("nilmdb error")
     if self.raise_decimation_error:
         raise InsufficientDecimationError("insufficient decimation")
     if decimation_level is not None and decimation_level > 1:
         layout = stream.decimated_layout
     else:
         decimation_level = 1
         layout = stream.layout
     for i in range(self.nintervals):
         for x in range(self.nchunks):
             await callback(helpers.create_data(layout, length=25), layout,
                            decimation_level)
         if i < (self.nintervals - 1):
             await callback(pipes.interval_token(layout), layout,
                            decimation_level)
예제 #4
0
 async def test_propogates_intervals_to_decimations(self):
     self.stream1.decimate = True
     source = QueueReader()
     pipe = pipes.InputPipe(stream=self.stream1, reader=source)
     # insert the following broken chunks of data
     # |----15*8----|-15-|-15-|-15-|-15-|  ==> raw (120 samples)
     # |-----30-----|-3--|--3-|--3-|--3-|  ==> x4  (27 samples)
     # |-----7------|                      ==> x16 (7 samples)
     # |-----1------|                      ==> x64 (1 sample
     n_chunks = 12
     for i in range(n_chunks):
         data = helpers.create_data(layout="int8_3", length=15, start=i * 1e6, step=1)
         await source.put(data.tobytes())
         if i > 6:  # breaks in the 2nd half
             await source.put(pipes.interval_token("int8_3").tobytes())
     task = await  self.store.spawn_inserter(self.stream1, pipe, insert_period=0)
     await task
     # should have raw, x4, x16, x64, x256
     self.assertEqual(len(self.fake_nilmdb.streams), 5)
     self.assertEqual(self.fake_nilmdb.streams["/joule/1"].rows, n_chunks * 15)
     # x4 level should be missing data due to interval breaks
     self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-4"].rows, 42)
     # x16 level should have 7 sample (only from first part)
     self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-16"].rows, 7)
     # x64 level should have 1 sample
     self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-64"].rows, 1)
     # x256 level should be empty
     self.assertEqual(self.fake_nilmdb.streams["/joule/1~decim-256"].rows, 0)
예제 #5
0
async def _extract_data(conn: asyncpg.Connection,
                        stream: DataStream,
                        callback,
                        decimation_level: int = 1,
                        start: int = None,
                        end: int = None,
                        block_size=50000):
    if decimation_level > 1:
        layout = stream.decimated_layout
    else:
        layout = stream.layout

    table_name = "data.stream%d" % stream.id
    if decimation_level > 1:
        table_name += "_%d" % decimation_level
    # extract by interval
    query = "SELECT time FROM data.stream%d_intervals " % stream.id
    query += psql_helpers.query_time_bounds(start, end)
    try:
        boundary_records = await conn.fetch(query)
    except asyncpg.UndefinedTableError:
        # no data tables
        data = np.array([], dtype=pipes.compute_dtype(layout))
        await callback(data, layout, decimation_level)
        return

    boundary_records += [{'time': end}]
    for i in range(len(boundary_records)):
        record = boundary_records[i]
        end = record['time']
        # extract the interval data
        done = False
        while not done:
            query = "SELECT * FROM %s " % table_name
            query += psql_helpers.query_time_bounds(start, end)
            query += " ORDER BY time ASC LIMIT %d" % block_size
            psql_bytes = BytesIO()
            try:
                await conn.copy_from_query(query,
                                           format='binary',
                                           output=psql_bytes)
            except asyncpg.UndefinedTableError:
                # interval table exists but not the data table
                data = np.array([], dtype=pipes.compute_dtype(layout))
                await callback(data, layout, decimation_level)
                return
            psql_bytes.seek(0)
            dtype = pipes.compute_dtype(layout)
            np_data = psql_helpers.bytes_to_data(psql_bytes, dtype)
            await callback(np_data, layout, decimation_level)

            if len(np_data) < block_size:
                break
            start = np_data['timestamp'][-1] + 1
        # do not put an interval token at the end of the data
        if i < len(boundary_records) - 1:
            await callback(pipes.interval_token(layout), layout,
                           decimation_level)
        start = end
예제 #6
0
 async def close_interval(self):
     if self.closed:
         raise PipeError("Pipe is closed")
     if self.writer is None:
         return  # nothing has been written yet so nothing to close
     if self._caching:
         await self.flush_cache()
     self.writer.write(interval_token(self.layout).tobytes())
     await self.writer.drain()
예제 #7
0
 async def callback(data, layout, decimation_factor):
     self.assertEqual(decimation_factor, 1)
     self.assertEqual(self.stream1.layout, layout)
     self.assertEqual(pipes.compute_dtype(self.stream1.layout), data.dtype)
     if len(data) == 0:
         self.assertEqual(0, len(data['timestamp']))
     else:
         for row in data:
             self.assertEqual(row, pipes.interval_token(self.stream1.layout))
예제 #8
0
 def close_interval_nowait(self):
     if self.closed:
         raise PipeError("Pipe is closed")
     if self.writer is None:
         return  # nothing has been written yet so nothing to close
     if self._cache_index > 0:
         log.warning("dumping %d rows of cached data due on %s" %
                     (self._cache_index, self.name))
         self._cache = np.empty(len(self._cache), self.dtype)
         self._cache_index = 0
     self.writer.write(interval_token(self.layout).tobytes())
예제 #9
0
async def _subscribe(request: web.Request, json: bool):
    db: Session = request.app["db"]
    supervisor: Supervisor = request.app['supervisor']
    if json:
        return web.Response(text="JSON subscription not implemented",
                            status=400)

    # find the requested stream
    if 'path' in request.query:
        stream = folder.find_stream_by_path(request.query['path'],
                                            db,
                                            stream_type=DataStream)
    elif 'id' in request.query:
        stream = db.query(DataStream).get(request.query["id"])
    else:
        return web.Response(text="specify an id or a path", status=400)
    if stream is None:
        return web.Response(text="stream does not exist", status=404)
    pipe = pipes.LocalPipe(stream.layout)
    try:
        unsubscribe = supervisor.subscribe(stream, pipe)
    except SubscriptionError:
        return web.Response(text="stream is not being produced", status=400)
    resp = web.StreamResponse(status=200,
                              headers={
                                  'joule-layout': stream.layout,
                                  'joule-decimation': '1'
                              })
    resp.enable_chunked_encoding()

    try:
        await resp.prepare(request)
    except ConnectionResetError:
        unsubscribe()
        return

    try:
        while True:
            try:
                data = await pipe.read()
            except pipes.EmptyPipe:
                unsubscribe()
                return resp
            pipe.consume(len(data))
            if len(data) > 0:
                await resp.write(data.tobytes())
            if pipe.end_of_interval:
                await resp.write(pipes.interval_token(stream.layout).tobytes())
    except asyncio.CancelledError as e:
        unsubscribe()
        # propogate the CancelledError up
        raise e
    except ConnectionResetError:
        unsubscribe()
예제 #10
0
    async def _test_remove(self):

        #  XXXXXXX------XXXXXXX
        #        ^|     ^
        # remove a chunk of data from the middle
        await self.store.remove(self.test_stream,
                                self.test_data['timestamp'][300],
                                self.test_data['timestamp'][400])
        # XXXXXXX------===XXXX
        #       ^|        ^
        # remove another chunk
        await self.store.remove(self.test_stream,
                                self.test_data['timestamp'][350],
                                self.test_data['timestamp'][500])

        # XXX___----===XXX
        #   ^|         ^
        await self.store.remove(self.test_stream,
                                self.test_data['timestamp'][250],
                                self.test_data['timestamp'][300])

        # extract the data, should have an interval gap between 249 and 500
        # and a *single* interval boundary at 250

        extracted_data = []

        async def callback(rx_data, layout, decimated):
            extracted_data.append(rx_data)

        await self.store.extract(self.test_stream,
                                 start=None,
                                 end=None,
                                 callback=callback)
        extracted_data = np.hstack(extracted_data)

        # beginning is unchanged
        np.testing.assert_array_equal(extracted_data[:249],
                                      self.test_data[:249])
        # interval boundary marking missing data
        np.testing.assert_array_equal(
            extracted_data[250], pipes.interval_token(self.test_stream.layout))
        # end is unchanged (closing interval boundary ignored)
        np.testing.assert_array_equal(extracted_data[251:],
                                      self.test_data[500:])

        # two intervals of data
        intervals = await self.store.intervals(self.test_stream,
                                               start=None,
                                               end=None)
        ts = self.test_data['timestamp']
        expected = [[ts[0], ts[249] + 1], [ts[500], ts[-1] + 1]]
        self.assertEqual(intervals, expected)
예제 #11
0
    async def _test_extract_data_with_intervals(self):
        test_stream = DataStream(
            id=1,
            name="stream1",
            datatype=DataStream.DATATYPE.FLOAT32,
            keep_us=DataStream.KEEP_ALL,
            decimate=True,
            elements=[Element(name="e%d" % x) for x in range(3)])
        pipe = pipes.LocalPipe(test_stream.layout)
        nrows = 955
        data = helpers.create_data(layout=test_stream.layout, length=nrows)
        task = await self.store.spawn_inserter(test_stream, pipe)
        for chunk in helpers.to_chunks(data, 300):
            await pipe.write(chunk)
            await pipe.close_interval()
        await pipe.close()
        await task

        # extract data
        extracted_data = []

        async def callback(rx_data, layout, factor):
            self.assertEqual(layout, test_stream.layout)
            self.assertEqual(factor, 1)
            extracted_data.append(rx_data)

        await self.store.extract(test_stream,
                                 start=None,
                                 end=None,
                                 callback=callback)
        extracted_data = np.hstack(extracted_data)
        # check for interval boundaries
        np.testing.assert_array_equal(extracted_data[300],
                                      pipes.interval_token(test_stream.layout))
        np.testing.assert_array_equal(extracted_data[601],
                                      pipes.interval_token(test_stream.layout))
        np.testing.assert_array_equal(extracted_data[902],
                                      pipes.interval_token(test_stream.layout))
예제 #12
0
    async def retrieve_data(data: np.ndarray, layout, factor):
        nonlocal data_blocks, data_segment, decimation_factor
        decimation_factor = factor
        if np.array_equal(data, pipes.interval_token(layout)):
            if data_segment is not None:
                data_blocks.append(data_segment.tolist())
                data_segment = None
        else:
            data = np.c_[data['timestamp'][:, None], data['data']]

            if data_segment is None:
                data_segment = data
            else:
                data_segment = np.vstack((data_segment, data))
예제 #13
0
 async def runner():
     pipes_in, pipes_out = await build_network_pipes(
         {'input': '/test/source:uint8[x,y,z]'}, {}, {}, my_node, None,
         None, True)
     blk1 = await pipes_in['input'].read()
     self.assertTrue(pipes_in['input'].end_of_interval)
     pipes_in['input'].consume(len(blk1))
     blk2 = await pipes_in['input'].read()
     pipes_in['input'].consume(len(blk2))
     rx_data = np.hstack(
         (blk1, interval_token(pipes_in['input'].layout), blk2))
     np.testing.assert_array_equal(rx_data, src_data)
     with self.assertRaises(EmptyPipe):
         await pipes_in['input'].read()
     await my_node.close()
예제 #14
0
def create_source_data(server, is_destination=False):
    # create the source stream
    src = DataStream(id=0,
                     name="source",
                     keep_us=100,
                     datatype=DataStream.DATATYPE.UINT8,
                     is_destination=is_destination)
    src.elements = [
        Element(name="e%d" % x,
                index=x,
                display_type=Element.DISPLAYTYPE.CONTINUOUS) for x in range(3)
    ]

    # source has 100 rows of data
    src_data = np.hstack(
        (helpers.create_data(src.layout), interval_token(src.layout),
         helpers.create_data(src.layout)))
    src_info = StreamInfo(int(src_data['timestamp'][0]),
                          int(src_data['timestamp'][-1]), len(src_data))
    server.add_stream('/test/source', src, src_info, src_data,
                      [src_info.start, src_info.end])
    return src_data
예제 #15
0
 async def _extract_by_path(self, path: str, start: Optional[int],
                            end: Optional[int], layout: str, callback):
     url = "{server}/stream/extract".format(server=self.server)
     params = {"path": path, "binary": 1}
     decimation_factor = 1
     r = re.search(r'~decim-(\d+)$', path)
     if r is not None:
         decimation_factor = int(r[1])
     async with self._get_client() as session:
         # first determine the intervals, use the base path for this
         if path.find("~decim") == -1:
             base_path = path
         else:
             base_path = path[:path.find("~decim")]
         intervals = await self._intervals_by_path(base_path, start, end)
         i = 0
         num_intervals = len(intervals)
         # now extract each interval
         for interval in intervals:
             params["start"] = interval[0]
             params["end"] = interval[1]
             async with session.get(url, params=params) as resp:
                 await check_for_error(resp)
                 # put data into the queue as it arrives
                 reader = pipes.InputPipe(name="outbound",
                                          layout=layout,
                                          reader=resp.content)
                 while True:
                     try:
                         data = await reader.read()
                         await callback(data, layout, decimation_factor)
                         reader.consume(len(data))
                     except pipes.EmptyPipe:
                         break
             # insert the interval token to indicate a break
             i += 1
             if i < num_intervals:
                 await callback(pipes.interval_token(layout), layout,
                                decimation_factor)
예제 #16
0
    def test_when_server_returns_error_code(self):
        server = FakeJoule()
        # create the source stream
        src = DataStream(id=0,
                         name="source",
                         keep_us=100,
                         datatype=DataStream.DATATYPE.FLOAT32)
        src.elements = [
            Element(name="e%d" % x,
                    index=x,
                    display_type=Element.DISPLAYTYPE.CONTINUOUS)
            for x in range(3)
        ]
        # source has 200 rows of data between [0, 200] in two intervals
        src_data = np.hstack((helpers.create_data(src.decimated_layout,
                                                  start=0,
                                                  length=100,
                                                  step=1),
                              pipes.interval_token(src.decimated_layout),
                              helpers.create_data(src.decimated_layout,
                                                  start=100,
                                                  length=100,
                                                  step=1)))

        src_info = StreamInfo(int(src_data['timestamp'][0]),
                              int(src_data['timestamp'][-1]), len(src_data))
        server.add_stream('/test/source', src, src_info, src_data)

        server.response = "test error"
        server.http_code = 500
        server.stub_data_read = True
        self.start_server(server)
        runner = CliRunner()

        with self.assertLogs(level=logging.ERROR):
            runner.invoke(main,
                          ['data', 'read', '/test/source', '--start', 'now'])

        self.stop_server()
예제 #17
0
    def test_reads_decimated_data(self):

        server = FakeJoule()
        # create the source stream
        src = DataStream(id=0,
                         name="source",
                         keep_us=100,
                         datatype=DataStream.DATATYPE.FLOAT32)
        src.elements = [
            Element(name="e%d" % x,
                    index=x,
                    display_type=Element.DISPLAYTYPE.CONTINUOUS)
            for x in range(3)
        ]
        # source has 200 rows of data between [0, 200] in two intervals
        src_data = np.hstack((helpers.create_data(src.decimated_layout,
                                                  start=0,
                                                  length=100,
                                                  step=1),
                              pipes.interval_token(src.decimated_layout),
                              helpers.create_data(src.decimated_layout,
                                                  start=100,
                                                  length=100,
                                                  step=1)))

        src_info = StreamInfo(int(src_data['timestamp'][0]),
                              int(src_data['timestamp'][-1]), len(src_data))
        server.add_stream('/test/source', src, src_info, src_data)
        self.start_server(server)

        # mark the intervals and show the bounds
        runner = CliRunner()
        result = runner.invoke(main, [
            'data', 'read', '/test/source', '--start', '0', '--end',
            '1 hour ago', '--max-rows', '28', '--mark-intervals',
            '--show-bounds'
        ])
        _print_result_on_error(result)
        self.assertEqual(result.exit_code, 0)
        output = result.output.split('\n')
        for x in range(len(src_data)):
            row = src_data[x]
            if row == pipes.interval_token(src.decimated_layout):
                expected = '# interval break'
            else:
                expected = "%d %s" % (row['timestamp'], ' '.join(
                    '%f' % x for x in row['data']))
            # import pdb; pdb.set_trace()
            self.assertTrue(expected in output[x + 1])

        # create a new event loop for the next run
        loop = asyncio.new_event_loop()
        loop.set_debug(True)
        asyncio.set_event_loop(loop)

        # do not mark the intervals and hide the bounds
        runner = CliRunner()

        result = runner.invoke(main, [
            'data', 'read', '/test/source', '--start', '0', '--end',
            '1 hour ago', '--max-rows', '28'
        ])
        self.assertEqual(result.exit_code, 0)
        output = result.output.split('\n')
        offset = 0
        for x in range(len(src_data)):
            row = src_data[x]
            if row == pipes.interval_token(src.decimated_layout):
                offset = 1
                continue
            else:
                expected = "%d %s" % (row['timestamp'], ' '.join(
                    '%f' % x for x in row['data'][:3]))
            self.assertTrue(expected in output[x - offset + 1])

        self.stop_server()
예제 #18
0
 async def callback(rx_data, layout, factor):
     if rx_data[0] != pipes.interval_token(layout):
         rx_chunks.append(rx_data)