Beispiel #1
0
async def _extract_data(conn: asyncpg.Connection,
                        stream: DataStream,
                        callback,
                        decimation_level: int = 1,
                        start: int = None,
                        end: int = None,
                        block_size=50000):
    if decimation_level > 1:
        layout = stream.decimated_layout
    else:
        layout = stream.layout

    table_name = "data.stream%d" % stream.id
    if decimation_level > 1:
        table_name += "_%d" % decimation_level
    # extract by interval
    query = "SELECT time FROM data.stream%d_intervals " % stream.id
    query += psql_helpers.query_time_bounds(start, end)
    try:
        boundary_records = await conn.fetch(query)
    except asyncpg.UndefinedTableError:
        # no data tables
        data = np.array([], dtype=pipes.compute_dtype(layout))
        await callback(data, layout, decimation_level)
        return

    boundary_records += [{'time': end}]
    for i in range(len(boundary_records)):
        record = boundary_records[i]
        end = record['time']
        # extract the interval data
        done = False
        while not done:
            query = "SELECT * FROM %s " % table_name
            query += psql_helpers.query_time_bounds(start, end)
            query += " ORDER BY time ASC LIMIT %d" % block_size
            psql_bytes = BytesIO()
            try:
                await conn.copy_from_query(query,
                                           format='binary',
                                           output=psql_bytes)
            except asyncpg.UndefinedTableError:
                # interval table exists but not the data table
                data = np.array([], dtype=pipes.compute_dtype(layout))
                await callback(data, layout, decimation_level)
                return
            psql_bytes.seek(0)
            dtype = pipes.compute_dtype(layout)
            np_data = psql_helpers.bytes_to_data(psql_bytes, dtype)
            await callback(np_data, layout, decimation_level)

            if len(np_data) < block_size:
                break
            start = np_data['timestamp'][-1] + 1
        # do not put an interval token at the end of the data
        if i < len(boundary_records) - 1:
            await callback(pipes.interval_token(layout), layout,
                           decimation_level)
        start = end
Beispiel #2
0
 async def extract(self,
                   stream: 'EventStream',
                   start: Optional[int] = None,
                   end: Optional[int] = None,
                   json_filter=None,
                   limit=None) -> List[Dict]:
     if end is not None and start is not None and end <= start:
         raise ValueError(
             "Invalid time bounds start [%d] must be < end [%d]" %
             (start, end))
     query = "SELECT id, time, end_time, content FROM data.events "
     where_clause = psql_helpers.query_time_bounds(start, end)
     if len(where_clause) == 0:
         where_clause = "WHERE "
     else:
         where_clause += " AND "
     where_clause += "event_stream_id=%d" % stream.id
     if json_filter is not None and len(json_filter) > 0:
         where_clause += " AND " + psql_helpers.query_event_json(
             json_filter)
     query += where_clause
     if limit is not None:
         assert limit > 0, "limit must be > 0"
         if start is None and end is not None:
             query += " ORDER BY time DESC"
         else:
             query += " ORDER BY time ASC"
         query += f" LIMIT {limit}"
     else:
         query += " ORDER BY time ASC"
     async with self.pool.acquire() as conn:
         records = await conn.fetch(query)
         events = list(map(record_to_event, records))
         events.sort(key=lambda e: e["start_time"])
         return events
Beispiel #3
0
 async def remove(self,
                  stream: 'DataStream',
                  start: Optional[int],
                  end: Optional[int],
                  exact: bool = True):
     where_clause = psql_helpers.query_time_bounds(start, end)
     async with self.pool.acquire() as conn:
         tables = await psql_helpers.get_table_names(conn, stream)
         for table in tables:
             # TODO: use drop chunks with newer and older clauses when timescale is updated
             # ******DROP CHUNKS IS *VERY* APPROXIMATE*********
             if start is None and "intervals" not in table and not exact:
                 # use the much faster drop chunks utility and accept the approximate result
                 bounds = await psql_helpers.convert_time_bounds(
                     conn, stream, start, end)
                 if bounds is None:
                     return  # no data to remove
                 query = "SELECT drop_chunks('%s', older_than=>'%s'::timestamp)" % (
                     table, bounds[1])
             else:
                 query = 'DELETE FROM %s ' % table + where_clause
             try:
                 await conn.execute(query)
             except asyncpg.UndefinedTableError:
                 return  # no data to remove
             except asyncpg.exceptions.RaiseError as err:
                 print("psql: ", err)
                 return
         # create an interval boundary to mark the missing data
         if start is not None:
             await psql_helpers.close_interval(conn, stream, start)
Beispiel #4
0
 async def remove(self,
                  stream: 'EventStream',
                  start: Optional[int] = None,
                  end: Optional[int] = None,
                  json_filter=None):
     query = "DELETE FROM data.events "
     where_clause = psql_helpers.query_time_bounds(start, end)
     if len(where_clause) == 0:
         where_clause = "WHERE "
     else:
         where_clause += " AND "
     where_clause += "event_stream_id=%d" % stream.id
     if json_filter is not None and len(json_filter) > 0:
         where_clause += " AND " + psql_helpers.query_event_json(
             json_filter)
     query += where_clause
     async with self.pool.acquire() as conn:
         return await conn.execute(query)
Beispiel #5
0
 async def count(self,
                 stream: 'EventStream',
                 start: Optional[int] = None,
                 end: Optional[int] = None,
                 json_filter=None) -> int:
     if end is not None and start is not None and end <= start:
         raise ValueError(
             "Invalid time bounds start [%d] must be < end [%d]" %
             (start, end))
     query = "SELECT count(*) FROM data.events "
     where_clause = psql_helpers.query_time_bounds(start, end)
     if len(where_clause) == 0:
         where_clause = "WHERE "
     else:
         where_clause += " AND "
     where_clause += "event_stream_id=%d" % stream.id
     if json_filter is not None and len(json_filter) > 0:
         where_clause += " AND " + psql_helpers.query_event_json(
             json_filter)
     query += where_clause
     async with self.pool.acquire() as conn:
         record = await conn.fetch(query)
         return record[0]['count']