def test_float_raise_valueerror(self): """ Assert to_nanoseconds raises on invalid float """ with pytest.raises(ValueError) as exc: to_nanoseconds(42.5) assert "can only convert whole numbers" in str(exc)
def windows(self, start, end, width, depth=0, version=0): """ Read arbitrarily-sized windows of data from BTrDB. StatPoint objects will be returned representing the data for each window. Parameters ---------- start : int or datetime like object The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) width : int The number of nanoseconds in each window, subject to the depth parameter. depth : int The precision of the window duration as a power of 2 in nanoseconds. E.g 30 would make the window duration accurate to roughly 1 second version : int The version of the stream to query. Returns ------- tuple Returns a tuple containing windows of data. Each window is a tuple containing data tuples. Each data tuple contains a StatPoint and the stream version (tuple(tuple(StatPoint, int), ...)). Notes ----- Windows returns arbitrary precision windows from BTrDB. It is slower than AlignedWindows, but still significantly faster than RawValues. Each returned window will be `width` nanoseconds long. `start` is inclusive, but `end` is exclusive (e.g if end < start+width you will get no results). That is, results will be returned for all windows that start at a time less than the end timestamp. If (`end` - `start`) is not a multiple of width, then end will be decreased to the greatest value less than end such that (end - start) is a multiple of `width` (i.e., we set end = start + width * floordiv(end - start, width). The `depth` parameter is an optimization that can be used to speed up queries on fast queries. Each window will be accurate to 2^depth nanoseconds. If depth is zero, the results are accurate to the nanosecond. On a dense stream for large windows, this accuracy may not be required. For example for a window of a day, +- one second may be appropriate, so a depth of 30 can be specified. This is much faster to execute on the database side. """ materialized = [] start = to_nanoseconds(start) end = to_nanoseconds(end) windows = self._btrdb.ep.windows(self._uuid, start, end, width, depth, version) for stat_points, version in windows: for point in stat_points: materialized.append((StatPoint.from_proto(point), version)) return tuple(materialized)
def test_str_raise_valueerror(self): """ Assert to_nanoseconds raises on invalid str """ dt_str = "01 Jan 2018 12:00:00 -0000" with pytest.raises(ValueError, match="RFC3339") as exc: to_nanoseconds(dt_str)
def delete(self, start, end): """ "Delete" all points between [`start`, `end`) "Delete" all points between `start` (inclusive) and `end` (exclusive), both in nanoseconds. As BTrDB has persistent multiversioning, the deleted points will still exist as part of an older version of the stream. Parameters ---------- start : int or datetime like object The start time in nanoseconds for the range to be deleted. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object The end time in nanoseconds for the range to be deleted. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) Returns ------- int The version of the new stream created """ return self._btrdb.ep.deleteRange(self._uuid, to_nanoseconds(start), to_nanoseconds(end))
def __init__(self, start=None, end=None): self.start = to_nanoseconds(start) if start else None self.end = to_nanoseconds(end) if end else None if self.start is None and self.end is None: raise ValueError("A valid `start` or `end` must be supplied") if self.start is not None and self.end is not None and self.start >= self.end: raise ValueError( "`start` must be strictly less than `end` argument")
def aligned_windows(self, start, end, pointwidth, version=0): """ Read statistical aggregates of windows of data from BTrDB. Query BTrDB for aggregates (or roll ups or windows) of the time series with `version` between time `start` (inclusive) and `end` (exclusive) in nanoseconds. Each point returned is a statistical aggregate of all the raw data within a window of width 2**`pointwidth` nanoseconds. These statistical aggregates currently include the mean, minimum, and maximum of the data and the count of data points composing the window. Note that `start` is inclusive, but `end` is exclusive. That is, results will be returned for all windows that start in the interval [start, end). If end < start+2^pointwidth you will not get any results. If start and end are not powers of two, the bottom pointwidth bits will be cleared. Each window will contain statistical summaries of the window. Statistical points with count == 0 will be omitted. Parameters ---------- start : int or datetime like object The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) pointwidth : int Specify the number of ns between data points (2**pointwidth) version : int Version of the stream to query Returns ------- tuple Returns a tuple containing windows of data. Each window is a tuple containing data tuples. Each data tuple contains a StatPoint and the stream version. Notes ----- As the window-width is a power-of-two, it aligns with BTrDB internal tree data structure and is faster to execute than `windows()`. """ materialized = [] start = to_nanoseconds(start) end = to_nanoseconds(end) windows = self._btrdb.ep.alignedWindows(self._uuid, start, end, pointwidth, version) for stat_points, version in windows: for point in stat_points: materialized.append((StatPoint.from_proto(point), version)) return tuple(materialized)
def windows(self, start, end, width, depth=0, version=0): """ Read arbitrarily-sized windows of data from BTrDB. StatPoint objects will be returned representing the data for each window. Parameters ---------- start : int or datetime like object The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) width : int The number of nanoseconds in each window. version : int The version of the stream to query. Returns ------- tuple Returns a tuple containing windows of data. Each window is a tuple containing data tuples. Each data tuple contains a StatPoint and the stream version (tuple(tuple(StatPoint, int), ...)). Notes ----- Windows returns arbitrary precision windows from BTrDB. It is slower than AlignedWindows, but still significantly faster than RawValues. Each returned window will be `width` nanoseconds long. `start` is inclusive, but `end` is exclusive (e.g if end < start+width you will get no results). That is, results will be returned for all windows that start at a time less than the end timestamp. If (`end` - `start`) is not a multiple of width, then end will be decreased to the greatest value less than end such that (end - start) is a multiple of `width` (i.e., we set end = start + width * floordiv(end - start, width). The `depth` parameter previously available has been deprecated. The only valid value for depth is now 0. """ materialized = [] start = to_nanoseconds(start) end = to_nanoseconds(end) windows = self._btrdb.ep.windows(self._uuid, start, end, width, depth, version) for stat_points, version in windows: for point in stat_points: materialized.append((StatPoint.from_proto(point), version)) return tuple(materialized)
def test_str_midnight(self): """ Test parse a date at midnight """ expected = datetime.datetime(2019, 4, 7, tzinfo=pytz.utc) expected = int(expected.timestamp() * 1e9) assert to_nanoseconds("2019-04-07") == expected
def values(self, start, end, version=0): """ Read raw values from BTrDB between time [a, b) in nanoseconds. RawValues queries BTrDB for the raw time series data points between `start` and `end` time, both in nanoseconds since the Epoch for the specified stream `version`. Parameters ---------- start : int or datetime like object The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) version: int The version of the stream to be queried Returns ------ list Returns a list of tuples containing a RawPoint and the stream version (list(tuple(RawPoint,int))). Notes ----- Note that the raw data points are the original values at the sensor's native sampling rate (assuming the time series represents measurements from a sensor). This is the lowest level of data with the finest time granularity. In the tree data structure of BTrDB, this data is stored in the vector nodes. """ materialized = [] start = to_nanoseconds(start) end = to_nanoseconds(end) point_windows = self._btrdb.ep.rawValues(self._uuid, start, end, version) for point_list, version in point_windows: for point in point_list: materialized.append((RawPoint.from_proto(point), version)) return materialized
def test_float(self): """ Assert to_nanoseconds handles float """ dt = datetime.datetime(2018, 1, 1, 12, tzinfo=pytz.utc) expected = int(dt.timestamp() * 1e9) dt64 = np.datetime64('2018-01-01T12:00') assert expected == to_nanoseconds(dt64)
def test_datetime_to_ns_naive(self): """ Assert to_nanoseconds handles naive datetime """ dt = datetime.datetime(2018, 1, 1, 12) localized = pytz.utc.localize(dt) expected = int(localized.timestamp() * 1e9) assert dt.tzinfo is None assert to_nanoseconds(dt) == expected
def test_datetime_to_ns_aware(self): """ Assert to_nanoseconds handles tz aware datetime """ eastern = pytz.timezone("US/Eastern") dt = datetime.datetime(2018, 1, 1, 17, tzinfo=eastern) expected = int(dt.astimezone(pytz.utc).timestamp() * 1e9) assert dt.tzinfo is not None assert to_nanoseconds(dt) == expected
def test_str(self): """ Assert to_nanoseconds handles RFC3339 format """ dt = datetime.datetime(2018, 1, 1, 12, tzinfo=pytz.utc) expected = int(dt.timestamp() * 1e9) dt_str = "2018-1-1 12:00:00.0-0000" assert dt.tzinfo is not None assert to_nanoseconds(dt_str) == expected dt_str = "2018-1-1 7:00:00.0-0500" dt = datetime.datetime(2018, 1, 1, 12, tzinfo=pytz.timezone("US/Eastern")) assert dt.tzinfo is not None assert to_nanoseconds(dt_str) == expected dt_str = "2018-01-15 07:32:49" dt = datetime.datetime(2018, 1, 15, 7, 32, 49, tzinfo=pytz.utc) expected = int(dt.timestamp() * 1e9) assert to_nanoseconds(dt_str) == expected
def nearest(self, time, version, backward=False): """ Finds the closest point in the stream to a specified time. Return the point nearest to the specified `time` in nanoseconds since Epoch in the stream with `version` while specifying whether to search forward or backward in time. If `backward` is false, the returned point will be >= `time`. If backward is true, the returned point will be < `time`. The version of the stream used to satisfy the query is returned. Parameters ---------- time : int or datetime like object The time (in nanoseconds since Epoch) to search near (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) version : int Version of the stream to use in search backward : boolean True to search backwards from time, else false for forward Returns ------- tuple The closest data point in the stream and the version of the stream the value was retrieved at (tuple(RawPoint, int)). """ try: rp, version = self._btrdb.ep.nearest(self._uuid, to_nanoseconds(time), version, backward) except BTrDBError as exc: if exc.code != 401: raise return None return RawPoint.from_proto(rp), version
def test_float(self): """ Assert to_nanoseconds handles float """ assert 42 == to_nanoseconds(42.0)
def test_int(self): """ Assert to_nanoseconds handles int """ assert 42 == to_nanoseconds(42)
def count(self, start=MINIMUM_TIME, end=MAXIMUM_TIME, pointwidth=62, precise=False, version=0): """ Compute the total number of points in the stream Counts the number of points in the specified window and version. By default returns the latest total count of points in the stream. This helper method sums the counts of all StatPoints returned by ``aligned_windows``. Because of this, note that the start and end timestamps may be adjusted if they are not powers of 2. For smaller windows of time, you may also need to adjust the pointwidth to ensure that the count granularity is captured appropriately. Alternatively you can set the precise argument to True which will give an exact count to the nanosecond but may be slower to execute. Parameters ---------- start : int or datetime like object, default: MINIMUM_TIME The start time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) end : int or datetime like object, default: MAXIMUM_TIME The end time in nanoseconds for the range to be queried. (see :func:`btrdb.utils.timez.to_nanoseconds` for valid input types) pointwidth : int, default: 62 Specify the number of ns between data points (2**pointwidth). If the value is too large for the time window than the next smallest, appropriate pointwidth will be used. precise : bool, default: False Forces the use of a windows query instead of aligned_windows to determine exact count down to the nanosecond. This will be some amount slower than the aligned_windows version. version : int, default: 0 Version of the stream to query Returns ------- int The total number of points in the stream for the specified window. """ if not precise: pointwidth = min( pointwidth, pw.from_nanoseconds( to_nanoseconds(end) - to_nanoseconds(start)) - 1) points = self.aligned_windows(start, end, pointwidth, version) return sum([point.count for point, _ in points]) depth = 0 width = to_nanoseconds(end) - to_nanoseconds(start) points = self.windows(start, end, width, depth, version) return sum([point.count for point, _ in points])