Ejemplo n.º 1
0
def period_percentiles_tsv(
        ts: TimeSeries,
        period: UtcPeriod,
        average_dt: int,
        percentile_period: UtcPeriod,
        percentiles: Sequence[int],
        client: DtsClient, calendar: Calendar
) -> TsVector:
    """Compute percentiles from a part of a time-series and generate a TsVector of
    percentile time-series spanning a possibly different time period.

    Args:
        ts: TimeSeries to compute percentile time-series for.
        period: Time period the output time-series should span.
        average_dt: Period to average values by when partitioning the input time-series.
        percentile_period: Period from ts to compute percentiles for.
        percentiles: Percentiles to compute. Values should be in the range ``0..100``.
        client: DtsClient to use for performing the partitioning.
        calendar: Calendar to to for interpreting time and partitioning the input
            time-series.

    Returns:
        A TsVector with one TimeSeries for each value in percentiles, all spanning period.
    """
    periods = int((percentile_period.end - percentile_period.start)//average_dt)

    n_avg = calendar.diff_units(period.start, period.end, average_dt)
    if n_avg*average_dt < period.end - period.start:
        n_avg += 1

    tsv = client.evaluate(
        ts.average(TimeAxis(period.start, average_dt, n_avg))
          .partition_by(calendar, period.start, average_dt, periods, period.start),
        period
    )
    tsv_p = tsv.percentiles(TimeAxis(period.start, average_dt, 1), percentiles)

    tsv = TsVector()
    ta = TimeAxis(period.start, period.end - period.start, 1)
    for ts_p in tsv_p:
        tsv.append(TimeSeries(ta, [ts_p.values[0]], POINT_AVERAGE_VALUE))

    return tsv
Ejemplo n.º 2
0
def windowed_percentiles_tsv(
        ts: TimeSeries,
        period: UtcPeriod,
        average_dt: int,
        percentile_dt: int,
        percentiles: Sequence[int],
        client: DtsClient, calendar: Calendar
) -> TsVector:
    """Compute percentiles for a time-series in a gliding window fashion.

     The input time-series is partitioned and averaged using ``ts.partition_by`` such
     that each value is included in percentile computations spanning percentile_dt time
     from the value occurrence.

    Args:
        ts: TimeSeries to compute percentile time-series for.
        period: Period to generate percentile time-series for.
        average_dt: Period to average values by when partitioning the input time-series.
            This is the time-step of the time-series in the output TsVector.
        percentile_dt: Time span for a value to contribute to percentile computations.
        percentiles: Percentiles to compute. Values should be in the range ``0..100``.
        client: DtsClient to use for performing the partitioning.
        calendar: Calendar to to for interpreting time and partitioning the input
            time-series.

    Returns:
        A TsVector with one TimeSeries for each value in percentiles, all spanning period.
    """
    periods = int(percentile_dt//average_dt)

    n_avg = calendar.diff_units(period.start, period.end, average_dt)
    if n_avg*average_dt < period.end - period.start:
        n_avg += 1

    tsv = client.evaluate(
        ts.average(TimeAxis(period.start, average_dt, n_avg))
          .partition_by(calendar, period.start, average_dt, periods, period.start)
          .time_shift(periods*average_dt),
        period
    )

    return tsv.percentiles(TimeAxis(period.start, average_dt, n_avg), percentiles)
Ejemplo n.º 3
0
    def test_merge_store_ts_points(self):
        """
        This test verifies the shyft internal time-series store,
        that the merge_store_points function do the required
        semantics.
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t, t + d, t + 3 * d], dtype=np.int64)),
                t + 4 * d)

            n_ts = 10
            store_tsv = TsVector()  # something we store at server side

            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                store_tsv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(ta,
                                   fill_value=float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))
            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            dts = DtsClient(host_port)

            dts.store_ts(
                store_tsv
            )  # 1. store the initial time-series, they are required for the merge_store_points function

            tb = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t - d, t + 3 * d, t + 4 * d],
                             dtype=np.int64)), t + 5 *
                d)  # make some points, one before, one in the middle and after
            mpv = TsVector()  # merge point vector
            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                mpv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(tb,
                                   fill_value=-1 - float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))

            dts.merge_store_ts_points(mpv)

            rts = TsVector()
            rts[:] = [TimeSeries(shyft_store_url(f"{i}")) for i in range(n_ts)]

            r = dts.evaluate(rts, tb.total_period())
            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            for i in range(len(r)):
                self.assertEqual(r[i].time_axis.size(), 5)
                assert_array_almost_equal(
                    r[i].values.to_numpy(),
                    np.array([-i - 1, i, i, -i - 1, -i - 1], dtype=np.float64))
Ejemplo n.º 4
0
    def test_ts_cache(self):
        """ Verify dtss ts-cache functions exposed to python """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 100
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store

            # add one external ts
            tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do")))
            # then start the server
            dtss = DtsServer()

            dtss.cb = self.dtss_read_callback  # rig external callbacks as well.
            self.callback_count = 0
            self.rd_throws = False
            cache_on_write = True
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            dts = DtsClient(
                host_port,
                auto_connect=False)  # demonstrate object life-time connection
            cs0 = dtss.cache_stats
            dts.store_ts(store_tsv,
                         overwrite_on_write=True,
                         cache_on_write=cache_on_write)
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=True)
            cs1 = dtss.cache_stats
            ccs1 = dts.cache_stats  # client can also provide cahce-stats

            dtss.flush_cache_all()  # force the cache empty
            dtss.clear_cache_stats()
            cs2 = dtss.cache_stats  # just to ensure clear did work
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # second evaluation, cache is empty, will force read(misses)
            cs3 = dtss.cache_stats
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # third evaluation, cache is now filled, all hits
            cs4 = dtss.cache_stats
            # now verify explicit caching performed by the python callback
            self.cache_dtss = dtss
            self.cache_reads = True
            dts.cache_flush()  # is the equivalent of
            # dtss.flush_cache_all()
            # dtss.clear_cache_stats()
            # use explicit cache-control instead of global
            dtss.set_auto_cache(
                False
            )  # turn off auto caching, we want to test the explicit caching
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=False
            )  # evaluation, just misses, but we cache explict the external
            cs5 = dtss.cache_stats  # ok base line a lots of misses
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=False)
            cs6 = dtss.cache_stats  # should be one hit here

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(cs0.hits, 0)
            self.assertEqual(cs0.misses, 0)
            self.assertEqual(cs0.coverage_misses, 0)
            self.assertEqual(cs0.id_count, 0)
            self.assertEqual(cs0.point_count, 0)
            self.assertEqual(cs0.fragment_count, 0)

            self.assertEqual(cs1.hits, n_ts)
            self.assertEqual(
                cs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(cs1.coverage_misses, 0)
            self.assertEqual(cs1.id_count, n_ts + 1)
            self.assertEqual(cs1.point_count, (n_ts + 1) * n)
            self.assertEqual(cs1.fragment_count, n_ts + 1)
            # verify client side cache_stats
            self.assertEqual(ccs1.hits, n_ts)
            self.assertEqual(
                ccs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(ccs1.coverage_misses, 0)
            self.assertEqual(ccs1.id_count, n_ts + 1)
            self.assertEqual(ccs1.point_count, (n_ts + 1) * n)
            self.assertEqual(ccs1.fragment_count, n_ts + 1)

            self.assertEqual(cs2.hits, 0)
            self.assertEqual(cs2.misses, 0)
            self.assertEqual(cs2.coverage_misses, 0)
            self.assertEqual(cs2.id_count, 0)
            self.assertEqual(cs2.point_count, 0)
            self.assertEqual(cs2.fragment_count, 0)

            self.assertEqual(cs3.hits, 0)
            self.assertEqual(
                cs3.misses, n_ts +
                1)  # because we cache on store, we don't even miss one time
            self.assertEqual(cs3.coverage_misses, 0)
            self.assertEqual(cs3.id_count, n_ts + 1)
            self.assertEqual(cs3.point_count, (n_ts + 1) * n)
            self.assertEqual(cs3.fragment_count, n_ts + 1)

            self.assertEqual(cs4.hits,
                             n_ts + 1)  # because previous read filled cache
            self.assertEqual(cs4.misses,
                             n_ts + 1)  # remembers previous misses.
            self.assertEqual(cs4.coverage_misses, 0)
            self.assertEqual(cs4.id_count, n_ts + 1)
            self.assertEqual(cs4.point_count, (n_ts + 1) * n)
            self.assertEqual(cs4.fragment_count, n_ts + 1)

            self.assertEqual(cs6.hits, 1)  # because previous read filled cache
            self.assertEqual(cs6.misses,
                             n_ts * 2 + 1)  # remembers previous misses.
            self.assertEqual(cs6.coverage_misses, 0)
            self.assertEqual(cs6.id_count, 1)
            self.assertEqual(cs6.point_count, 1 * n)
            self.assertEqual(cs6.fragment_count, 1)
Ejemplo n.º 5
0
    def test_ts_store(self):
        """
        This test verifies the shyft internal time-series store,
        that allow identified time-series to be stored
        in the backend using a directory container specified for the
        location.

        All time-series of the form shyft://<container>/<ts-name>
        is mapped to the configured <container> (aka a directory on the server)

        This applies to expressions, as well as the new
        .store_ts(ts_vector) function that allows the user to
        stash away time-series into the configured back-end container.

        All find-operations of the form shyft://<container>/<regular-expression>
        is mapped to a search in the corresponding directory for the <container>

        :return:
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store
            # krls with some extra challenges related to serialization
            tsv_krls = TsVector()
            krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation(
                dt=d, gamma=1e-3, tolerance=0.001, size=ta.size())
            tsv_krls.append(krls_ts)
            # min_max_check_ts_fill also needs a serial check
            # create a  trivial-case
            ts9 = TimeSeries(shyft_store_url("9"))
            ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts,
                                                   v_max=10.0 * n_ts)
            tsv_krls.append(ts_qac)
            tsv_krls.append(ts9)
            tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5))

            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            std_max_items = dtss.cache_max_items
            dtss.cache_max_items = 3000
            tst_max_items = dtss.cache_max_items
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            # also notice that we dont have to setup callbacks in this case (but we could, and they would work)
            #
            # finally start the action
            dts = DtsClient(host_port)
            # then try something that should work
            dts.store_ts(store_tsv)
            r1 = dts.evaluate(tsv, ta.total_period())
            f1 = dts.find(
                r"shyft://test/\d")  # find all ts with one digit, 0..9
            r2 = dts.evaluate(tsv_krls, ta.total_period())
            url_x = shyft_store_url(r'does not exists')
            tsvx = TsVector()
            tsvx.append(TimeSeries(url_x))
            try:
                rx = dts.evaluate(tsvx, ta.total_period())
                self.assertFalse(True, 'This did not work out')
            except RuntimeError as rex:
                self.assertIsNotNone(rex)

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(len(f1), 10)
            self.assertEqual(len(r2), len(tsv_krls))
            assert_array_almost_equal(r2[1].values.to_numpy(),
                                      r2[2].values.to_numpy(),
                                      decimal=4)
            self.assertEqual(1000000, std_max_items)
            self.assertEqual(3000, tst_max_items)
Ejemplo n.º 6
0
    def test_functionality_hosting_localhost(self):

        # setup data to be calculated
        utc = Calendar()
        d = deltahours(1)
        d24 = deltahours(24)
        n = 240
        n24 = 10
        t = utc.time(2016, 1, 1)
        ta = TimeAxis(t, d, n)
        ta24 = TimeAxis(t, d24, n24)
        n_ts = 100
        percentile_list = IntVector([0, 35, 50, 65, 100])
        tsv = TsVector()
        store_tsv = TsVector()  # something we store at server side
        for i in range(n_ts):
            pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()),
                             point_fx.POINT_AVERAGE_VALUE)
            tsv.append(float(1 + i / 10) * pts)
            store_tsv.append(TimeSeries("cache://test/{0}".format(i),
                                        pts))  # generate a bound pts to store

        dummy_ts = TimeSeries('dummy://a')
        tsv.append(dummy_ts.integral(ta))
        self.assertGreater(len(ts_stringify(tsv[0])),
                           10)  # just ensure ts_stringify work on expr.
        # then start the server
        dtss = DtsServer()
        port_no = find_free_port()
        host_port = 'localhost:{0}'.format(port_no)
        dtss.set_listening_port(port_no)
        dtss.cb = self.dtss_read_callback
        dtss.find_cb = self.dtss_find_callback
        dtss.store_ts_cb = self.dtss_store_callback

        dtss.start_async()

        dts = DtsClient(StringVector([host_port]), True,
                        1000)  # as number of hosts
        # then try something that should work
        dts.store_ts(store_tsv)
        r1 = dts.evaluate(tsv, ta.total_period())
        tsv1x = tsv.inside(-0.5, 0.5)
        tsv1x.append(tsv1x[-1].decode(
            start_bit=1, n_bits=1))  # just to verify serialization/bind
        tsv1x.append(store_tsv[1].derivative())
        tsv1x.append(store_tsv[1].pow(
            2.0))  # just for verify pow serialization(well, it's a bin-op..)
        r1x = dts.evaluate(tsv1x, ta.total_period())
        r2 = dts.percentiles(tsv, ta.total_period(), ta24, percentile_list)
        r3 = dts.find('netcdf://dummy\.nc/ts\d')
        self.rd_throws = True
        ex_count = 0
        try:
            rx = dts.evaluate(tsv, ta.total_period())
        except RuntimeError as e:
            ex_count = 1
            pass
        self.rd_throws = True
        try:
            fx = dts.find('should throw')
        except RuntimeError as e:
            ex_count += 1
            pass

        dts.close()  # close connection (will use context manager later)
        dtss.clear()  # close server
        self.assertEqual(ex_count, 2)
        self.assertEqual(len(r1), len(tsv))
        self.assertEqual(self.callback_count, 4)
        for i in range(n_ts - 1):
            self.assertEqual(r1[i].time_axis, tsv[i].time_axis)
            assert_array_almost_equal(r1[i].values.to_numpy(),
                                      tsv[i].values.to_numpy(),
                                      decimal=4)

        self.assertEqual(len(r2), len(percentile_list))
        dummy_ts.bind(
            TimeSeries(ta,
                       fill_value=1.0,
                       point_fx=point_fx.POINT_AVERAGE_VALUE))
        p2 = tsv.percentiles(ta24, percentile_list)
        # r2 = tsv.percentiles(ta24,percentile_list)

        for i in range(len(p2)):
            self.assertEqual(r2[i].time_axis, p2[i].time_axis)
            assert_array_almost_equal(r2[i].values.to_numpy(),
                                      p2[i].values.to_numpy(),
                                      decimal=1)

        self.assertEqual(self.find_count, 2)
        self.assertEqual(len(r3), 10)  # 0..9
        for i in range(len(r3)):
            self.assertEqual(r3[i], self.ts_infos[i])
        self.assertIsNotNone(r1x)
        self.assertEqual(1, len(self.stored_tsv))
        self.assertEqual(len(store_tsv), len(self.stored_tsv[0]))
        for i in range(len(store_tsv)):
            self.assertEqual(self.stored_tsv[0][i].ts_id(),
                             store_tsv[i].ts_id())
Ejemplo n.º 7
0
def selector_ts(
        ts: TimeSeries,
        period: UtcPeriod,
        average_dt: int,
        threshold_tss: Sequence[TimeSeries],
        tss: Sequence[TimeSeries],
        mask_point_fx: point_interpretation_policy,
        client: DtsClient, calendar: Calendar
) -> TimeSeries:
    """Select values from different time-series based on values from a single
    time-series when compared to a set of threshold time-series.

    Args:
        ts: TimeSeries to base selections on.
        period: Period to select values in.
        average_dt: Time step to use for the internal masking series. An average of
            the input ts together with the threshold time-series in threshold_tss is
            used to generate the masks.
        threshold_tss: Threshold time-series. Should be one less than the number of
            time-series to choose from.
        tss: TimeSeries to choose values from.
        mask_point_fx: Point interpretation to use for the mask time-series. If equal
            to POINT_INSTANT_VALUE the boundaries between different selection
            regions is smoothed. If equal to POINT_AVERAGE_VALUE the boundaries are
            sharp.
        client: DtsClient use for computations and data retrieval.
        calendar: Calendar used to interpret time.

    Returns:
        A TimeSeries that is the selection of values from tss.
    """
    assert len(threshold_tss) == len(tss) - 1, ('the number of thresholds should be one less '
                                                'than the number of time-series')

    # setup averaging for mask
    tsv = TsVector()
    # ----------
    n = calendar.diff_units(period.start, period.end, average_dt)
    if n * average_dt < period.end - period.start:
        n += 1
    ta_avg = TimeAxis(period.start, average_dt, n)
    tsv.append(ts.average(ta_avg))
    # ----------
    tsv: TsVector = client.evaluate(tsv, period)
    # ----------
    avg_ts = tsv[0]
    del tsv

    # compute mask
    masks: List[DoubleVector] = []
    for avg_p, avg_v in zip(avg_ts.get_time_axis(), avg_ts.values):
        added_mask = False
        for i in range(len(tss)):
            if i == len(masks):
                masks.append(DoubleVector())
            if not added_mask:
                # determine period threshold
                if i == 0:
                    min_threshold = -1_000_000_000
                    max_threshold = threshold_tss[0](avg_p.start)
                elif i == len(tss) - 1:
                    min_threshold = threshold_tss[-1](avg_p.start)
                    max_threshold = 1_000_000_000
                else:
                    min_threshold = threshold_tss[i - 1](avg_p.start)
                    max_threshold = threshold_tss[i](avg_p.start)
                # set mask value
                if min_threshold <= avg_v < max_threshold:
                    added_mask = True
                    masks[i].append(1.0)
                else:
                    masks[i].append(0.0)
            else:
                masks[i].append(0.0)

    # construct final ts
    computed_ts = None
    for i, ts_expr in enumerate(tss):
        if computed_ts is not None:
            computed_ts += ts_expr * TimeSeries(ta_avg, masks[i], mask_point_fx)
        else:
            computed_ts = ts_expr * TimeSeries(ta_avg, masks[i], mask_point_fx)

    return computed_ts