Ejemplo n.º 1
0
    def test_dtss_partition_by_average(self):
        """
        This test illustrates use of partition_by client and server-side.
        The main point here is to ensure that the evaluate period covers
        both the historical and evaluation peri
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            t = utc.time(2000, 1, 1)
            n = utc.diff_units(t, utc.add(t, Calendar.YEAR, 10), d)
            ta = TimeAxis(t, d, n)
            td = TimeAxis(t, d * 24, n // 24)
            n_ts = 1
            store_tsv = TsVector()  # something we store at server side
            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(
                        np.linspace(start=0, stop=1.0 * (i + 1),
                                    num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url(f"{i}")
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store

            # start dtss server
            dtss = DtsServer()
            cache_on_write = True
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            # create dts client
            c = DtsClient(
                host_port,
                auto_connect=False)  # demonstrate object life-time connection
            c.store_ts(store_tsv,
                       overwrite_on_write=True,
                       cache_on_write=cache_on_write)

            t_0 = utc.time(2018, 1, 1)
            tax = TimeAxis(t_0, Calendar.DAY, 365)
            ts_h1 = TimeSeries(shyft_store_url(f'{0}'))
            ts_h2 = store_tsv[0]
            ts_p1 = ts_h1.partition_by(utc, t, Calendar.YEAR, 10,
                                       t_0).average(tax)
            ts_p2 = ts_h2.partition_by(utc, t, Calendar.YEAR, 10,
                                       t_0).average(tax)
Ejemplo n.º 2
0
    def test_ts_cache(self):
        """ Verify dtss ts-cache functions exposed to python """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 100
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store

            # add one external ts
            tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do")))
            # then start the server
            dtss = DtsServer()

            dtss.cb = self.dtss_read_callback  # rig external callbacks as well.
            self.callback_count = 0
            self.rd_throws = False
            cache_on_write = True
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            dts = DtsClient(
                host_port,
                auto_connect=False)  # demonstrate object life-time connection
            cs0 = dtss.cache_stats
            dts.store_ts(store_tsv,
                         overwrite_on_write=True,
                         cache_on_write=cache_on_write)
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=True)
            cs1 = dtss.cache_stats
            ccs1 = dts.cache_stats  # client can also provide cahce-stats

            dtss.flush_cache_all()  # force the cache empty
            dtss.clear_cache_stats()
            cs2 = dtss.cache_stats  # just to ensure clear did work
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # second evaluation, cache is empty, will force read(misses)
            cs3 = dtss.cache_stats
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # third evaluation, cache is now filled, all hits
            cs4 = dtss.cache_stats
            # now verify explicit caching performed by the python callback
            self.cache_dtss = dtss
            self.cache_reads = True
            dts.cache_flush()  # is the equivalent of
            # dtss.flush_cache_all()
            # dtss.clear_cache_stats()
            # use explicit cache-control instead of global
            dtss.set_auto_cache(
                False
            )  # turn off auto caching, we want to test the explicit caching
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=False
            )  # evaluation, just misses, but we cache explict the external
            cs5 = dtss.cache_stats  # ok base line a lots of misses
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=False)
            cs6 = dtss.cache_stats  # should be one hit here

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(cs0.hits, 0)
            self.assertEqual(cs0.misses, 0)
            self.assertEqual(cs0.coverage_misses, 0)
            self.assertEqual(cs0.id_count, 0)
            self.assertEqual(cs0.point_count, 0)
            self.assertEqual(cs0.fragment_count, 0)

            self.assertEqual(cs1.hits, n_ts)
            self.assertEqual(
                cs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(cs1.coverage_misses, 0)
            self.assertEqual(cs1.id_count, n_ts + 1)
            self.assertEqual(cs1.point_count, (n_ts + 1) * n)
            self.assertEqual(cs1.fragment_count, n_ts + 1)
            # verify client side cache_stats
            self.assertEqual(ccs1.hits, n_ts)
            self.assertEqual(
                ccs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(ccs1.coverage_misses, 0)
            self.assertEqual(ccs1.id_count, n_ts + 1)
            self.assertEqual(ccs1.point_count, (n_ts + 1) * n)
            self.assertEqual(ccs1.fragment_count, n_ts + 1)

            self.assertEqual(cs2.hits, 0)
            self.assertEqual(cs2.misses, 0)
            self.assertEqual(cs2.coverage_misses, 0)
            self.assertEqual(cs2.id_count, 0)
            self.assertEqual(cs2.point_count, 0)
            self.assertEqual(cs2.fragment_count, 0)

            self.assertEqual(cs3.hits, 0)
            self.assertEqual(
                cs3.misses, n_ts +
                1)  # because we cache on store, we don't even miss one time
            self.assertEqual(cs3.coverage_misses, 0)
            self.assertEqual(cs3.id_count, n_ts + 1)
            self.assertEqual(cs3.point_count, (n_ts + 1) * n)
            self.assertEqual(cs3.fragment_count, n_ts + 1)

            self.assertEqual(cs4.hits,
                             n_ts + 1)  # because previous read filled cache
            self.assertEqual(cs4.misses,
                             n_ts + 1)  # remembers previous misses.
            self.assertEqual(cs4.coverage_misses, 0)
            self.assertEqual(cs4.id_count, n_ts + 1)
            self.assertEqual(cs4.point_count, (n_ts + 1) * n)
            self.assertEqual(cs4.fragment_count, n_ts + 1)

            self.assertEqual(cs6.hits, 1)  # because previous read filled cache
            self.assertEqual(cs6.misses,
                             n_ts * 2 + 1)  # remembers previous misses.
            self.assertEqual(cs6.coverage_misses, 0)
            self.assertEqual(cs6.id_count, 1)
            self.assertEqual(cs6.point_count, 1 * n)
            self.assertEqual(cs6.fragment_count, 1)
Ejemplo n.º 3
0
    def test_merge_store_ts_points(self):
        """
        This test verifies the shyft internal time-series store,
        that the merge_store_points function do the required
        semantics.
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t, t + d, t + 3 * d], dtype=np.int64)),
                t + 4 * d)

            n_ts = 10
            store_tsv = TsVector()  # something we store at server side

            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                store_tsv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(ta,
                                   fill_value=float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))
            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            dts = DtsClient(host_port)

            dts.store_ts(
                store_tsv
            )  # 1. store the initial time-series, they are required for the merge_store_points function

            tb = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t - d, t + 3 * d, t + 4 * d],
                             dtype=np.int64)), t + 5 *
                d)  # make some points, one before, one in the middle and after
            mpv = TsVector()  # merge point vector
            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                mpv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(tb,
                                   fill_value=-1 - float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))

            dts.merge_store_ts_points(mpv)

            rts = TsVector()
            rts[:] = [TimeSeries(shyft_store_url(f"{i}")) for i in range(n_ts)]

            r = dts.evaluate(rts, tb.total_period())
            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            for i in range(len(r)):
                self.assertEqual(r[i].time_axis.size(), 5)
                assert_array_almost_equal(
                    r[i].values.to_numpy(),
                    np.array([-i - 1, i, i, -i - 1, -i - 1], dtype=np.float64))
Ejemplo n.º 4
0
    def test_ts_store(self):
        """
        This test verifies the shyft internal time-series store,
        that allow identified time-series to be stored
        in the backend using a directory container specified for the
        location.

        All time-series of the form shyft://<container>/<ts-name>
        is mapped to the configured <container> (aka a directory on the server)

        This applies to expressions, as well as the new
        .store_ts(ts_vector) function that allows the user to
        stash away time-series into the configured back-end container.

        All find-operations of the form shyft://<container>/<regular-expression>
        is mapped to a search in the corresponding directory for the <container>

        :return:
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store
            # krls with some extra challenges related to serialization
            tsv_krls = TsVector()
            krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation(
                dt=d, gamma=1e-3, tolerance=0.001, size=ta.size())
            tsv_krls.append(krls_ts)
            # min_max_check_ts_fill also needs a serial check
            # create a  trivial-case
            ts9 = TimeSeries(shyft_store_url("9"))
            ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts,
                                                   v_max=10.0 * n_ts)
            tsv_krls.append(ts_qac)
            tsv_krls.append(ts9)
            tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5))

            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            std_max_items = dtss.cache_max_items
            dtss.cache_max_items = 3000
            tst_max_items = dtss.cache_max_items
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            # also notice that we dont have to setup callbacks in this case (but we could, and they would work)
            #
            # finally start the action
            dts = DtsClient(host_port)
            # then try something that should work
            dts.store_ts(store_tsv)
            r1 = dts.evaluate(tsv, ta.total_period())
            f1 = dts.find(
                r"shyft://test/\d")  # find all ts with one digit, 0..9
            r2 = dts.evaluate(tsv_krls, ta.total_period())
            url_x = shyft_store_url(r'does not exists')
            tsvx = TsVector()
            tsvx.append(TimeSeries(url_x))
            try:
                rx = dts.evaluate(tsvx, ta.total_period())
                self.assertFalse(True, 'This did not work out')
            except RuntimeError as rex:
                self.assertIsNotNone(rex)

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(len(f1), 10)
            self.assertEqual(len(r2), len(tsv_krls))
            assert_array_almost_equal(r2[1].values.to_numpy(),
                                      r2[2].values.to_numpy(),
                                      decimal=4)
            self.assertEqual(1000000, std_max_items)
            self.assertEqual(3000, tst_max_items)