def test_dtss_partition_by_average(self): """ This test illustrates use of partition_by client and server-side. The main point here is to ensure that the evaluate period covers both the historical and evaluation peri """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) t = utc.time(2000, 1, 1) n = utc.diff_units(t, utc.add(t, Calendar.YEAR, 10), d) ta = TimeAxis(t, d, n) td = TimeAxis(t, d * 24, n // 24) n_ts = 1 store_tsv = TsVector() # something we store at server side for i in range(n_ts): pts = TimeSeries( ta, np.sin( np.linspace(start=0, stop=1.0 * (i + 1), num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url(f"{i}") store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # start dtss server dtss = DtsServer() cache_on_write = True port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # create dts client c = DtsClient( host_port, auto_connect=False) # demonstrate object life-time connection c.store_ts(store_tsv, overwrite_on_write=True, cache_on_write=cache_on_write) t_0 = utc.time(2018, 1, 1) tax = TimeAxis(t_0, Calendar.DAY, 365) ts_h1 = TimeSeries(shyft_store_url(f'{0}')) ts_h2 = store_tsv[0] ts_p1 = ts_h1.partition_by(utc, t, Calendar.YEAR, 10, t_0).average(tax) ts_p2 = ts_h2.partition_by(utc, t, Calendar.YEAR, 10, t_0).average(tax)
def test_ts_cache(self): """ Verify dtss ts-cache functions exposed to python """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 100 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # add one external ts tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do"))) # then start the server dtss = DtsServer() dtss.cb = self.dtss_read_callback # rig external callbacks as well. self.callback_count = 0 self.rd_throws = False cache_on_write = True port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container dts = DtsClient( host_port, auto_connect=False) # demonstrate object life-time connection cs0 = dtss.cache_stats dts.store_ts(store_tsv, overwrite_on_write=True, cache_on_write=cache_on_write) r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True) cs1 = dtss.cache_stats ccs1 = dts.cache_stats # client can also provide cahce-stats dtss.flush_cache_all() # force the cache empty dtss.clear_cache_stats() cs2 = dtss.cache_stats # just to ensure clear did work r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # second evaluation, cache is empty, will force read(misses) cs3 = dtss.cache_stats r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # third evaluation, cache is now filled, all hits cs4 = dtss.cache_stats # now verify explicit caching performed by the python callback self.cache_dtss = dtss self.cache_reads = True dts.cache_flush() # is the equivalent of # dtss.flush_cache_all() # dtss.clear_cache_stats() # use explicit cache-control instead of global dtss.set_auto_cache( False ) # turn off auto caching, we want to test the explicit caching r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False ) # evaluation, just misses, but we cache explict the external cs5 = dtss.cache_stats # ok base line a lots of misses r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False) cs6 = dtss.cache_stats # should be one hit here dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(cs0.hits, 0) self.assertEqual(cs0.misses, 0) self.assertEqual(cs0.coverage_misses, 0) self.assertEqual(cs0.id_count, 0) self.assertEqual(cs0.point_count, 0) self.assertEqual(cs0.fragment_count, 0) self.assertEqual(cs1.hits, n_ts) self.assertEqual( cs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(cs1.coverage_misses, 0) self.assertEqual(cs1.id_count, n_ts + 1) self.assertEqual(cs1.point_count, (n_ts + 1) * n) self.assertEqual(cs1.fragment_count, n_ts + 1) # verify client side cache_stats self.assertEqual(ccs1.hits, n_ts) self.assertEqual( ccs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(ccs1.coverage_misses, 0) self.assertEqual(ccs1.id_count, n_ts + 1) self.assertEqual(ccs1.point_count, (n_ts + 1) * n) self.assertEqual(ccs1.fragment_count, n_ts + 1) self.assertEqual(cs2.hits, 0) self.assertEqual(cs2.misses, 0) self.assertEqual(cs2.coverage_misses, 0) self.assertEqual(cs2.id_count, 0) self.assertEqual(cs2.point_count, 0) self.assertEqual(cs2.fragment_count, 0) self.assertEqual(cs3.hits, 0) self.assertEqual( cs3.misses, n_ts + 1) # because we cache on store, we don't even miss one time self.assertEqual(cs3.coverage_misses, 0) self.assertEqual(cs3.id_count, n_ts + 1) self.assertEqual(cs3.point_count, (n_ts + 1) * n) self.assertEqual(cs3.fragment_count, n_ts + 1) self.assertEqual(cs4.hits, n_ts + 1) # because previous read filled cache self.assertEqual(cs4.misses, n_ts + 1) # remembers previous misses. self.assertEqual(cs4.coverage_misses, 0) self.assertEqual(cs4.id_count, n_ts + 1) self.assertEqual(cs4.point_count, (n_ts + 1) * n) self.assertEqual(cs4.fragment_count, n_ts + 1) self.assertEqual(cs6.hits, 1) # because previous read filled cache self.assertEqual(cs6.misses, n_ts * 2 + 1) # remembers previous misses. self.assertEqual(cs6.coverage_misses, 0) self.assertEqual(cs6.id_count, 1) self.assertEqual(cs6.point_count, 1 * n) self.assertEqual(cs6.fragment_count, 1)
def test_merge_store_ts_points(self): """ This test verifies the shyft internal time-series store, that the merge_store_points function do the required semantics. """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) t = utc.time(2016, 1, 1) ta = TimeAxis( UtcTimeVector.from_numpy( np.array([t, t + d, t + 3 * d], dtype=np.int64)), t + 4 * d) n_ts = 10 store_tsv = TsVector() # something we store at server side for i in range(n_ts): ts_id = shyft_store_url("{0}".format(i)) store_tsv.append( TimeSeries( ts_id, TimeSeries(ta, fill_value=float(i), point_fx=point_fx.POINT_AVERAGE_VALUE))) # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container dts = DtsClient(host_port) dts.store_ts( store_tsv ) # 1. store the initial time-series, they are required for the merge_store_points function tb = TimeAxis( UtcTimeVector.from_numpy( np.array([t - d, t + 3 * d, t + 4 * d], dtype=np.int64)), t + 5 * d) # make some points, one before, one in the middle and after mpv = TsVector() # merge point vector for i in range(n_ts): ts_id = shyft_store_url("{0}".format(i)) mpv.append( TimeSeries( ts_id, TimeSeries(tb, fill_value=-1 - float(i), point_fx=point_fx.POINT_AVERAGE_VALUE))) dts.merge_store_ts_points(mpv) rts = TsVector() rts[:] = [TimeSeries(shyft_store_url(f"{i}")) for i in range(n_ts)] r = dts.evaluate(rts, tb.total_period()) dts.close() # close connection (will use context manager later) dtss.clear() # close server for i in range(len(r)): self.assertEqual(r[i].time_axis.size(), 5) assert_array_almost_equal( r[i].values.to_numpy(), np.array([-i - 1, i, i, -i - 1, -i - 1], dtype=np.float64))
def test_ts_store(self): """ This test verifies the shyft internal time-series store, that allow identified time-series to be stored in the backend using a directory container specified for the location. All time-series of the form shyft://<container>/<ts-name> is mapped to the configured <container> (aka a directory on the server) This applies to expressions, as well as the new .store_ts(ts_vector) function that allows the user to stash away time-series into the configured back-end container. All find-operations of the form shyft://<container>/<regular-expression> is mapped to a search in the corresponding directory for the <container> :return: """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # krls with some extra challenges related to serialization tsv_krls = TsVector() krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation( dt=d, gamma=1e-3, tolerance=0.001, size=ta.size()) tsv_krls.append(krls_ts) # min_max_check_ts_fill also needs a serial check # create a trivial-case ts9 = TimeSeries(shyft_store_url("9")) ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts, v_max=10.0 * n_ts) tsv_krls.append(ts_qac) tsv_krls.append(ts9) tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5)) # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) std_max_items = dtss.cache_max_items dtss.cache_max_items = 3000 tst_max_items = dtss.cache_max_items dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # also notice that we dont have to setup callbacks in this case (but we could, and they would work) # # finally start the action dts = DtsClient(host_port) # then try something that should work dts.store_ts(store_tsv) r1 = dts.evaluate(tsv, ta.total_period()) f1 = dts.find( r"shyft://test/\d") # find all ts with one digit, 0..9 r2 = dts.evaluate(tsv_krls, ta.total_period()) url_x = shyft_store_url(r'does not exists') tsvx = TsVector() tsvx.append(TimeSeries(url_x)) try: rx = dts.evaluate(tsvx, ta.total_period()) self.assertFalse(True, 'This did not work out') except RuntimeError as rex: self.assertIsNotNone(rex) dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(len(f1), 10) self.assertEqual(len(r2), len(tsv_krls)) assert_array_almost_equal(r2[1].values.to_numpy(), r2[2].values.to_numpy(), decimal=4) self.assertEqual(1000000, std_max_items) self.assertEqual(3000, tst_max_items)