def test_get_ts_info(self): """ Verify we can get specific TsInfo objects for time-series from the server backend. """ with tempfile.TemporaryDirectory() as c_dir: # start the server dtss = DtsServer() port_no = find_free_port() host_adr = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.set_container( "testing", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # get a client client = DtsClient(host_adr) try: client.get_ts_info(r'shyft://testing/data') except Exception as e: pass else: # only end up here if no exceptions self.fail('Could fetch info for non-existing ts info') # setup some data utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) tsv = TsVector() pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(TimeSeries(r'shyft://testing/data', pts)) client.store_ts(tsv) info: TsInfo = client.get_ts_info(r'shyft://testing/data') self.assertEqual(info.name, r'data') self.assertEqual(info.point_fx, point_fx.POINT_AVERAGE_VALUE) self.assertEqual(info.data_period, ta.total_period())
def test_merge_store_ts_points(self): """ This test verifies the shyft internal time-series store, that the merge_store_points function do the required semantics. """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) t = utc.time(2016, 1, 1) ta = TimeAxis( UtcTimeVector.from_numpy( np.array([t, t + d, t + 3 * d], dtype=np.int64)), t + 4 * d) n_ts = 10 store_tsv = TsVector() # something we store at server side for i in range(n_ts): ts_id = shyft_store_url("{0}".format(i)) store_tsv.append( TimeSeries( ts_id, TimeSeries(ta, fill_value=float(i), point_fx=point_fx.POINT_AVERAGE_VALUE))) # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container dts = DtsClient(host_port) dts.store_ts( store_tsv ) # 1. store the initial time-series, they are required for the merge_store_points function tb = TimeAxis( UtcTimeVector.from_numpy( np.array([t - d, t + 3 * d, t + 4 * d], dtype=np.int64)), t + 5 * d) # make some points, one before, one in the middle and after mpv = TsVector() # merge point vector for i in range(n_ts): ts_id = shyft_store_url("{0}".format(i)) mpv.append( TimeSeries( ts_id, TimeSeries(tb, fill_value=-1 - float(i), point_fx=point_fx.POINT_AVERAGE_VALUE))) dts.merge_store_ts_points(mpv) rts = TsVector() rts[:] = [TimeSeries(shyft_store_url(f"{i}")) for i in range(n_ts)] r = dts.evaluate(rts, tb.total_period()) dts.close() # close connection (will use context manager later) dtss.clear() # close server for i in range(len(r)): self.assertEqual(r[i].time_axis.size(), 5) assert_array_almost_equal( r[i].values.to_numpy(), np.array([-i - 1, i, i, -i - 1, -i - 1], dtype=np.float64))
def test_ts_store(self): """ This test verifies the shyft internal time-series store, that allow identified time-series to be stored in the backend using a directory container specified for the location. All time-series of the form shyft://<container>/<ts-name> is mapped to the configured <container> (aka a directory on the server) This applies to expressions, as well as the new .store_ts(ts_vector) function that allows the user to stash away time-series into the configured back-end container. All find-operations of the form shyft://<container>/<regular-expression> is mapped to a search in the corresponding directory for the <container> :return: """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # krls with some extra challenges related to serialization tsv_krls = TsVector() krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation( dt=d, gamma=1e-3, tolerance=0.001, size=ta.size()) tsv_krls.append(krls_ts) # min_max_check_ts_fill also needs a serial check # create a trivial-case ts9 = TimeSeries(shyft_store_url("9")) ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts, v_max=10.0 * n_ts) tsv_krls.append(ts_qac) tsv_krls.append(ts9) tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5)) # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) std_max_items = dtss.cache_max_items dtss.cache_max_items = 3000 tst_max_items = dtss.cache_max_items dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # also notice that we dont have to setup callbacks in this case (but we could, and they would work) # # finally start the action dts = DtsClient(host_port) # then try something that should work dts.store_ts(store_tsv) r1 = dts.evaluate(tsv, ta.total_period()) f1 = dts.find( r"shyft://test/\d") # find all ts with one digit, 0..9 r2 = dts.evaluate(tsv_krls, ta.total_period()) url_x = shyft_store_url(r'does not exists') tsvx = TsVector() tsvx.append(TimeSeries(url_x)) try: rx = dts.evaluate(tsvx, ta.total_period()) self.assertFalse(True, 'This did not work out') except RuntimeError as rex: self.assertIsNotNone(rex) dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(len(f1), 10) self.assertEqual(len(r2), len(tsv_krls)) assert_array_almost_equal(r2[1].values.to_numpy(), r2[2].values.to_numpy(), decimal=4) self.assertEqual(1000000, std_max_items) self.assertEqual(3000, tst_max_items)
def test_ts_cache(self): """ Verify dtss ts-cache functions exposed to python """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 100 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # add one external ts tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do"))) # then start the server dtss = DtsServer() dtss.cb = self.dtss_read_callback # rig external callbacks as well. self.callback_count = 0 self.rd_throws = False cache_on_write = True port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container dts = DtsClient( host_port, auto_connect=False) # demonstrate object life-time connection cs0 = dtss.cache_stats dts.store_ts(store_tsv, overwrite_on_write=True, cache_on_write=cache_on_write) r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True) cs1 = dtss.cache_stats ccs1 = dts.cache_stats # client can also provide cahce-stats dtss.flush_cache_all() # force the cache empty dtss.clear_cache_stats() cs2 = dtss.cache_stats # just to ensure clear did work r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # second evaluation, cache is empty, will force read(misses) cs3 = dtss.cache_stats r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # third evaluation, cache is now filled, all hits cs4 = dtss.cache_stats # now verify explicit caching performed by the python callback self.cache_dtss = dtss self.cache_reads = True dts.cache_flush() # is the equivalent of # dtss.flush_cache_all() # dtss.clear_cache_stats() # use explicit cache-control instead of global dtss.set_auto_cache( False ) # turn off auto caching, we want to test the explicit caching r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False ) # evaluation, just misses, but we cache explict the external cs5 = dtss.cache_stats # ok base line a lots of misses r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False) cs6 = dtss.cache_stats # should be one hit here dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(cs0.hits, 0) self.assertEqual(cs0.misses, 0) self.assertEqual(cs0.coverage_misses, 0) self.assertEqual(cs0.id_count, 0) self.assertEqual(cs0.point_count, 0) self.assertEqual(cs0.fragment_count, 0) self.assertEqual(cs1.hits, n_ts) self.assertEqual( cs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(cs1.coverage_misses, 0) self.assertEqual(cs1.id_count, n_ts + 1) self.assertEqual(cs1.point_count, (n_ts + 1) * n) self.assertEqual(cs1.fragment_count, n_ts + 1) # verify client side cache_stats self.assertEqual(ccs1.hits, n_ts) self.assertEqual( ccs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(ccs1.coverage_misses, 0) self.assertEqual(ccs1.id_count, n_ts + 1) self.assertEqual(ccs1.point_count, (n_ts + 1) * n) self.assertEqual(ccs1.fragment_count, n_ts + 1) self.assertEqual(cs2.hits, 0) self.assertEqual(cs2.misses, 0) self.assertEqual(cs2.coverage_misses, 0) self.assertEqual(cs2.id_count, 0) self.assertEqual(cs2.point_count, 0) self.assertEqual(cs2.fragment_count, 0) self.assertEqual(cs3.hits, 0) self.assertEqual( cs3.misses, n_ts + 1) # because we cache on store, we don't even miss one time self.assertEqual(cs3.coverage_misses, 0) self.assertEqual(cs3.id_count, n_ts + 1) self.assertEqual(cs3.point_count, (n_ts + 1) * n) self.assertEqual(cs3.fragment_count, n_ts + 1) self.assertEqual(cs4.hits, n_ts + 1) # because previous read filled cache self.assertEqual(cs4.misses, n_ts + 1) # remembers previous misses. self.assertEqual(cs4.coverage_misses, 0) self.assertEqual(cs4.id_count, n_ts + 1) self.assertEqual(cs4.point_count, (n_ts + 1) * n) self.assertEqual(cs4.fragment_count, n_ts + 1) self.assertEqual(cs6.hits, 1) # because previous read filled cache self.assertEqual(cs6.misses, n_ts * 2 + 1) # remembers previous misses. self.assertEqual(cs6.coverage_misses, 0) self.assertEqual(cs6.id_count, 1) self.assertEqual(cs6.point_count, 1 * n) self.assertEqual(cs6.fragment_count, 1)
def test_functionality_hosting_localhost(self): # setup data to be calculated utc = Calendar() d = deltahours(1) d24 = deltahours(24) n = 240 n24 = 10 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) ta24 = TimeAxis(t, d24, n24) n_ts = 100 percentile_list = IntVector([0, 35, 50, 65, 100]) tsv = TsVector() store_tsv = TsVector() # something we store at server side for i in range(n_ts): pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(float(1 + i / 10) * pts) store_tsv.append(TimeSeries("cache://test/{0}".format(i), pts)) # generate a bound pts to store dummy_ts = TimeSeries('dummy://a') tsv.append(dummy_ts.integral(ta)) self.assertGreater(len(ts_stringify(tsv[0])), 10) # just ensure ts_stringify work on expr. # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.cb = self.dtss_read_callback dtss.find_cb = self.dtss_find_callback dtss.store_ts_cb = self.dtss_store_callback dtss.start_async() dts = DtsClient(StringVector([host_port]), True, 1000) # as number of hosts # then try something that should work dts.store_ts(store_tsv) r1 = dts.evaluate(tsv, ta.total_period()) tsv1x = tsv.inside(-0.5, 0.5) tsv1x.append(tsv1x[-1].decode( start_bit=1, n_bits=1)) # just to verify serialization/bind tsv1x.append(store_tsv[1].derivative()) tsv1x.append(store_tsv[1].pow( 2.0)) # just for verify pow serialization(well, it's a bin-op..) r1x = dts.evaluate(tsv1x, ta.total_period()) r2 = dts.percentiles(tsv, ta.total_period(), ta24, percentile_list) r3 = dts.find('netcdf://dummy\.nc/ts\d') self.rd_throws = True ex_count = 0 try: rx = dts.evaluate(tsv, ta.total_period()) except RuntimeError as e: ex_count = 1 pass self.rd_throws = True try: fx = dts.find('should throw') except RuntimeError as e: ex_count += 1 pass dts.close() # close connection (will use context manager later) dtss.clear() # close server self.assertEqual(ex_count, 2) self.assertEqual(len(r1), len(tsv)) self.assertEqual(self.callback_count, 4) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), tsv[i].values.to_numpy(), decimal=4) self.assertEqual(len(r2), len(percentile_list)) dummy_ts.bind( TimeSeries(ta, fill_value=1.0, point_fx=point_fx.POINT_AVERAGE_VALUE)) p2 = tsv.percentiles(ta24, percentile_list) # r2 = tsv.percentiles(ta24,percentile_list) for i in range(len(p2)): self.assertEqual(r2[i].time_axis, p2[i].time_axis) assert_array_almost_equal(r2[i].values.to_numpy(), p2[i].values.to_numpy(), decimal=1) self.assertEqual(self.find_count, 2) self.assertEqual(len(r3), 10) # 0..9 for i in range(len(r3)): self.assertEqual(r3[i], self.ts_infos[i]) self.assertIsNotNone(r1x) self.assertEqual(1, len(self.stored_tsv)) self.assertEqual(len(store_tsv), len(self.stored_tsv[0])) for i in range(len(store_tsv)): self.assertEqual(self.stored_tsv[0][i].ts_id(), store_tsv[i].ts_id())