def test_dtss_partition_by_average(self): """ This test illustrates use of partition_by client and server-side. The main point here is to ensure that the evaluate period covers both the historical and evaluation peri """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) t = utc.time(2000, 1, 1) n = utc.diff_units(t, utc.add(t, Calendar.YEAR, 10), d) ta = TimeAxis(t, d, n) td = TimeAxis(t, d * 24, n // 24) n_ts = 1 store_tsv = TsVector() # something we store at server side for i in range(n_ts): pts = TimeSeries( ta, np.sin( np.linspace(start=0, stop=1.0 * (i + 1), num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url(f"{i}") store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # start dtss server dtss = DtsServer() cache_on_write = True port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # create dts client c = DtsClient( host_port, auto_connect=False) # demonstrate object life-time connection c.store_ts(store_tsv, overwrite_on_write=True, cache_on_write=cache_on_write) t_0 = utc.time(2018, 1, 1) tax = TimeAxis(t_0, Calendar.DAY, 365) ts_h1 = TimeSeries(shyft_store_url(f'{0}')) ts_h2 = store_tsv[0] ts_p1 = ts_h1.partition_by(utc, t, Calendar.YEAR, 10, t_0).average(tax) ts_p2 = ts_h2.partition_by(utc, t, Calendar.YEAR, 10, t_0).average(tax)
def test_dtss_remove_series(self): with tempfile.TemporaryDirectory() as c_dir: # start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # setup some data utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) tsv = TsVector() pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(TimeSeries("cache://test/foo", pts)) # get a client client = DtsClient(host_port) client.store_ts(tsv) # start with no removing dtss.set_can_remove(False) # we should be disallowed to remove now try: client.remove("shyft://test/foo") except Exception as err: self.assertEqual( str(err), "dtss::server: server does not support removing") # then try with allowing remove dtss.set_can_remove(True) # we only support removing shyft-url style data try: client.remove("protocol://test/foo") except Exception as err: self.assertEqual( str(err), "dtss::server: server does not allow removing for non shyft-url type data" ) # now it should work client.remove("shyft://test/foo")
def test_failures(self): """ Verify that dtss client server connections are auto-magically restored and fixed """ with tempfile.TemporaryDirectory() as c_dir: # start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # setup some data utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) tsv = TsVector() pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(TimeSeries("cache://test/foo", pts)) # get a client client = DtsClient(host_port, auto_connect=False) client.store_ts(tsv) client.close() client.store_ts(tsv) # should just work, it re-open automagically dtss.clear( ) # the server is out and away, no chance this would work try: client.store_ts(tsv) self.assertTrue( False, 'This should throw, because there is no dtss server to help you' ) except Exception as ee: self.assertFalse(False, f'expected {ee} here') dtss.set_listening_port(port_no) dtss.start_async() client.store_ts( tsv) # this should just work, automagically reconnect
def test_get_ts_info(self): """ Verify we can get specific TsInfo objects for time-series from the server backend. """ with tempfile.TemporaryDirectory() as c_dir: # start the server dtss = DtsServer() port_no = find_free_port() host_adr = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.set_container( "testing", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # get a client client = DtsClient(host_adr) try: client.get_ts_info(r'shyft://testing/data') except Exception as e: pass else: # only end up here if no exceptions self.fail('Could fetch info for non-existing ts info') # setup some data utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) tsv = TsVector() pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(TimeSeries(r'shyft://testing/data', pts)) client.store_ts(tsv) info: TsInfo = client.get_ts_info(r'shyft://testing/data') self.assertEqual(info.name, r'data') self.assertEqual(info.point_fx, point_fx.POINT_AVERAGE_VALUE) self.assertEqual(info.data_period, ta.total_period())
def test_can_create_cf_compliant_file(self): # create files test_file = path.join(path.abspath(os.curdir), 'shyft_test.nc') if path.exists(test_file): os.remove(test_file) # create meta info epsg_id = 32633 x0 = 100000 x1 = 200000 y0 = 100000 y1 = 200000 x = 101000 y = 101000 z = 1200 temperature = TimeSeriesMetaInfo('temperature', '/observed/at_stn_abc/temperature', 'observed air temperature', x, y, z, epsg_id) # create time axis utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) data = np.arange(0, ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds = TimeSeriesStore(test_file, temperature) t_ds.create_new_file() t_ds.append_ts_data(ts) # expected result ts_exp = ts # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append data print("\n\n append at the end data") # create time axis ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 48) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 72) data = np.empty(72) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:72] = np.arange(0, 48, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append with overlap print("\n\n append with overlap") # create time axis ta = TimeAxis(utc.time(2016, 1, 3), deltahours(1), 48) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96) data = np.empty(96) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(0, 24, dtype=np.float64) # <-- new data data[48:96] = np.arange(0, 48, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append with gap in time axis print("\n\n Append with gap in time axis") # create time axis ta = TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result time_vals = np.append( TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1], ta.time_points) # print(time_vals) ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64))) data = np.empty(120) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(0, 24, dtype=np.float64) data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:120] = np.arange(0, 24, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there # print(ts_exp.total_period()) rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected # print(geo_temperature[0].ts.time_axis.time_points - ts_exp.time_axis.time_points) # print(geo_temperature[0].ts.time_axis.time_points - time_vals) # print(ts_exp.time_axis.time_points - time_vals) self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Add new data in the middle where nothing was defined (no moving) print( "\n\n Add new data in the middle where nothing was defined (no moving)" ) # create time axis ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy( np.arange(100, 100 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result time_vals = np.append( TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1], TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24).time_points) ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64))) data = np.empty(120) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(100, 124, dtype=np.float64) # <-- new data data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:120] = np.arange(0, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # print(ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Insert new data in the middle and move rest print("\n\n insert new data and move rest") # create time axis ta = TimeAxis(utc.time(2016, 1, 5), deltahours(1), 36) ts = TimeSeries(ta, dv.from_numpy( np.arange(200, 200 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 144) data = np.empty(144) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(100, 124, dtype=np.float64) data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:132] = np.arange(200, 236, dtype=np.float64) # <-- new data data[132:144] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Add new data before existing data without overlap print("\n\n add new data before existing data without overlap") # create time axis ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy( np.arange(300, 300 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 168) data = np.empty(168) data[:24] = np.arange(300, 324, dtype=np.float64) # <-- new data data[24:48] = np.arange(0, 24, dtype=np.float64) data[48:72] = np.arange(100, 124, dtype=np.float64) data[72:120] = np.arange(0, 48, dtype=np.float64) data[120:156] = np.arange(200, 236, dtype=np.float64) data[156:168] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # add new data before existing data with overlap print("\n\n add new data before existing data with overlap") # create time axis ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 36) ts = TimeSeries(ta, dv.from_numpy( np.arange(400, 400 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch # t_ds = TimeSeriesStore(test_file, temperature) t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 192) data = np.empty(192) data[:36] = np.arange(400, 436, dtype=np.float64) # <-- new data data[36:48] = np.arange(312, 324, dtype=np.float64) data[48:72] = np.arange(0, 24, dtype=np.float64) data[72:96] = np.arange(100, 124, dtype=np.float64) data[96:144] = np.arange(0, 48, dtype=np.float64) data[144:180] = np.arange(200, 236, dtype=np.float64) data[180:192] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Overwrite everything with less data points # create time axis print('\n\n Overwrite everything with less data points') ta = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 9) ts = TimeSeries(ta, dv.from_numpy( np.arange(1000, 1000 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.append_ts_data(ts) # expected result ts_exp = ts # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Insert data with different dt # create time axis print('\n\n Insert data with different dt') ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.append_ts_data(ts) # expected result time_points = np.empty(33, dtype=np.int) time_points[0:2] = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 1).time_points time_points[2:26] = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 23).time_points time_points[26:] = TimeAxis(utc.time(2016, 1, 2), deltahours(24), 6).time_points ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.empty(32) data[0:2] = np.array([1000, 1001]) data[2:26] = np.arange(0, 24) # <-- new data data[26:] = np.arange(1003, 1009) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod in the middle print('\n\n delete data with range UtcPeriod') tp = UtcPeriod(utc.time(2015, 12, 31), utc.time(2016, 1, 1, 12)) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451433600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000, 1452038400, 1452124800, 1452211200 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array([ 1000, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008 ]) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy to use # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod at the start print('\n\n delete data with range UtcPeriod at the start') tp = UtcPeriod(1451433600, 1451667600) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000, 1452038400, 1452124800, 1452211200 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array( [18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008]) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy to use for this test # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod at the end print('\n\n delete data with range UtcPeriod at the end') tp = UtcPeriod(1451952000, utc.time(2016, 1, 10)) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array([18, 19, 20, 21, 22, 23, 1003, 1004, 1005]) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there try: rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) except CFDataRepositoryError: pass # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod everything print('\n\n delete data with range UtcPeriod everything') tp = UtcPeriod(utc.time(2016, 1, 1), utc.time(2016, 1, 10)) # write the time series t_ds.remove_tp_data(tp) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there self.assertRaises(CFDataRepositoryError, ts_dr.get_timeseries, ['temperature'], tp) # -------------------------------------- # insert data in between time saved data points print('\n\n insert data in between time saved data points') # insert first data in which we want to insert the second batch utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1), deltahours(24), 2) data = np.arange(0, ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # insert first data for every hour in between utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23) data = np.arange(10, 10 + ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result time_points = np.array([ 1451606400, 1451610000, 1451613600, 1451617200, 1451620800, 1451624400, 1451628000, 1451631600, 1451635200, 1451638800, 1451642400, 1451646000, 1451649600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0 ]) time_points[-1] = 2 * time_points[-2] - time_points[ -3] # last time point calc data = np.array([ 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy value for this case # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # insert data including nan print('\n\n insert data including nan') utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23) data = np.arange(10, 10 + ta.size(), dtype=np.float64) data[4] = np.nan data[ 6] = np.nan # np.inf, but trouble getting inf trough all version of numpy/netcdf data[8] = np.nan # -np.inf, --"-- ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result time_points = np.array([ 1451606400, 1451610000, 1451613600, 1451617200, 1451620800, 1451624400, 1451628000, 1451631600, 1451635200, 1451638800, 1451642400, 1451646000, 1451649600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0 ]) time_points[-1] = 2 * time_points[-2] - time_points[ -3] # last time point calc data = np.array([ 0, 10, 11, 12, 13, np.nan, 15, # np.inf, np. nan, # TODO: figure out how to unmask restoring 'used' mask-values 17, #-np.inf, np.nan, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE) # TODO: policy right ? # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy(), equal_nan=True)) if path.exists(test_file): os.remove(test_file)
def test_ts_cache(self): """ Verify dtss ts-cache functions exposed to python """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 100 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # add one external ts tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do"))) # then start the server dtss = DtsServer() dtss.cb = self.dtss_read_callback # rig external callbacks as well. self.callback_count = 0 self.rd_throws = False cache_on_write = True port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container dts = DtsClient( host_port, auto_connect=False) # demonstrate object life-time connection cs0 = dtss.cache_stats dts.store_ts(store_tsv, overwrite_on_write=True, cache_on_write=cache_on_write) r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True) cs1 = dtss.cache_stats ccs1 = dts.cache_stats # client can also provide cahce-stats dtss.flush_cache_all() # force the cache empty dtss.clear_cache_stats() cs2 = dtss.cache_stats # just to ensure clear did work r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # second evaluation, cache is empty, will force read(misses) cs3 = dtss.cache_stats r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=True ) # third evaluation, cache is now filled, all hits cs4 = dtss.cache_stats # now verify explicit caching performed by the python callback self.cache_dtss = dtss self.cache_reads = True dts.cache_flush() # is the equivalent of # dtss.flush_cache_all() # dtss.clear_cache_stats() # use explicit cache-control instead of global dtss.set_auto_cache( False ) # turn off auto caching, we want to test the explicit caching r1 = dts.evaluate( tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False ) # evaluation, just misses, but we cache explict the external cs5 = dtss.cache_stats # ok base line a lots of misses r1 = dts.evaluate(tsv, ta.total_period(), use_ts_cached_read=True, update_ts_cache=False) cs6 = dtss.cache_stats # should be one hit here dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(cs0.hits, 0) self.assertEqual(cs0.misses, 0) self.assertEqual(cs0.coverage_misses, 0) self.assertEqual(cs0.id_count, 0) self.assertEqual(cs0.point_count, 0) self.assertEqual(cs0.fragment_count, 0) self.assertEqual(cs1.hits, n_ts) self.assertEqual( cs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(cs1.coverage_misses, 0) self.assertEqual(cs1.id_count, n_ts + 1) self.assertEqual(cs1.point_count, (n_ts + 1) * n) self.assertEqual(cs1.fragment_count, n_ts + 1) # verify client side cache_stats self.assertEqual(ccs1.hits, n_ts) self.assertEqual( ccs1.misses, 1 ) # because we cache on store, so 10 cached, 1 external with miss self.assertEqual(ccs1.coverage_misses, 0) self.assertEqual(ccs1.id_count, n_ts + 1) self.assertEqual(ccs1.point_count, (n_ts + 1) * n) self.assertEqual(ccs1.fragment_count, n_ts + 1) self.assertEqual(cs2.hits, 0) self.assertEqual(cs2.misses, 0) self.assertEqual(cs2.coverage_misses, 0) self.assertEqual(cs2.id_count, 0) self.assertEqual(cs2.point_count, 0) self.assertEqual(cs2.fragment_count, 0) self.assertEqual(cs3.hits, 0) self.assertEqual( cs3.misses, n_ts + 1) # because we cache on store, we don't even miss one time self.assertEqual(cs3.coverage_misses, 0) self.assertEqual(cs3.id_count, n_ts + 1) self.assertEqual(cs3.point_count, (n_ts + 1) * n) self.assertEqual(cs3.fragment_count, n_ts + 1) self.assertEqual(cs4.hits, n_ts + 1) # because previous read filled cache self.assertEqual(cs4.misses, n_ts + 1) # remembers previous misses. self.assertEqual(cs4.coverage_misses, 0) self.assertEqual(cs4.id_count, n_ts + 1) self.assertEqual(cs4.point_count, (n_ts + 1) * n) self.assertEqual(cs4.fragment_count, n_ts + 1) self.assertEqual(cs6.hits, 1) # because previous read filled cache self.assertEqual(cs6.misses, n_ts * 2 + 1) # remembers previous misses. self.assertEqual(cs6.coverage_misses, 0) self.assertEqual(cs6.id_count, 1) self.assertEqual(cs6.point_count, 1 * n) self.assertEqual(cs6.fragment_count, 1)
def test_ts_store(self): """ This test verifies the shyft internal time-series store, that allow identified time-series to be stored in the backend using a directory container specified for the location. All time-series of the form shyft://<container>/<ts-name> is mapped to the configured <container> (aka a directory on the server) This applies to expressions, as well as the new .store_ts(ts_vector) function that allows the user to stash away time-series into the configured back-end container. All find-operations of the form shyft://<container>/<regular-expression> is mapped to a search in the corresponding directory for the <container> :return: """ with tempfile.TemporaryDirectory() as c_dir: # setup data to be calculated utc = Calendar() d = deltahours(1) n = 365 * 24 // 3 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) n_ts = 10 store_tsv = TsVector() # something we store at server side tsv = TsVector( ) # something we put an expression into, refering to stored ts-symbols for i in range(n_ts): pts = TimeSeries( ta, np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())), point_fx.POINT_AVERAGE_VALUE) ts_id = shyft_store_url("{0}".format(i)) tsv.append(float(1.0) * TimeSeries(ts_id) ) # make an expression that returns what we store store_tsv.append(TimeSeries( ts_id, pts)) # generate a bound pts to store # krls with some extra challenges related to serialization tsv_krls = TsVector() krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation( dt=d, gamma=1e-3, tolerance=0.001, size=ta.size()) tsv_krls.append(krls_ts) # min_max_check_ts_fill also needs a serial check # create a trivial-case ts9 = TimeSeries(shyft_store_url("9")) ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts, v_max=10.0 * n_ts) tsv_krls.append(ts_qac) tsv_krls.append(ts9) tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5)) # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_auto_cache(True) std_max_items = dtss.cache_max_items dtss.cache_max_items = 3000 tst_max_items = dtss.cache_max_items dtss.set_listening_port(port_no) dtss.set_container( "test", c_dir ) # notice we set container 'test' to point to c_dir directory dtss.start_async( ) # the internal shyft time-series will be stored to that container # also notice that we dont have to setup callbacks in this case (but we could, and they would work) # # finally start the action dts = DtsClient(host_port) # then try something that should work dts.store_ts(store_tsv) r1 = dts.evaluate(tsv, ta.total_period()) f1 = dts.find( r"shyft://test/\d") # find all ts with one digit, 0..9 r2 = dts.evaluate(tsv_krls, ta.total_period()) url_x = shyft_store_url(r'does not exists') tsvx = TsVector() tsvx.append(TimeSeries(url_x)) try: rx = dts.evaluate(tsvx, ta.total_period()) self.assertFalse(True, 'This did not work out') except RuntimeError as rex: self.assertIsNotNone(rex) dts.close() # close connection (will use context manager later) dtss.clear() # close server # now the moment of truth: self.assertEqual(len(r1), len(tsv)) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), store_tsv[i].values.to_numpy(), decimal=4) self.assertEqual(len(f1), 10) self.assertEqual(len(r2), len(tsv_krls)) assert_array_almost_equal(r2[1].values.to_numpy(), r2[2].values.to_numpy(), decimal=4) self.assertEqual(1000000, std_max_items) self.assertEqual(3000, tst_max_items)
def test_functionality_hosting_localhost(self): # setup data to be calculated utc = Calendar() d = deltahours(1) d24 = deltahours(24) n = 240 n24 = 10 t = utc.time(2016, 1, 1) ta = TimeAxis(t, d, n) ta24 = TimeAxis(t, d24, n24) n_ts = 100 percentile_list = IntVector([0, 35, 50, 65, 100]) tsv = TsVector() store_tsv = TsVector() # something we store at server side for i in range(n_ts): pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()), point_fx.POINT_AVERAGE_VALUE) tsv.append(float(1 + i / 10) * pts) store_tsv.append(TimeSeries("cache://test/{0}".format(i), pts)) # generate a bound pts to store dummy_ts = TimeSeries('dummy://a') tsv.append(dummy_ts.integral(ta)) self.assertGreater(len(ts_stringify(tsv[0])), 10) # just ensure ts_stringify work on expr. # then start the server dtss = DtsServer() port_no = find_free_port() host_port = 'localhost:{0}'.format(port_no) dtss.set_listening_port(port_no) dtss.cb = self.dtss_read_callback dtss.find_cb = self.dtss_find_callback dtss.store_ts_cb = self.dtss_store_callback dtss.start_async() dts = DtsClient(StringVector([host_port]), True, 1000) # as number of hosts # then try something that should work dts.store_ts(store_tsv) r1 = dts.evaluate(tsv, ta.total_period()) tsv1x = tsv.inside(-0.5, 0.5) tsv1x.append(tsv1x[-1].decode( start_bit=1, n_bits=1)) # just to verify serialization/bind tsv1x.append(store_tsv[1].derivative()) tsv1x.append(store_tsv[1].pow( 2.0)) # just for verify pow serialization(well, it's a bin-op..) r1x = dts.evaluate(tsv1x, ta.total_period()) r2 = dts.percentiles(tsv, ta.total_period(), ta24, percentile_list) r3 = dts.find('netcdf://dummy\.nc/ts\d') self.rd_throws = True ex_count = 0 try: rx = dts.evaluate(tsv, ta.total_period()) except RuntimeError as e: ex_count = 1 pass self.rd_throws = True try: fx = dts.find('should throw') except RuntimeError as e: ex_count += 1 pass dts.close() # close connection (will use context manager later) dtss.clear() # close server self.assertEqual(ex_count, 2) self.assertEqual(len(r1), len(tsv)) self.assertEqual(self.callback_count, 4) for i in range(n_ts - 1): self.assertEqual(r1[i].time_axis, tsv[i].time_axis) assert_array_almost_equal(r1[i].values.to_numpy(), tsv[i].values.to_numpy(), decimal=4) self.assertEqual(len(r2), len(percentile_list)) dummy_ts.bind( TimeSeries(ta, fill_value=1.0, point_fx=point_fx.POINT_AVERAGE_VALUE)) p2 = tsv.percentiles(ta24, percentile_list) # r2 = tsv.percentiles(ta24,percentile_list) for i in range(len(p2)): self.assertEqual(r2[i].time_axis, p2[i].time_axis) assert_array_almost_equal(r2[i].values.to_numpy(), p2[i].values.to_numpy(), decimal=1) self.assertEqual(self.find_count, 2) self.assertEqual(len(r3), 10) # 0..9 for i in range(len(r3)): self.assertEqual(r3[i], self.ts_infos[i]) self.assertIsNotNone(r1x) self.assertEqual(1, len(self.stored_tsv)) self.assertEqual(len(store_tsv), len(self.stored_tsv[0])) for i in range(len(store_tsv)): self.assertEqual(self.stored_tsv[0][i].ts_id(), store_tsv[i].ts_id())
def test_forecast(self): fx_avg = ts_point_fx.POINT_AVERAGE_VALUE utc = Calendar() ta = TimeAxis(utc.time(2017, 1, 1, 0, 0, 0), deltahours(24), 4) historical_data = TsVector() forecast_sets = TsVectorSet() weight_sets = dv() num_historical_data = 56 # Let's make three sets, one of two elements, one of three, and one of # four. forecasts_1 = TsVector() forecasts_2 = TsVector() forecasts_3 = TsVector() forecasts_1.append(TimeSeries(ta, dv([13.4, 15.6, 17.1, 19.1]), fx_avg)) forecasts_1.append(TimeSeries(ta, dv([34.1, 2.40, 43.9, 10.2]), fx_avg)) forecast_sets.append(forecasts_1) weight_sets.append(5.0) forecasts_2.append(TimeSeries(ta, dv([83.1, -42.2, 0.4, 23.4]), fx_avg)) forecasts_2.append(TimeSeries(ta, dv([15.1, 6.500, 4.2, 2.9]), fx_avg)) forecasts_2.append(TimeSeries(ta, dv([53.1, 87.90, 23.8, 5.6]), fx_avg)) forecast_sets.append(forecasts_2) weight_sets.append(9.0) forecasts_3.append( TimeSeries(ta, dv([1.5, -1.9, -17.2, -10.0]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([4.7, 18.2, 15.3000, 8.9]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([-45.2, -2.3, 80.2, 71.0]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([45.1, -92.0, 34.4, 65.8]), fx_avg)) forecast_sets.append(forecasts_3) weight_sets.append(3.0) for i in range(num_historical_data): historical_data.append( TimeSeries(ta, dv.from_numpy(np.random.random(ta.size()) * 50.0), fx_avg)) # need one more exposed from core here: auto historical_order = qm::quantile_index<tsa_t>(historical_data, ta); interpolation_start = ta.time(2) interpolation_end = ta.time(3) # Act result = quantile_map_forecast(forecast_sets, weight_sets, historical_data, ta, interpolation_start, interpolation_end, False) self.assertIsNotNone(result) self.assertEqual(len(result), num_historical_data) for ts in result: self.assertEqual(ts.size(), ta.size())