def test_pow(self): """ verify pow(ts,num) pow(num,ts) pow(ts,ts) """ a = TimeSeries(TimeAxis(time('2018-01-01T00:00:00Z'), time(3600), 3), DoubleVector([1.0, 2.0, 3.0]), stair_case) b = TimeSeries(TimeAxis(time('2018-01-01T00:00:00Z'), time(3600), 3), DoubleVector([2.0, 2.0, 2.0]), stair_case) assert_array_almost_equal([1, 4, 9], a.pow(2.0).values.to_numpy()) assert_array_almost_equal([1, 4, 9], pow(a, 2.0).values.to_numpy()) assert_array_almost_equal([2, 4, 8], pow(2.0, a).values.to_numpy()) assert_array_almost_equal([2, 4, 8], pow(b, a).values.to_numpy()) assert_array_almost_equal([2, 4, 8], b.pow(a).values.to_numpy())
def test_error_handling(self): utc = Calendar() t0 = utc.time(2018, 1, 1) dt = deltahours(1) dv = DoubleVector() dv[:] = [1.0, 2.0, 2.5, 1.9, 3.0, 3.1, float('nan')] # also verify nan-handling ts = TimeSeries(TimeAxis(t0, dt, len(dv)), dv, POINT_AVERAGE_VALUE) try: ts.decode(start_bit=0, n_bits=0) self.assertTrue(False, 'This should throw, n_bits >0') except RuntimeError as re: pass try: ts.decode(start_bit=41, n_bits=12) self.assertTrue(False, 'This should throw, start_bit + n_bits >52') except RuntimeError as re: pass try: ts.decode(start_bit=-1, n_bits=12) self.assertTrue(False, 'This should throw, start_bit >=0') except RuntimeError as re: pass
def test_linear_vector(self): ta = TimeAxis(0, 10, 6) tsv = DoubleVector([1, 1, 2, 3, -1.0, 5.0]) fv = TsVector([TimeSeries(ta, tsv, POINT_INSTANT_VALUE)]) d_fv = fv.derivative() f = fv[0] d_f = d_fv[0] self.assertEqual(len(f), len(d_f)) self.assertAlmostEqual(d_f.value(0), 0.0) self.assertAlmostEqual(d_f.value(1), 0.1) self.assertAlmostEqual(d_f.value(2), 0.1) self.assertAlmostEqual(d_f.value(3), -0.4) self.assertAlmostEqual(d_f(f.time(3) + 5), -0.4) self.assertAlmostEqual(d_f.value(4), 0.6) self.assertFalse(math.isfinite(d_f.value(5))) self.assertFalse(math.isfinite(d_f(f.time(5)))) self.assertTrue(math.isfinite(d_f(f.time(5) - 1))) v = d_f.values self.assertAlmostEqual(len(v), len(f)) self.assertAlmostEqual(v[0], 0.0) self.assertAlmostEqual(v[1], 0.1) self.assertAlmostEqual(v[2], 0.1) self.assertAlmostEqual(v[3], -0.4) self.assertAlmostEqual(v[4], 0.6) self.assertFalse(math.isfinite(v[5]))
def test_glacier_melt_ts_m3s(self): utc = Calendar() t0 = utc.time(2016, 10, 1) dt = deltahours(1) n = 240 ta = TimeAxis(t0, dt, n) area_m2 = 487 * 1000 * 1000 # Jostedalsbreen, largest i Europe temperature = TimeSeries(ta=ta, fill_value=10.0, point_fx=fx_policy.POINT_AVERAGE_VALUE) sca_values = dv.from_numpy(np.linspace(area_m2 * 1.0, 0.0, num=n)) sca = TimeSeries(ta=ta, values=sca_values, point_fx=fx_policy.POINT_AVERAGE_VALUE) gf = 1.0 * area_m2 dtf = 6.0 melt_m3s = create_glacier_melt_ts_m3s( temperature, sca, gf, dtf) # Here we get back a melt_ts, that we can do ts-stuff with self.assertIsNotNone(melt_m3s) full_melt_m3s = glacier_melt_step(dtf, 10.0, 0.0, gf) expected_melt_m3s = np.linspace(0.0, full_melt_m3s, num=n) assert_array_almost_equal(expected_melt_m3s, melt_m3s.values.to_numpy(), 4) # Just to check we can work with the result as a ts in all aspects mx2 = melt_m3s * 2.0 emx2 = expected_melt_m3s * 2.0 assert_array_almost_equal(emx2, mx2.values.to_numpy(), 4)
def remove_tp_data(self, period: UtcPeriod): """ delete data given within the time period :param period: :return: """ time_series_cropped = None with Dataset(self.file_path, 'a') as ds: # 1. load the data time_variable = 'time' time = ds.variables.get(time_variable, None) if time is None: raise TimeSeriesStoreError( 'Something is wrong with the dataset. time not found.') var = ds.variables.get(self.ts_meta_info.variable_name, None) if var is None: raise TimeSeriesStoreError( 'Something is wrong with the dataset. variable {0} not found.' .format(self.ts_meta_info.variable_name)) if len(time): # 2. get indices of the data to delete time_utc = convert_netcdf_time(time.units, time) idx_min = np.searchsorted(time_utc, period.start, side='left') idx_max = np.searchsorted(time_utc, period.end, side='right') # check if there is data outside the range if idx_max - idx_min != len(time): # print('indices ', idx_min, idx_max, len(time)) # 3. crop the data array if idx_max < len(time): time_cropped = np.append(time[0:idx_min], time[idx_max:]) var_cropped = np.append(var[0:idx_min], var[idx_max:]) else: time_cropped = np.append(time[0:idx_min], []) var_cropped = np.append(var[0:idx_min], []) last_time_point = 2 * time_cropped[-1] - time_cropped[-2] # print(type(time_cropped[0])) # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy()) ta = TimeAxis( UtcTimeVector.from_numpy(time_cropped.astype( np.int64)), int(last_time_point)) # print(var_cropped) # print(type(var_cropped)) time_series_cropped = TimeSeries( ta, dv.from_numpy(var_cropped), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct point policy? # 4. save the cropped data self.create_new_file() if time_series_cropped: self.append_ts_data(time_series_cropped)
def test_simple_case(self): utc = Calendar() t0 = utc.time(2018, 1, 1) dt = deltahours(1) dv = DoubleVector() dv[:] = [1.0, 2.0, 2.5, 1.9, 3.0, 3.1, -1.0] i1_ex = [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0] ts = TimeSeries(TimeAxis(t0, dt, len(dv)), dv, POINT_AVERAGE_VALUE) i1 = ts.inside(2.0, 3.0) assert_array_almost_equal(i1.values.to_numpy(), np.array(i1_ex))
def test_inverted_values(self): utc = Calendar() t0 = utc.time(2018, 1, 1) dt = deltahours(1) dv = DoubleVector() dv[:] = [1.0, 2.0, 2.5, 1.9, 3.0, 3.1, float('nan')] # also verify nan-handling i1_ex = [1.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0] ts = TimeSeries(TimeAxis(t0, dt, len(dv)), dv, POINT_AVERAGE_VALUE) i2 = ts.inside(min_v=2.0, max_v=3.0, nan_v=1.0, inside_v=0.0, outside_v=1.0) assert_array_almost_equal(i2.values.to_numpy(), np.array(i1_ex))
def _create_forecasts(self, t0: int, dt: int, n: int, fc_dt: int, fc_n: int) -> TsVector: tsv = TsVector() stair_case = ts_point_fx.POINT_AVERAGE_VALUE for i in range(fc_n): ta = TimeAxis(t0 + i * fc_dt, dt, n) mrk = (i + 1) / 100.0 v = dv.from_numpy( np.linspace(1 + mrk, 1 + n + mrk, n, endpoint=False)) tsv.append(TimeSeries(ta, v, stair_case)) return tsv
def test_basic_case(self): ta = TimeAxis(0, 10, 6) to = TimeAxis(10, 20, 3) va = DoubleVector([0, 10, 20, 30, 40.0, 50.0]) a = TimeSeries(ta, va, POINT_INSTANT_VALUE) o = TimeSeries( to, fill_value=0.0, point_fx=POINT_INSTANT_VALUE) # point-ip should not matter r = a.use_time_axis_from(o) self.assertIsNotNone(r) self.assertEquals(r.time_axis, o.time_axis) ev = DoubleVector([10.0, 30.0, 50.0]).to_numpy() self.assertTrue(np.allclose(r.values.to_numpy(), ev)) self.assertEquals(r.point_interpretation(), a.point_interpretation()) tsv = TsVector([a, 2.0 * a]) rv = tsv.use_time_axis_from(o) for x in rv: self.assertEquals(x.time_axis, o.time_axis) self.assertTrue(np.allclose(rv[0].values.to_numpy(), ev)) self.assertTrue(np.allclose(rv[1].values.to_numpy(), 2.0 * ev))
def test_convolve_policy(self): utc = Calendar() ts = TimeSeries(ta=TimeAxisFixedDeltaT(utc.time(2001, 1, 1), deltahours(1), 24), fill_value=10.0, point_fx=point_fx.POINT_AVERAGE_VALUE) w = DoubleVector.from_numpy([0.05, 0.15, 0.6, 0.15, 0.05]) cts = ts.convolve_w(w, convolve_policy.USE_FIRST ) # ensure mass-balance between source and cts self.assertIsNotNone(cts) self.assertEquals(len(cts), len(ts)) self.assertEquals(cts.values.to_numpy().sum(), ts.values.to_numpy().sum())
def test_simple_case(self): utc = Calendar() t0 = utc.time(2018, 1, 1) dt = deltahours(1) dv = DoubleVector() dv[:] = [1.2, 0.0, 2.0, 5.0, 15.0, float('nan'), -1.0] # these are bit-encoded values, note 1.2 -> 1.0 i0_1_e = [1.0, 0.0, 0.0, 1.0, 1.0, float('nan'), float('nan')] # expected values for bit 0 1-bit i1_3_e = [0.0, 0.0, 1.0, 2.0, 7.0, float('nan'), float('nan')] # expected values for bit 3 3-bits ts = TimeSeries(TimeAxis(t0, dt, len(dv)), dv, POINT_AVERAGE_VALUE) i0_1 = ts.decode(start_bit=0, n_bits=1) i1_3 = ts.decode(start_bit=1, n_bits=3) assert_array_almost_equal(i0_1.values.to_numpy(), np.array(i0_1_e)) assert_array_almost_equal(i1_3.values.to_numpy(), np.array(i1_3_e))
def test_glacier_melt_ts_m3s(self): utc = Calendar() t0 = utc.time(2016,10,1) dt = deltahours(1) n = 240 ta = Timeaxis(t0, dt, n) area_m2 = 487*1000*1000 # Jostedalsbreen, largest i Europe temperature = Timeseries(ta=ta, fill_value=10.0, point_fx=fx_policy.POINT_AVERAGE_VALUE) sca_values = dv.from_numpy(np.linspace(area_m2*1.0,0.0,num=n)) sca = Timeseries(ta=ta, values=sca_values, point_fx=fx_policy.POINT_AVERAGE_VALUE) gf = 1.0 *area_m2 dtf = 6.0 melt_m3s = create_glacier_melt_ts_m3s(temperature, sca, gf, dtf) # Here we get back a melt_ts, that we can do ts-stuff with self.assertIsNotNone(melt_m3s) full_melt_m3s = glacier_melt_step(dtf, 10.0, 0.0, gf) expected_melt_m3s = np.linspace(0.0,full_melt_m3s,num=n) assert_array_almost_equal(expected_melt_m3s,melt_m3s.values.to_numpy(),4) # Just to check we can work with the result as a ts in all aspects mx2 = melt_m3s*2.0 emx2 = expected_melt_m3s * 2.0; assert_array_almost_equal(emx2, mx2.values.to_numpy(), 4)
def test_can_create_cf_compliant_file(self): # create files test_file = path.join(path.abspath(os.curdir), 'shyft_test.nc') if path.exists(test_file): os.remove(test_file) # create meta info epsg_id = 32633 x0 = 100000 x1 = 200000 y0 = 100000 y1 = 200000 x = 101000 y = 101000 z = 1200 temperature = TimeSeriesMetaInfo('temperature', '/observed/at_stn_abc/temperature', 'observed air temperature', x, y, z, epsg_id) # create time axis utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) data = np.arange(0, ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds = TimeSeriesStore(test_file, temperature) t_ds.create_new_file() t_ds.append_ts_data(ts) # expected result ts_exp = ts # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append data print("\n\n append at the end data") # create time axis ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 48) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 72) data = np.empty(72) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:72] = np.arange(0, 48, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append with overlap print("\n\n append with overlap") # create time axis ta = TimeAxis(utc.time(2016, 1, 3), deltahours(1), 48) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96) data = np.empty(96) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(0, 24, dtype=np.float64) # <-- new data data[48:96] = np.arange(0, 48, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Append with gap in time axis print("\n\n Append with gap in time axis") # create time axis ta = TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result time_vals = np.append( TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1], ta.time_points) # print(time_vals) ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64))) data = np.empty(120) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(0, 24, dtype=np.float64) data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:120] = np.arange(0, 24, dtype=np.float64) # <-- new data ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there # print(ts_exp.total_period()) rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected # print(geo_temperature[0].ts.time_axis.time_points - ts_exp.time_axis.time_points) # print(geo_temperature[0].ts.time_axis.time_points - time_vals) # print(ts_exp.time_axis.time_points - time_vals) self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Add new data in the middle where nothing was defined (no moving) print( "\n\n Add new data in the middle where nothing was defined (no moving)" ) # create time axis ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy( np.arange(100, 100 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result time_vals = np.append( TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1], TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24).time_points) ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64))) data = np.empty(120) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(100, 124, dtype=np.float64) # <-- new data data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:120] = np.arange(0, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # print(ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Insert new data in the middle and move rest print("\n\n insert new data and move rest") # create time axis ta = TimeAxis(utc.time(2016, 1, 5), deltahours(1), 36) ts = TimeSeries(ta, dv.from_numpy( np.arange(200, 200 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the data t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 144) data = np.empty(144) data[:24] = np.arange(0, 24, dtype=np.float64) data[24:48] = np.arange(100, 124, dtype=np.float64) data[48:96] = np.arange(0, 48, dtype=np.float64) data[96:132] = np.arange(200, 236, dtype=np.float64) # <-- new data data[132:144] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Add new data before existing data without overlap print("\n\n add new data before existing data without overlap") # create time axis ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy( np.arange(300, 300 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 168) data = np.empty(168) data[:24] = np.arange(300, 324, dtype=np.float64) # <-- new data data[24:48] = np.arange(0, 24, dtype=np.float64) data[48:72] = np.arange(100, 124, dtype=np.float64) data[72:120] = np.arange(0, 48, dtype=np.float64) data[120:156] = np.arange(200, 236, dtype=np.float64) data[156:168] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # add new data before existing data with overlap print("\n\n add new data before existing data with overlap") # create time axis ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 36) ts = TimeSeries(ta, dv.from_numpy( np.arange(400, 400 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch # t_ds = TimeSeriesStore(test_file, temperature) t_ds.append_ts_data(ts) # expected result ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 192) data = np.empty(192) data[:36] = np.arange(400, 436, dtype=np.float64) # <-- new data data[36:48] = np.arange(312, 324, dtype=np.float64) data[48:72] = np.arange(0, 24, dtype=np.float64) data[72:96] = np.arange(100, 124, dtype=np.float64) data[96:144] = np.arange(0, 48, dtype=np.float64) data[144:180] = np.arange(200, 236, dtype=np.float64) data[180:192] = np.arange(12, 24, dtype=np.float64) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Overwrite everything with less data points # create time axis print('\n\n Overwrite everything with less data points') ta = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 9) ts = TimeSeries(ta, dv.from_numpy( np.arange(1000, 1000 + ta.size(), dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.append_ts_data(ts) # expected result ts_exp = ts # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # Insert data with different dt # create time axis print('\n\n Insert data with different dt') ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.append_ts_data(ts) # expected result time_points = np.empty(33, dtype=np.int) time_points[0:2] = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 1).time_points time_points[2:26] = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 23).time_points time_points[26:] = TimeAxis(utc.time(2016, 1, 2), deltahours(24), 6).time_points ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.empty(32) data[0:2] = np.array([1000, 1001]) data[2:26] = np.arange(0, 24) # <-- new data data[26:] = np.arange(1003, 1009) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod in the middle print('\n\n delete data with range UtcPeriod') tp = UtcPeriod(utc.time(2015, 12, 31), utc.time(2016, 1, 1, 12)) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451433600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000, 1452038400, 1452124800, 1452211200 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array([ 1000, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008 ]) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy to use # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod at the start print('\n\n delete data with range UtcPeriod at the start') tp = UtcPeriod(1451433600, 1451667600) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000, 1452038400, 1452124800, 1452211200 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array( [18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008]) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy to use for this test # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod at the end print('\n\n delete data with range UtcPeriod at the end') tp = UtcPeriod(1451952000, utc.time(2016, 1, 10)) # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24) # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE) # write the time series t_ds.remove_tp_data(tp) # expected result time_points = np.array([ 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 1451779200, 1451865600, 1451952000 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) data = np.array([18, 19, 20, 21, 22, 23, 1003, 1004, 1005]) ts_exp = TimeSeries(ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there try: rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) except CFDataRepositoryError: pass # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # delete data with range UtcPeriod everything print('\n\n delete data with range UtcPeriod everything') tp = UtcPeriod(utc.time(2016, 1, 1), utc.time(2016, 1, 10)) # write the time series t_ds.remove_tp_data(tp) # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there self.assertRaises(CFDataRepositoryError, ts_dr.get_timeseries, ['temperature'], tp) # -------------------------------------- # insert data in between time saved data points print('\n\n insert data in between time saved data points') # insert first data in which we want to insert the second batch utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1), deltahours(24), 2) data = np.arange(0, ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # insert first data for every hour in between utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23) data = np.arange(10, 10 + ta.size(), dtype=np.float64) ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result time_points = np.array([ 1451606400, 1451610000, 1451613600, 1451617200, 1451620800, 1451624400, 1451628000, 1451631600, 1451635200, 1451638800, 1451642400, 1451646000, 1451649600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0 ]) time_points[-1] = 2 * time_points[-2] - time_points[ -3] # last time point calc data = np.array([ 0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE ) # TODO: is this correct policy value for this case # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy())) # -------------------------------------- # insert data including nan print('\n\n insert data including nan') utc = Calendar() ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23) data = np.arange(10, 10 + ta.size(), dtype=np.float64) data[4] = np.nan data[ 6] = np.nan # np.inf, but trouble getting inf trough all version of numpy/netcdf data[8] = np.nan # -np.inf, --"-- ts = TimeSeries(ta, dv.from_numpy(data), point_fx=point_fx.POINT_AVERAGE_VALUE) # save the first batch t_ds.append_ts_data(ts) # expected result time_points = np.array([ 1451606400, 1451610000, 1451613600, 1451617200, 1451620800, 1451624400, 1451628000, 1451631600, 1451635200, 1451638800, 1451642400, 1451646000, 1451649600, 1451653200, 1451656800, 1451660400, 1451664000, 1451667600, 1451671200, 1451674800, 1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0 ]) time_points[-1] = 2 * time_points[-2] - time_points[ -3] # last time point calc data = np.array([ 0, 10, 11, 12, 13, np.nan, 15, # np.inf, np. nan, # TODO: figure out how to unmask restoring 'used' mask-values 17, #-np.inf, np.nan, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1 ]) ta = TimeAxis(UtcTimeVector.from_numpy(time_points)) ts_exp = TimeSeries( ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE) # TODO: policy right ? # now read back the result using a *standard* shyft cf geo repository selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]} ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria) # now read back 'temperature' that we know should be there rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period()) # and verify that we get exactly back what we wanted. self.assertIsNotNone(rts_map) self.assertTrue('temperature' in rts_map) geo_temperature = rts_map['temperature'] self.assertEqual(len(geo_temperature), 1) self.assertLessEqual( GeoPoint.distance2(geo_temperature[0].mid_point(), GeoPoint(x, y, z)), 1.0) # check if time axis is as expected self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis) self.assertTrue( np.allclose(geo_temperature[0].ts.time_axis.time_points, ts_exp.time_axis.time_points)) self.assertEqual(geo_temperature[0].ts.point_interpretation(), point_fx.POINT_AVERAGE_VALUE) # check if variable data is as expected self.assertTrue( np.allclose(geo_temperature[0].ts.values.to_numpy(), ts_exp.values.to_numpy(), equal_nan=True)) if path.exists(test_file): os.remove(test_file)
def test_forecast(self): fx_avg = ts_point_fx.POINT_AVERAGE_VALUE utc = Calendar() ta = TimeAxis(utc.time(2017, 1, 1, 0, 0, 0), deltahours(24), 4) historical_data = TsVector() forecast_sets = TsVectorSet() weight_sets = dv() num_historical_data = 56 # Let's make three sets, one of two elements, one of three, and one of # four. forecasts_1 = TsVector() forecasts_2 = TsVector() forecasts_3 = TsVector() forecasts_1.append(TimeSeries(ta, dv([13.4, 15.6, 17.1, 19.1]), fx_avg)) forecasts_1.append(TimeSeries(ta, dv([34.1, 2.40, 43.9, 10.2]), fx_avg)) forecast_sets.append(forecasts_1) weight_sets.append(5.0) forecasts_2.append(TimeSeries(ta, dv([83.1, -42.2, 0.4, 23.4]), fx_avg)) forecasts_2.append(TimeSeries(ta, dv([15.1, 6.500, 4.2, 2.9]), fx_avg)) forecasts_2.append(TimeSeries(ta, dv([53.1, 87.90, 23.8, 5.6]), fx_avg)) forecast_sets.append(forecasts_2) weight_sets.append(9.0) forecasts_3.append( TimeSeries(ta, dv([1.5, -1.9, -17.2, -10.0]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([4.7, 18.2, 15.3000, 8.9]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([-45.2, -2.3, 80.2, 71.0]), fx_avg)) forecasts_3.append( TimeSeries(ta, dv([45.1, -92.0, 34.4, 65.8]), fx_avg)) forecast_sets.append(forecasts_3) weight_sets.append(3.0) for i in range(num_historical_data): historical_data.append( TimeSeries(ta, dv.from_numpy(np.random.random(ta.size()) * 50.0), fx_avg)) # need one more exposed from core here: auto historical_order = qm::quantile_index<tsa_t>(historical_data, ta); interpolation_start = ta.time(2) interpolation_end = ta.time(3) # Act result = quantile_map_forecast(forecast_sets, weight_sets, historical_data, ta, interpolation_start, interpolation_end, False) self.assertIsNotNone(result) self.assertEqual(len(result), num_historical_data) for ts in result: self.assertEqual(ts.size(), ta.size())
def append_ts_data(self, time_series: TimeSeries): """ ensure that the data-file content are equal to time_series for the time_series.time_axis.total_period(). If needed, create and update the file meta-data. :param time_series: :return: """ period = time_series.total_period() n_new_val = time_series.size() crop_data = False time_series_cropped = None with Dataset(self.file_path, 'a') as ds: # read time, from ts.time_axis.start() # or last value of time # then consider if we should fill in complete time-axis ? # # figure out the start-index, # then # ds.time[startindex:] = ts.time_axis.numpy values # ds.temperature[startindex:] = ts.values.to_numpy() # # or if more advanced algorithm, # first read # diff # result -> delete range, replace range, insert range.. time_variable = 'time' time = ds.variables.get(time_variable, None) if time is None: raise TimeSeriesStoreError( 'Something is wrong with the dataset. time not found.') var = ds.variables.get(self.ts_meta_info.variable_name, None) if var is None: raise TimeSeriesStoreError( 'Something is wrong with the dataset. variable {0} not found.' .format(self.ts_meta_info.variable_name)) if len(time): time_utc = convert_netcdf_time(time.units, time) idx_min = np.searchsorted(time_utc, period.start, side='left') idx_max = np.searchsorted( time_utc, period.end, side='left' ) # use 'left' since period.end = time_point(last_value)+dt idx_data_end = idx_min + n_new_val # print('indices ', idx_min, idx_max, idx_data_end, len(time)) # move data if we are overlap or new data`s time before saved time: if idx_min < len(time_utc) and idx_max < len( time_utc) and idx_max - idx_min != n_new_val: # print('In moving condition ', idx_max - idx_min, n_new_val) idx_last = len(time_utc) time[idx_data_end:] = time[idx_max:idx_last] var[idx_data_end:, 0] = var[idx_max:idx_last, 0] # insert new data time[idx_min: idx_data_end] = time_series.time_axis.time_points[:-1] var[idx_min:idx_data_end, 0] = time_series.values.to_numpy() # crop all data which should not be there if idx_max - idx_min - n_new_val > 0: idx_del_start = len(time) - idx_max + idx_min + n_new_val # print("we need to delete something at the end ", idx_max - idx_min - n_new_val, idx_del_start) crop_data = True time_cropped = time[0:idx_del_start] var_cropped = var[0:idx_del_start, 0] last_time_point = 2 * time_cropped[-1] - time_cropped[-2] # print(type(time_cropped[0])) # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy()) ta = TimeAxis( UtcTimeVector.from_numpy(time_cropped.astype( np.int64)), int(last_time_point)) # print(var_cropped) # print(type(var_cropped)) time_series_cropped = TimeSeries( ta, dv.from_numpy(var_cropped), point_fx. POINT_INSTANT_VALUE) # TODO: is this right policy? else: time[:] = time_series.time_axis.time_points[:-1] var[:, 0] = time_series.values.to_numpy() # for i, (t, val) in enumerate(zip(time[:], var[:])): # print('{:<4} : {} - {} - {}'.format(i, datetime.fromtimestamp(t), val[0], type(val[0]))) ds.sync() if crop_data and time_series_cropped: self.create_new_file() self.append_ts_data(time_series_cropped)
def selector_ts( ts: TimeSeries, period: UtcPeriod, average_dt: int, threshold_tss: Sequence[TimeSeries], tss: Sequence[TimeSeries], mask_point_fx: point_interpretation_policy, client: DtsClient, calendar: Calendar ) -> TimeSeries: """Select values from different time-series based on values from a single time-series when compared to a set of threshold time-series. Args: ts: TimeSeries to base selections on. period: Period to select values in. average_dt: Time step to use for the internal masking series. An average of the input ts together with the threshold time-series in threshold_tss is used to generate the masks. threshold_tss: Threshold time-series. Should be one less than the number of time-series to choose from. tss: TimeSeries to choose values from. mask_point_fx: Point interpretation to use for the mask time-series. If equal to POINT_INSTANT_VALUE the boundaries between different selection regions is smoothed. If equal to POINT_AVERAGE_VALUE the boundaries are sharp. client: DtsClient use for computations and data retrieval. calendar: Calendar used to interpret time. Returns: A TimeSeries that is the selection of values from tss. """ assert len(threshold_tss) == len(tss) - 1, ('the number of thresholds should be one less ' 'than the number of time-series') # setup averaging for mask tsv = TsVector() # ---------- n = calendar.diff_units(period.start, period.end, average_dt) if n * average_dt < period.end - period.start: n += 1 ta_avg = TimeAxis(period.start, average_dt, n) tsv.append(ts.average(ta_avg)) # ---------- tsv: TsVector = client.evaluate(tsv, period) # ---------- avg_ts = tsv[0] del tsv # compute mask masks: List[DoubleVector] = [] for avg_p, avg_v in zip(avg_ts.get_time_axis(), avg_ts.values): added_mask = False for i in range(len(tss)): if i == len(masks): masks.append(DoubleVector()) if not added_mask: # determine period threshold if i == 0: min_threshold = -1_000_000_000 max_threshold = threshold_tss[0](avg_p.start) elif i == len(tss) - 1: min_threshold = threshold_tss[-1](avg_p.start) max_threshold = 1_000_000_000 else: min_threshold = threshold_tss[i - 1](avg_p.start) max_threshold = threshold_tss[i](avg_p.start) # set mask value if min_threshold <= avg_v < max_threshold: added_mask = True masks[i].append(1.0) else: masks[i].append(0.0) else: masks[i].append(0.0) # construct final ts computed_ts = None for i, ts_expr in enumerate(tss): if computed_ts is not None: computed_ts += ts_expr * TimeSeries(ta_avg, masks[i], mask_point_fx) else: computed_ts = ts_expr * TimeSeries(ta_avg, masks[i], mask_point_fx) return computed_ts