Beispiel #1
0
    def remove_tp_data(self, period: UtcPeriod):
        """
        delete data given within the time period

        :param period:
        :return:
        """
        time_series_cropped = None

        with Dataset(self.file_path, 'a') as ds:
            # 1. load the data
            time_variable = 'time'
            time = ds.variables.get(time_variable, None)

            if time is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. time not found.')
            var = ds.variables.get(self.ts_meta_info.variable_name, None)

            if var is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. variable {0} not found.'
                    .format(self.ts_meta_info.variable_name))

            if len(time):
                # 2. get indices of the data to delete
                time_utc = convert_netcdf_time(time.units, time)

                idx_min = np.searchsorted(time_utc, period.start, side='left')
                idx_max = np.searchsorted(time_utc, period.end, side='right')

                # check if there is data outside the range
                if idx_max - idx_min != len(time):
                    # print('indices ', idx_min, idx_max, len(time))
                    # 3. crop the data array
                    if idx_max < len(time):
                        time_cropped = np.append(time[0:idx_min],
                                                 time[idx_max:])
                        var_cropped = np.append(var[0:idx_min], var[idx_max:])
                    else:
                        time_cropped = np.append(time[0:idx_min], [])
                        var_cropped = np.append(var[0:idx_min], [])
                    last_time_point = 2 * time_cropped[-1] - time_cropped[-2]
                    # print(type(time_cropped[0]))
                    # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy())
                    ta = TimeAxis(
                        UtcTimeVector.from_numpy(time_cropped.astype(
                            np.int64)), int(last_time_point))
                    # print(var_cropped)
                    # print(type(var_cropped))
                    time_series_cropped = TimeSeries(
                        ta, dv.from_numpy(var_cropped),
                        point_fx.POINT_INSTANT_VALUE
                    )  # TODO: is this correct point policy?

        # 4. save the cropped data
        self.create_new_file()
        if time_series_cropped:
            self.append_ts_data(time_series_cropped)
Beispiel #2
0
def fixed_tsv(
        period: UtcPeriod,
        fixed_values: Sequence[float]
) -> TsVector:
    """Create a TsVector with TimeSeries with fixed values spanning the given period.

    Args:
        period: Time period the generated TimeSeries should span.
        fixed_values: A sequence of numbers to generate constant TimeSeries for.

    Returns:
        A TsVector with one TimeSeries for each value in fixed_values, all spanning period.
    """
    tsv = TsVector()
    for fv in fixed_values:
        tsv.append(TimeSeries(
            TimeAxis(UtcTimeVector([period.start, period.end])),
            [fv], POINT_AVERAGE_VALUE
        ))
    return tsv
Beispiel #3
0
    def test_can_create_cf_compliant_file(self):
        # create files
        test_file = path.join(path.abspath(os.curdir), 'shyft_test.nc')
        if path.exists(test_file):
            os.remove(test_file)
        # create meta info
        epsg_id = 32633
        x0 = 100000
        x1 = 200000
        y0 = 100000
        y1 = 200000
        x = 101000
        y = 101000
        z = 1200
        temperature = TimeSeriesMetaInfo('temperature',
                                         '/observed/at_stn_abc/temperature',
                                         'observed air temperature', x, y, z,
                                         epsg_id)

        # create time axis
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        data = np.arange(0, ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)

        # save the first batch
        t_ds = TimeSeriesStore(test_file, temperature)
        t_ds.create_new_file()
        t_ds.append_ts_data(ts)

        # expected result
        ts_exp = ts

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append data
        print("\n\n append at the end data")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 48)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 72)
        data = np.empty(72)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:72] = np.arange(0, 48, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append with overlap
        print("\n\n append with overlap")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 3), deltahours(1), 48)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96)
        data = np.empty(96)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(0, 24, dtype=np.float64)  # <-- new data
        data[48:96] = np.arange(0, 48, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append with gap in time axis
        print("\n\n Append with gap in time axis")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        time_vals = np.append(
            TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1],
            ta.time_points)
        # print(time_vals)
        ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64)))
        data = np.empty(120)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(0, 24, dtype=np.float64)
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:120] = np.arange(0, 24, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        # print(ts_exp.total_period())
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        # print(geo_temperature[0].ts.time_axis.time_points - ts_exp.time_axis.time_points)
        # print(geo_temperature[0].ts.time_axis.time_points - time_vals)
        # print(ts_exp.time_axis.time_points - time_vals)
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Add new data in the middle where nothing was defined (no moving)
        print(
            "\n\n Add new data in the middle where nothing was defined (no moving)"
        )
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(100, 100 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        time_vals = np.append(
            TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1],
            TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24).time_points)
        ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64)))
        data = np.empty(120)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(100, 124, dtype=np.float64)  # <-- new data
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:120] = np.arange(0, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())
        # print(ts_exp.total_period())
        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Insert new data in the middle and move rest
        print("\n\n insert new data and move rest")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 5), deltahours(1), 36)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(200, 200 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 144)
        data = np.empty(144)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(100, 124, dtype=np.float64)
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:132] = np.arange(200, 236, dtype=np.float64)  # <-- new data
        data[132:144] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Add new data before existing data without overlap
        print("\n\n add new data before existing data without overlap")
        # create time axis
        ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(300, 300 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 168)
        data = np.empty(168)
        data[:24] = np.arange(300, 324, dtype=np.float64)  # <-- new data
        data[24:48] = np.arange(0, 24, dtype=np.float64)
        data[48:72] = np.arange(100, 124, dtype=np.float64)
        data[72:120] = np.arange(0, 48, dtype=np.float64)
        data[120:156] = np.arange(200, 236, dtype=np.float64)
        data[156:168] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # add new data before existing data with overlap
        print("\n\n add new data before existing data with overlap")
        # create time axis
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 36)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(400, 400 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        # t_ds = TimeSeriesStore(test_file, temperature)
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 192)
        data = np.empty(192)
        data[:36] = np.arange(400, 436, dtype=np.float64)  # <-- new data
        data[36:48] = np.arange(312, 324, dtype=np.float64)
        data[48:72] = np.arange(0, 24, dtype=np.float64)
        data[72:96] = np.arange(100, 124, dtype=np.float64)
        data[96:144] = np.arange(0, 48, dtype=np.float64)
        data[144:180] = np.arange(200, 236, dtype=np.float64)
        data[180:192] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Overwrite everything with less data points
        # create time axis
        print('\n\n Overwrite everything with less data points')
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 9)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(1000, 1000 + ta.size(),
                                      dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.append_ts_data(ts)

        # expected result
        ts_exp = ts

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Insert data with different dt
        # create time axis
        print('\n\n Insert data with different dt')
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, 24, dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.empty(33, dtype=np.int)
        time_points[0:2] = TimeAxis(utc.time(2015, 12, 30), deltahours(24),
                                    1).time_points
        time_points[2:26] = TimeAxis(utc.time(2016, 1, 1), deltahours(1),
                                     23).time_points
        time_points[26:] = TimeAxis(utc.time(2016, 1, 2), deltahours(24),
                                    6).time_points
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.empty(32)
        data[0:2] = np.array([1000, 1001])
        data[2:26] = np.arange(0, 24)  # <-- new data
        data[26:] = np.arange(1003, 1009)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod in the middle
        print('\n\n delete data with range UtcPeriod')
        tp = UtcPeriod(utc.time(2015, 12, 31), utc.time(2016, 1, 1, 12))
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451433600, 1451653200, 1451656800, 1451660400, 1451664000,
            1451667600, 1451671200, 1451674800, 1451678400, 1451682000,
            1451685600, 1451689200, 1451692800, 1451779200, 1451865600,
            1451952000, 1452038400, 1452124800, 1452211200
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array([
            1000, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1003, 1004, 1005,
            1006, 1007, 1008
        ])
        ts_exp = TimeSeries(ta, dv.from_numpy(data),
                            point_fx.POINT_INSTANT_VALUE
                            )  # TODO: is this correct policy to use

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod at the start
        print('\n\n delete data with range UtcPeriod at the start')
        tp = UtcPeriod(1451433600, 1451667600)
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451671200, 1451674800, 1451678400, 1451682000, 1451685600,
            1451689200, 1451692800, 1451779200, 1451865600, 1451952000,
            1452038400, 1452124800, 1452211200
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array(
            [18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008])
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE
        )  # TODO: is this correct policy to use for this test

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod at the end
        print('\n\n delete data with range UtcPeriod at the end')
        tp = UtcPeriod(1451952000, utc.time(2016, 1, 10))
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451671200, 1451674800, 1451678400, 1451682000, 1451685600,
            1451689200, 1451692800, 1451779200, 1451865600, 1451952000
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array([18, 19, 20, 21, 22, 23, 1003, 1004, 1005])
        ts_exp = TimeSeries(ta, dv.from_numpy(data),
                            point_fx.POINT_INSTANT_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        try:
            rts_map = ts_dr.get_timeseries(['temperature'],
                                           ts_exp.total_period())
        except CFDataRepositoryError:
            pass

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod everything
        print('\n\n delete data with range UtcPeriod everything')
        tp = UtcPeriod(utc.time(2016, 1, 1), utc.time(2016, 1, 10))
        # write the time series
        t_ds.remove_tp_data(tp)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        self.assertRaises(CFDataRepositoryError, ts_dr.get_timeseries,
                          ['temperature'], tp)

        # --------------------------------------
        # insert data in between time saved data points
        print('\n\n insert data in between time saved data points')
        # insert first data in which we want to insert the second batch
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(24), 2)
        data = np.arange(0, ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # insert first data for every hour in between
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23)
        data = np.arange(10, 10 + ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.array([
            1451606400, 1451610000, 1451613600, 1451617200, 1451620800,
            1451624400, 1451628000, 1451631600, 1451635200, 1451638800,
            1451642400, 1451646000, 1451649600, 1451653200, 1451656800,
            1451660400, 1451664000, 1451667600, 1451671200, 1451674800,
            1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0
        ])
        time_points[-1] = 2 * time_points[-2] - time_points[
            -3]  # last time point calc
        data = np.array([
            0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
            26, 27, 28, 29, 30, 31, 32, 1
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE
        )  # TODO: is this correct policy value for this case

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # insert data including nan
        print('\n\n insert data including nan')
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23)
        data = np.arange(10, 10 + ta.size(), dtype=np.float64)
        data[4] = np.nan
        data[
            6] = np.nan  # np.inf, but trouble getting inf trough all version of numpy/netcdf
        data[8] = np.nan  # -np.inf, --"--
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.array([
            1451606400, 1451610000, 1451613600, 1451617200, 1451620800,
            1451624400, 1451628000, 1451631600, 1451635200, 1451638800,
            1451642400, 1451646000, 1451649600, 1451653200, 1451656800,
            1451660400, 1451664000, 1451667600, 1451671200, 1451674800,
            1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0
        ])
        time_points[-1] = 2 * time_points[-2] - time_points[
            -3]  # last time point calc

        data = np.array([
            0,
            10,
            11,
            12,
            13,
            np.nan,
            15,
            # np.inf,
            np.
            nan,  # TODO: figure out how to unmask restoring 'used' mask-values
            17,
            #-np.inf,
            np.nan,
            19,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            28,
            29,
            30,
            31,
            32,
            1
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data),
            point_fx.POINT_INSTANT_VALUE)  # TODO: policy right ?

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy(),
                        equal_nan=True))
        if path.exists(test_file):
            os.remove(test_file)
Beispiel #4
0
    def test_merge_store_ts_points(self):
        """
        This test verifies the shyft internal time-series store,
        that the merge_store_points function do the required
        semantics.
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t, t + d, t + 3 * d], dtype=np.int64)),
                t + 4 * d)

            n_ts = 10
            store_tsv = TsVector()  # something we store at server side

            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                store_tsv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(ta,
                                   fill_value=float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))
            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            dts = DtsClient(host_port)

            dts.store_ts(
                store_tsv
            )  # 1. store the initial time-series, they are required for the merge_store_points function

            tb = TimeAxis(
                UtcTimeVector.from_numpy(
                    np.array([t - d, t + 3 * d, t + 4 * d],
                             dtype=np.int64)), t + 5 *
                d)  # make some points, one before, one in the middle and after
            mpv = TsVector()  # merge point vector
            for i in range(n_ts):
                ts_id = shyft_store_url("{0}".format(i))
                mpv.append(
                    TimeSeries(
                        ts_id,
                        TimeSeries(tb,
                                   fill_value=-1 - float(i),
                                   point_fx=point_fx.POINT_AVERAGE_VALUE)))

            dts.merge_store_ts_points(mpv)

            rts = TsVector()
            rts[:] = [TimeSeries(shyft_store_url(f"{i}")) for i in range(n_ts)]

            r = dts.evaluate(rts, tb.total_period())
            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            for i in range(len(r)):
                self.assertEqual(r[i].time_axis.size(), 5)
                assert_array_almost_equal(
                    r[i].values.to_numpy(),
                    np.array([-i - 1, i, i, -i - 1, -i - 1], dtype=np.float64))
Beispiel #5
0
    def append_ts_data(self, time_series: TimeSeries):
        """
        ensure that the data-file content
        are equal to time_series for the time_series.time_axis.total_period().
        If needed, create and update the file meta-data.
        :param time_series:
        :return:
        """
        period = time_series.total_period()
        n_new_val = time_series.size()
        crop_data = False
        time_series_cropped = None

        with Dataset(self.file_path, 'a') as ds:
            # read time, from ts.time_axis.start()
            #  or last value of time
            # then consider if we should fill in complete time-axis ?
            #
            # figure out the start-index,
            # then
            # ds.time[startindex:] = ts.time_axis.numpy values
            # ds.temperature[startindex:] = ts.values.to_numpy()
            #
            # or if more advanced algorithm,
            #  first read
            #  diff
            #   result -> delete range, replace range, insert range..
            time_variable = 'time'
            time = ds.variables.get(time_variable, None)

            if time is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. time not found.')
            var = ds.variables.get(self.ts_meta_info.variable_name, None)

            if var is None:
                raise TimeSeriesStoreError(
                    'Something is wrong with the dataset. variable {0} not found.'
                    .format(self.ts_meta_info.variable_name))

            if len(time):
                time_utc = convert_netcdf_time(time.units, time)

                idx_min = np.searchsorted(time_utc, period.start, side='left')
                idx_max = np.searchsorted(
                    time_utc, period.end, side='left'
                )  # use 'left' since period.end = time_point(last_value)+dt
                idx_data_end = idx_min + n_new_val
                # print('indices ', idx_min, idx_max, idx_data_end, len(time))
                # move data if we are overlap or new data`s time before saved time:
                if idx_min < len(time_utc) and idx_max < len(
                        time_utc) and idx_max - idx_min != n_new_val:
                    # print('In moving condition ', idx_max - idx_min, n_new_val)
                    idx_last = len(time_utc)
                    time[idx_data_end:] = time[idx_max:idx_last]
                    var[idx_data_end:, 0] = var[idx_max:idx_last, 0]
                # insert new data
                time[idx_min:
                     idx_data_end] = time_series.time_axis.time_points[:-1]
                var[idx_min:idx_data_end, 0] = time_series.values.to_numpy()
                # crop all data which should not be there
                if idx_max - idx_min - n_new_val > 0:
                    idx_del_start = len(time) - idx_max + idx_min + n_new_val
                    # print("we need to delete something at the end ", idx_max - idx_min - n_new_val, idx_del_start)
                    crop_data = True
                    time_cropped = time[0:idx_del_start]
                    var_cropped = var[0:idx_del_start, 0]
                    last_time_point = 2 * time_cropped[-1] - time_cropped[-2]
                    # print(type(time_cropped[0]))
                    # print(UtcTimeVector.from_numpy(time_cropped.astype(np.int64)).to_numpy())
                    ta = TimeAxis(
                        UtcTimeVector.from_numpy(time_cropped.astype(
                            np.int64)), int(last_time_point))
                    # print(var_cropped)
                    # print(type(var_cropped))
                    time_series_cropped = TimeSeries(
                        ta, dv.from_numpy(var_cropped), point_fx.
                        POINT_INSTANT_VALUE)  # TODO: is this right policy?

            else:
                time[:] = time_series.time_axis.time_points[:-1]
                var[:, 0] = time_series.values.to_numpy()

            # for i, (t, val) in enumerate(zip(time[:], var[:])):
            #     print('{:<4} : {} - {} - {}'.format(i, datetime.fromtimestamp(t), val[0], type(val[0])))
            ds.sync()

        if crop_data and time_series_cropped:
            self.create_new_file()
            self.append_ts_data(time_series_cropped)