Example #1
0
    def test_dtss_partition_by_average(self):
        """
        This test illustrates use of partition_by client and server-side.
        The main point here is to ensure that the evaluate period covers
        both the historical and evaluation peri
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            t = utc.time(2000, 1, 1)
            n = utc.diff_units(t, utc.add(t, Calendar.YEAR, 10), d)
            ta = TimeAxis(t, d, n)
            td = TimeAxis(t, d * 24, n // 24)
            n_ts = 1
            store_tsv = TsVector()  # something we store at server side
            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(
                        np.linspace(start=0, stop=1.0 * (i + 1),
                                    num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url(f"{i}")
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store

            # start dtss server
            dtss = DtsServer()
            cache_on_write = True
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            # create dts client
            c = DtsClient(
                host_port,
                auto_connect=False)  # demonstrate object life-time connection
            c.store_ts(store_tsv,
                       overwrite_on_write=True,
                       cache_on_write=cache_on_write)

            t_0 = utc.time(2018, 1, 1)
            tax = TimeAxis(t_0, Calendar.DAY, 365)
            ts_h1 = TimeSeries(shyft_store_url(f'{0}'))
            ts_h2 = store_tsv[0]
            ts_p1 = ts_h1.partition_by(utc, t, Calendar.YEAR, 10,
                                       t_0).average(tax)
            ts_p2 = ts_h2.partition_by(utc, t, Calendar.YEAR, 10,
                                       t_0).average(tax)
Example #2
0
    def test_dtss_remove_series(self):
        with tempfile.TemporaryDirectory() as c_dir:

            # start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            # setup some data
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            tsv = TsVector()
            pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()),
                             point_fx.POINT_AVERAGE_VALUE)
            tsv.append(TimeSeries("cache://test/foo", pts))

            # get a client
            client = DtsClient(host_port)
            client.store_ts(tsv)

            # start with no removing
            dtss.set_can_remove(False)

            # we should be disallowed to remove now
            try:
                client.remove("shyft://test/foo")
            except Exception as err:
                self.assertEqual(
                    str(err), "dtss::server: server does not support removing")

            # then try with allowing remove
            dtss.set_can_remove(True)

            # we only support removing shyft-url style data
            try:
                client.remove("protocol://test/foo")
            except Exception as err:
                self.assertEqual(
                    str(err),
                    "dtss::server: server does not allow removing for non shyft-url type data"
                )

            # now it should work
            client.remove("shyft://test/foo")
Example #3
0
    def test_failures(self):
        """
        Verify that dtss client server connections are auto-magically
        restored and fixed
        """
        with tempfile.TemporaryDirectory() as c_dir:

            # start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            # setup some data
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            tsv = TsVector()
            pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()),
                             point_fx.POINT_AVERAGE_VALUE)
            tsv.append(TimeSeries("cache://test/foo", pts))

            # get a client
            client = DtsClient(host_port, auto_connect=False)
            client.store_ts(tsv)
            client.close()
            client.store_ts(tsv)  # should just work, it re-open automagically
            dtss.clear(
            )  # the server is out and away, no chance this would work
            try:
                client.store_ts(tsv)
                self.assertTrue(
                    False,
                    'This should throw, because there is no dtss server to help you'
                )
            except Exception as ee:
                self.assertFalse(False, f'expected {ee} here')

            dtss.set_listening_port(port_no)
            dtss.start_async()
            client.store_ts(
                tsv)  # this should just work, automagically reconnect
Example #4
0
    def test_get_ts_info(self):
        """
        Verify we can get specific TsInfo objects for time-series from the server backend.
        """
        with tempfile.TemporaryDirectory() as c_dir:

            # start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_adr = 'localhost:{0}'.format(port_no)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "testing", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            # get a client
            client = DtsClient(host_adr)

            try:
                client.get_ts_info(r'shyft://testing/data')
            except Exception as e:
                pass
            else:
                # only end up here if no exceptions
                self.fail('Could fetch info for non-existing ts info')

            # setup some data
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            tsv = TsVector()
            pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()),
                             point_fx.POINT_AVERAGE_VALUE)
            tsv.append(TimeSeries(r'shyft://testing/data', pts))
            client.store_ts(tsv)

            info: TsInfo = client.get_ts_info(r'shyft://testing/data')

            self.assertEqual(info.name, r'data')
            self.assertEqual(info.point_fx, point_fx.POINT_AVERAGE_VALUE)
            self.assertEqual(info.data_period, ta.total_period())
Example #5
0
    def test_can_create_cf_compliant_file(self):
        # create files
        test_file = path.join(path.abspath(os.curdir), 'shyft_test.nc')
        if path.exists(test_file):
            os.remove(test_file)
        # create meta info
        epsg_id = 32633
        x0 = 100000
        x1 = 200000
        y0 = 100000
        y1 = 200000
        x = 101000
        y = 101000
        z = 1200
        temperature = TimeSeriesMetaInfo('temperature',
                                         '/observed/at_stn_abc/temperature',
                                         'observed air temperature', x, y, z,
                                         epsg_id)

        # create time axis
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        data = np.arange(0, ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)

        # save the first batch
        t_ds = TimeSeriesStore(test_file, temperature)
        t_ds.create_new_file()
        t_ds.append_ts_data(ts)

        # expected result
        ts_exp = ts

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append data
        print("\n\n append at the end data")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 48)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 72)
        data = np.empty(72)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:72] = np.arange(0, 48, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append with overlap
        print("\n\n append with overlap")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 3), deltahours(1), 48)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96)
        data = np.empty(96)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(0, 24, dtype=np.float64)  # <-- new data
        data[48:96] = np.arange(0, 48, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Append with gap in time axis
        print("\n\n Append with gap in time axis")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, ta.size(),
                                                dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        time_vals = np.append(
            TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1],
            ta.time_points)
        # print(time_vals)
        ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64)))
        data = np.empty(120)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(0, 24, dtype=np.float64)
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:120] = np.arange(0, 24, dtype=np.float64)  # <-- new data
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        # print(ts_exp.total_period())
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        # print(geo_temperature[0].ts.time_axis.time_points - ts_exp.time_axis.time_points)
        # print(geo_temperature[0].ts.time_axis.time_points - time_vals)
        # print(ts_exp.time_axis.time_points - time_vals)
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Add new data in the middle where nothing was defined (no moving)
        print(
            "\n\n Add new data in the middle where nothing was defined (no moving)"
        )
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 2), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(100, 100 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        time_vals = np.append(
            TimeAxis(utc.time(2016, 1, 1), deltahours(1), 96).time_points[:-1],
            TimeAxis(utc.time(2016, 1, 6), deltahours(1), 24).time_points)
        ta = TimeAxis(UtcTimeVector.from_numpy(time_vals.astype(np.int64)))
        data = np.empty(120)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(100, 124, dtype=np.float64)  # <-- new data
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:120] = np.arange(0, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())
        # print(ts_exp.total_period())
        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Insert new data in the middle and move rest
        print("\n\n insert new data and move rest")
        # create time axis
        ta = TimeAxis(utc.time(2016, 1, 5), deltahours(1), 36)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(200, 200 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the data
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 144)
        data = np.empty(144)
        data[:24] = np.arange(0, 24, dtype=np.float64)
        data[24:48] = np.arange(100, 124, dtype=np.float64)
        data[48:96] = np.arange(0, 48, dtype=np.float64)
        data[96:132] = np.arange(200, 236, dtype=np.float64)  # <-- new data
        data[132:144] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Add new data before existing data without overlap
        print("\n\n add new data before existing data without overlap")
        # create time axis
        ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(300, 300 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2015, 12, 31), deltahours(1), 168)
        data = np.empty(168)
        data[:24] = np.arange(300, 324, dtype=np.float64)  # <-- new data
        data[24:48] = np.arange(0, 24, dtype=np.float64)
        data[48:72] = np.arange(100, 124, dtype=np.float64)
        data[72:120] = np.arange(0, 48, dtype=np.float64)
        data[120:156] = np.arange(200, 236, dtype=np.float64)
        data[156:168] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # add new data before existing data with overlap
        print("\n\n add new data before existing data with overlap")
        # create time axis
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 36)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(400, 400 + ta.size(), dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        # t_ds = TimeSeriesStore(test_file, temperature)
        t_ds.append_ts_data(ts)

        # expected result
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(1), 192)
        data = np.empty(192)
        data[:36] = np.arange(400, 436, dtype=np.float64)  # <-- new data
        data[36:48] = np.arange(312, 324, dtype=np.float64)
        data[48:72] = np.arange(0, 24, dtype=np.float64)
        data[72:96] = np.arange(100, 124, dtype=np.float64)
        data[96:144] = np.arange(0, 48, dtype=np.float64)
        data[144:180] = np.arange(200, 236, dtype=np.float64)
        data[180:192] = np.arange(12, 24, dtype=np.float64)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Overwrite everything with less data points
        # create time axis
        print('\n\n Overwrite everything with less data points')
        ta = TimeAxis(utc.time(2015, 12, 30), deltahours(24), 9)
        ts = TimeSeries(ta,
                        dv.from_numpy(
                            np.arange(1000, 1000 + ta.size(),
                                      dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.append_ts_data(ts)

        # expected result
        ts_exp = ts

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # Insert data with different dt
        # create time axis
        print('\n\n Insert data with different dt')
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        ts = TimeSeries(ta,
                        dv.from_numpy(np.arange(0, 24, dtype=np.float64)),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.empty(33, dtype=np.int)
        time_points[0:2] = TimeAxis(utc.time(2015, 12, 30), deltahours(24),
                                    1).time_points
        time_points[2:26] = TimeAxis(utc.time(2016, 1, 1), deltahours(1),
                                     23).time_points
        time_points[26:] = TimeAxis(utc.time(2016, 1, 2), deltahours(24),
                                    6).time_points
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.empty(32)
        data[0:2] = np.array([1000, 1001])
        data[2:26] = np.arange(0, 24)  # <-- new data
        data[26:] = np.arange(1003, 1009)
        ts_exp = TimeSeries(ta,
                            dv.from_numpy(data),
                            point_fx=point_fx.POINT_AVERAGE_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod in the middle
        print('\n\n delete data with range UtcPeriod')
        tp = UtcPeriod(utc.time(2015, 12, 31), utc.time(2016, 1, 1, 12))
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451433600, 1451653200, 1451656800, 1451660400, 1451664000,
            1451667600, 1451671200, 1451674800, 1451678400, 1451682000,
            1451685600, 1451689200, 1451692800, 1451779200, 1451865600,
            1451952000, 1452038400, 1452124800, 1452211200
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array([
            1000, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 1003, 1004, 1005,
            1006, 1007, 1008
        ])
        ts_exp = TimeSeries(ta, dv.from_numpy(data),
                            point_fx.POINT_INSTANT_VALUE
                            )  # TODO: is this correct policy to use

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod at the start
        print('\n\n delete data with range UtcPeriod at the start')
        tp = UtcPeriod(1451433600, 1451667600)
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451671200, 1451674800, 1451678400, 1451682000, 1451685600,
            1451689200, 1451692800, 1451779200, 1451865600, 1451952000,
            1452038400, 1452124800, 1452211200
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array(
            [18, 19, 20, 21, 22, 23, 1003, 1004, 1005, 1006, 1007, 1008])
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE
        )  # TODO: is this correct policy to use for this test

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod at the end
        print('\n\n delete data with range UtcPeriod at the end')
        tp = UtcPeriod(1451952000, utc.time(2016, 1, 10))
        # ta = TimeAxis(utc.time(2016, 1, 1), deltahours(1), 24)
        # ts = TimeSeries(ta, dv.from_numpy(np.arange(0, 24, dtype=np.float64)), point_fx=point_fx.POINT_AVERAGE_VALUE)
        # write the time series
        t_ds.remove_tp_data(tp)

        # expected result
        time_points = np.array([
            1451671200, 1451674800, 1451678400, 1451682000, 1451685600,
            1451689200, 1451692800, 1451779200, 1451865600, 1451952000
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        data = np.array([18, 19, 20, 21, 22, 23, 1003, 1004, 1005])
        ts_exp = TimeSeries(ta, dv.from_numpy(data),
                            point_fx.POINT_INSTANT_VALUE)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        try:
            rts_map = ts_dr.get_timeseries(['temperature'],
                                           ts_exp.total_period())
        except CFDataRepositoryError:
            pass

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # delete data with range UtcPeriod everything
        print('\n\n delete data with range UtcPeriod everything')
        tp = UtcPeriod(utc.time(2016, 1, 1), utc.time(2016, 1, 10))
        # write the time series
        t_ds.remove_tp_data(tp)

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        self.assertRaises(CFDataRepositoryError, ts_dr.get_timeseries,
                          ['temperature'], tp)

        # --------------------------------------
        # insert data in between time saved data points
        print('\n\n insert data in between time saved data points')
        # insert first data in which we want to insert the second batch
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1), deltahours(24), 2)
        data = np.arange(0, ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # insert first data for every hour in between
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23)
        data = np.arange(10, 10 + ta.size(), dtype=np.float64)
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.array([
            1451606400, 1451610000, 1451613600, 1451617200, 1451620800,
            1451624400, 1451628000, 1451631600, 1451635200, 1451638800,
            1451642400, 1451646000, 1451649600, 1451653200, 1451656800,
            1451660400, 1451664000, 1451667600, 1451671200, 1451674800,
            1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0
        ])
        time_points[-1] = 2 * time_points[-2] - time_points[
            -3]  # last time point calc
        data = np.array([
            0, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25,
            26, 27, 28, 29, 30, 31, 32, 1
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data), point_fx.POINT_INSTANT_VALUE
        )  # TODO: is this correct policy value for this case

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy()))

        # --------------------------------------
        # insert data including nan
        print('\n\n insert data including nan')
        utc = Calendar()
        ta = TimeAxis(utc.time(2016, 1, 1) + deltahours(1), deltahours(1), 23)
        data = np.arange(10, 10 + ta.size(), dtype=np.float64)
        data[4] = np.nan
        data[
            6] = np.nan  # np.inf, but trouble getting inf trough all version of numpy/netcdf
        data[8] = np.nan  # -np.inf, --"--
        ts = TimeSeries(ta,
                        dv.from_numpy(data),
                        point_fx=point_fx.POINT_AVERAGE_VALUE)
        # save the first batch
        t_ds.append_ts_data(ts)

        # expected result
        time_points = np.array([
            1451606400, 1451610000, 1451613600, 1451617200, 1451620800,
            1451624400, 1451628000, 1451631600, 1451635200, 1451638800,
            1451642400, 1451646000, 1451649600, 1451653200, 1451656800,
            1451660400, 1451664000, 1451667600, 1451671200, 1451674800,
            1451678400, 1451682000, 1451685600, 1451689200, 1451692800, 0
        ])
        time_points[-1] = 2 * time_points[-2] - time_points[
            -3]  # last time point calc

        data = np.array([
            0,
            10,
            11,
            12,
            13,
            np.nan,
            15,
            # np.inf,
            np.
            nan,  # TODO: figure out how to unmask restoring 'used' mask-values
            17,
            #-np.inf,
            np.nan,
            19,
            20,
            21,
            22,
            23,
            24,
            25,
            26,
            27,
            28,
            29,
            30,
            31,
            32,
            1
        ])
        ta = TimeAxis(UtcTimeVector.from_numpy(time_points))
        ts_exp = TimeSeries(
            ta, dv.from_numpy(data),
            point_fx.POINT_INSTANT_VALUE)  # TODO: policy right ?

        # now read back the result using a *standard* shyft cf geo repository
        selection_criteria = {'bbox': [[x0, x1, x1, x0], [y0, y0, y1, y1]]}
        ts_dr = CFDataRepository(epsg_id, test_file, selection_criteria)
        # now read back 'temperature' that we know should be there
        rts_map = ts_dr.get_timeseries(['temperature'], ts_exp.total_period())

        # and verify that we get exactly back what we wanted.
        self.assertIsNotNone(rts_map)
        self.assertTrue('temperature' in rts_map)
        geo_temperature = rts_map['temperature']
        self.assertEqual(len(geo_temperature), 1)
        self.assertLessEqual(
            GeoPoint.distance2(geo_temperature[0].mid_point(),
                               GeoPoint(x, y, z)), 1.0)
        # check if time axis is as expected
        self.assertEqual(geo_temperature[0].ts.time_axis, ts_exp.time_axis)
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.time_axis.time_points,
                        ts_exp.time_axis.time_points))
        self.assertEqual(geo_temperature[0].ts.point_interpretation(),
                         point_fx.POINT_AVERAGE_VALUE)
        # check if variable data is as expected
        self.assertTrue(
            np.allclose(geo_temperature[0].ts.values.to_numpy(),
                        ts_exp.values.to_numpy(),
                        equal_nan=True))
        if path.exists(test_file):
            os.remove(test_file)
Example #6
0
    def test_ts_cache(self):
        """ Verify dtss ts-cache functions exposed to python """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 100
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store

            # add one external ts
            tsv.append(TimeSeries(fake_store_url("_any_ts_id_will_do")))
            # then start the server
            dtss = DtsServer()

            dtss.cb = self.dtss_read_callback  # rig external callbacks as well.
            self.callback_count = 0
            self.rd_throws = False
            cache_on_write = True
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container

            dts = DtsClient(
                host_port,
                auto_connect=False)  # demonstrate object life-time connection
            cs0 = dtss.cache_stats
            dts.store_ts(store_tsv,
                         overwrite_on_write=True,
                         cache_on_write=cache_on_write)
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=True)
            cs1 = dtss.cache_stats
            ccs1 = dts.cache_stats  # client can also provide cahce-stats

            dtss.flush_cache_all()  # force the cache empty
            dtss.clear_cache_stats()
            cs2 = dtss.cache_stats  # just to ensure clear did work
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # second evaluation, cache is empty, will force read(misses)
            cs3 = dtss.cache_stats
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=True
            )  # third evaluation, cache is now filled, all hits
            cs4 = dtss.cache_stats
            # now verify explicit caching performed by the python callback
            self.cache_dtss = dtss
            self.cache_reads = True
            dts.cache_flush()  # is the equivalent of
            # dtss.flush_cache_all()
            # dtss.clear_cache_stats()
            # use explicit cache-control instead of global
            dtss.set_auto_cache(
                False
            )  # turn off auto caching, we want to test the explicit caching
            r1 = dts.evaluate(
                tsv,
                ta.total_period(),
                use_ts_cached_read=True,
                update_ts_cache=False
            )  # evaluation, just misses, but we cache explict the external
            cs5 = dtss.cache_stats  # ok base line a lots of misses
            r1 = dts.evaluate(tsv,
                              ta.total_period(),
                              use_ts_cached_read=True,
                              update_ts_cache=False)
            cs6 = dtss.cache_stats  # should be one hit here

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(cs0.hits, 0)
            self.assertEqual(cs0.misses, 0)
            self.assertEqual(cs0.coverage_misses, 0)
            self.assertEqual(cs0.id_count, 0)
            self.assertEqual(cs0.point_count, 0)
            self.assertEqual(cs0.fragment_count, 0)

            self.assertEqual(cs1.hits, n_ts)
            self.assertEqual(
                cs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(cs1.coverage_misses, 0)
            self.assertEqual(cs1.id_count, n_ts + 1)
            self.assertEqual(cs1.point_count, (n_ts + 1) * n)
            self.assertEqual(cs1.fragment_count, n_ts + 1)
            # verify client side cache_stats
            self.assertEqual(ccs1.hits, n_ts)
            self.assertEqual(
                ccs1.misses, 1
            )  # because we cache on store, so 10 cached, 1 external with miss
            self.assertEqual(ccs1.coverage_misses, 0)
            self.assertEqual(ccs1.id_count, n_ts + 1)
            self.assertEqual(ccs1.point_count, (n_ts + 1) * n)
            self.assertEqual(ccs1.fragment_count, n_ts + 1)

            self.assertEqual(cs2.hits, 0)
            self.assertEqual(cs2.misses, 0)
            self.assertEqual(cs2.coverage_misses, 0)
            self.assertEqual(cs2.id_count, 0)
            self.assertEqual(cs2.point_count, 0)
            self.assertEqual(cs2.fragment_count, 0)

            self.assertEqual(cs3.hits, 0)
            self.assertEqual(
                cs3.misses, n_ts +
                1)  # because we cache on store, we don't even miss one time
            self.assertEqual(cs3.coverage_misses, 0)
            self.assertEqual(cs3.id_count, n_ts + 1)
            self.assertEqual(cs3.point_count, (n_ts + 1) * n)
            self.assertEqual(cs3.fragment_count, n_ts + 1)

            self.assertEqual(cs4.hits,
                             n_ts + 1)  # because previous read filled cache
            self.assertEqual(cs4.misses,
                             n_ts + 1)  # remembers previous misses.
            self.assertEqual(cs4.coverage_misses, 0)
            self.assertEqual(cs4.id_count, n_ts + 1)
            self.assertEqual(cs4.point_count, (n_ts + 1) * n)
            self.assertEqual(cs4.fragment_count, n_ts + 1)

            self.assertEqual(cs6.hits, 1)  # because previous read filled cache
            self.assertEqual(cs6.misses,
                             n_ts * 2 + 1)  # remembers previous misses.
            self.assertEqual(cs6.coverage_misses, 0)
            self.assertEqual(cs6.id_count, 1)
            self.assertEqual(cs6.point_count, 1 * n)
            self.assertEqual(cs6.fragment_count, 1)
Example #7
0
    def test_ts_store(self):
        """
        This test verifies the shyft internal time-series store,
        that allow identified time-series to be stored
        in the backend using a directory container specified for the
        location.

        All time-series of the form shyft://<container>/<ts-name>
        is mapped to the configured <container> (aka a directory on the server)

        This applies to expressions, as well as the new
        .store_ts(ts_vector) function that allows the user to
        stash away time-series into the configured back-end container.

        All find-operations of the form shyft://<container>/<regular-expression>
        is mapped to a search in the corresponding directory for the <container>

        :return:
        """
        with tempfile.TemporaryDirectory() as c_dir:
            # setup data to be calculated
            utc = Calendar()
            d = deltahours(1)
            n = 365 * 24 // 3
            t = utc.time(2016, 1, 1)
            ta = TimeAxis(t, d, n)
            n_ts = 10
            store_tsv = TsVector()  # something we store at server side
            tsv = TsVector(
            )  # something we put an expression into, refering to stored ts-symbols

            for i in range(n_ts):
                pts = TimeSeries(
                    ta,
                    np.sin(np.linspace(start=0, stop=1.0 * i, num=ta.size())),
                    point_fx.POINT_AVERAGE_VALUE)
                ts_id = shyft_store_url("{0}".format(i))
                tsv.append(float(1.0) * TimeSeries(ts_id)
                           )  # make an expression that returns what we store
                store_tsv.append(TimeSeries(
                    ts_id, pts))  # generate a bound pts to store
            # krls with some extra challenges related to serialization
            tsv_krls = TsVector()
            krls_ts = TimeSeries(shyft_store_url("9")).krls_interpolation(
                dt=d, gamma=1e-3, tolerance=0.001, size=ta.size())
            tsv_krls.append(krls_ts)
            # min_max_check_ts_fill also needs a serial check
            # create a  trivial-case
            ts9 = TimeSeries(shyft_store_url("9"))
            ts_qac = ts9.min_max_check_linear_fill(v_min=-10.0 * n_ts,
                                                   v_max=10.0 * n_ts)
            tsv_krls.append(ts_qac)
            tsv_krls.append(ts9)
            tsv_krls.append(ts9.inside(min_v=-0.5, max_v=0.5))

            # then start the server
            dtss = DtsServer()
            port_no = find_free_port()
            host_port = 'localhost:{0}'.format(port_no)
            dtss.set_auto_cache(True)
            std_max_items = dtss.cache_max_items
            dtss.cache_max_items = 3000
            tst_max_items = dtss.cache_max_items
            dtss.set_listening_port(port_no)
            dtss.set_container(
                "test", c_dir
            )  # notice we set container 'test' to point to c_dir directory
            dtss.start_async(
            )  # the internal shyft time-series will be stored to that container
            # also notice that we dont have to setup callbacks in this case (but we could, and they would work)
            #
            # finally start the action
            dts = DtsClient(host_port)
            # then try something that should work
            dts.store_ts(store_tsv)
            r1 = dts.evaluate(tsv, ta.total_period())
            f1 = dts.find(
                r"shyft://test/\d")  # find all ts with one digit, 0..9
            r2 = dts.evaluate(tsv_krls, ta.total_period())
            url_x = shyft_store_url(r'does not exists')
            tsvx = TsVector()
            tsvx.append(TimeSeries(url_x))
            try:
                rx = dts.evaluate(tsvx, ta.total_period())
                self.assertFalse(True, 'This did not work out')
            except RuntimeError as rex:
                self.assertIsNotNone(rex)

            dts.close()  # close connection (will use context manager later)
            dtss.clear()  # close server

            # now the moment of truth:
            self.assertEqual(len(r1), len(tsv))
            for i in range(n_ts - 1):
                self.assertEqual(r1[i].time_axis, store_tsv[i].time_axis)
                assert_array_almost_equal(r1[i].values.to_numpy(),
                                          store_tsv[i].values.to_numpy(),
                                          decimal=4)

            self.assertEqual(len(f1), 10)
            self.assertEqual(len(r2), len(tsv_krls))
            assert_array_almost_equal(r2[1].values.to_numpy(),
                                      r2[2].values.to_numpy(),
                                      decimal=4)
            self.assertEqual(1000000, std_max_items)
            self.assertEqual(3000, tst_max_items)
Example #8
0
    def test_functionality_hosting_localhost(self):

        # setup data to be calculated
        utc = Calendar()
        d = deltahours(1)
        d24 = deltahours(24)
        n = 240
        n24 = 10
        t = utc.time(2016, 1, 1)
        ta = TimeAxis(t, d, n)
        ta24 = TimeAxis(t, d24, n24)
        n_ts = 100
        percentile_list = IntVector([0, 35, 50, 65, 100])
        tsv = TsVector()
        store_tsv = TsVector()  # something we store at server side
        for i in range(n_ts):
            pts = TimeSeries(ta, np.linspace(start=0, stop=1.0, num=ta.size()),
                             point_fx.POINT_AVERAGE_VALUE)
            tsv.append(float(1 + i / 10) * pts)
            store_tsv.append(TimeSeries("cache://test/{0}".format(i),
                                        pts))  # generate a bound pts to store

        dummy_ts = TimeSeries('dummy://a')
        tsv.append(dummy_ts.integral(ta))
        self.assertGreater(len(ts_stringify(tsv[0])),
                           10)  # just ensure ts_stringify work on expr.
        # then start the server
        dtss = DtsServer()
        port_no = find_free_port()
        host_port = 'localhost:{0}'.format(port_no)
        dtss.set_listening_port(port_no)
        dtss.cb = self.dtss_read_callback
        dtss.find_cb = self.dtss_find_callback
        dtss.store_ts_cb = self.dtss_store_callback

        dtss.start_async()

        dts = DtsClient(StringVector([host_port]), True,
                        1000)  # as number of hosts
        # then try something that should work
        dts.store_ts(store_tsv)
        r1 = dts.evaluate(tsv, ta.total_period())
        tsv1x = tsv.inside(-0.5, 0.5)
        tsv1x.append(tsv1x[-1].decode(
            start_bit=1, n_bits=1))  # just to verify serialization/bind
        tsv1x.append(store_tsv[1].derivative())
        tsv1x.append(store_tsv[1].pow(
            2.0))  # just for verify pow serialization(well, it's a bin-op..)
        r1x = dts.evaluate(tsv1x, ta.total_period())
        r2 = dts.percentiles(tsv, ta.total_period(), ta24, percentile_list)
        r3 = dts.find('netcdf://dummy\.nc/ts\d')
        self.rd_throws = True
        ex_count = 0
        try:
            rx = dts.evaluate(tsv, ta.total_period())
        except RuntimeError as e:
            ex_count = 1
            pass
        self.rd_throws = True
        try:
            fx = dts.find('should throw')
        except RuntimeError as e:
            ex_count += 1
            pass

        dts.close()  # close connection (will use context manager later)
        dtss.clear()  # close server
        self.assertEqual(ex_count, 2)
        self.assertEqual(len(r1), len(tsv))
        self.assertEqual(self.callback_count, 4)
        for i in range(n_ts - 1):
            self.assertEqual(r1[i].time_axis, tsv[i].time_axis)
            assert_array_almost_equal(r1[i].values.to_numpy(),
                                      tsv[i].values.to_numpy(),
                                      decimal=4)

        self.assertEqual(len(r2), len(percentile_list))
        dummy_ts.bind(
            TimeSeries(ta,
                       fill_value=1.0,
                       point_fx=point_fx.POINT_AVERAGE_VALUE))
        p2 = tsv.percentiles(ta24, percentile_list)
        # r2 = tsv.percentiles(ta24,percentile_list)

        for i in range(len(p2)):
            self.assertEqual(r2[i].time_axis, p2[i].time_axis)
            assert_array_almost_equal(r2[i].values.to_numpy(),
                                      p2[i].values.to_numpy(),
                                      decimal=1)

        self.assertEqual(self.find_count, 2)
        self.assertEqual(len(r3), 10)  # 0..9
        for i in range(len(r3)):
            self.assertEqual(r3[i], self.ts_infos[i])
        self.assertIsNotNone(r1x)
        self.assertEqual(1, len(self.stored_tsv))
        self.assertEqual(len(store_tsv), len(self.stored_tsv[0]))
        for i in range(len(store_tsv)):
            self.assertEqual(self.stored_tsv[0][i].ts_id(),
                             store_tsv[i].ts_id())
Example #9
0
    def test_forecast(self):
        fx_avg = ts_point_fx.POINT_AVERAGE_VALUE
        utc = Calendar()
        ta = TimeAxis(utc.time(2017, 1, 1, 0, 0, 0), deltahours(24), 4)
        historical_data = TsVector()

        forecast_sets = TsVectorSet()
        weight_sets = dv()
        num_historical_data = 56

        # Let's make three sets, one of two elements, one of three, and one of
        # four.

        forecasts_1 = TsVector()
        forecasts_2 = TsVector()
        forecasts_3 = TsVector()

        forecasts_1.append(TimeSeries(ta, dv([13.4, 15.6, 17.1, 19.1]),
                                      fx_avg))
        forecasts_1.append(TimeSeries(ta, dv([34.1, 2.40, 43.9, 10.2]),
                                      fx_avg))
        forecast_sets.append(forecasts_1)
        weight_sets.append(5.0)

        forecasts_2.append(TimeSeries(ta, dv([83.1, -42.2, 0.4, 23.4]),
                                      fx_avg))
        forecasts_2.append(TimeSeries(ta, dv([15.1, 6.500, 4.2, 2.9]), fx_avg))
        forecasts_2.append(TimeSeries(ta, dv([53.1, 87.90, 23.8, 5.6]),
                                      fx_avg))
        forecast_sets.append(forecasts_2)
        weight_sets.append(9.0)

        forecasts_3.append(
            TimeSeries(ta, dv([1.5, -1.9, -17.2, -10.0]), fx_avg))
        forecasts_3.append(
            TimeSeries(ta, dv([4.7, 18.2, 15.3000, 8.9]), fx_avg))
        forecasts_3.append(
            TimeSeries(ta, dv([-45.2, -2.3, 80.2, 71.0]), fx_avg))
        forecasts_3.append(
            TimeSeries(ta, dv([45.1, -92.0, 34.4, 65.8]), fx_avg))
        forecast_sets.append(forecasts_3)
        weight_sets.append(3.0)

        for i in range(num_historical_data):
            historical_data.append(
                TimeSeries(ta,
                           dv.from_numpy(np.random.random(ta.size()) * 50.0),
                           fx_avg))

        # need one more exposed from core here: auto historical_order = qm::quantile_index<tsa_t>(historical_data, ta);

        interpolation_start = ta.time(2)
        interpolation_end = ta.time(3)
        # Act
        result = quantile_map_forecast(forecast_sets, weight_sets,
                                       historical_data, ta,
                                       interpolation_start, interpolation_end,
                                       False)

        self.assertIsNotNone(result)
        self.assertEqual(len(result), num_historical_data)
        for ts in result:
            self.assertEqual(ts.size(), ta.size())