def test_percentiles(self): c=api.Calendar() t0=c.time(2016, 1, 1) dt=api.deltahours(1) n=240 ta=api.TimeAxisFixedDeltaT(t0, dt, n) timeseries=api.TsVector() for i in range(10): timeseries.append( api.TimeSeries(ta=ta, fill_value=i, point_fx=api.point_interpretation_policy.POINT_AVERAGE_VALUE)) wanted_percentiles=api.IntVector([api.statistics_property.MIN_EXTREME, 0, 10, 50, api.statistics_property.AVERAGE, 70, 100, api.statistics_property.MAX_EXTREME]) ta_day=api.TimeAxisFixedDeltaT(t0, dt*24, n//24) ta_day2=api.TimeAxis(t0, dt*24, n//24) percentiles=api.percentiles(timeseries, ta_day, wanted_percentiles) percentiles2=timeseries.percentiles(ta_day2, wanted_percentiles) # just to verify it works with alt. syntax self.assertEqual(len(percentiles2), len(percentiles)) for i in range(len(ta_day)): self.assertAlmostEqual(0.0, percentiles[0].value(i), 3, "min-extreme ") self.assertAlmostEqual(0.0, percentiles[1].value(i), 3, " 0-percentile") self.assertAlmostEqual(0.9, percentiles[2].value(i), 3, " 10-percentile") self.assertAlmostEqual(4.5, percentiles[3].value(i), 3, " 50-percentile") self.assertAlmostEqual(4.5, percentiles[4].value(i), 3, " -average") self.assertAlmostEqual(6.3, percentiles[5].value(i), 3, " 70-percentile") self.assertAlmostEqual(9.0, percentiles[6].value(i), 3, "100-percentile") self.assertAlmostEqual(9.0, percentiles[7].value(i), 3, "max-extreme")
def test_percentiles(self): c = api.Calendar() t0 = c.time(2016, 1, 1) dt = api.deltahours(1) n = 240 ta = api.Timeaxis(t0, dt, n) timeseries = api.TsVector() for i in range(10): timeseries.append( api.Timeseries(ta=ta, fill_value=i, point_fx=api.point_interpretation_policy.POINT_AVERAGE_VALUE)) wanted_percentiles = api.IntVector([0, 10, 50, -1, 70, 100]) ta_day = api.Timeaxis(t0, dt * 24, n // 24) ta_day2 = api.Timeaxis2(t0, dt * 24, n // 24) percentiles = api.percentiles(timeseries, ta_day, wanted_percentiles) percentiles2 = timeseries.percentiles(ta_day2, wanted_percentiles) # just to verify it works with alt. syntax self.assertEqual(len(percentiles2), len(percentiles)) for i in range(len(ta_day)): self.assertAlmostEqual(0.0, percentiles[0].value(i), 3, " 0-percentile") self.assertAlmostEqual(0.9, percentiles[1].value(i), 3, " 10-percentile") self.assertAlmostEqual(4.5, percentiles[2].value(i), 3, " 50-percentile") self.assertAlmostEqual(4.5, percentiles[3].value(i), 3, " -average") self.assertAlmostEqual(6.3, percentiles[4].value(i), 3, " 70-percentile") self.assertAlmostEqual(9.0, percentiles[5].value(i), 3, "100-percentile")
def test_percentiles_with_min_max_extremes(self): """ the percentiles function now also supports picking out the min-max peak value within each interval. Setup test-data so that we have a well known percentile result, but also have peak-values within the interval that we can verify. We let hour ts 0..9 have values 0..9 constant 24*10 days then modify ts[1], every day first value to a peak min value equal to - day_no*1 every day second value to a peak max value equal to + day_no*1 every day 3rd value to a nan value ts[1] should then have same average value for each day (so same percentile) but min-max extreme should be equal to +- day_no*1 """ c=api.Calendar() t0=c.time(2016, 1, 1) dt=api.deltahours(1) n=240 ta=api.TimeAxis(t0, dt, n) timeseries=api.TsVector() p_fx=api.point_interpretation_policy.POINT_AVERAGE_VALUE for i in range(10): timeseries.append(api.TimeSeries(ta=ta, fill_value=i, point_fx=p_fx)) ts=timeseries[1] # pick this one to insert min/max extremes for i in range(0, 240, 24): ts.set(i + 0, 1.0 - 100*i/24.0) ts.set(i + 1, 1.0 + 100*i/24.0) # notice that when i==0, this gives 1.0 ts.set(i + 2, float('nan')) # also put in a nan, just to verify it is ignored during average processing wanted_percentiles=api.IntVector([api.statistics_property.MIN_EXTREME, 0, 10, 50, api.statistics_property.AVERAGE, 70, 100, api.statistics_property.MAX_EXTREME]) ta_day=api.TimeAxis(t0, dt*24, n//24) percentiles=api.percentiles(timeseries, ta_day, wanted_percentiles) for i in range(len(ta_day)): if i == 0: # first timestep, the min/max extremes are picked from 0'th and 9'th ts. self.assertAlmostEqual(0.0, percentiles[0].value(i), 3, "min-extreme ") self.assertAlmostEqual(9.0, percentiles[7].value(i), 3, "min-extreme ") else: self.assertAlmostEqual(1.0 - 100.0*i*24.0/24.0, percentiles[0].value(i), 3, "min-extreme ") self.assertAlmostEqual(1.0 + 100.0*i*24.0/24.0, percentiles[7].value(i), 3, "max-extreme") self.assertAlmostEqual(0.0, percentiles[1].value(i), 3, " 0-percentile") self.assertAlmostEqual(0.9, percentiles[2].value(i), 3, " 10-percentile") self.assertAlmostEqual(4.5, percentiles[3].value(i), 3, " 50-percentile") self.assertAlmostEqual(4.5, percentiles[4].value(i), 3, " -average") self.assertAlmostEqual(6.3, percentiles[5].value(i), 3, " 70-percentile") self.assertAlmostEqual(9.0, percentiles[6].value(i), 3, "100-percentile")
def test_partition_by(self): """ verify/demo exposure of the .partition_by function that can be used to produce yearly percentiles statistics for long historical time-series """ c=api.Calendar() t0=c.time(1930, 9, 1) dt=api.deltahours(1) n=c.diff_units(t0, c.time(2016, 9, 1), dt) ta=api.TimeAxis(t0, dt, n) pattern_values=api.DoubleVector.from_numpy(np.arange(len(ta))) # increasing values src_ts=api.TimeSeries(ta=ta, values=pattern_values, point_fx=api.point_interpretation_policy.POINT_AVERAGE_VALUE) partition_t0=c.time(2016, 9, 1) n_partitions=80 partition_interval=api.Calendar.YEAR # get back TsVector, # where all TsVector[i].index_of(partition_t0) # is equal to the index ix for which the TsVector[i].value(ix) correspond to start value of that particular partition. ts_partitions=src_ts.partition_by(c, t0, partition_interval, n_partitions, partition_t0) self.assertEqual(len(ts_partitions), n_partitions) ty=t0 for ts in ts_partitions: ix=ts.index_of(partition_t0) vix=ts.value(ix) expected_value=c.diff_units(t0, ty, dt) self.assertEqual(vix, expected_value) ty=c.add(ty, partition_interval, 1) # Now finally, try percentiles on the partitions wanted_percentiles=[0, 10, 25, -1, 50, 75, 90, 100] ta_percentiles=api.TimeAxis(partition_t0, api.deltahours(24), 365) percentiles=api.percentiles(ts_partitions, ta_percentiles, wanted_percentiles) self.assertEqual(len(percentiles), len(wanted_percentiles))
def test_partition_by(self): """ verify/demo exposure of the .partition_by function that can be used to produce yearly percentiles statistics for long historical time-series """ c = api.Calendar() t0 = c.time(1930, 9, 1) dt = api.deltahours(1) n = c.diff_units(t0, c.time(2016, 9, 1), dt) ta = api.Timeaxis2(t0, dt, n) pattern_values = api.DoubleVector.from_numpy(np.arange(len(ta))) # increasing values src_ts = api.Timeseries(ta=ta, values=pattern_values, point_fx=api.point_interpretation_policy.POINT_AVERAGE_VALUE) partition_t0 = c.time(2016, 9, 1) n_partitions = 80 partition_interval = api.Calendar.YEAR # get back TsVector, # where all TsVector[i].index_of(partition_t0) # is equal to the index ix for which the TsVector[i].value(ix) correspond to start value of that particular partition. ts_partitions = src_ts.partition_by(c, t0, partition_interval, n_partitions, partition_t0) self.assertEqual(len(ts_partitions),n_partitions) ty = t0 for ts in ts_partitions: ix = ts.index_of(partition_t0) vix = ts.value(ix) expected_value = c.diff_units(t0, ty, dt) self.assertEqual(vix, expected_value) ty = c.add(ty, partition_interval, 1) # Now finally, try percentiles on the partitions wanted_percentiles = [0, 10, 25, -1, 50, 75, 90, 100] ta_percentiles = api.Timeaxis2(partition_t0, api.deltahours(24), 365) percentiles = api.percentiles(ts_partitions,ta_percentiles,wanted_percentiles) self.assertEqual(len(percentiles), len(wanted_percentiles))