def test_Simple_Aggregation(self): sensor = SimpleSensor('084EB18E44FFA/7-MB-1') from_dt = dt(2016,3,25,9,58,0, tzinfo=sensor.timezone) # last was at 2016-03-25T14:09:45+00:00 to_dt = dt(2016,3,25,10,32,0, tzinfo=sensor.timezone) dataTimeSeries = DataTimeSeries() slider_dt = from_dt while slider_dt < to_dt: data = PhysicalData( labels = ['temp_C'], values = [25.5] ) physicalDataTimePoint = PhysicalDataTimePoint(dt = slider_dt, data = data, validity_region_span = sensor.Points_validity_region_span) dataTimeSeries.append(physicalDataTimePoint) slider_dt = slider_dt + TimeSlotSpan('1m') dataTimeSeriesAggregatorProcess = DataTimeSeriesAggregatorProcess(timeSlotSpan = TimeSlotSpan('15m'), Sensor = sensor, data_to_aggregate = PhysicalDataTimePoint) # Aggregate dataTimeSeriesAggregatorProcess.start(dataTimeSeries = dataTimeSeries, start_dt = from_dt, end_dt = to_dt, rounded = True, threaded = False) # Get results aggregated_dataTimeSeries = dataTimeSeriesAggregatorProcess.get_results(until=None) # Quick check results using string representations i = 0 for slot in aggregated_dataTimeSeries: if i == 0: self.assertEqual(str(slot), '''PhysicalDataTimeSlot: from 2016-03-25 10:00:00+01:00 to 2016-03-25 10:15:00+01:00 with span of 15m and coverage of 1.0''') self.assertEqual(slot.data.content, {'temp_C_MAX': 25.5, 'temp_C_AVG': 25.5, 'temp_C_MIN': 25.5}) elif i == 1: self.assertEqual(str(slot), '''PhysicalDataTimeSlot: from 2016-03-25 10:15:00+01:00 to 2016-03-25 10:30:00+01:00 with span of 15m and coverage of 1.0''') self.assertEqual(slot.data.content, {'temp_C_MAX': 25.5, 'temp_C_AVG': 25.5, 'temp_C_MIN': 25.5}) else: raise Exception('Test failed') i +=1 if i != 2: raise Exception('Test failed')
def start(self, dataTimeSeries, start_dt, end_dt, rounded=False, threaded=False, callback=None, callback_trigger=None): ''' Start the aggregator process. if start is not set, the first datapoint is used. If end is not set, once the process will provide the results until the last datapoint (useful for online processing) ''' # For now start/end not set is not supported: if not start_dt or not end_dt: raise NotImplementedError('Empty start/end not yet implemented') # Handle the rounded case if rounded: start_dt = self.timeSlotSpan.round_dt(start_dt) if start_dt else None end_dt = self.timeSlotSpan.round_dt(end_dt) if end_dt else None if start_dt == end_dt: raise InputException('Sorry, after rounding start_dt and end_dt they are the same point! ({})'.format(start_dt)) else: # Check for consistency if start_dt is not None: if start_dt != self.timeSlotSpan.round_dt(start_dt): raise InputException('Sorry, provided start_dt is not consistent with the timeSlotSpan ({})'.format(start_dt)) if end_dt is not None: if end_dt != self.timeSlotSpan.round_dt(end_dt): raise InputException('Sorry, provided end_dt is not consistent with the timeSlotSpan ({})'.format(end_dt)) # Set some support varibales slot_start_dt = None slot_end_dt = None prev_dataTimePoint = None filtered_dataTimeSeries = DataTimeSeries() process_ended = False # Ok, start running the aggregators in a streaming-fashion way, # so going trought all the data in the time series logger.info('Aggregation process started from {} to {} with a sensor of class {} on {}'.format(start_dt, end_dt, self.Sensor.__class__.__name__, dataTimeSeries)) callback_counter = 1 for dataTimePoint in dataTimeSeries: # Set start_dt if not already done if not start_dt: start_dt = self.timeSlotSpan.timeInterval.round_dt(dataTimePoint.dt) if rounded else dataTimePoint.dt if not slot_end_dt: slot_end_dt = start_dt # First, check if we have some points to discard at the beginning if dataTimePoint.dt < start_dt: # If we are here it means we are going data belonging to a previous slot # (probably just spare data loaded to have access to the prev_datapoint) prev_dataTimePoint = dataTimePoint #logger.debug print 'dataTimePoint.dt (disc): ', dataTimePoint.dt continue # Similar concept for the end if dataTimePoint.dt >= end_dt: if process_ended: continue # Here we manage all the cases according to start/end, missing slots etc. # We have also to create empty slots at the beginning, at the end and in the middle. # An empty slot will have every required value (according to DataSlots_labels) set to None. # Even if the dataTimeSeries is completely empty, we have the DataSlots_labelsthatnks to # the Sensor object which is mandatory. And in future maybe even encapsulated in the time series. #---------------------------- # Slots handling #---------------------------- # The following procedure works in general for slots at the beginning and in the middle. # The approach is to detect if the current slot is "outdated" and spin a new one if so. if dataTimePoint.dt > slot_end_dt: # If the current slot is outdated: # 1) Add this last point to the dataTimeSeries: filtered_dataTimeSeries.append(dataTimePoint) #2) keep spinning new slots until the current data point falls in one of them. # NOTE: Read the following "while" more as an "if" which can also lead to spin multiple # slot if there are empty slots between the one being closed and the dataTimePoint.dt. # TODO: leave or remove the above if for code readability? while slot_end_dt < dataTimePoint.dt: # If we are in the pre-first slot, just silently spin a new slot: if slot_start_dt is not None: logger.info('SlotStream: this slot (start={}, end={}) is closed, now aggregating it..'.format(slot_start_dt, slot_end_dt)) # Aggregate aggregator_results = self.aggregator.aggregate(dataTimeSeries = filtered_dataTimeSeries, start_dt = slot_start_dt, end_dt = slot_end_dt, timeSlotSpan = self.timeSlotSpan, allow_None_data = self.allow_None_data) # .. and append results self.results_dataTimeSeries.append(aggregator_results) # Also, handle the callback callback_counter +=1 if callback_trigger and callback_counter > callback_trigger: if callback: callback(self, triggerer=self) callback_counter = 1 # Create a new slot slot_start_dt = slot_end_dt slot_end_dt = slot_start_dt + self.timeSlotSpan # Create a new filtered_dataTimeSeries as part of the 'create a new slot' procedure filtered_dataTimeSeries = DataTimeSeries() # Append the previous dataprev_dataTimePoint to the new DataTimeSeries filtered_dataTimeSeries.append(prev_dataTimePoint) logger.info('SlotStream: Spinned a new slot (start={}, end={})'.format(slot_start_dt, slot_end_dt)) # If last slot mark process as ended: if dataTimePoint.dt >= end_dt: process_ended = True #---------------------------- # Time series filtering #---------------------------- # Append this point filtered_dataTimeSeries.append(dataTimePoint) # ..and save as previous point prev_dataTimePoint = dataTimePoint #---------------------------- # Last slots #---------------------------- force_close_last=False if force_close_last: # 1) Close the last slot and aggreagte it. You should never do it unless you knwo what you are doing if filtered_dataTimeSeries: logger.info('SlotStream: this slot (start={}, end={}) is closed, now aggregating it..'.format(slot_start_dt, slot_end_dt)) # Aggregate aggregator_results = self.aggregator.aggregate(dataTimeSeries = filtered_dataTimeSeries, start_dt = slot_start_dt, end_dt = slot_end_dt, timeSlotSpan = self.timeSlotSpan) # .. and append results self.results_dataTimeSeries.append(aggregator_results) # Also, handle the callback callback_counter +=1 if callback_trigger and callback_counter > callback_trigger: if callback: callback(self, triggerer=self) callback_counter = 1
class test_compute_1D_coverage(unittest.TestCase): def setUp(self): # TimeSeries from 16:58:00 to 17:32:00 (Europe/Rome) self.dataTimeSeries1 = DataTimeSeries() start_t = 1436022000 - 120 validity_region_span = TimeSlotSpan("1m") for i in range(35): data = PhysicalData(labels=["power_W"], values=[154 + i]) physicalDataTimePoint = PhysicalDataTimePoint( t=start_t + (i * 60), tz="Europe/Rome", data=data, validity_region_span=validity_region_span ) self.dataTimeSeries1.append(physicalDataTimePoint) # TimeSeries from 17:00:00 to 17:30:00 (Europe/Rome) self.dataTimeSeries2 = DataTimeSeries() start_t = 1436022000 validity_region_span = TimeSlotSpan("1m") for i in range(34): data = PhysicalData(labels=["power_W"], values=[154 + i]) physicalDataTimePoint = PhysicalDataTimePoint( t=start_t + (i * 60), tz="Europe/Rome", data=data, validity_region_span=validity_region_span ) self.dataTimeSeries2.append(physicalDataTimePoint) # TimeSeries from 17:00:00 to 17:20:00 (Europe/Rome) self.dataTimeSeries3 = DataTimeSeries() start_t = 1436022000 - 120 validity_region_span = TimeSlotSpan("1m") for i in range(23): data = PhysicalData(labels=["power_W"], values=[154 + i]) physicalDataTimePoint = PhysicalDataTimePoint( t=start_t + (i * 60), tz="Europe/Rome", data=data, validity_region_span=validity_region_span ) self.dataTimeSeries3.append(physicalDataTimePoint) # TimeSeries from 17:10:00 to 17:30:00 (Europe/Rome) self.dataTimeSeries4 = DataTimeSeries() start_t = 1436022000 + 600 validity_region_span = TimeSlotSpan("1m") for i in range(21): data = PhysicalData(labels=["power_W"], values=[154 + i]) physicalDataTimePoint = PhysicalDataTimePoint( t=start_t + (i * 60), tz="Europe/Rome", data=data, validity_region_span=validity_region_span ) self.dataTimeSeries4.append(physicalDataTimePoint) # TimeSeries from 16:58:00 to 17:32:00 (Europe/Rome) self.dataTimeSeries5 = DataTimeSeries() start_t = 1436022000 - 120 validity_region_span = TimeSlotSpan("1m") for i in range(35): if i > 10 and i < 21: continue data = PhysicalData(labels=["power_W"], values=[154 + i]) physicalDataTimePoint = PhysicalDataTimePoint( t=start_t + (i * 60), tz="Europe/Rome", data=data, validity_region_span=validity_region_span ) self.dataTimeSeries5.append(physicalDataTimePoint) def test_compute_1D_coverage_basic(self): # ---------------------------- # Test wrong init parameters # ---------------------------- with self.assertRaises(InputException): compute_1D_coverage(dataSeries=None, start_Point=None, end_Point=None) with self.assertRaises(NotImplementedError): compute_1D_coverage(dataSeries=self.dataTimeSeries1, start_Point=None, end_Point=None) with self.assertRaises(InputException): compute_1D_coverage(dataSeries=self.dataTimeSeries1, start_Point=5, end_Point=TimePoint(t=3)) with self.assertRaises(InputException): compute_1D_coverage(dataSeries=self.dataTimeSeries1, start_Point=TimePoint(t=3), end_Point=5) # ---------------------------- # Test logic # ---------------------------- # Full coverage (coverage=1.0) start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries1, start_Point=start_Point, end_Point=end_Point ) self.assertEqual(Slot_coverage, 1.0) # A) Full coverage (coverage=1.0) again, to test repeatability start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries1, start_Point=start_Point, end_Point=end_Point ) self.assertEqual(Slot_coverage, 1.0) # B) Full coverage (coverage=1.0) witjout prev/next in the timeSeries start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries2, start_Point=start_Point, end_Point=end_Point ) self.assertEqual(Slot_coverage, 1.0) # C) Missing ten minutes over 30 at the end (coverage=0.683)) start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries3, start_Point=start_Point, end_Point=end_Point ) # 20 minutes plus other 30 secs validity for the 20th point over 30 minutes self.assertEqual(Slot_coverage, (((20 * 60.0) + 30.0) / (30 * 60.0))) # D) Missing ten minutes over 30 at the beginning (coverage=0.683) start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries4, start_Point=start_Point, end_Point=end_Point ) # 20 minutes plus other 30 secs (previous) validity for the 10th point over 30 minutes self.assertEqual(Slot_coverage, (((20 * 60.0) + 30.0) / (30 * 60.0))) # E) Missing eleven minutes over 30 in the middle (coverage=0.66) start_Point = TimePoint(t=1436022000, tz="Europe/Rome") # 2015-07-04 17:00:00+02:00 end_Point = TimePoint(t=1436022000 + 1800, tz="Europe/Rome") # 2015-07-04 17:30:00+02:00 Slot_coverage = compute_1D_coverage( dataSeries=self.dataTimeSeries5, start_Point=start_Point, end_Point=end_Point ) # 20 minutes plus other 30 secs (previous) validity for the 10th point over 30 minutes self.assertAlmostEqual(Slot_coverage, (2.0 / 3.0)) def tearDown(self): pass
def test_PutGet_DataTimePoints(self): dataTimeSeriesSQLiteStorage = sqlite.DataTimeSeriesSQLiteStorage(in_memory=True) # Generate 10 points DataTimeSeries with flowrate sensor dataTimeSeries = DataTimeSeries() for i in range(10): data = PhysicalData( labels = ['flowrate_m3s'], values = [20.6+i] ) physicalDataTimePoint = PhysicalDataTimePoint(t = 1436022000 + (i*60), tz="Europe/Rome", data=data) dataTimeSeries.append(physicalDataTimePoint) # Generate 10 points DataTimeSeries with light sensor dataTimeSeries_light = DataTimeSeries() for i in range(10): data = PhysicalData( labels = ['light_pct'], values = [60.6+i] ) physicalDataTimePoint = PhysicalDataTimePoint(t = 1436022000 + (i*60), tz="Europe/Rome", data=data) dataTimeSeries_light.append(physicalDataTimePoint) # Test put data without sensor (not implemented for now) with self.assertRaises(NotImplementedError): data_id_1 = dataTimeSeriesSQLiteStorage.put(dataTimeSeries) # Test volumetric sensor volumetricSensorV1_1 = VolumetricSensorV1('lu65na') volumetricSensorV1_2 = VolumetricSensorV1('lu34na') # Test labels inconsistency with self.assertRaises(InputException): dataTimeSeriesSQLiteStorage.put(dataTimeSeries_light, sensor=volumetricSensorV1_1) # Test put data with sensor and no right to create structure with self.assertRaises(StorageException): dataTimeSeriesSQLiteStorage.put(dataTimeSeries, sensor=volumetricSensorV1_1) # Test get with sensor and no structure in the storage with self.assertRaises(StorageException): _ = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, cached=True) # Test put data with sensor and right to create structure AND get with sensor and without from_dt/to_dt # TODO: this is not correct unit test of the put and get. It is testing them at the same time! dataTimeSeriesSQLiteStorage.put(dataTimeSeries, sensor=volumetricSensorV1_1, can_initialize=True) out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, cached=True) self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries) # Test get of no data: out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_2, cached=True) # We can check the equality against a simple DataTimeSeries empyt_dataTimeSeries = DataTimeSeries() self.assertEqual(out_streamingDataTimeSeries, empyt_dataTimeSeries) # The following test is just for confirm of the above steps. Should not be here in a proper unittesting approach. self.assertNotEqual(out_streamingDataTimeSeries, dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, cached=True)) # Now test the get with start_dt and end_dt from_dt = dt(2015,7,4,17,3,0, tzinfo='Europe/Rome') to_dt = dt(2015,7,4,17,6,0, tzinfo='Europe/Rome') out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get( sensor = volumetricSensorV1_1, from_dt = from_dt, to_dt = to_dt, cached = True) dataTimeSeries_filtered = dataTimeSeries.filter(from_dt = from_dt, to_dt=to_dt) self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries_filtered)
def test_PutGet_DataTimeSlots(self): dataTimeSeriesSQLiteStorage = sqlite.DataTimeSeriesSQLiteStorage(in_memory=True) # Generate 10 slots DataTimeSeries with flowrate sensor aggregated data dataTimeSeries = DataTimeSeries() for i in range(10): data = PhysicalData(labels = ['flowrate_m3s_AVG', 'flowrate_m3s_MIN', 'flowrate_m3s_MAX', 'volume_m3_TOT'], values = [20.6+i,20.6+i,20.6+i,20.6+i] ) physicalDataTimeSlot = PhysicalDataTimeSlot(start = TimePoint(t=1436022000 + (i*60),tz="Europe/Rome"), end = TimePoint(t=1436022000 + ((i+1)*60), tz="Europe/Rome"), data=data, span=TimeSlotSpan('60s')) dataTimeSeries.append(physicalDataTimeSlot) # Generate 10 points DataTimeSeries with light sensor aggregated data dataTimeSeries_light = DataTimeSeries() for i in range(10): data = PhysicalData(labels = ['light_pct_AVG'], values = [20.6+i] ) physicalDataTimeSlot = PhysicalDataTimeSlot(start = TimePoint(t=1436022000 + (i*60),tz="Europe/Rome"), end = TimePoint(t=1436022000 + ((i+1)*60), tz="Europe/Rome"), data=data, span=TimeSlotSpan('60s')) dataTimeSeries_light.append(physicalDataTimeSlot) # Test put data without sensor (not implemented for now) with self.assertRaises(NotImplementedError): data_id_1 = dataTimeSeriesSQLiteStorage.put(dataTimeSeries) # Test volumetric sensor volumetricSensorV1_1 = VolumetricSensorV1('lu65na') volumetricSensorV1_2 = VolumetricSensorV1('lu34na') # Test labels inconsistency with self.assertRaises(InputException): dataTimeSeriesSQLiteStorage.put(dataTimeSeries_light, sensor=volumetricSensorV1_1) # Test put data with sensor and no right to create structure with self.assertRaises(StorageException): dataTimeSeriesSQLiteStorage.put(dataTimeSeries, sensor=volumetricSensorV1_1) # Test get with sensor and no structure in the storage with self.assertRaises(StorageException): _ = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, timeSlotSpan=TimeSlotSpan('60s'), cached=True) # Test put data with sensor and right to create structure AND get with sensor and without from_dt/to_dt # TODO: this is not correct unit test of the put and get. It is testing them at the same time! dataTimeSeriesSQLiteStorage.put(dataTimeSeries, sensor=volumetricSensorV1_1, can_initialize=True) out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, timeSlotSpan=TimeSlotSpan('60s'), cached=True) self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries) # Test get of no data: out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_2, timeSlotSpan=TimeSlotSpan('60s'), cached=True) # We can check the equality against a simple DataTimeSeries empyt_dataTimeSeries = DataTimeSeries() self.assertEqual(out_streamingDataTimeSeries, empyt_dataTimeSeries) # The following test is just for confirm of the above steps. Should not be here in a proper unittesting approach. self.assertNotEqual(out_streamingDataTimeSeries, dataTimeSeriesSQLiteStorage.get(sensor=volumetricSensorV1_1, timeSlotSpan=TimeSlotSpan('60s'), cached=True)) # Now test the get with start_dt and end_dt from_dt = dt(2015,7,4,17,3,0, tzinfo='Europe/Rome') to_dt = dt(2015,7,4,17,6,0, tzinfo='Europe/Rome') out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get( sensor = volumetricSensorV1_1, from_dt = from_dt, to_dt = to_dt, timeSlotSpan = TimeSlotSpan('60s'), cached = True) dataTimeSeries_filtered = dataTimeSeries.filter(from_dt = from_dt, to_dt=to_dt) self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries_filtered) # Also test that if we go trough the cached streaminTimeSeries again, we get the same result: self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries_filtered) # Now get the time series without caching: out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get( sensor = volumetricSensorV1_1, from_dt = from_dt, to_dt = to_dt, timeSlotSpan = TimeSlotSpan('60s')) # Check that we can compare it as is even if it is not cached: self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries_filtered) # Check that we can compare it again: self.assertEqual(out_streamingDataTimeSeries, dataTimeSeries_filtered) # Now get AGAIn the time series without caching: out_streamingDataTimeSeries = dataTimeSeriesSQLiteStorage.get( sensor = volumetricSensorV1_1, from_dt = from_dt, to_dt = to_dt, timeSlotSpan = TimeSlotSpan('60s')) # But this time do not test any comparisons (that triggers the caching of the TimeSeries), # instead test that going trough it twice we achieve the same result (under the hood we go twice in the DB): items_A = [item for item in out_streamingDataTimeSeries] items_B = [item for item in out_streamingDataTimeSeries] self.assertEqual(items_A, items_B) # WARNING: This is specific to SLQlite and its dataTimeStream self.assertEqual(out_streamingDataTimeSeries.dataTimeStream.get_statistics()['source_acceses'], 2) # Now foce load te time series: out_streamingDataTimeSeries.force_load() # After force-loading, another soruce acces is performed self.assertEqual(out_streamingDataTimeSeries.dataTimeStream.get_statistics()['source_acceses'], 3) items_C = [item for item in out_streamingDataTimeSeries] self.assertEqual(items_A, items_C) # Generating the list items_C after a force_load should not generate a new source_access self.assertEqual(out_streamingDataTimeSeries.dataTimeStream.get_statistics()['source_acceses'], 3) # Perform again the iterator check: items_A = [item for item in out_streamingDataTimeSeries] items_B = [item for item in out_streamingDataTimeSeries] self.assertEqual(items_A, items_B) # And ensure that the source accesses is still set to three self.assertEqual(out_streamingDataTimeSeries.dataTimeStream.get_statistics()['source_acceses'], 3)