def test_pipeline(self): meter1 = ElecMeter(store=self.datastore, metadata=self.meter_meta, meter_id=METER_ID) # load co_test.h5 dataset = DataSet(join(data_dir(), 'co_test.h5')) meter2 = dataset.buildings[1].elec.mains() for meter in [meter1, meter2]: for chunksize in [None, 2**10, 2**29]: if chunksize is None: load_kwargs = {} else: load_kwargs = {'chunksize': chunksize} source_node = meter.get_source_node(**load_kwargs) good_sections = GoodSections(source_node) good_sections.run() combined = good_sections.results #.simple() meter.clear_cache() meter.good_sections(**load_kwargs) meter.good_sections(**load_kwargs) meter.clear_cache() dataset.store.close()
def test_process_chunk(self): MAX_SAMPLE_PERIOD = 10 metadata = {'device': {'max_sample_period': MAX_SAMPLE_PERIOD}} # 0 1 2 3 4 5 6 7 secs = [0,10,20,30, 50,60, 100, 200, # 8 9 10 11 12 13 14 15 16 250,260,270,280,290,300, 350,360,370] index = pd.DatetimeIndex([pd.Timestamp('2011-01-01 00:00:00') + timedelta(seconds=sec) for sec in secs]) df = pd.DataFrame(data=np.random.randn(len(index), 3), index=index, columns=['a', 'b', 'c']) df.timeframe = TimeFrame(index[0], index[-1]) df.look_ahead = pd.DataFrame() locate = GoodSections() locate.results = GoodSectionsResults(MAX_SAMPLE_PERIOD) locate._process_chunk(df, metadata) results = locate.results.combined() self.assertEqual(len(results), 4) self.assertEqual(results[0].timedelta.total_seconds(), 30) self.assertEqual(results[1].timedelta.total_seconds(), 10) self.assertEqual(results[2].timedelta.total_seconds(), 50) self.assertEqual(results[3].timedelta.total_seconds(), 20) # Now try splitting data into multiple chunks timestamps = [ pd.Timestamp("2011-01-01 00:00:00"), pd.Timestamp("2011-01-01 00:00:40"), pd.Timestamp("2011-01-01 00:01:20"), pd.Timestamp("2011-01-01 00:04:20"), pd.Timestamp("2011-01-01 00:06:20") ] for split_point in [[4, 6, 9, 17], [4, 10, 12, 17]]: locate = GoodSections() locate.results = GoodSectionsResults(MAX_SAMPLE_PERIOD) df.results = {} prev_i = 0 for j, i in enumerate(split_point): cropped_df = df.iloc[prev_i:i] cropped_df.timeframe = TimeFrame(timestamps[j], timestamps[j+1]) try: cropped_df.look_ahead = df.iloc[i:] except IndexError: cropped_df.look_ahead = pd.DataFrame() prev_i = i locate._process_chunk(cropped_df, metadata) results = locate.results.combined() self.assertEqual(len(results), 4) self.assertEqual(results[0].timedelta.total_seconds(), 30) self.assertEqual(results[1].timedelta.total_seconds(), 10) self.assertEqual(results[2].timedelta.total_seconds(), 50) self.assertEqual(results[3].timedelta.total_seconds(), 20)
def test_process_chunk(self): MAX_SAMPLE_PERIOD = 10 metadata = {'device': {'max_sample_period': MAX_SAMPLE_PERIOD}} # 0 1 2 3 4 5 6 7 secs = [ 0, 10, 20, 30, 50, 60, 100, 200, # 8 9 10 11 12 13 14 15 16 250, 260, 270, 280, 290, 300, 350, 360, 370 ] index = pd.DatetimeIndex([ pd.Timestamp('2011-01-01 00:00:00') + timedelta(seconds=sec) for sec in secs ]) df = pd.DataFrame(data=np.random.randn(len(index), 3), index=index, columns=['a', 'b', 'c']) df.timeframe = TimeFrame(index[0], index[-1]) df.look_ahead = pd.DataFrame() locate = GoodSections() locate.results = GoodSectionsResults(MAX_SAMPLE_PERIOD) locate._process_chunk(df, metadata) results = locate.results #.combined() self.assertEqual(len(results), 4) self.assertAlmostEqual(results[0].timedelta.total_seconds(), 30) self.assertEqual(results[1].timedelta.total_seconds(), 10) self.assertEqual(results[2].timedelta.total_seconds(), 50) self.assertEqual(results[3].timedelta.total_seconds(), 20) # Now try splitting data into multiple chunks timestamps = [ pd.Timestamp("2011-01-01 00:00:00"), pd.Timestamp("2011-01-01 00:00:40"), pd.Timestamp("2011-01-01 00:01:20"), pd.Timestamp("2011-01-01 00:04:20"), pd.Timestamp("2011-01-01 00:06:20") ] for split_point in [[4, 6, 9, 17], [4, 10, 12, 17]]: locate = GoodSections() locate.results = GoodSectionsResults(MAX_SAMPLE_PERIOD) df.results = {} prev_i = 0 for j, i in enumerate(split_point): cropped_df = df.iloc[prev_i:i] cropped_df.timeframe = TimeFrame(timestamps[j], timestamps[j + 1]) try: cropped_df.look_ahead = df.iloc[i:] except IndexError: cropped_df.look_ahead = pd.DataFrame() prev_i = i locate._process_chunk(cropped_df, metadata) results = locate.results #.combined() self.assertEqual(len(results), 4) self.assertAlmostEqual(results[0].timedelta.total_seconds(), 30) self.assertEqual(results[1].timedelta.total_seconds(), 10) self.assertEqual(results[2].timedelta.total_seconds(), 50) self.assertEqual(results[3].timedelta.total_seconds(), 20)
def test_pipeline(self): meter1 = ElecMeter(store=self.datastore, metadata=self.meter_meta, meter_id=METER_ID) # load co_test.h5 dataset = DataSet(join(data_dir(), 'co_test.h5')) meter2 = dataset.buildings[1].elec.mains() for meter in [meter1, meter2]: for chunksize in [None, 2**10, 2**29]: if chunksize is None: kwargs = {} else: kwargs = {'chunksize': chunksize} source_node = meter.get_source_node(**kwargs) good_sections = GoodSections(source_node) good_sections.run() combined = good_sections.results.simple() meter.clear_cache() meter.good_sections(**kwargs) meter.good_sections(**kwargs) meter.clear_cache() dataset.store.close()