def test_init(self): """Check if correct folder is used""" ch = caching.Cache('standby', folder=os.getcwd()) self.assertEqual(ch.folder, os.getcwd()) ch = caching.Cache('water_standby') self.assertEqual(ch.folder, os.path.join(os.getcwd(), 'data', 'cache_day'))
def test_write(self): """Write dataframe with multiple columns""" ch = caching.Cache('elec_temp') # write a dataframe with single column index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC') df = pd.DataFrame(index=index, data=np.random.randn(3, 2), columns=['testsensor1', 'testsensor2']) expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor1.csv') expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor2.csv') self.assertFalse(os.path.exists(expected_path1)) self.assertFalse(os.path.exists(expected_path2)) try: ch._write(df) self.assertTrue(os.path.exists(expected_path1)) self.assertTrue(os.path.exists(expected_path2)) except: raise finally: os.remove(expected_path1) os.remove(expected_path2)
def test_write_single2(self): """Write timeseries """ ch = caching.Cache('elec_temp') # write a dataframe with single column index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC') df = pd.Series(index=index, data=np.random.randn(3), name='testsensor_series') expected_path = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor_series.csv') self.assertFalse(os.path.exists(expected_path)) try: ch._write_single(df) self.assertTrue(os.path.exists(expected_path)) except: raise finally: os.remove(expected_path) # raise ValueError on series without name df = pd.Series(index=index, data=np.random.randn(3)) self.assertRaises(ValueError, ch._write_single, df)
def test_check_df(self): """check if dataframe is not empty and has daily frequency""" ch = caching.Cache('elec_standby') df = pd.DataFrame() self.assertFalse(ch.check_df(df)) index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC') df = pd.DataFrame(index=index, data=np.random.randn(3, 2), columns=['A', 'B']) self.assertTrue(ch.check_df(df)) index = pd.DatetimeIndex(['20160201', '20160202', '20160203'], tz='UTC') df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A']) self.assertTrue(ch.check_df(df)) index = pd.DatetimeIndex(['20160201', '20160202', '20160204'], tz='UTC') df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A']) self.assertFalse(ch.check_df(df))
def test_get_multiple(self): """Obtain cached results and return a correct dataframe""" ch = caching.Cache('elec_standby') mysensor = Sensor(key='mysensor', device=None, site='None', type=None, description=None, system=None, quantity=None, unit=None, direction=None, tariff=None, cumulative=None) mysensor2 = Sensor(key='mysensor2', device=None, site='None', type=None, description=None, system=None, quantity=None, unit=None, direction=None, tariff=None, cumulative=None) df = ch.get([mysensor, mysensor2], end='20160104') self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=4, tz='UTC')).all()) self.assertListEqual(df.columns.tolist(), ['mysensor', 'mysensor2']) self.assertEqual(df.ix[1, 'mysensor2'], 5) self.assertTrue(np.isnan(df.ix[3, 'mysensor2']))
def test_write_single1(self): """Write dataframe with single columns only""" ch = caching.Cache('elec_temp') # write a dataframe with single column index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='Europe/Brussels') df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['testsensor']) expected_path = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor.pkl') self.assertFalse(os.path.exists(expected_path)) try: ch._write_single(df) self.assertTrue(os.path.exists(expected_path)) except: raise finally: os.remove(expected_path) # raise ValueError on dataframe with multiple columns df = pd.DataFrame(index=index, data=np.random.randn(3, 2), columns=['testsensor1', 'testsensor2']) self.assertRaises(ValueError, ch._write_single, df)
def test_load(self): """Load and parse a cached object correctly""" ch = caching.Cache('elec_standby') df = ch._load('mysensor') self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=365, tz='UTC')).all()) self.assertEqual(df.columns, ['mysensor'])
def test_get_single(self): """Obtain cached results and return a correct dataframe""" ch = caching.Cache('elec_standby') mysensor = Sensor(key='mysensor', device=None, site='None', type=None, description=None, system=None, quantity=None, unit=None, direction=None, tariff=None, cumulative=None) df = ch.get([mysensor]) self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=365, tz='UTC')).all()) self.assertEqual(df.columns, ['mysensor']) df = ch.get([mysensor], end='20160115') self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=15, tz='UTC')).all()) df = ch.get([mysensor], start='20160707', end='20160708') self.assertTrue((df.index == pd.DatetimeIndex(start='20160707', freq='D', periods=2, tz='UTC')).all()) self.assertFalse(df.index.tz is None, "Returned dataframe is tz-naive")
def test_update_multiple(self): """Update an existing cached sensor with new information""" ch = caching.Cache('elec_temp') testsensor2 = Sensor(key='testsensor2', device=None, site='None', type=None, description=None,system=None, quantity=None,unit=None,direction=None,tariff=None,cumulative=None) # write a dataframe with two columns index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC') df = pd.DataFrame(index=index, data=dict(testsensor1=[0,1,2], testsensor2= [0,1,2])) expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor1.csv') expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor2.csv') self.assertFalse(os.path.exists(expected_path1)) self.assertFalse(os.path.exists(expected_path2)) try: ch.update(df) self.assertTrue(os.path.exists(expected_path1)) self.assertTrue(os.path.exists(expected_path2)) index = pd.DatetimeIndex(start='20160103', freq='D', periods=3, tz='UTC') df_new = pd.DataFrame(index=index, data=dict(testsensor1=[100,200,300], testsensor2=[100,200,300])) ch.update(df_new) df_res = ch.get([testsensor2]) self.assertEqual(df_res.iloc[1,0], 1) self.assertEqual(df_res.iloc[2,0], 100) self.assertEqual(df_res.iloc[4,0], 300) except: raise finally: os.remove(expected_path1) os.remove(expected_path2)
def test_check_df_series(self): """check if series is not empty and has daily frequency""" ch = caching.Cache('elec_standby') df = pd.Series() self.assertFalse(ch.check_df(df)) index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC') ts = pd.Series(index=index, data=np.random.randn(3), name='A') self.assertTrue(ch.check_df(ts))
def test_update_single(self): """Update an existing cached sensor with new information""" ch = caching.Cache('elec_temp') testsensor = Sensor(key='testsensor', device=None, site='None', type=None, description=None, system=None, quantity=None, unit=None, direction=None, tariff=None, cumulative=None) try: # write a dataframe with single column index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='Europe/Brussels') df = pd.DataFrame(index=index, data=[0, 1, 2], columns=['testsensor']) expected_path = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor.pkl') self.assertFalse(os.path.exists(expected_path)) ch._write_single(df) index = pd.DatetimeIndex(start='20160103', freq='D', periods=3, tz='Europe/Brussels') df_new = pd.DataFrame(index=index, data=[100, 200, 300], columns=['testsensor']) ch.update(df_new) df_res = ch.get([testsensor]) self.assertEqual(df_res.iloc[1, 0], 1) self.assertEqual(df_res.iloc[2, 0], 100) self.assertEqual(df_res.iloc[4, 0], 300) except: raise finally: os.remove( os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor.pkl'))
def test_get_raises(self): """Raise TypeError when argument sensors is not a list""" ch = caching.Cache('elec_standby') mysensor = Sensor(key='mysensor', device=None, site='None', type=None, description=None, system=None, quantity=None, unit=None, direction=None, tariff=None, cumulative=None) self.assertRaises(TypeError, ch.get, mysensor)
def compute(sensorid, start_model, end_model): end = pd.Timestamp('now', tz='Europe/Brussels') # Create houseprint from saved file, if not available, parse the google spreadsheet try: hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl') hp = houseprint.load_houseprint_from_file(hp_filename) print("Houseprint loaded from {}".format(hp_filename)) except Exception as e: print(e) print( "Because of this error we try to build the houseprint from source") hp = houseprint.Houseprint() hp.init_tmpo() # Load the cached daily data sensor = hp.find_sensor(sensorid) cache = caching.Cache(variable='{}_daily_total'.format(sensor.type)) df_day = cache.get(sensors=[sensor]) df_day.rename(columns={sensorid: sensor.type}, inplace=True) # Load the cached weather data, clean up and compose a combined dataframe weather = forecastwrapper.Weather(location=(50.8024, 4.3407), start=start_model, end=end) irradiances = [ (0, 90), # north vertical (90, 90), # east vertical (180, 90), # south vertical (270, 90), # west vertical ] orientations = [0, 90, 180, 270] weather_data = weather.days( irradiances=irradiances, wind_orients=orientations, heating_base_temperatures=[0, 6, 8, 10, 12, 14, 16, 18]).dropna(axis=1) weather_data.drop([ 'icon', 'summary', 'moonPhase', 'windBearing', 'temperatureMaxTime', 'temperatureMinTime', 'apparentTemperatureMaxTime', 'apparentTemperatureMinTime', 'uvIndexTime', 'sunsetTime', 'sunriseTime' ], axis=1, inplace=True) # Add columns for the day-of-week for i, d in zip(range(7), [ 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday' ]): weather_data[d] = 0 weather_data.loc[weather_data.index.weekday == i, d] = 1 weather_data = weather_data.applymap(float) data = pd.concat([df_day, weather_data], axis=1).dropna() data = data.tz_convert('Europe/Brussels') df = data.resample(rule='MS').sum() if len(df) < 2: print("Not enough data for building a monthly reference model") sys.exit(1) # monthly model, statistical validation mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.03) figures = mv.plot(df=df) figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_model_' + sensorid + '.png'), dpi=100) figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures', 'multivar_results_' + sensorid + '.png'), dpi=100) # weekly model, statistical validation df = data.resample(rule='W').sum() if len(df.ix[:end_model]) < 4: print("Not enough data for building a weekly reference model") sys.exit(1) mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.02) if len(df.ix[end_model:]) > 0: figures = mv.plot(model=False, bar_chart=True, df=df.ix[end_model:]) figures[0].savefig(os.path.join( c.get('data', 'folder'), 'figures', 'multivar_prediction_weekly_' + sensorid + '.png'), dpi=100)
# The first time, this will take a very looong time to get all the detailed data for building the cache # Afterwards, this is quick starttime = dt.time(0, tzinfo=BXL) endtime = dt.time(5, tzinfo=BXL) caching.cache_results(hp=hp, sensors=sensors, resultname='elec_min_night_0-5', AnalysisClass=DailyAgg, agg='min', chunk=False, starttime=starttime, endtime=endtime) caching.cache_results(hp=hp, sensors=sensors, resultname='elec_max_night_0-5', AnalysisClass=DailyAgg, agg='max', chunk=False, starttime=starttime, endtime=endtime) # In[ ]: cache_min = caching.Cache(variable='elec_min_night_0-5') cache_max = caching.Cache(variable='elec_max_night_0-5') dfdaymin = cache_min.get(sensors=sensors) dfdaymax = cache_max.get(sensors=sensors) dfdaymin.info() # The next plot shows that some periods are missing. Due to the cumulative nature of the electricity counter, we still have the total consumption. However, it is spread out of the entire period. So we don't know the standby power during these days, and we have to remove those days. # In[ ]: if DEV: sensor = hp.search_sensors(key='3aa4')[0] df = sensor.get_data(head=pd.Timestamp('20151117'), tail=pd.Timestamp('20160104')) charts.plot(df, stock=True, show='inline')
# Load houseprint from cache if possible, otherwise build it from source try: hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl') hp = houseprint.load_houseprint_from_file(hp_filename) print("Houseprint loaded from {}".format(hp_filename)) except Exception as e: print(e) print("Because of this error we try to build the houseprint from source") hp = houseprint.Houseprint() hp.init_tmpo() # Get the cache objects for gas, elec and water, and update them, sensor by sensor for sensortype in ['gas', 'electricity', 'water']: cache = caching.Cache(variable=sensortype + '_daily_total') sensors = hp.get_sensors(sensortype=sensortype) df_cached = cache.get(sensors=sensors) # for each sensor: # 1. get the last timestamp of the cached daily total # 2. get the daily data since than # 3. fill up the cache with the new data print('Caching daily totals for {}'.format(sensortype)) for sensor in tqdm(sensors): try: last_ts = df_cached[sensor.key].dropna().index[-1] last_ts = last_ts.tz_convert('Europe/Brussels') except: last_ts = pd.Timestamp('1970-01-01', tz='Europe/Brussels')
# In[ ]: # The first time, this will take a very looong time to get all the detailed data for building the cache # Afterwards, this is quick caching.cache_results(hp=hp, sensors=sensors, function='daily_min', resultname='elec_daily_min') caching.cache_results(hp=hp, sensors=sensors, function='daily_max', resultname='elec_daily_max') # In[ ]: cache_min = caching.Cache(variable='elec_daily_min') cache_max = caching.Cache(variable='elec_daily_max') dfdaymin = cache_min.get(sensors=sensors) dfdaymax = cache_max.get(sensors=sensors) # The next plot shows that some periods are missing. Due to the cumulative nature of the electricity counter, we still have the total consumption. However, it is spread out of the entire period. So we don't know the standby power during these days, and we have to remove those days. # In[ ]: if DEV: sensor = hp.search_sensors(key='3aa4')[0] df = sensor.get_data(head=pd.Timestamp('20151117'), tail=pd.Timestamp('20160104')) charts.plot(df, stock=True, show='inline') # In[ ]:
sensors = hp.get_sensors(sensortype='electricity') # sensor objects # Remove some sensors exclude = [ '565de0a7dc64d8370aa321491217b85f' # 3E ] solar = [x.key for x in hp.search_sensors(type='electricity', system='solar')] exclude += solar for s in sensors: if s.key in exclude: sensors.remove(s) hp.init_tmpo() hp.sync_tmpos() cache = caching.Cache(variable='elec_standby') for s in sensors[:1]: # get cached data df_cached = cache.get(s.key) try: last_day = df_cached.index[-1] except IndexError: last_day = 0 # get new data, full resolution df_new = hp.get_data(sensors=[s], head=last_day) print("Now make a dataframe with daily index, and standby power")