Exemple #1
0
 def test_init(self):
     """Check if correct folder is used"""
     
     ch = caching.Cache('standby', folder=os.getcwd())
     self.assertEqual(ch.folder, os.getcwd())
     
     ch = caching.Cache('water_standby')
     self.assertEqual(ch.folder, os.path.join(os.getcwd(), 'data', 'cache_day'))
Exemple #2
0
    def test_write(self):
        """Write dataframe with multiple columns"""
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['testsensor1', 'testsensor2'])
        expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'),
                                      'cache_day', 'elec_temp_testsensor1.csv')
        expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'),
                                      'cache_day', 'elec_temp_testsensor2.csv')
        self.assertFalse(os.path.exists(expected_path1))
        self.assertFalse(os.path.exists(expected_path2))

        try:
            ch._write(df)
            self.assertTrue(os.path.exists(expected_path1))
            self.assertTrue(os.path.exists(expected_path2))
        except:
            raise
        finally:
            os.remove(expected_path1)
            os.remove(expected_path2)
Exemple #3
0
    def test_write_single2(self):
        """Write timeseries """
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.Series(index=index,
                       data=np.random.randn(3),
                       name='testsensor_series')
        expected_path = os.path.join(test_dir, cfg.get('data',
                                                       'folder'), 'cache_day',
                                     'elec_temp_testsensor_series.csv')
        self.assertFalse(os.path.exists(expected_path))
        try:
            ch._write_single(df)
            self.assertTrue(os.path.exists(expected_path))
        except:
            raise
        finally:
            os.remove(expected_path)

        # raise ValueError on series without name
        df = pd.Series(index=index, data=np.random.randn(3))
        self.assertRaises(ValueError, ch._write_single, df)
Exemple #4
0
    def test_check_df(self):
        """check if dataframe is not empty and has daily frequency"""
        ch = caching.Cache('elec_standby')

        df = pd.DataFrame()
        self.assertFalse(ch.check_df(df))

        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['A', 'B'])
        self.assertTrue(ch.check_df(df))

        index = pd.DatetimeIndex(['20160201', '20160202', '20160203'],
                                 tz='UTC')
        df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A'])
        self.assertTrue(ch.check_df(df))

        index = pd.DatetimeIndex(['20160201', '20160202', '20160204'],
                                 tz='UTC')
        df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A'])
        self.assertFalse(ch.check_df(df))
Exemple #5
0
    def test_get_multiple(self):
        """Obtain cached results and return a correct dataframe"""
        ch = caching.Cache('elec_standby')
        mysensor = Sensor(key='mysensor',
                          device=None,
                          site='None',
                          type=None,
                          description=None,
                          system=None,
                          quantity=None,
                          unit=None,
                          direction=None,
                          tariff=None,
                          cumulative=None)
        mysensor2 = Sensor(key='mysensor2',
                           device=None,
                           site='None',
                           type=None,
                           description=None,
                           system=None,
                           quantity=None,
                           unit=None,
                           direction=None,
                           tariff=None,
                           cumulative=None)
        df = ch.get([mysensor, mysensor2], end='20160104')

        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=4,
                                                      tz='UTC')).all())
        self.assertListEqual(df.columns.tolist(), ['mysensor', 'mysensor2'])
        self.assertEqual(df.ix[1, 'mysensor2'], 5)
        self.assertTrue(np.isnan(df.ix[3, 'mysensor2']))
    def test_write_single1(self):
        """Write dataframe with single columns only"""
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='Europe/Brussels')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3),
                          columns=['testsensor'])
        expected_path = os.path.join(test_dir, cfg.get('data', 'folder'),
                                     'cache_day', 'elec_temp_testsensor.pkl')
        self.assertFalse(os.path.exists(expected_path))
        try:
            ch._write_single(df)
            self.assertTrue(os.path.exists(expected_path))
        except:
            raise
        finally:
            os.remove(expected_path)

        # raise ValueError on dataframe with multiple columns
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['testsensor1', 'testsensor2'])
        self.assertRaises(ValueError, ch._write_single, df)
Exemple #7
0
 def test_load(self):
     """Load and parse a cached object correctly"""
     ch = caching.Cache('elec_standby')
     df = ch._load('mysensor')
     
     self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=365, tz='UTC')).all())
     self.assertEqual(df.columns, ['mysensor'])
Exemple #8
0
    def test_get_single(self):
        """Obtain cached results and return a correct dataframe"""
        ch = caching.Cache('elec_standby')
        mysensor = Sensor(key='mysensor',
                          device=None,
                          site='None',
                          type=None,
                          description=None,
                          system=None,
                          quantity=None,
                          unit=None,
                          direction=None,
                          tariff=None,
                          cumulative=None)
        df = ch.get([mysensor])

        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=365,
                                                      tz='UTC')).all())
        self.assertEqual(df.columns, ['mysensor'])

        df = ch.get([mysensor], end='20160115')
        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=15,
                                                      tz='UTC')).all())

        df = ch.get([mysensor], start='20160707', end='20160708')
        self.assertTrue((df.index == pd.DatetimeIndex(start='20160707',
                                                      freq='D',
                                                      periods=2,
                                                      tz='UTC')).all())
        self.assertFalse(df.index.tz is None, "Returned dataframe is tz-naive")
Exemple #9
0
    def test_update_multiple(self):
        """Update an existing cached sensor with new information"""

        ch = caching.Cache('elec_temp')
        testsensor2 = Sensor(key='testsensor2', device=None, site='None', type=None, description=None,system=None,
                                quantity=None,unit=None,direction=None,tariff=None,cumulative=None)

        # write a dataframe with two columns
        index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC')
        df = pd.DataFrame(index=index, data=dict(testsensor1=[0,1,2], testsensor2= [0,1,2]))
        expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor1.csv')
        expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor2.csv')
        self.assertFalse(os.path.exists(expected_path1))
        self.assertFalse(os.path.exists(expected_path2))
        try:
            ch.update(df)
            self.assertTrue(os.path.exists(expected_path1))
            self.assertTrue(os.path.exists(expected_path2))

            index = pd.DatetimeIndex(start='20160103', freq='D', periods=3, tz='UTC')
            df_new = pd.DataFrame(index=index, data=dict(testsensor1=[100,200,300], testsensor2=[100,200,300]))
            ch.update(df_new)
            df_res = ch.get([testsensor2])

            self.assertEqual(df_res.iloc[1,0], 1)
            self.assertEqual(df_res.iloc[2,0], 100)
            self.assertEqual(df_res.iloc[4,0], 300)
        except:
            raise
        finally:
            os.remove(expected_path1)
            os.remove(expected_path2)
Exemple #10
0
    def test_check_df_series(self):
        """check if series is not empty and has daily frequency"""
        ch = caching.Cache('elec_standby')

        df = pd.Series()
        self.assertFalse(ch.check_df(df))

        index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC')
        ts = pd.Series(index=index, data=np.random.randn(3), name='A')
        self.assertTrue(ch.check_df(ts))
    def test_update_single(self):
        """Update an existing cached sensor with new information"""

        ch = caching.Cache('elec_temp')
        testsensor = Sensor(key='testsensor',
                            device=None,
                            site='None',
                            type=None,
                            description=None,
                            system=None,
                            quantity=None,
                            unit=None,
                            direction=None,
                            tariff=None,
                            cumulative=None)

        try:
            # write a dataframe with single column
            index = pd.DatetimeIndex(start='20160101',
                                     freq='D',
                                     periods=3,
                                     tz='Europe/Brussels')
            df = pd.DataFrame(index=index,
                              data=[0, 1, 2],
                              columns=['testsensor'])
            expected_path = os.path.join(test_dir, cfg.get('data', 'folder'),
                                         'cache_day',
                                         'elec_temp_testsensor.pkl')
            self.assertFalse(os.path.exists(expected_path))
            ch._write_single(df)

            index = pd.DatetimeIndex(start='20160103',
                                     freq='D',
                                     periods=3,
                                     tz='Europe/Brussels')
            df_new = pd.DataFrame(index=index,
                                  data=[100, 200, 300],
                                  columns=['testsensor'])
            ch.update(df_new)
            df_res = ch.get([testsensor])

            self.assertEqual(df_res.iloc[1, 0], 1)
            self.assertEqual(df_res.iloc[2, 0], 100)
            self.assertEqual(df_res.iloc[4, 0], 300)
        except:
            raise
        finally:
            os.remove(
                os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day',
                             'elec_temp_testsensor.pkl'))
Exemple #12
0
 def test_get_raises(self):
     """Raise TypeError when argument sensors is not a list"""
     ch = caching.Cache('elec_standby')
     mysensor = Sensor(key='mysensor',
                       device=None,
                       site='None',
                       type=None,
                       description=None,
                       system=None,
                       quantity=None,
                       unit=None,
                       direction=None,
                       tariff=None,
                       cumulative=None)
     self.assertRaises(TypeError, ch.get, mysensor)
def compute(sensorid, start_model, end_model):
    end = pd.Timestamp('now', tz='Europe/Brussels')
    # Create houseprint from saved file, if not available, parse the google spreadsheet
    try:
        hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
        hp = houseprint.load_houseprint_from_file(hp_filename)
        print("Houseprint loaded from {}".format(hp_filename))
    except Exception as e:
        print(e)
        print(
            "Because of this error we try to build the houseprint from source")
        hp = houseprint.Houseprint()
    hp.init_tmpo()

    # Load the cached daily data
    sensor = hp.find_sensor(sensorid)
    cache = caching.Cache(variable='{}_daily_total'.format(sensor.type))
    df_day = cache.get(sensors=[sensor])
    df_day.rename(columns={sensorid: sensor.type}, inplace=True)

    # Load the cached weather data, clean up and compose a combined dataframe
    weather = forecastwrapper.Weather(location=(50.8024, 4.3407),
                                      start=start_model,
                                      end=end)
    irradiances = [
        (0, 90),  # north vertical
        (90, 90),  # east vertical
        (180, 90),  # south vertical
        (270, 90),  # west vertical
    ]
    orientations = [0, 90, 180, 270]
    weather_data = weather.days(
        irradiances=irradiances,
        wind_orients=orientations,
        heating_base_temperatures=[0, 6, 8, 10, 12, 14, 16, 18]).dropna(axis=1)
    weather_data.drop([
        'icon', 'summary', 'moonPhase', 'windBearing', 'temperatureMaxTime',
        'temperatureMinTime', 'apparentTemperatureMaxTime',
        'apparentTemperatureMinTime', 'uvIndexTime', 'sunsetTime',
        'sunriseTime'
    ],
                      axis=1,
                      inplace=True)
    # Add columns for the day-of-week
    for i, d in zip(range(7), [
            'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
            'Sunday'
    ]):
        weather_data[d] = 0
        weather_data.loc[weather_data.index.weekday == i, d] = 1
    weather_data = weather_data.applymap(float)

    data = pd.concat([df_day, weather_data], axis=1).dropna()
    data = data.tz_convert('Europe/Brussels')

    df = data.resample(rule='MS').sum()
    if len(df) < 2:
        print("Not enough data for building a monthly reference model")
        sys.exit(1)

    # monthly model, statistical validation
    mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.03)
    figures = mv.plot(df=df)

    figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures',
                                    'multivar_model_' + sensorid + '.png'),
                       dpi=100)
    figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures',
                                    'multivar_results_' + sensorid + '.png'),
                       dpi=100)

    # weekly model, statistical validation
    df = data.resample(rule='W').sum()
    if len(df.ix[:end_model]) < 4:
        print("Not enough data for building a weekly reference model")
        sys.exit(1)
    mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.02)
    if len(df.ix[end_model:]) > 0:
        figures = mv.plot(model=False, bar_chart=True, df=df.ix[end_model:])
        figures[0].savefig(os.path.join(
            c.get('data', 'folder'), 'figures',
            'multivar_prediction_weekly_' + sensorid + '.png'),
                           dpi=100)
Exemple #14
0

# The first time, this will take a very looong time to get all the detailed data for building the cache
# Afterwards, this is quick
starttime = dt.time(0, tzinfo=BXL)
endtime = dt.time(5, tzinfo=BXL)
caching.cache_results(hp=hp, sensors=sensors, resultname='elec_min_night_0-5', AnalysisClass=DailyAgg,  
                      agg='min', chunk=False, starttime=starttime, endtime=endtime)

caching.cache_results(hp=hp, sensors=sensors, resultname='elec_max_night_0-5', AnalysisClass=DailyAgg, 
                      agg='max', chunk=False, starttime=starttime, endtime=endtime)


# In[ ]:

cache_min = caching.Cache(variable='elec_min_night_0-5')
cache_max = caching.Cache(variable='elec_max_night_0-5')
dfdaymin = cache_min.get(sensors=sensors)
dfdaymax = cache_max.get(sensors=sensors)
dfdaymin.info()


# The next plot shows that some periods are missing.  Due to the cumulative nature of the electricity counter, we still have the total consumption.  However, it is spread out of the entire period.  So we don't know the standby power during these days, and we have to remove those days.  

# In[ ]:

if DEV:
    sensor = hp.search_sensors(key='3aa4')[0]
    df = sensor.get_data(head=pd.Timestamp('20151117'), tail=pd.Timestamp('20160104'))
    charts.plot(df, stock=True, show='inline')
Exemple #15
0
# Load houseprint from cache if possible, otherwise build it from source
try:
    hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
    hp = houseprint.load_houseprint_from_file(hp_filename)
    print("Houseprint loaded from {}".format(hp_filename))
except Exception as e:
    print(e)
    print("Because of this error we try to build the houseprint from source")
    hp = houseprint.Houseprint()

hp.init_tmpo()

# Get the cache objects for gas, elec and water, and update them, sensor by sensor
for sensortype in ['gas', 'electricity', 'water']:
    cache = caching.Cache(variable=sensortype + '_daily_total')
    sensors = hp.get_sensors(sensortype=sensortype)
    df_cached = cache.get(sensors=sensors)

    # for each sensor:
    # 1. get the last timestamp of the cached daily total
    # 2. get the daily data since than
    # 3. fill up the cache with the new data
    print('Caching daily totals for {}'.format(sensortype))
    for sensor in tqdm(sensors):
        try:
            last_ts = df_cached[sensor.key].dropna().index[-1]
            last_ts = last_ts.tz_convert('Europe/Brussels')
        except:
            last_ts = pd.Timestamp('1970-01-01', tz='Europe/Brussels')
# In[ ]:

# The first time, this will take a very looong time to get all the detailed data for building the cache
# Afterwards, this is quick
caching.cache_results(hp=hp,
                      sensors=sensors,
                      function='daily_min',
                      resultname='elec_daily_min')
caching.cache_results(hp=hp,
                      sensors=sensors,
                      function='daily_max',
                      resultname='elec_daily_max')

# In[ ]:

cache_min = caching.Cache(variable='elec_daily_min')
cache_max = caching.Cache(variable='elec_daily_max')
dfdaymin = cache_min.get(sensors=sensors)
dfdaymax = cache_max.get(sensors=sensors)

# The next plot shows that some periods are missing.  Due to the cumulative nature of the electricity counter, we still have the total consumption.  However, it is spread out of the entire period.  So we don't know the standby power during these days, and we have to remove those days.

# In[ ]:

if DEV:
    sensor = hp.search_sensors(key='3aa4')[0]
    df = sensor.get_data(head=pd.Timestamp('20151117'),
                         tail=pd.Timestamp('20160104'))
    charts.plot(df, stock=True, show='inline')

# In[ ]:
Exemple #17
0
sensors = hp.get_sensors(sensortype='electricity')  # sensor objects

# Remove some sensors
exclude = [
    '565de0a7dc64d8370aa321491217b85f'  # 3E
]
solar = [x.key for x in hp.search_sensors(type='electricity', system='solar')]
exclude += solar

for s in sensors:
    if s.key in exclude:
        sensors.remove(s)

hp.init_tmpo()
hp.sync_tmpos()

cache = caching.Cache(variable='elec_standby')

for s in sensors[:1]:
    # get cached data
    df_cached = cache.get(s.key)
    try:
        last_day = df_cached.index[-1]
    except IndexError:
        last_day = 0

    # get new data, full resolution
    df_new = hp.get_data(sensors=[s], head=last_day)

    print("Now make a dataframe with daily index, and standby power")