コード例 #1
0
 def test_init(self):
     """Check if correct folder is used"""
     
     ch = caching.Cache('standby', folder=os.getcwd())
     self.assertEqual(ch.folder, os.getcwd())
     
     ch = caching.Cache('water_standby')
     self.assertEqual(ch.folder, os.path.join(os.getcwd(), 'data', 'cache_day'))
コード例 #2
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
    def test_write(self):
        """Write dataframe with multiple columns"""
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['testsensor1', 'testsensor2'])
        expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'),
                                      'cache_day', 'elec_temp_testsensor1.csv')
        expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'),
                                      'cache_day', 'elec_temp_testsensor2.csv')
        self.assertFalse(os.path.exists(expected_path1))
        self.assertFalse(os.path.exists(expected_path2))

        try:
            ch._write(df)
            self.assertTrue(os.path.exists(expected_path1))
            self.assertTrue(os.path.exists(expected_path2))
        except:
            raise
        finally:
            os.remove(expected_path1)
            os.remove(expected_path2)
コード例 #3
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
    def test_write_single2(self):
        """Write timeseries """
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.Series(index=index,
                       data=np.random.randn(3),
                       name='testsensor_series')
        expected_path = os.path.join(test_dir, cfg.get('data',
                                                       'folder'), 'cache_day',
                                     'elec_temp_testsensor_series.csv')
        self.assertFalse(os.path.exists(expected_path))
        try:
            ch._write_single(df)
            self.assertTrue(os.path.exists(expected_path))
        except:
            raise
        finally:
            os.remove(expected_path)

        # raise ValueError on series without name
        df = pd.Series(index=index, data=np.random.randn(3))
        self.assertRaises(ValueError, ch._write_single, df)
コード例 #4
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
    def test_check_df(self):
        """check if dataframe is not empty and has daily frequency"""
        ch = caching.Cache('elec_standby')

        df = pd.DataFrame()
        self.assertFalse(ch.check_df(df))

        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='UTC')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['A', 'B'])
        self.assertTrue(ch.check_df(df))

        index = pd.DatetimeIndex(['20160201', '20160202', '20160203'],
                                 tz='UTC')
        df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A'])
        self.assertTrue(ch.check_df(df))

        index = pd.DatetimeIndex(['20160201', '20160202', '20160204'],
                                 tz='UTC')
        df = pd.DataFrame(index=index, data=np.random.randn(3), columns=['A'])
        self.assertFalse(ch.check_df(df))
コード例 #5
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
    def test_get_multiple(self):
        """Obtain cached results and return a correct dataframe"""
        ch = caching.Cache('elec_standby')
        mysensor = Sensor(key='mysensor',
                          device=None,
                          site='None',
                          type=None,
                          description=None,
                          system=None,
                          quantity=None,
                          unit=None,
                          direction=None,
                          tariff=None,
                          cumulative=None)
        mysensor2 = Sensor(key='mysensor2',
                           device=None,
                           site='None',
                           type=None,
                           description=None,
                           system=None,
                           quantity=None,
                           unit=None,
                           direction=None,
                           tariff=None,
                           cumulative=None)
        df = ch.get([mysensor, mysensor2], end='20160104')

        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=4,
                                                      tz='UTC')).all())
        self.assertListEqual(df.columns.tolist(), ['mysensor', 'mysensor2'])
        self.assertEqual(df.ix[1, 'mysensor2'], 5)
        self.assertTrue(np.isnan(df.ix[3, 'mysensor2']))
コード例 #6
0
    def test_write_single1(self):
        """Write dataframe with single columns only"""
        ch = caching.Cache('elec_temp')

        # write a dataframe with single column
        index = pd.DatetimeIndex(start='20160101',
                                 freq='D',
                                 periods=3,
                                 tz='Europe/Brussels')
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3),
                          columns=['testsensor'])
        expected_path = os.path.join(test_dir, cfg.get('data', 'folder'),
                                     'cache_day', 'elec_temp_testsensor.pkl')
        self.assertFalse(os.path.exists(expected_path))
        try:
            ch._write_single(df)
            self.assertTrue(os.path.exists(expected_path))
        except:
            raise
        finally:
            os.remove(expected_path)

        # raise ValueError on dataframe with multiple columns
        df = pd.DataFrame(index=index,
                          data=np.random.randn(3, 2),
                          columns=['testsensor1', 'testsensor2'])
        self.assertRaises(ValueError, ch._write_single, df)
コード例 #7
0
 def test_load(self):
     """Load and parse a cached object correctly"""
     ch = caching.Cache('elec_standby')
     df = ch._load('mysensor')
     
     self.assertTrue((df.index == pd.DatetimeIndex(start='20160101', freq='D', periods=365, tz='UTC')).all())
     self.assertEqual(df.columns, ['mysensor'])
コード例 #8
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
    def test_get_single(self):
        """Obtain cached results and return a correct dataframe"""
        ch = caching.Cache('elec_standby')
        mysensor = Sensor(key='mysensor',
                          device=None,
                          site='None',
                          type=None,
                          description=None,
                          system=None,
                          quantity=None,
                          unit=None,
                          direction=None,
                          tariff=None,
                          cumulative=None)
        df = ch.get([mysensor])

        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=365,
                                                      tz='UTC')).all())
        self.assertEqual(df.columns, ['mysensor'])

        df = ch.get([mysensor], end='20160115')
        self.assertTrue((df.index == pd.DatetimeIndex(start='20160101',
                                                      freq='D',
                                                      periods=15,
                                                      tz='UTC')).all())

        df = ch.get([mysensor], start='20160707', end='20160708')
        self.assertTrue((df.index == pd.DatetimeIndex(start='20160707',
                                                      freq='D',
                                                      periods=2,
                                                      tz='UTC')).all())
        self.assertFalse(df.index.tz is None, "Returned dataframe is tz-naive")
コード例 #9
0
    def test_update_multiple(self):
        """Update an existing cached sensor with new information"""

        ch = caching.Cache('elec_temp')
        testsensor2 = Sensor(key='testsensor2', device=None, site='None', type=None, description=None,system=None,
                                quantity=None,unit=None,direction=None,tariff=None,cumulative=None)

        # write a dataframe with two columns
        index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC')
        df = pd.DataFrame(index=index, data=dict(testsensor1=[0,1,2], testsensor2= [0,1,2]))
        expected_path1 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor1.csv')
        expected_path2 = os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day', 'elec_temp_testsensor2.csv')
        self.assertFalse(os.path.exists(expected_path1))
        self.assertFalse(os.path.exists(expected_path2))
        try:
            ch.update(df)
            self.assertTrue(os.path.exists(expected_path1))
            self.assertTrue(os.path.exists(expected_path2))

            index = pd.DatetimeIndex(start='20160103', freq='D', periods=3, tz='UTC')
            df_new = pd.DataFrame(index=index, data=dict(testsensor1=[100,200,300], testsensor2=[100,200,300]))
            ch.update(df_new)
            df_res = ch.get([testsensor2])

            self.assertEqual(df_res.iloc[1,0], 1)
            self.assertEqual(df_res.iloc[2,0], 100)
            self.assertEqual(df_res.iloc[4,0], 300)
        except:
            raise
        finally:
            os.remove(expected_path1)
            os.remove(expected_path2)
コード例 #10
0
    def test_check_df_series(self):
        """check if series is not empty and has daily frequency"""
        ch = caching.Cache('elec_standby')

        df = pd.Series()
        self.assertFalse(ch.check_df(df))

        index = pd.DatetimeIndex(start='20160101', freq='D', periods=3, tz='UTC')
        ts = pd.Series(index=index, data=np.random.randn(3), name='A')
        self.assertTrue(ch.check_df(ts))
コード例 #11
0
    def test_update_single(self):
        """Update an existing cached sensor with new information"""

        ch = caching.Cache('elec_temp')
        testsensor = Sensor(key='testsensor',
                            device=None,
                            site='None',
                            type=None,
                            description=None,
                            system=None,
                            quantity=None,
                            unit=None,
                            direction=None,
                            tariff=None,
                            cumulative=None)

        try:
            # write a dataframe with single column
            index = pd.DatetimeIndex(start='20160101',
                                     freq='D',
                                     periods=3,
                                     tz='Europe/Brussels')
            df = pd.DataFrame(index=index,
                              data=[0, 1, 2],
                              columns=['testsensor'])
            expected_path = os.path.join(test_dir, cfg.get('data', 'folder'),
                                         'cache_day',
                                         'elec_temp_testsensor.pkl')
            self.assertFalse(os.path.exists(expected_path))
            ch._write_single(df)

            index = pd.DatetimeIndex(start='20160103',
                                     freq='D',
                                     periods=3,
                                     tz='Europe/Brussels')
            df_new = pd.DataFrame(index=index,
                                  data=[100, 200, 300],
                                  columns=['testsensor'])
            ch.update(df_new)
            df_res = ch.get([testsensor])

            self.assertEqual(df_res.iloc[1, 0], 1)
            self.assertEqual(df_res.iloc[2, 0], 100)
            self.assertEqual(df_res.iloc[4, 0], 300)
        except:
            raise
        finally:
            os.remove(
                os.path.join(test_dir, cfg.get('data', 'folder'), 'cache_day',
                             'elec_temp_testsensor.pkl'))
コード例 #12
0
ファイル: test_caching.py プロジェクト: toonhooy/opengrid
 def test_get_raises(self):
     """Raise TypeError when argument sensors is not a list"""
     ch = caching.Cache('elec_standby')
     mysensor = Sensor(key='mysensor',
                       device=None,
                       site='None',
                       type=None,
                       description=None,
                       system=None,
                       quantity=None,
                       unit=None,
                       direction=None,
                       tariff=None,
                       cumulative=None)
     self.assertRaises(TypeError, ch.get, mysensor)
コード例 #13
0
def compute(sensorid, start_model, end_model):
    end = pd.Timestamp('now', tz='Europe/Brussels')
    # Create houseprint from saved file, if not available, parse the google spreadsheet
    try:
        hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
        hp = houseprint.load_houseprint_from_file(hp_filename)
        print("Houseprint loaded from {}".format(hp_filename))
    except Exception as e:
        print(e)
        print(
            "Because of this error we try to build the houseprint from source")
        hp = houseprint.Houseprint()
    hp.init_tmpo()

    # Load the cached daily data
    sensor = hp.find_sensor(sensorid)
    cache = caching.Cache(variable='{}_daily_total'.format(sensor.type))
    df_day = cache.get(sensors=[sensor])
    df_day.rename(columns={sensorid: sensor.type}, inplace=True)

    # Load the cached weather data, clean up and compose a combined dataframe
    weather = forecastwrapper.Weather(location=(50.8024, 4.3407),
                                      start=start_model,
                                      end=end)
    irradiances = [
        (0, 90),  # north vertical
        (90, 90),  # east vertical
        (180, 90),  # south vertical
        (270, 90),  # west vertical
    ]
    orientations = [0, 90, 180, 270]
    weather_data = weather.days(
        irradiances=irradiances,
        wind_orients=orientations,
        heating_base_temperatures=[0, 6, 8, 10, 12, 14, 16, 18]).dropna(axis=1)
    weather_data.drop([
        'icon', 'summary', 'moonPhase', 'windBearing', 'temperatureMaxTime',
        'temperatureMinTime', 'apparentTemperatureMaxTime',
        'apparentTemperatureMinTime', 'uvIndexTime', 'sunsetTime',
        'sunriseTime'
    ],
                      axis=1,
                      inplace=True)
    # Add columns for the day-of-week
    for i, d in zip(range(7), [
            'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday',
            'Sunday'
    ]):
        weather_data[d] = 0
        weather_data.loc[weather_data.index.weekday == i, d] = 1
    weather_data = weather_data.applymap(float)

    data = pd.concat([df_day, weather_data], axis=1).dropna()
    data = data.tz_convert('Europe/Brussels')

    df = data.resample(rule='MS').sum()
    if len(df) < 2:
        print("Not enough data for building a monthly reference model")
        sys.exit(1)

    # monthly model, statistical validation
    mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.03)
    figures = mv.plot(df=df)

    figures[0].savefig(os.path.join(c.get('data', 'folder'), 'figures',
                                    'multivar_model_' + sensorid + '.png'),
                       dpi=100)
    figures[1].savefig(os.path.join(c.get('data', 'folder'), 'figures',
                                    'multivar_results_' + sensorid + '.png'),
                       dpi=100)

    # weekly model, statistical validation
    df = data.resample(rule='W').sum()
    if len(df.ix[:end_model]) < 4:
        print("Not enough data for building a weekly reference model")
        sys.exit(1)
    mv = regression.MVLinReg(df.ix[:end_model], sensor.type, p_max=0.02)
    if len(df.ix[end_model:]) > 0:
        figures = mv.plot(model=False, bar_chart=True, df=df.ix[end_model:])
        figures[0].savefig(os.path.join(
            c.get('data', 'folder'), 'figures',
            'multivar_prediction_weekly_' + sensorid + '.png'),
                           dpi=100)
コード例 #14
0

# The first time, this will take a very looong time to get all the detailed data for building the cache
# Afterwards, this is quick
starttime = dt.time(0, tzinfo=BXL)
endtime = dt.time(5, tzinfo=BXL)
caching.cache_results(hp=hp, sensors=sensors, resultname='elec_min_night_0-5', AnalysisClass=DailyAgg,  
                      agg='min', chunk=False, starttime=starttime, endtime=endtime)

caching.cache_results(hp=hp, sensors=sensors, resultname='elec_max_night_0-5', AnalysisClass=DailyAgg, 
                      agg='max', chunk=False, starttime=starttime, endtime=endtime)


# In[ ]:

cache_min = caching.Cache(variable='elec_min_night_0-5')
cache_max = caching.Cache(variable='elec_max_night_0-5')
dfdaymin = cache_min.get(sensors=sensors)
dfdaymax = cache_max.get(sensors=sensors)
dfdaymin.info()


# The next plot shows that some periods are missing.  Due to the cumulative nature of the electricity counter, we still have the total consumption.  However, it is spread out of the entire period.  So we don't know the standby power during these days, and we have to remove those days.  

# In[ ]:

if DEV:
    sensor = hp.search_sensors(key='3aa4')[0]
    df = sensor.get_data(head=pd.Timestamp('20151117'), tail=pd.Timestamp('20160104'))
    charts.plot(df, stock=True, show='inline')
コード例 #15
0
# Load houseprint from cache if possible, otherwise build it from source
try:
    hp_filename = os.path.join(c.get('data', 'folder'), 'hp_anonymous.pkl')
    hp = houseprint.load_houseprint_from_file(hp_filename)
    print("Houseprint loaded from {}".format(hp_filename))
except Exception as e:
    print(e)
    print("Because of this error we try to build the houseprint from source")
    hp = houseprint.Houseprint()

hp.init_tmpo()

# Get the cache objects for gas, elec and water, and update them, sensor by sensor
for sensortype in ['gas', 'electricity', 'water']:
    cache = caching.Cache(variable=sensortype + '_daily_total')
    sensors = hp.get_sensors(sensortype=sensortype)
    df_cached = cache.get(sensors=sensors)

    # for each sensor:
    # 1. get the last timestamp of the cached daily total
    # 2. get the daily data since than
    # 3. fill up the cache with the new data
    print('Caching daily totals for {}'.format(sensortype))
    for sensor in tqdm(sensors):
        try:
            last_ts = df_cached[sensor.key].dropna().index[-1]
            last_ts = last_ts.tz_convert('Europe/Brussels')
        except:
            last_ts = pd.Timestamp('1970-01-01', tz='Europe/Brussels')
コード例 #16
0
# In[ ]:

# The first time, this will take a very looong time to get all the detailed data for building the cache
# Afterwards, this is quick
caching.cache_results(hp=hp,
                      sensors=sensors,
                      function='daily_min',
                      resultname='elec_daily_min')
caching.cache_results(hp=hp,
                      sensors=sensors,
                      function='daily_max',
                      resultname='elec_daily_max')

# In[ ]:

cache_min = caching.Cache(variable='elec_daily_min')
cache_max = caching.Cache(variable='elec_daily_max')
dfdaymin = cache_min.get(sensors=sensors)
dfdaymax = cache_max.get(sensors=sensors)

# The next plot shows that some periods are missing.  Due to the cumulative nature of the electricity counter, we still have the total consumption.  However, it is spread out of the entire period.  So we don't know the standby power during these days, and we have to remove those days.

# In[ ]:

if DEV:
    sensor = hp.search_sensors(key='3aa4')[0]
    df = sensor.get_data(head=pd.Timestamp('20151117'),
                         tail=pd.Timestamp('20160104'))
    charts.plot(df, stock=True, show='inline')

# In[ ]:
コード例 #17
0
sensors = hp.get_sensors(sensortype='electricity')  # sensor objects

# Remove some sensors
exclude = [
    '565de0a7dc64d8370aa321491217b85f'  # 3E
]
solar = [x.key for x in hp.search_sensors(type='electricity', system='solar')]
exclude += solar

for s in sensors:
    if s.key in exclude:
        sensors.remove(s)

hp.init_tmpo()
hp.sync_tmpos()

cache = caching.Cache(variable='elec_standby')

for s in sensors[:1]:
    # get cached data
    df_cached = cache.get(s.key)
    try:
        last_day = df_cached.index[-1]
    except IndexError:
        last_day = 0

    # get new data, full resolution
    df_new = hp.get_data(sensors=[s], head=last_day)

    print("Now make a dataframe with daily index, and standby power")