def download_day_offers(year, month): expected_fname = "PUBLIC_DVD_BIDDAYOFFER_D_"+str(year)+str(month).zfill(2)+"010000.CSV" # If the file isn't in the folder, use nemosis to download it. if not check_file_exists(os.path.join(raw_data_cache, 'BIDDAYOFFER_D'), expected_fname): print("File not found - fetching via NEMOSIS", expected_fname) start_time = pendulum.datetime(year,month,2,0,0).format('YYYY/MM/DD HH:mm:ss') end_time = pendulum.datetime(year,month,2,0,5).format('YYYY/MM/DD HH:mm:ss') print(start_time, end_time) # end_time = '2018/01/01 00:05:00' table = 'BIDDAYOFFER_D' data_fetch_methods.dynamic_data_compiler(start_time, end_time, table, os.path.join(raw_data_cache, 'BIDDAYOFFER_D'))
def test_dispatch_tables_start_of_month_parquet_format(self): start_time = '2018/02/01 00:00:00' end_time = '2018/02/01 05:15:00' for table in self.table_names: print( 'Testing {} returing values at start of month.'.format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table], ) expected_length = 63 expected_number_of_columns = 2 expected_first_time = pd.to_datetime( start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') if table in [ 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT' ]: expected_length = 10 expected_first_time = '2018/02/01 00:30:00' expected_first_time = pd.to_datetime( expected_first_time, format='%Y/%m/%d %H:%M:%S') expected_last_time = '2018/02/01 05:00:00' expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') if table == 'BIDPEROFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 if table == 'BIDDAYOFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 expected_length = 2 expected_last_time = '2018/02/01 00:00:00' expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') expected_first_time = '2018/01/31 00:00:00' expected_first_time = pd.to_datetime( expected_first_time, format='%Y/%m/%d %H:%M:%S') data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=self.filter_values[table_type], fformat='parquet', parse_data_types=True) data = data.reset_index(drop=True) print(table) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) self.assertFalse(all(object == data.dtypes)) print('Passed')
def get_regional_demand(start_time, end_time, raw_data_cache): dispatch_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHREGIONSUM', raw_data_cache, select_columns=[ 'SETTLEMENTDATE', 'INTERVENTION', 'REGIONID', 'TOTALDEMAND' ]) dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0] dispatch_data['TOTALDEMAND'] = pd.to_numeric(dispatch_data['TOTALDEMAND']) dispatch_data = dispatch_data.pivot_table(values='TOTALDEMAND', index='SETTLEMENTDATE', columns='REGIONID') dispatch_data = dispatch_data.reset_index().fillna('0.0') dispatch_data = dispatch_data.rename(columns={ 'QLD1': 'qld', 'NSW1': 'nsw', 'VIC1': 'vic', 'SA1': 'sa', 'TAS1': 'tas' }) dispatch_data.columns = [ col + '-demand' if col != 'SETTLEMENTDATE' else col for col in dispatch_data.columns ] return dispatch_data
def get_tech_operating_capacities(start_time, end_time, raw_data_cache): tech_data = get_duid_techs(raw_data_cache) dispatch_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHLOAD', raw_data_cache, select_columns=[ 'DUID', 'SETTLEMENTDATE', 'INTERVENTION', 'AVAILABILITY' ]) dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0] dispatch_data = pd.merge(dispatch_data, tech_data, on='DUID') dispatch_data['AVAILABILITY'] = pd.to_numeric( dispatch_data['AVAILABILITY']) dispatch_data = dispatch_data.groupby(['TECH', 'SETTLEMENTDATE'], as_index=False).aggregate( {'AVAILABILITY': 'sum'}) dispatch_data['tech_region'] = dispatch_data['TECH'] + '-capacity' dispatch_data = dispatch_data.pivot_table(values='AVAILABILITY', index='SETTLEMENTDATE', columns='tech_region') dispatch_data = dispatch_data.reset_index().fillna('0.0') return dispatch_data
def test_fcas_tables_end_of_year(self): table = 'FCAS_4_SECOND' minute_offset = 5 start_time = self.start_year - timedelta(minutes=minute_offset) end_time = start_time + timedelta(minutes=minute_offset * 2) start_str = start_time.strftime("%Y/%m/%d %H:%M:%S") end_str = end_time.strftime("%Y/%m/%d %H:%M:%S") print('Testing {} returing values at end of year.'.format(table)) dat_col = defaults.primary_date_columns[table] cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] expected_length = 15 * (minute_offset * 2) length_check = False expected_number_of_columns = 3 data = data_fetch_methods.dynamic_data_compiler( start_str, end_str, table, defaults.raw_data_cache, select_columns=cols, fformat="feather", keep_csv=False) length_array = data[dat_col].drop_duplicates() if length_array.shape[0] == expected_length: length_check = True # case if first data point is 00:00:01/02/03 elif length_array.shape[0] == expected_length - 1: length_check = True self.assertTrue(length_check) self.assertEqual(expected_number_of_columns, data.shape[1]) print('Passed')
def test_caching_and_typing_works_parquet(self): start_time = '2018/02/20 23:00:00' end_time = '2018/02/20 23:30:00' for table in self.table_names: dat_col = defaults.primary_date_columns[table] id_col = self.id_cols[table] print('Testing {} returing values for 1 interval.'.format(table)) data_fetch_methods.cache_compiler(start_time, end_time, table, defaults.raw_data_cache, fformat="parquet") data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, fformat="parquet") dat_col_type = data[dat_col].dtype id_col_type = data[id_col].dtype not_empty = data.shape[0] > 0 not_typed = all(data.dtypes == "object") self.assertTrue(not_empty) self.assertFalse(not_typed) self.assertEqual(dat_col_type, "<M8[ns]") self.assertEqual(id_col_type, "object") print('Passed')
def update_nemosis_cache(): start_time = "{}/01/01 00:00:00".format(2012, ) end_time = "{}/01/01 00:00:00".format( int(datetime.datetime.now().year, ) + 1) table = "TRADINGPRICE" raw_data_cache = 'data/aemo_raw_cache/' price_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, raw_data_cache)
def test_dispatch_tables_start_of_month_just_csv_dont_merge(self): # Empty cache. for f in os.listdir(defaults.raw_data_cache): os.remove(os.path.join(defaults.raw_data_cache, f)) start_time = '2018/02/01 00:00:00' end_time = '2018/02/01 05:15:00' for table in self.table_names: print( 'Testing {} returing values at start of month.'.format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table], ) expected_length = 63 expected_number_of_columns = 2 expected_first_time = pd.to_datetime( start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') if table in [ 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT' ]: expected_length = 10 expected_first_time = '2018/02/01 00:30:00' expected_first_time = pd.to_datetime( expected_first_time, format='%Y/%m/%d %H:%M:%S') expected_last_time = '2018/02/01 05:00:00' expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') if table == 'BIDPEROFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 if table == 'BIDDAYOFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 expected_length = 2 expected_last_time = '2018/02/01 00:00:00' expected_last_time = pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') expected_first_time = '2018/01/31 00:00:00' expected_first_time = pd.to_datetime( expected_first_time, format='%Y/%m/%d %H:%M:%S') data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=self.filter_values[table_type], fformat='csv', data_merge=False) self.assertIsNone(data, None) self.assertEqual(len(os.listdir(defaults.raw_data_cache)), 2)
def test_dispatch_tables_start_of_month(self): start_time = '2018/02/01 00:00:00' end_time = '2018/02/01 05:15:00' for table in self.table_names: print(f'Testing {table} returning values at start of month.') dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] filter_cols = self.table_filters[table] cols = [dat_col, *filter_cols] expected_length = 63 expected_number_of_columns = 2 expected_first_time = \ (pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(minutes=5)) expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') if table in [ 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT' ]: expected_length = 10 expected_first_time = '2018/02/01 00:30:00' expected_first_time =\ pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') expected_last_time = '2018/02/01 05:00:00' expected_last_time =\ pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') if "ONLY" not in table_type: expected_number_of_columns = 3 if table == 'BIDDAYOFFER_D': expected_length = 2 expected_last_time = '2018/02/01 00:00:00' expected_last_time = \ pd.to_datetime(expected_last_time, format='%Y/%m/%d %H:%M:%S') expected_first_time = '2018/01/31 00:00:00' expected_first_time =\ pd.to_datetime(expected_first_time, format='%Y/%m/%d %H:%M:%S') data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, fformat="feather", keep_csv=False, filter_cols=filter_cols, filter_values=self.filter_values[table_type]) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def get_residual_demand(start_time, end_time, raw_data_cache): cols = ['DUID', 'Region', 'Fuel Source - Descriptor'] tech_data = data_fetch_methods.static_table_xl( 'Generators and Scheduled Loads', raw_data_cache, select_columns=cols) zero_srmc_techs = ['Wind', 'Solar', 'Solar '] tech_data = tech_data[tech_data['Fuel Source - Descriptor'].isin( zero_srmc_techs)] scada_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCH_UNIT_SCADA', raw_data_cache) scada_data = pd.merge(scada_data, tech_data, on='DUID') scada_data['SCADAVALUE'] = pd.to_numeric(scada_data['SCADAVALUE']) scada_data = scada_data.groupby(['SETTLEMENTDATE', 'Region'], as_index=False).agg({'SCADAVALUE': 'sum'}) regional_demand = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHREGIONSUM', raw_data_cache) regional_demand = regional_demand[regional_demand['INTERVENTION'] == 0] regional_demand = pd.merge(regional_demand, scada_data, left_on=['SETTLEMENTDATE', 'REGIONID'], right_on=['SETTLEMENTDATE', 'Region']) regional_demand['TOTALDEMAND'] = pd.to_numeric( regional_demand['TOTALDEMAND']) regional_demand['RESIDUALDEMAND'] = regional_demand[ 'TOTALDEMAND'] - regional_demand['SCADAVALUE'] regional_demand = regional_demand.pivot_table(values='RESIDUALDEMAND', index='SETTLEMENTDATE', columns='REGIONID') regional_demand = regional_demand.reset_index().fillna('0.0') regional_demand = regional_demand.rename(columns={ 'QLD1': 'qld', 'NSW1': 'nsw', 'VIC1': 'vic', 'SA1': 'sa', 'TAS1': 'tas' }) regional_demand.columns = [ col + '-demand' if col != 'SETTLEMENTDATE' else col for col in regional_demand.columns ] return regional_demand
def get_unit_dispatch(start_time, end_time, unit, raw_data_cache): dispatch_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHLOAD', raw_data_cache, select_columns=['DUID', 'SETTLEMENTDATE', 'INTERVENTION', 'INITIALMW']) dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0] dispatch_data = dispatch_data[dispatch_data['DUID'] == unit] initial_mw = dispatch_data['INITIALMW'].iloc[0] return float(initial_mw)
def get_wholesale_prices(year, region): start_time = "{}/01/01 00:00:00".format(year, ) end_time = "{}/01/01 00:00:00".format(int(year, ) + 1) table = "TRADINGPRICE" raw_data_cache = 'data/aemo_raw_cache/' price_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, raw_data_cache) price_data = price_data[price_data['REGIONID'] == (region + '1')] price_data = price_data.loc[:, ['SETTLEMENTDATE', 'RRP']] price_data['RRP'] = pd.to_numeric(price_data['RRP']) price_data['SETTLEMENTDATE'] = pd.to_datetime(price_data['SETTLEMENTDATE']) return price_data
def test_filtering_for_one_interval_returns(self): start_time = '2017/05/20 23:00:00' end_time = '2017/05/20 23:05:00' for table in self.table_names: print('Testing {} returing values for 1 interval.'.format(table)) data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=defaults.table_primary_keys[table] + ['END_DATE']) group_cols = [col for col in defaults.table_primary_keys[table] if col != 'START_DATE'] contains_duplicates = data.duplicated(group_cols).any() self.assertEqual(False, contains_duplicates) not_empty = data.shape[0] > 0 self.assertEqual(True, not_empty) print('Passed')
def get_fleet_dispatch(start_time, end_time, fleet_units, region, raw_data_cache): dispatch_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHLOAD', raw_data_cache, select_columns=[ 'DUID', 'SETTLEMENTDATE', 'TOTALCLEARED', 'INTERVENTION', 'RAISEREG', 'RAISE6SEC', 'RAISE60SEC', 'RAISE5MIN' ]) dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0] dispatch_data = dispatch_data[dispatch_data['DUID'].isin(fleet_units)] dispatch_data['TOTALCLEARED'] = pd.to_numeric( dispatch_data['TOTALCLEARED']) dispatch_data['RAISEREG'] = pd.to_numeric(dispatch_data['RAISEREG']) dispatch_data['RAISE6SEC'] = pd.to_numeric(dispatch_data['RAISE6SEC']) dispatch_data['RAISE60SEC'] = pd.to_numeric(dispatch_data['RAISE60SEC']) dispatch_data['RAISE5MIN'] = pd.to_numeric(dispatch_data['RAISE5MIN']) dispatch_data = dispatch_data.groupby('SETTLEMENTDATE', as_index=False).aggregate({ 'TOTALCLEARED': 'sum', 'RAISEREG': 'sum', 'RAISE6SEC': 'sum', 'RAISE60SEC': 'sum', 'RAISE5MIN': 'sum' }) aemo_dispatch_names = { 'TOTALCLEARED': region + '-energy-fleet-dispatch', 'RAISEREG': region + '-raise_regulation-fleet-dispatch', 'RAISE6SEC': region + '-raise_6_second-fleet-dispatch', 'RAISE60SEC': region + '-raise_60_second-fleet-dispatch', 'RAISE5MIN': region + '-raise_5_minute-fleet-dispatch' } dispatch_data = dispatch_data.rename(columns=aemo_dispatch_names) return dispatch_data
def test_dispatch_tables_end_of_month(self): start_time = '2013/07/31 21:00:00' end_time = '2013/08/01 00:00:00' for table in self.table_names: print('Testing {} returing values at end of month.'.format(table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table],) expected_length = 36 expected_number_of_columns = 2 expected_first_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') + timedelta(minutes=5) expected_last_time = pd.Timestamp.strptime(end_time, '%Y/%m/%d %H:%M:%S') if table in ['TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT']: expected_length = 6 expected_first_time = '2013/07/31 21:30:00' expected_first_time = pd.Timestamp.strptime(expected_first_time, '%Y/%m/%d %H:%M:%S') if table == 'BIDPEROFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 if table == 'BIDDAYOFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 expected_length = 1 expected_last_time = expected_first_time.replace(hour=0, minute=0) expected_first_time = expected_first_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=self.filter_values[table_type]) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_first_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def get_regional_prices(start_time, end_time, raw_data_cache): dispatch_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHPRICE', raw_data_cache, select_columns=[ 'SETTLEMENTDATE', 'INTERVENTION', 'REGIONID', 'RRP', 'RAISEREGRRP', 'RAISE6SECRRP', 'RAISE60SECRRP', 'RAISE5MINRRP' ]) dispatch_data = dispatch_data[dispatch_data['INTERVENTION'] == 0] data = pd.DataFrame() for name, aemo_name in aemo_price_names.items(): dispatch_data[aemo_name] = pd.to_numeric(dispatch_data[aemo_name]) data_temp = dispatch_data.pivot_table(values=aemo_name, index='SETTLEMENTDATE', columns='REGIONID') data_temp = data_temp.reset_index().fillna('0.0') data_temp = data_temp.rename( columns={ 'QLD1': 'qld', 'NSW1': 'nsw', 'VIC1': 'vic', 'SA1': 'sa', 'TAS1': 'tas' }) data_temp.columns = [ col + '-' + name if col != 'SETTLEMENTDATE' else col for col in data_temp.columns ] if data.empty: data = data_temp else: data = pd.merge(data, data_temp, on=['SETTLEMENTDATE']) return data
def test_dispatch_tables_stradle_years(self): table = 'FCAS_4_SECOND' start_time = '2011/12/31 23:55:04' end_time = '2012/01/01 00:05:00' print('Testing {} returing values from adjacent years.'.format(table)) dat_col = defaults.primary_date_columns[table] cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER') expected_length = 149 expected_number_of_columns = 3 expected_firt_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') + timedelta(seconds=4) expected_last_time = pd.Timestamp.strptime(end_time, '%Y/%m/%d %H:%M:%S') data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=(['1'], ['3'])) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_firt_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def test_fcas_tables_end_of_month(self): table = 'FCAS_4_SECOND' start_time = '2013/07/31 23:55:03' end_time = '2013/08/01 00:00:04' print('Testing {} returing values at end of month.'.format(table)) dat_col = defaults.primary_date_columns[table] cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER') expected_length = 75 expected_number_of_columns = 3 expected_firt_time = pd.to_datetime(start_time, format='%Y/%m/%d %H:%M:%S') + timedelta(seconds=4) expected_last_time = pd.to_datetime(end_time, format='%Y/%m/%d %H:%M:%S') data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=(['1'], ['3'])) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_firt_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def trading_price(self): table = 'TRADINGPRICE' raw_data_cache = './cache' trading_price = data_fetch_methods.dynamic_data_compiler( self._start_date.strftime("%Y/%m/%d 00:00:00"), self._end_date.strftime("%Y/%m/%d 00:00:00"), table, raw_data_cache) trading_price = trading_price.loc[trading_price['REGIONID'] == self._state] trading_price['SETTLEMENTDATE'] = pd.to_datetime( trading_price['SETTLEMENTDATE'], format='%d/%m/%Y %H:%M') trading_price = trading_price.sort_values(by=['SETTLEMENTDATE']) trading_price = trading_price.reset_index(drop=True) trading_price['tstep_len'] = [30] * len(trading_price.index) columns = [ 'RRP', 'RAISE6SECRRP', 'RAISE60SECRRP', 'RAISE5MINRRP', 'RAISEREGRRP', 'LOWER6SECRRP', 'LOWER60SECRRP', 'LOWERREGRRP' ] trading_price[columns] = trading_price[columns].astype(float) return trading_price
def plant_stats(start_time, end_time, table_name, raw_data_location, select_columns=None, filter_cols=None, filter_values=None): ix = pd.DatetimeIndex( start=datetime.strptime(start_time, '%Y/%m/%d %H:%M:%S'), end=datetime.strptime(end_time, '%Y/%m/%d %H:%M:%S') - timedelta(minutes=5), freq='5T') timeseries_df = pd.DataFrame(index=ix) timeseries_df.reset_index(inplace=True) timeseries_df.columns = ['SETTLEMENTDATE'] gen_max_cap = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DUDETAIL', raw_data_location, select_columns=['EFFECTIVEDATE', 'DUID', 'VERSIONNO', 'MAXCAPACITY'], filter_cols=filter_cols, filter_values=filter_values) gen_max_cap = select_highest_version_number( gen_max_cap, defaults.table_primary_keys['DUDETAIL']) gen_region = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DUDETAILSUMMARY', raw_data_location, select_columns=['START_DATE', 'END_DATE', 'DUID', 'REGIONID'], filter_cols=filter_cols, filter_values=filter_values) scada = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCH_UNIT_SCADA', raw_data_location, select_columns=['SETTLEMENTDATE', 'DUID', 'SCADAVALUE']) dispatch_price = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHPRICE', raw_data_location, select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP', 'INTERVENTION']) dispatch_price = select_intervention_if_present( dispatch_price, defaults.table_primary_keys['DISPATCHPRICE']) trading_price = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'TRADINGPRICE', raw_data_location, select_columns=['SETTLEMENTDATE', 'REGIONID', 'RRP']) dispatch_price['RRP'] = pd.to_numeric(dispatch_price['RRP']) trading_price['RRP'] = pd.to_numeric(trading_price['RRP']) #trading_price = calc_trading_price(dispatch_price) region_summary = data_fetch_methods.dynamic_data_compiler( start_time, end_time, 'DISPATCHREGIONSUM', raw_data_location, select_columns=[ 'SETTLEMENTDATE', 'REGIONID', 'TOTALDEMAND', 'INTERVENTION', 'DISPATCHINTERVAL' ]) region_summary = select_intervention_if_present( region_summary, defaults.table_primary_keys['DISPATCHREGIONSUM']) scada_list = [] for gen in scada.groupby(['DUID'], as_index=False): temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE') temp['SCADAVALUE'] = np.where(temp['SCADAVALUE'].isnull(), 0.0, temp['SCADAVALUE']) temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID']) scada_list.append(temp) scada = pd.concat(scada_list) trading_load = calc_trading_load(scada) combined_data = merge_tables_for_plant_stats(timeseries_df, gen_max_cap, gen_region, scada, trading_load, dispatch_price, trading_price, region_summary) combined_data['SCADAVALUE'] = pd.to_numeric(combined_data['SCADAVALUE']) combined_data['MAXCAPACITY'] = pd.to_numeric(combined_data['MAXCAPACITY']) combined_data['TRADING_TOTALCLEARED'] = pd.to_numeric( combined_data['TRADING_TOTALCLEARED']) combined_data['TOTALDEMAND'] = pd.to_numeric(combined_data['TOTALDEMAND']) stats = stats_by_month_and_plant(combined_data) return stats
def trading_and_dispatch_cost(): gen_region = data_fetch_methods.dynamic_data_compiler( '2017/01/01 00:05:00', '2018/01/01 00:05:00', 'DUDETAILSUMMARY', 'E:/raw_aemo_data', select_columns=['START_DATE', 'END_DATE', 'DUID', 'REGIONID']) scada = data_fetch_methods.dynamic_data_compiler('2017/01/01 00:05:00', '2018/01/01 00:05:00', 'DISPATCH_UNIT_SCADA', 'E:/raw_aemo_data') ix = pd.DatetimeIndex(start=datetime.strptime('2017/01/01 00:00:00', '%Y/%m/%d %H:%M:%S'), end=datetime.strptime('2018/01/01 00:00:00', '%Y/%m/%d %H:%M:%S'), freq='5T') timeseries_df = pd.DataFrame(index=ix) timeseries_df.reset_index(inplace=True) timeseries_df.columns = ['SETTLEMENTDATE'] scada_list = [] for gen in scada.groupby(['DUID'], as_index=False): temp = pd.merge(timeseries_df, gen[1], 'left', on='SETTLEMENTDATE') temp['SCADAVALUE'] = np.where(temp['SCADAVALUE'].isnull(), 0.0, temp['SCADAVALUE']) temp['DUID'] = np.where(temp['DUID'].isnull(), gen[0], temp['DUID']) scada_list.append(temp) scada = pd.concat(scada_list) dispatch_price = data_fetch_methods.dynamic_data_compiler( '2017/01/01 00:00:00', '2018/01/01 00:05:00', 'DISPATCHPRICE', 'E:/raw_aemo_data') dispatch_price = select_intervention_if_present( dispatch_price, defaults.table_primary_keys['DISPATCHPRICE']) gen_region = gen_region.sort_values('START_DATE') scada = scada.sort_values('SETTLEMENTDATE') scada_and_regions = pd.merge_asof(scada, gen_region, left_on='SETTLEMENTDATE', right_on='START_DATE', by='DUID') scada_and_regions = scada_and_regions[~scada_and_regions['REGIONID']. isnull()] scada_and_regions = pd.merge(scada_and_regions, dispatch_price, 'inner', left_on=['REGIONID', 'SETTLEMENTDATE'], right_on=['REGIONID', 'SETTLEMENTDATE']) scada_and_regions['SCADAVALUE'] = pd.to_numeric( scada_and_regions['SCADAVALUE']) scada_and_regions['RRP'] = pd.to_numeric(scada_and_regions['RRP']) scada_and_regions['DISPATCHCOST'] = (scada_and_regions['SCADAVALUE'] * scada_and_regions['RRP']) / 12 scada_and_regions = scada_and_regions.set_index('SETTLEMENTDATE') scada_and_regions = scada_and_regions.groupby('DUID').resample( '30T', label='right', closed='right').aggregate({ 'SCADAVALUE': 'mean', 'RRP': 'mean', 'DISPATCHCOST': 'sum' }) scada_and_regions.reset_index(inplace=True) scada_and_regions['TRADINGCOST'] = (scada_and_regions['SCADAVALUE'] * scada_and_regions['RRP']) / 2 scada_and_regions['SCADAVALUE'] = scada_and_regions['SCADAVALUE'] / 2 scada_and_regions = scada_and_regions.groupby('DUID').sum() scada_and_regions.to_csv( 'C:/Users/user/Documents/dispatch_trading_cost.csv')
start_time_array = timeData[0:-1] end_time_array = timeData[1:len(timeData)] # For every day this creates a clean csv file of the Gen and Load of HPR for x in range(0, len(start_time_array)): print(x) start_time = start_time_array[x].strftime("%Y/%m/%d %H:%M:%S") print(start_time) end_time = end_time_array[x].strftime("%Y/%m/%d %H:%M:%S") print(end_time) table = 'FCAS_4_SECOND' raw_data_cache = 'C:/...' # The folder where the raw data is stored gen_filename = 'C:/.../HRP_GEN_TEST##.csv' # The folder where the cleaned data will go load_filename = 'C:/.../HRP_LOAD_TEST##.csv' # The folder where the cleaned data will go gen_filename = gen_filename.replace('##', start_time_array[x].strftime("%Y%m%d")) load_filename = load_filename.replace( '##', start_time_array[x].strftime("%Y%m%d")) price_data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, raw_data_cache) price_data_HPRGen = price_data[price_data.ELEMENTNUMBER.isin(['330']) & price_data.VARIABLENUMBER.isin(['2'])] price_data_HPRLoad = price_data[price_data.ELEMENTNUMBER.isin(['331']) & price_data.VARIABLENUMBER.isin(['2'])] price_data_HPRGen.sort_values(by=['TIMESTAMP']).to_csv(gen_filename) price_data_HPRLoad.sort_values(by=['TIMESTAMP']).to_csv(load_filename)
def fcas4s_scada_match(start_time, end_time, table_name, raw_data_location, select_columns=None, filter_cols=None, filter_values=None): # Pull in the 4 second fcas data. table_name_fcas4s = 'FCAS_4_SECOND' fcas4s = data_fetch_methods.dynamic_data_compiler(start_time, end_time, table_name_fcas4s, raw_data_location) # Pull in the 4 second fcas variable types. table_name_variable_types = 'VARIABLES_FCAS_4_SECOND' fcas4s_variable_types = data_fetch_methods.static_table( start_time, end_time, table_name_variable_types, raw_data_location) # Select the variable types that measure MW on an interconnector and Gen_MW from a dispatch unit. fcas4s_variable_types = fcas4s_variable_types[ fcas4s_variable_types['VARIABLETYPE'].isin(['MW', 'Gen_MW'])] fcas4s = fcas4s[fcas4s['VARIABLENUMBER'].isin( fcas4s_variable_types['VARIABLENUMBER'])] # Select just the fcas 4 second data variable columns that we need. fcas4s = fcas4s.loc[:, ('TIMESTAMP', 'ELEMENTNUMBER', 'VALUE')] # Convert the fcas MW measured values to numeric type. fcas4s['VALUE'] = pd.to_numeric(fcas4s['VALUE']) # Rename the 4 second measurements to the timestamp of the start of the 5 min interval i.e round down to nearest # 5 min interval. fcas4s = fcas4s[(fcas4s['TIMESTAMP'].dt.minute.isin(list(range(0, 60, 5)))) & (fcas4s['TIMESTAMP'].dt.second < 20)] fcas4s['TIMESTAMP'] = fcas4s['TIMESTAMP'].apply( lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute)) # Pull in the dispatch unit scada data. table_name_scada = 'DISPATCH_UNIT_SCADA' scada = data_fetch_methods.dynamic_data_compiler(start_time, end_time, table_name_scada, raw_data_location) scada['SETTLEMENTDATE'] = scada['SETTLEMENTDATE'] - timedelta(minutes=5) scada = scada.loc[:, ('SETTLEMENTDATE', 'DUID', 'SCADAVALUE')] scada.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] scada['SCADAVALUE'] = pd.to_numeric(scada['SCADAVALUE']) # Pull in the interconnector scada data and use the intervention records where the exist. table_name_inter_flow = 'DISPATCHINTERCONNECTORRES' inter_flows = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table_name_inter_flow, raw_data_location) inter_flows['METEREDMWFLOW'] = pd.to_numeric(inter_flows['METEREDMWFLOW']) inter_flows = inter_flows.sort_values('INTERVENTION') inter_flows = inter_flows.groupby(['SETTLEMENTDATE', 'INTERCONNECTORID'], as_index=False).last() inter_flows = inter_flows.loc[:, ('SETTLEMENTDATE', 'INTERCONNECTORID', 'METEREDMWFLOW')] inter_flows['SETTLEMENTDATE'] = inter_flows['SETTLEMENTDATE'] - timedelta( minutes=5) inter_flows.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] # Combine scada data from interconnectors and dispatch units. scada_elements = pd.concat([scada, inter_flows], sort=False) # Merge the fcas and scada data based on time stamp, these leads every scada element to be joined to every fcas # element that then allows them to be comapred. profile_comp = pd.merge(fcas4s, scada_elements, 'inner', left_on='TIMESTAMP', right_on='SETTLEMENTDATE') # Calculate the error between each measurement. profile_comp['ERROR'] = profile_comp['VALUE'] - profile_comp['SCADAVALUE'] profile_comp['ERROR'] = profile_comp['ERROR'].abs() # Choose the fcas values that best matches the scada value during the 5 min interval. profile_comp = profile_comp.sort_values('ERROR') error_comp = profile_comp.groupby( ['MARKETNAME', 'ELEMENTNUMBER', 'TIMESTAMP'], as_index=False).first() # Aggregate the error to comapre each scada and fcas element potential match. error_comp = error_comp.groupby(['MARKETNAME', 'ELEMENTNUMBER'], as_index=False).sum() # Sort the comparisons based on aggregate error. error_comp = error_comp.sort_values('ERROR') # Drop duplicates of element numbers and scada element names, keeping the record for each with the least error. best_matches_scada = error_comp[ error_comp['SCADAVALUE'].abs() > 0] # Don't include units 0 values for scada best_matches_scada = best_matches_scada.drop_duplicates('ELEMENTNUMBER', keep='first') best_matches_scada = best_matches_scada.drop_duplicates('MARKETNAME', keep='first') # Remove fcas elements where a match only occurred because both fcas and scada showed no dispatch. best_matches_scada['ELEMENTNUMBER'] = pd.to_numeric( best_matches_scada['ELEMENTNUMBER']) best_matches_scada = best_matches_scada.sort_values('ELEMENTNUMBER') best_matches_scada['ELEMENTNUMBER'] = best_matches_scada[ 'ELEMENTNUMBER'].astype(str) # Give error as a percentage. best_matches_scada['ERROR'] = best_matches_scada[ 'ERROR'] / best_matches_scada['SCADAVALUE'] # drop matches with error greater than 100 % best_matches_scada = best_matches_scada[(best_matches_scada['ERROR'] < 1) & (best_matches_scada['ERROR'] > -1)] best_matches_scada = best_matches_scada.loc[:, ('ELEMENTNUMBER', 'MARKETNAME', 'ERROR')] if select_columns is not None: best_matches_scada = best_matches_scada.loc[:, select_columns] if filter_cols is not None: best_matches_scada = filters.filter_on_column_value( best_matches_scada, filter_cols, filter_values) return best_matches_scada