def test_dispatch_tables_stradle_years(self): table = 'FCAS_4_SECOND' start_time = '2011/12/31 23:55:04' end_time = '2012/01/01 00:05:00' print('Testing {} returing values from adjacent years.'.format(table)) dat_col = defaults.primary_date_columns[table] cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER'] filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER') expected_length = 149 expected_number_of_columns = 3 expected_firt_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') expected_last_time = pd.Timestamp.strptime( end_time, '%Y/%m/%d %H:%M:%S') - timedelta(seconds=4) data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=(['1'], ['3'])) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_firt_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def test_dispatch_tables_stradle_years(self): start_time = '2017/12/31 23:00:00' end_time = '2018/01/01 01:00:00' for table in self.table_names: print('Testing {} returing values from adjacent years.'.format( table)) dat_col = defaults.primary_date_columns[table] table_type = self.table_types[table] cols = [dat_col, self.table_types[table]] filter_cols = (self.table_types[table], ) expected_length = 24 expected_number_of_columns = 2 expected_firt_time = pd.Timestamp.strptime(start_time, '%Y/%m/%d %H:%M:%S') expected_last_time = pd.Timestamp.strptime( end_time, '%Y/%m/%d %H:%M:%S') - timedelta(minutes=5) if table in [ 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM', 'TRADINGINTERCONNECT' ]: expected_length = 4 expected_last_time = '2018/01/01 00:30:00' expected_last_time = pd.Timestamp.strptime( expected_last_time, '%Y/%m/%d %H:%M:%S') if table == 'BIDPEROFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 if table == 'BIDDAYOFFER_D': cols = [dat_col, 'DUID', 'BIDTYPE'] filter_cols = ('DUID', 'BIDTYPE') expected_number_of_columns = 3 expected_length = 2 expected_last_time = expected_last_time.replace(hour=0, minute=0) expected_firt_time = expected_firt_time.replace(hour=0, minute=0) data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=cols, filter_cols=filter_cols, filter_values=self.filter_values[table_type]) data = data.sort_values(dat_col) data = data.reset_index(drop=True) self.assertEqual(expected_length, data.shape[0]) self.assertEqual(expected_number_of_columns, data.shape[1]) self.assertEqual(expected_firt_time, data[dat_col][0]) self.assertEqual(expected_last_time, data[dat_col].iloc[-1]) print('Passed')
def test_filtering_for_one_interval_returns(self): start_time = '2017/05/20 23:00:00' end_time = '2017/05/20 23:05:00' for table in self.table_names: print('Testing {} returing values for 1 interval.'.format(table)) data = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table, defaults.raw_data_cache, select_columns=defaults.table_primary_keys[table]) group_cols = [ col for col in defaults.table_primary_keys[table] if col != 'EFFECTIVEDATE' ] contains_duplicates = data.duplicated(group_cols).any() self.assertEqual(False, contains_duplicates) not_empty = data.shape[0] > 0 self.assertEqual(True, not_empty) print('Passed')
def fcas4s_scada_match(start_time, end_time, table_name, raw_data_location, select_columns=None, filter_cols=None, filter_values=None): # Pull in the 4 second fcas data. table_name_fcas4s = 'FCAS_4_SECOND' fcas4s = data_fetch_methods.dynamic_data_compiler(start_time, end_time, table_name_fcas4s, raw_data_location) # Pull in the 4 second fcas variable types. table_name_variable_types = 'VARIABLES_FCAS_4_SECOND' fcas4s_variable_types = data_fetch_methods.static_table( start_time, end_time, table_name_variable_types, raw_data_location) # Select the variable types that measure MW on an interconnector and Gen_MW from a dispatch unit. fcas4s_variable_types = fcas4s_variable_types[ fcas4s_variable_types['VARIABLETYPE'].isin(['MW', 'Gen_MW'])] fcas4s = fcas4s[fcas4s['VARIABLENUMBER'].isin( fcas4s_variable_types['VARIABLENUMBER'])] # Select just the fcas 4 second data variable columns that we need. fcas4s = fcas4s.loc[:, ('TIMESTAMP', 'ELEMENTNUMBER', 'VALUE')] # Convert the fcas MW measured values to numeric type. fcas4s['VALUE'] = pd.to_numeric(fcas4s['VALUE']) # Rename the 4 second measurements to the timestamp of the start of the 5 min interval i.e round down to nearest # 5 min interval. fcas4s = fcas4s[(fcas4s['TIMESTAMP'].dt.minute.isin(list(range(0, 60, 5)))) & (fcas4s['TIMESTAMP'].dt.second < 20)] fcas4s['TIMESTAMP'] = fcas4s['TIMESTAMP'].apply( lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute)) # Pull in the dispatch unit scada data. table_name_scada = 'DISPATCH_UNIT_SCADA' scada = data_fetch_methods.dynamic_data_compiler(start_time, end_time, table_name_scada, raw_data_location) scada['SETTLEMENTDATE'] = scada['SETTLEMENTDATE'] - timedelta(minutes=5) scada = scada.loc[:, ('SETTLEMENTDATE', 'DUID', 'SCADAVALUE')] scada.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] scada['SCADAVALUE'] = pd.to_numeric(scada['SCADAVALUE']) # Pull in the interconnector scada data and use the intervention records where the exist. table_name_inter_flow = 'DISPATCHINTERCONNECTORRES' inter_flows = data_fetch_methods.dynamic_data_compiler( start_time, end_time, table_name_inter_flow, raw_data_location) inter_flows['METEREDMWFLOW'] = pd.to_numeric(inter_flows['METEREDMWFLOW']) inter_flows = inter_flows.sort_values('INTERVENTION') inter_flows = inter_flows.groupby(['SETTLEMENTDATE', 'INTERCONNECTORID'], as_index=False).last() inter_flows = inter_flows.loc[:, ('SETTLEMENTDATE', 'INTERCONNECTORID', 'METEREDMWFLOW')] inter_flows['SETTLEMENTDATE'] = inter_flows['SETTLEMENTDATE'] - timedelta( minutes=5) inter_flows.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE'] # Combine scada data from interconnectors and dispatch units. scada_elements = pd.concat([scada, inter_flows], sort=False) # Merge the fcas and scada data based on time stamp, these leads every scada element to be joined to every fcas # element that then allows them to be comapred. profile_comp = pd.merge(fcas4s, scada_elements, 'inner', left_on='TIMESTAMP', right_on='SETTLEMENTDATE') # Calculate the error between each measurement. profile_comp['ERROR'] = profile_comp['VALUE'] - profile_comp['SCADAVALUE'] profile_comp['ERROR'] = profile_comp['ERROR'].abs() # Choose the fcas values that best matches the scada value during the 5 min interval. profile_comp = profile_comp.sort_values('ERROR') error_comp = profile_comp.groupby( ['MARKETNAME', 'ELEMENTNUMBER', 'TIMESTAMP'], as_index=False).first() # Aggregate the error to comapre each scada and fcas element potential match. error_comp = error_comp.groupby(['MARKETNAME', 'ELEMENTNUMBER'], as_index=False).sum() # Sort the comparisons based on aggregate error. error_comp = error_comp.sort_values('ERROR') # Drop duplicates of element numbers and scada element names, keeping the record for each with the least error. best_matches_scada = error_comp[ error_comp['SCADAVALUE'].abs() > 0] # Don't include units 0 values for scada best_matches_scada = best_matches_scada.drop_duplicates('ELEMENTNUMBER', keep='first') best_matches_scada = best_matches_scada.drop_duplicates('MARKETNAME', keep='first') # Remove fcas elements where a match only occurred because both fcas and scada showed no dispatch. best_matches_scada['ELEMENTNUMBER'] = pd.to_numeric( best_matches_scada['ELEMENTNUMBER']) best_matches_scada = best_matches_scada.sort_values('ELEMENTNUMBER') best_matches_scada['ELEMENTNUMBER'] = best_matches_scada[ 'ELEMENTNUMBER'].astype(str) # Give error as a percentage. best_matches_scada['ERROR'] = best_matches_scada[ 'ERROR'] / best_matches_scada['SCADAVALUE'] # drop matches with error greater than 100 % best_matches_scada = best_matches_scada[(best_matches_scada['ERROR'] < 1) & (best_matches_scada['ERROR'] > -1)] best_matches_scada = best_matches_scada.loc[:, ('ELEMENTNUMBER', 'MARKETNAME', 'ERROR')] if select_columns is not None: best_matches_scada = best_matches_scada.loc[:, select_columns] if filter_cols is not None: best_matches_scada = filters.filter_on_column_value( best_matches_scada, filter_cols, filter_values) return best_matches_scada