Python dynamic_data_compiler Examples, data_fetch_methods.dynamic_data_compiler Python Examples

Example #1

0

Show file

 def test_dispatch_tables_stradle_years(self):
     table = 'FCAS_4_SECOND'
     start_time = '2011/12/31 23:55:04'
     end_time = '2012/01/01 00:05:00'
     print('Testing {} returing values from adjacent years.'.format(table))
     dat_col = defaults.primary_date_columns[table]
     cols = [dat_col, 'ELEMENTNUMBER', 'VARIABLENUMBER']
     filter_cols = ('ELEMENTNUMBER', 'VARIABLENUMBER')
     expected_length = 149
     expected_number_of_columns = 3
     expected_firt_time = pd.Timestamp.strptime(start_time,
                                                '%Y/%m/%d %H:%M:%S')
     expected_last_time = pd.Timestamp.strptime(
         end_time, '%Y/%m/%d %H:%M:%S') - timedelta(seconds=4)
     data = data_fetch_methods.dynamic_data_compiler(
         start_time,
         end_time,
         table,
         defaults.raw_data_cache,
         select_columns=cols,
         filter_cols=filter_cols,
         filter_values=(['1'], ['3']))
     data = data.sort_values(dat_col)
     data = data.reset_index(drop=True)
     self.assertEqual(expected_length, data.shape[0])
     self.assertEqual(expected_number_of_columns, data.shape[1])
     self.assertEqual(expected_firt_time, data[dat_col][0])
     self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
     print('Passed')

Example #2

0

Show file

 def test_dispatch_tables_stradle_years(self):
     start_time = '2017/12/31 23:00:00'
     end_time = '2018/01/01 01:00:00'
     for table in self.table_names:
         print('Testing {} returing values from adjacent years.'.format(
             table))
         dat_col = defaults.primary_date_columns[table]
         table_type = self.table_types[table]
         cols = [dat_col, self.table_types[table]]
         filter_cols = (self.table_types[table], )
         expected_length = 24
         expected_number_of_columns = 2
         expected_firt_time = pd.Timestamp.strptime(start_time,
                                                    '%Y/%m/%d %H:%M:%S')
         expected_last_time = pd.Timestamp.strptime(
             end_time, '%Y/%m/%d %H:%M:%S') - timedelta(minutes=5)
         if table in [
                 'TRADINGLOAD', 'TRADINGPRICE', 'TRADINGREGIONSUM',
                 'TRADINGINTERCONNECT'
         ]:
             expected_length = 4
             expected_last_time = '2018/01/01 00:30:00'
             expected_last_time = pd.Timestamp.strptime(
                 expected_last_time, '%Y/%m/%d %H:%M:%S')
         if table == 'BIDPEROFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
         if table == 'BIDDAYOFFER_D':
             cols = [dat_col, 'DUID', 'BIDTYPE']
             filter_cols = ('DUID', 'BIDTYPE')
             expected_number_of_columns = 3
             expected_length = 2
             expected_last_time = expected_last_time.replace(hour=0,
                                                             minute=0)
             expected_firt_time = expected_firt_time.replace(hour=0,
                                                             minute=0)
         data = data_fetch_methods.dynamic_data_compiler(
             start_time,
             end_time,
             table,
             defaults.raw_data_cache,
             select_columns=cols,
             filter_cols=filter_cols,
             filter_values=self.filter_values[table_type])
         data = data.sort_values(dat_col)
         data = data.reset_index(drop=True)
         self.assertEqual(expected_length, data.shape[0])
         self.assertEqual(expected_number_of_columns, data.shape[1])
         self.assertEqual(expected_firt_time, data[dat_col][0])
         self.assertEqual(expected_last_time, data[dat_col].iloc[-1])
         print('Passed')

Example #3

0

Show file

 def test_filtering_for_one_interval_returns(self):
     start_time = '2017/05/20 23:00:00'
     end_time = '2017/05/20 23:05:00'
     for table in self.table_names:
         print('Testing {} returing values for 1 interval.'.format(table))
         data = data_fetch_methods.dynamic_data_compiler(
             start_time,
             end_time,
             table,
             defaults.raw_data_cache,
             select_columns=defaults.table_primary_keys[table])
         group_cols = [
             col for col in defaults.table_primary_keys[table]
             if col != 'EFFECTIVEDATE'
         ]
         contains_duplicates = data.duplicated(group_cols).any()
         self.assertEqual(False, contains_duplicates)
         not_empty = data.shape[0] > 0
         self.assertEqual(True, not_empty)
         print('Passed')

Example #4

0

Show file

File: custom_tables.py Project: ShevyMoss/beginnings-

def fcas4s_scada_match(start_time,
                       end_time,
                       table_name,
                       raw_data_location,
                       select_columns=None,
                       filter_cols=None,
                       filter_values=None):

    # Pull in the 4 second fcas data.
    table_name_fcas4s = 'FCAS_4_SECOND'
    fcas4s = data_fetch_methods.dynamic_data_compiler(start_time, end_time,
                                                      table_name_fcas4s,
                                                      raw_data_location)
    # Pull in the 4 second fcas variable types.
    table_name_variable_types = 'VARIABLES_FCAS_4_SECOND'
    fcas4s_variable_types = data_fetch_methods.static_table(
        start_time, end_time, table_name_variable_types, raw_data_location)

    # Select the variable types that measure MW on an interconnector and Gen_MW from a dispatch unit.
    fcas4s_variable_types = fcas4s_variable_types[
        fcas4s_variable_types['VARIABLETYPE'].isin(['MW', 'Gen_MW'])]
    fcas4s = fcas4s[fcas4s['VARIABLENUMBER'].isin(
        fcas4s_variable_types['VARIABLENUMBER'])]

    # Select just the fcas 4 second data variable columns that we need.
    fcas4s = fcas4s.loc[:, ('TIMESTAMP', 'ELEMENTNUMBER', 'VALUE')]

    # Convert the fcas MW measured values to numeric type.
    fcas4s['VALUE'] = pd.to_numeric(fcas4s['VALUE'])

    # Rename the 4 second measurements to the timestamp of the start of the 5 min interval i.e round down to nearest
    # 5 min interval.
    fcas4s = fcas4s[(fcas4s['TIMESTAMP'].dt.minute.isin(list(range(0, 60, 5))))
                    & (fcas4s['TIMESTAMP'].dt.second < 20)]
    fcas4s['TIMESTAMP'] = fcas4s['TIMESTAMP'].apply(
        lambda dt: datetime(dt.year, dt.month, dt.day, dt.hour, dt.minute))

    # Pull in the dispatch unit scada data.
    table_name_scada = 'DISPATCH_UNIT_SCADA'
    scada = data_fetch_methods.dynamic_data_compiler(start_time, end_time,
                                                     table_name_scada,
                                                     raw_data_location)
    scada['SETTLEMENTDATE'] = scada['SETTLEMENTDATE'] - timedelta(minutes=5)
    scada = scada.loc[:, ('SETTLEMENTDATE', 'DUID', 'SCADAVALUE')]
    scada.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE']
    scada['SCADAVALUE'] = pd.to_numeric(scada['SCADAVALUE'])

    # Pull in the interconnector scada data and use the intervention records where the exist.
    table_name_inter_flow = 'DISPATCHINTERCONNECTORRES'
    inter_flows = data_fetch_methods.dynamic_data_compiler(
        start_time, end_time, table_name_inter_flow, raw_data_location)
    inter_flows['METEREDMWFLOW'] = pd.to_numeric(inter_flows['METEREDMWFLOW'])
    inter_flows = inter_flows.sort_values('INTERVENTION')
    inter_flows = inter_flows.groupby(['SETTLEMENTDATE', 'INTERCONNECTORID'],
                                      as_index=False).last()
    inter_flows = inter_flows.loc[:, ('SETTLEMENTDATE', 'INTERCONNECTORID',
                                      'METEREDMWFLOW')]
    inter_flows['SETTLEMENTDATE'] = inter_flows['SETTLEMENTDATE'] - timedelta(
        minutes=5)
    inter_flows.columns = ['SETTLEMENTDATE', 'MARKETNAME', 'SCADAVALUE']

    # Combine scada data from interconnectors and dispatch units.
    scada_elements = pd.concat([scada, inter_flows], sort=False)

    # Merge the fcas and scada data based on time stamp, these leads every scada element to be joined to every fcas
    # element that then allows them to be comapred.
    profile_comp = pd.merge(fcas4s,
                            scada_elements,
                            'inner',
                            left_on='TIMESTAMP',
                            right_on='SETTLEMENTDATE')

    # Calculate the error between each measurement.
    profile_comp['ERROR'] = profile_comp['VALUE'] - profile_comp['SCADAVALUE']
    profile_comp['ERROR'] = profile_comp['ERROR'].abs()

    # Choose the fcas values that best matches the scada value during the 5 min interval.
    profile_comp = profile_comp.sort_values('ERROR')
    error_comp = profile_comp.groupby(
        ['MARKETNAME', 'ELEMENTNUMBER', 'TIMESTAMP'], as_index=False).first()

    # Aggregate the error to comapre each scada and fcas element potential match.
    error_comp = error_comp.groupby(['MARKETNAME', 'ELEMENTNUMBER'],
                                    as_index=False).sum()

    # Sort the comparisons based on aggregate error.
    error_comp = error_comp.sort_values('ERROR')

    # Drop duplicates of element numbers and scada element names, keeping the record for each with the least error.
    best_matches_scada = error_comp[
        error_comp['SCADAVALUE'].abs() >
        0]  # Don't include units 0 values for scada
    best_matches_scada = best_matches_scada.drop_duplicates('ELEMENTNUMBER',
                                                            keep='first')
    best_matches_scada = best_matches_scada.drop_duplicates('MARKETNAME',
                                                            keep='first')

    # Remove fcas elements where a match only occurred because both fcas and scada showed no dispatch.
    best_matches_scada['ELEMENTNUMBER'] = pd.to_numeric(
        best_matches_scada['ELEMENTNUMBER'])
    best_matches_scada = best_matches_scada.sort_values('ELEMENTNUMBER')
    best_matches_scada['ELEMENTNUMBER'] = best_matches_scada[
        'ELEMENTNUMBER'].astype(str)

    # Give error as a percentage.
    best_matches_scada['ERROR'] = best_matches_scada[
        'ERROR'] / best_matches_scada['SCADAVALUE']

    # drop matches with error greater than 100 %
    best_matches_scada = best_matches_scada[(best_matches_scada['ERROR'] < 1) &
                                            (best_matches_scada['ERROR'] > -1)]

    best_matches_scada = best_matches_scada.loc[:, ('ELEMENTNUMBER',
                                                    'MARKETNAME', 'ERROR')]

    if select_columns is not None:
        best_matches_scada = best_matches_scada.loc[:, select_columns]

    if filter_cols is not None:
        best_matches_scada = filters.filter_on_column_value(
            best_matches_scada, filter_cols, filter_values)

    return best_matches_scada