def filter_turbine_data(self): """ Apply a set of filtering algorithms to the turbine wind speed vs power curve to flag data not representative of normal turbine operation Args: (None) Returns: (None) """ dic = self._scada_dict # Loop through turbines for t in self._turbs: max_bin = self._max_power_filter * dic[t].power_kw.max( ) # Set maximum range for using bin-filter # Apply range filter dic[t].loc[:, 'flag_range'] = filters.range_flag( dic[t].loc[:, 'windspeed_ms'], below=0, above=40) # Apply frozen/unresponsive sensor filter dic[t].loc[:, 'flag_frozen'] = filters.unresponsive_flag( dic[t].loc[:, 'windspeed_ms'], threshold=3) # Apply window range filter dic[t].loc[:, 'flag_window'] = filters.window_range_flag( window_col=dic[t].loc[:, 'windspeed_ms'], window_start=5., window_end=40, value_col=dic[t].loc[:, 'power_kw'], value_min=20., value_max=2000.) # Apply bin-based filter dic[t].loc[:, 'flag_bin'] = filters.bin_filter( bin_col=dic[t].loc[:, 'power_kw'], value_col=dic[t].loc[:, 'windspeed_ms'], bin_width=100, threshold=2., center_type='median', bin_min=20., bin_max=max_bin, threshold_type='scalar', direction='all') # Create a 'final' flag which is true if any of the previous flags are true dic[t].loc[:, 'flag_final'] = (dic[t].loc[:, 'flag_range']) | \ (dic[t].loc[:, 'flag_window']) | \ (dic[t].loc[:, 'flag_bin']) | \ (dic[t].loc[:, 'flag_frozen'])
def filter_turbine_data(self): """ Apply a set of filtering algorithms to the turbine wind speed vs power curve to flag data not representative of normal turbine operation Args: n(:obj:`int`): The Monte Carlo iteration number Returns: (None) """ dic = self._scada_dict # Loop through turbines for t in self._turbs: turb_capac = dic[t].wtur_W_avg.max() max_bin = self._run.max_power_filter * turb_capac # Set maximum range for using bin-filter dic[t].dropna( subset=['wmet_wdspd_avg', 'energy_kwh'], inplace=True ) # Drop any data where scada wind speed or energy is NaN # Flag turbine energy data less than zero dic[t].loc[:, 'flag_neg'] = filters.range_flag( dic[t].loc[:, 'wtur_W_avg'], below=0, above=turb_capac) # Apply range filter dic[t].loc[:, 'flag_range'] = filters.range_flag( dic[t].loc[:, 'wmet_wdspd_avg'], below=0, above=40) # Apply frozen/unresponsive sensor filter dic[t].loc[:, 'flag_frozen'] = filters.unresponsive_flag( dic[t].loc[:, 'wmet_wdspd_avg'], threshold=3) # Apply window range filter dic[t].loc[:, 'flag_window'] = filters.window_range_flag( window_col=dic[t].loc[:, 'wmet_wdspd_avg'], window_start=5., window_end=40, value_col=dic[t].loc[:, 'wtur_W_avg'], value_min=0.02 * turb_capac, value_max=1.2 * turb_capac) threshold_wind_bin = self._run.wind_bin_thresh # Apply bin-based filter dic[t].loc[:, 'flag_bin'] = filters.bin_filter( bin_col=dic[t].loc[:, 'wtur_W_avg'], value_col=dic[t].loc[:, 'wmet_wdspd_avg'], bin_width=0.06 * turb_capac, threshold=threshold_wind_bin, # wind bin thresh center_type='median', bin_min=0.01 * turb_capac, bin_max=max_bin, threshold_type='scalar', direction='all') # Create a 'final' flag which is true if any of the previous flags are true dic[t].loc[:, 'flag_final'] = (dic[t].loc[:, 'flag_range']) | \ (dic[t].loc[:, 'flag_window']) | \ (dic[t].loc[:, 'flag_bin']) | \ (dic[t].loc[:, 'flag_frozen']) # Set negative turbine data to zero dic[t].loc[dic[t]['flag_neg'], 'wtur_W_avg'] = 0
def test_range_flag(self): x = pd.Series(np.array([-1,0,1])) y = filters.range_flag(x, -0.5, 0.5) self.assertTrue(y.equals(pd.Series([True, False, True])))
def filter_outliers(self, n): """ This function filters outliers based on a combination of range filter, unresponsive sensor filter, and window filter. We use a memoized funciton to store the regression data in a dictionary for each combination as it comes up in the Monte Carlo simulation. This saves significant computational time in not having to run robust linear regression for each Monte Carlo iteration Args: n(:obj:`float`): Monte Carlo iteration Returns: :obj:`pandas.DataFrame`: Filtered monthly/daily data ready for linear regression """ reanal = self._run.reanalysis_product # Check if valid data has already been calculated and stored. If so, just return it if (reanal, self._run.loss_threshold) in self.outlier_filtering: valid_data = self.outlier_filtering[(reanal, self._run.loss_threshold)] return valid_data # If valid data hasn't yet been stored in dictionary, determine the valid data df = self._aggregate.df # First set of filters checking combined losses and if the Nan data flag was on df_sub = df.loc[((df['availability_pct'] + df['curtailment_pct']) < self._run.loss_threshold) & (df['nan_flag'] == False), :] # Set maximum range for using bin-filter, convert from MW to GWh plant_capac = self._plant._plant_capacity / 1000. * self._hours_in_res # Apply range filter to wind speed df_sub = df_sub.assign( flag_range=filters.range_flag(df_sub[reanal], below=0, above=40)) # Apply frozen/unresponsive sensor filter df_sub.loc[:, 'flag_frozen'] = filters.unresponsive_flag(df_sub[reanal], threshold=3) # Apply window range filter df_sub.loc[:, 'flag_window'] = filters.window_range_flag( window_col=df_sub[reanal], window_start=5., window_end=40, value_col=df_sub['energy_gwh'], value_min=0.02 * plant_capac, value_max=1.2 * plant_capac) # Create a 'final' flag which is true if any of the previous flags are true df_sub.loc[:,'flag_final'] = (df_sub.loc[:, 'flag_range']) | (df_sub.loc[:, 'flag_frozen']) | \ (df_sub.loc[:, 'flag_window']) # Define valid data valid_data = df_sub.loc[ df_sub.loc[:, 'flag_final'] == False, [reanal, 'energy_gwh', 'availability_gwh', 'curtailment_gwh']] if self.reg_winddirection: valid_data_to_add = df_sub.loc[ df_sub.loc[:, 'flag_final'] == False, [reanal + '_wd', reanal + '_u_ms', reanal + '_v_ms']] valid_data = pd.concat([valid_data, valid_data_to_add], axis=1) if self.reg_temperature: valid_data_to_add = df_sub.loc[df_sub.loc[:, 'flag_final'] == False, [reanal + '_temperature_K']] valid_data = pd.concat([valid_data, valid_data_to_add], axis=1) if self.time_resolution == 'M': valid_data_to_add = df_sub.loc[df_sub.loc[:, 'flag_final'] == False, ['num_days_expected']] valid_data = pd.concat([valid_data, valid_data_to_add], axis=1) # Update the dictionary self.outlier_filtering[(reanal, self._run.loss_threshold)] = valid_data # Return result return valid_data