예제 #1
0
    def filter_turbine_data(self):
        """
        Apply a set of filtering algorithms to the turbine wind speed vs power curve to flag
        data not representative of normal turbine operation
        
        Args:
            (None)
            
        Returns:
            (None)
        """

        dic = self._scada_dict

        # Loop through turbines
        for t in self._turbs:
            max_bin = self._max_power_filter * dic[t].power_kw.max(
            )  # Set maximum range for using bin-filter

            # Apply range filter
            dic[t].loc[:, 'flag_range'] = filters.range_flag(
                dic[t].loc[:, 'windspeed_ms'], below=0, above=40)

            # Apply frozen/unresponsive sensor filter
            dic[t].loc[:, 'flag_frozen'] = filters.unresponsive_flag(
                dic[t].loc[:, 'windspeed_ms'], threshold=3)

            # Apply window range filter
            dic[t].loc[:, 'flag_window'] = filters.window_range_flag(
                window_col=dic[t].loc[:, 'windspeed_ms'],
                window_start=5.,
                window_end=40,
                value_col=dic[t].loc[:, 'power_kw'],
                value_min=20.,
                value_max=2000.)

            # Apply bin-based filter
            dic[t].loc[:, 'flag_bin'] = filters.bin_filter(
                bin_col=dic[t].loc[:, 'power_kw'],
                value_col=dic[t].loc[:, 'windspeed_ms'],
                bin_width=100,
                threshold=2.,
                center_type='median',
                bin_min=20.,
                bin_max=max_bin,
                threshold_type='scalar',
                direction='all')

            # Create a 'final' flag which is true if any of the previous flags are true
            dic[t].loc[:, 'flag_final'] = (dic[t].loc[:, 'flag_range']) | \
                                          (dic[t].loc[:, 'flag_window']) | \
                                          (dic[t].loc[:, 'flag_bin']) | \
                                          (dic[t].loc[:, 'flag_frozen'])
    def filter_turbine_data(self):
        """
        Apply a set of filtering algorithms to the turbine wind speed vs power curve to flag
        data not representative of normal turbine operation
        
        Args:
            n(:obj:`int`): The Monte Carlo iteration number
            
        Returns:
            (None)
        """
        dic = self._scada_dict

        # Loop through turbines
        for t in self._turbs:
            turb_capac = dic[t].wtur_W_avg.max()

            max_bin = self._run.max_power_filter * turb_capac  # Set maximum range for using bin-filter

            dic[t].dropna(
                subset=['wmet_wdspd_avg', 'energy_kwh'], inplace=True
            )  # Drop any data where scada wind speed or energy is NaN

            # Flag turbine energy data less than zero
            dic[t].loc[:, 'flag_neg'] = filters.range_flag(
                dic[t].loc[:, 'wtur_W_avg'], below=0, above=turb_capac)
            # Apply range filter
            dic[t].loc[:, 'flag_range'] = filters.range_flag(
                dic[t].loc[:, 'wmet_wdspd_avg'], below=0, above=40)
            # Apply frozen/unresponsive sensor filter
            dic[t].loc[:, 'flag_frozen'] = filters.unresponsive_flag(
                dic[t].loc[:, 'wmet_wdspd_avg'], threshold=3)
            # Apply window range filter
            dic[t].loc[:, 'flag_window'] = filters.window_range_flag(
                window_col=dic[t].loc[:, 'wmet_wdspd_avg'],
                window_start=5.,
                window_end=40,
                value_col=dic[t].loc[:, 'wtur_W_avg'],
                value_min=0.02 * turb_capac,
                value_max=1.2 * turb_capac)

            threshold_wind_bin = self._run.wind_bin_thresh
            # Apply bin-based filter
            dic[t].loc[:, 'flag_bin'] = filters.bin_filter(
                bin_col=dic[t].loc[:, 'wtur_W_avg'],
                value_col=dic[t].loc[:, 'wmet_wdspd_avg'],
                bin_width=0.06 * turb_capac,
                threshold=threshold_wind_bin,  # wind bin thresh 
                center_type='median',
                bin_min=0.01 * turb_capac,
                bin_max=max_bin,
                threshold_type='scalar',
                direction='all')

            # Create a 'final' flag which is true if any of the previous flags are true
            dic[t].loc[:, 'flag_final'] = (dic[t].loc[:, 'flag_range']) | \
                                          (dic[t].loc[:, 'flag_window']) | \
                                          (dic[t].loc[:, 'flag_bin']) | \
                                          (dic[t].loc[:, 'flag_frozen'])

            # Set negative turbine data to zero
            dic[t].loc[dic[t]['flag_neg'], 'wtur_W_avg'] = 0
예제 #3
0
 def test_range_flag(self):
     x = pd.Series(np.array([-1,0,1]))
     y = filters.range_flag(x, -0.5, 0.5)
     self.assertTrue(y.equals(pd.Series([True, False, True])))
예제 #4
0
    def filter_outliers(self, n):
        """
        This function filters outliers based on a combination of range filter, unresponsive sensor filter, 
        and window filter.
        We use a memoized funciton to store the regression data in a dictionary for each combination as it
        comes up in the Monte Carlo simulation. This saves significant computational time in not having to run
        robust linear regression for each Monte Carlo iteration
        
        Args:
            n(:obj:`float`): Monte Carlo iteration
        
        Returns:
            :obj:`pandas.DataFrame`: Filtered monthly/daily data ready for linear regression
        """

        reanal = self._run.reanalysis_product

        # Check if valid data has already been calculated and stored. If so, just return it
        if (reanal, self._run.loss_threshold) in self.outlier_filtering:
            valid_data = self.outlier_filtering[(reanal,
                                                 self._run.loss_threshold)]
            return valid_data

        # If valid data hasn't yet been stored in dictionary, determine the valid data
        df = self._aggregate.df

        # First set of filters checking combined losses and if the Nan data flag was on
        df_sub = df.loc[((df['availability_pct'] +
                          df['curtailment_pct']) < self._run.loss_threshold) &
                        (df['nan_flag'] == False), :]

        # Set maximum range for using bin-filter, convert from MW to GWh
        plant_capac = self._plant._plant_capacity / 1000. * self._hours_in_res

        # Apply range filter to wind speed
        df_sub = df_sub.assign(
            flag_range=filters.range_flag(df_sub[reanal], below=0, above=40))
        # Apply frozen/unresponsive sensor filter
        df_sub.loc[:,
                   'flag_frozen'] = filters.unresponsive_flag(df_sub[reanal],
                                                              threshold=3)
        # Apply window range filter
        df_sub.loc[:, 'flag_window'] = filters.window_range_flag(
            window_col=df_sub[reanal],
            window_start=5.,
            window_end=40,
            value_col=df_sub['energy_gwh'],
            value_min=0.02 * plant_capac,
            value_max=1.2 * plant_capac)

        # Create a 'final' flag which is true if any of the previous flags are true
        df_sub.loc[:,'flag_final'] = (df_sub.loc[:, 'flag_range']) | (df_sub.loc[:, 'flag_frozen']) | \
                                          (df_sub.loc[:, 'flag_window'])

        # Define valid data
        valid_data = df_sub.loc[
            df_sub.loc[:, 'flag_final'] == False,
            [reanal, 'energy_gwh', 'availability_gwh', 'curtailment_gwh']]
        if self.reg_winddirection:
            valid_data_to_add = df_sub.loc[
                df_sub.loc[:, 'flag_final'] == False,
                [reanal + '_wd', reanal + '_u_ms', reanal + '_v_ms']]
            valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)

        if self.reg_temperature:
            valid_data_to_add = df_sub.loc[df_sub.loc[:,
                                                      'flag_final'] == False,
                                           [reanal + '_temperature_K']]
            valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)

        if self.time_resolution == 'M':
            valid_data_to_add = df_sub.loc[df_sub.loc[:,
                                                      'flag_final'] == False,
                                           ['num_days_expected']]
            valid_data = pd.concat([valid_data, valid_data_to_add], axis=1)

        # Update the dictionary
        self.outlier_filtering[(reanal, self._run.loss_threshold)] = valid_data

        # Return result
        return valid_data