def test_lin_cdf_match_stored_params(): """ Test scaling based on given percentiles. """ perc_src = [10, 15, 22] perc_ref = [100, 150, 220] # this also tests scaling of data outside of the original range src = np.arange(25) o = scaling.lin_cdf_match_stored_params(src, perc_src, perc_ref) nptest.assert_almost_equal(o, src * 10)
def scale(self, data, reference_index, gpi_info): """ Scale all columns in data to the column at the reference_index. Parameters ---------- data: pandas.DataFrame temporally matched dataset reference_index: int Which column of the data contains the scaling reference. gpi_info: tuple tuple of at least, (gpi, lon, lat) Where gpi has to be the grid point indices of the grid of this scaler. Raises ------ ValueError if scaling is not successful """ gpi = gpi_info[0] parameters = self.get_parameters(data, gpi) reference_name = data.columns.values[reference_index] reference = data[reference_name] data = data.drop([reference_name], axis=1) for series in data: src_percentiles = parameters[series] ref_percentiles = parameters[reference_name] data[series] = pd.Series( lin_cdf_match_stored_params(data[series].values, src_percentiles, ref_percentiles), index=data.index) data.insert(reference_index, reference.name, reference) return data
def scale(self, data, reference_index, gpi_info): """ Scale all columns in data to the column at the reference_index. Parameters ---------- data: pandas.DataFrame temporally matched dataset reference_index: int Which column of the data contains the scaling reference. gpi_info: tuple tuple of at least, (gpi, lon, lat) Where gpi has to be the grid point indices of the grid of this scaler. Raises ------ ValueError if scaling is not successful """ gpi = gpi_info[0] parameters = self.get_parameters(data, gpi) reference_name = data.columns.values[reference_index] reference = data[reference_name] data = data.drop([reference_name], axis=1) for series in data: src_percentiles = parameters[series] ref_percentiles = parameters[reference_name] data[series] = pd.Series( lin_cdf_match_stored_params(data[series].values, src_percentiles, ref_percentiles), index=data.index, ) data.insert(reference_index, reference.name, reference) return data
def test_lin_cdf_match_stored_params_min_max(): """ Test scaling based on given percentiles. Include minimum maximum capping. """ perc_src = [10, 15, 22] perc_ref = [100, 150, 220] # this also tests scaling of data outside of the original range src = np.arange(25) o = scaling.lin_cdf_match_stored_params(src, perc_src, perc_ref, max_val=230, min_val=85) o_should = np.array([ 85, 85, 85, 85, 85, 85, 85, 85, 85, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 230 ]) nptest.assert_almost_equal(o, o_should)
def test_lin_cdf_match_stored_params_min_max(): """ Test scaling based on given percentiles. Include minimum maximum capping. """ perc_src = [10, 15, 22] perc_ref = [100, 150, 220] # this also tests scaling of data outside of the original range src = np.arange(25) o = scaling.lin_cdf_match_stored_params(src, perc_src, perc_ref, max_val=230, min_val=85) o_should = np.array([85, 85, 85, 85, 85, 85, 85, 85, 85, 90, 100, 110, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220, 230, 230]) nptest.assert_almost_equal(o, o_should)
def _reference_bias_correction(self, frame, method='linreg', group=None): """ Scales the 'reference' column to the 'candidate' column via fitting of regression parameters. Parameters ------- frame : DataFrame The DataFrame with the candidate and reference data method : str, optional (default: 'linreg') Method for bias correction as described in pytesmo group : int or None, optional (default: None) 0 or 1, if a group is selected, bias is calculated only for values of the group and applied to the whole frame, if None is selected, bias is calculated from and applied to the full frame. Returns ------- df_reference : pd.DataFrame The bias corrected input data frame reference column """ dframe = self.get_group_data( None, frame, columns=[self.candidate_col_name, self.reference_col_name]) if dframe.index.size > 1: df = dframe.copy(True) if group: # reference data is changed...to fit the candidate!! src = self.get_group_data(group, df.dropna(), columns=[self.reference_col_name]) src = src[self.reference_col_name].values can = self.get_group_data(group, df.dropna(), columns=[self.candidate_col_name]) can = can[self.candidate_col_name].values else: src = df.dropna( )[self.reference_col_name].values # reference data is changed can = df.dropna()[ self.candidate_col_name].values # ...to fit the candidate if method == 'linreg': slope, inter = linreg_params(src, can) df[self.reference_col_name] = \ linreg_stored_params(df[self.reference_col_name], slope, inter) elif method == 'cdf_match': percentiles = [0, 5, 10, 30, 50, 70, 90, 95, 100] if can.size != 0 and src.size != 0: perc_can = np.array(np.percentile(can, percentiles)) perc_src = np.array(np.percentile(src, percentiles)) df[self.reference_col_name] = \ lin_cdf_match_stored_params(df[self.reference_col_name].values, perc_src, perc_can, min_val=0, max_val=None) elif method == 'mean_std': df[self.reference_col_name] = mean_std(src, can) elif method == 'min_max': df[self.reference_col_name] = min_max(src, can) else: raise ValueError( method, 'Method for bias correction is not supported') return df[[self.reference_col_name]]