def test_min_max_scaling(): # use a random sample from a standard distribution n = 1000 x = np.random.normal(0, 0.5, n) y = np.arange(n) o = scaling.min_max(y, x) nptest.assert_almost_equal(np.min(x), np.min(o)) nptest.assert_almost_equal(np.max(x), np.max(o))
def _reference_bias_correction(self, frame, method='linreg', group=None): """ Scales the 'reference' column to the 'candidate' column via fitting of regression parameters. Parameters ------- frame : DataFrame The DataFrame with the candidate and reference data method : str, optional (default: 'linreg') Method for bias correction as described in pytesmo group : int or None, optional (default: None) 0 or 1, if a group is selected, bias is calculated only for values of the group and applied to the whole frame, if None is selected, bias is calculated from and applied to the full frame. Returns ------- df_reference : pd.DataFrame The bias corrected input data frame reference column """ dframe = self.get_group_data( None, frame, columns=[self.candidate_col_name, self.reference_col_name]) if dframe.index.size > 1: df = dframe.copy(True) if group: # reference data is changed...to fit the candidate!! src = self.get_group_data(group, df.dropna(), columns=[self.reference_col_name]) src = src[self.reference_col_name].values can = self.get_group_data(group, df.dropna(), columns=[self.candidate_col_name]) can = can[self.candidate_col_name].values else: src = df.dropna( )[self.reference_col_name].values # reference data is changed can = df.dropna()[ self.candidate_col_name].values # ...to fit the candidate if method == 'linreg': slope, inter = linreg_params(src, can) df[self.reference_col_name] = \ linreg_stored_params(df[self.reference_col_name], slope, inter) elif method == 'cdf_match': percentiles = [0, 5, 10, 30, 50, 70, 90, 95, 100] if can.size != 0 and src.size != 0: perc_can = np.array(np.percentile(can, percentiles)) perc_src = np.array(np.percentile(src, percentiles)) df[self.reference_col_name] = \ lin_cdf_match_stored_params(df[self.reference_col_name].values, perc_src, perc_can, min_val=0, max_val=None) elif method == 'mean_std': df[self.reference_col_name] = mean_std(src, can) elif method == 'min_max': df[self.reference_col_name] = min_max(src, can) else: raise ValueError( method, 'Method for bias correction is not supported') return df[[self.reference_col_name]]