Ejemplo n.º 1
0
def test_min_max_scaling():

    # use a random sample from a standard distribution
    n = 1000
    x = np.random.normal(0, 0.5, n)
    y = np.arange(n)

    o = scaling.min_max(y, x)
    nptest.assert_almost_equal(np.min(x), np.min(o))
    nptest.assert_almost_equal(np.max(x), np.max(o))
Ejemplo n.º 2
0
def test_min_max_scaling():

    # use a random sample from a standard distribution
    n = 1000
    x = np.random.normal(0, 0.5, n)
    y = np.arange(n)

    o = scaling.min_max(y, x)
    nptest.assert_almost_equal(np.min(x), np.min(o))
    nptest.assert_almost_equal(np.max(x), np.max(o))
Ejemplo n.º 3
0
    def _reference_bias_correction(self, frame, method='linreg', group=None):
        """
        Scales the 'reference' column to the 'candidate' column via fitting
        of regression parameters.

        Parameters
        -------
        frame : DataFrame
            The DataFrame with the candidate and reference data
        method : str, optional (default: 'linreg')
            Method for bias correction as described in pytesmo
        group : int or None, optional (default: None)
            0 or 1, if a group is selected, bias is calculated only for values
            of the group and applied to the whole frame, if None is selected,
            bias is calculated from and applied to the full frame.

        Returns
        -------
        df_reference : pd.DataFrame
            The bias corrected input data frame reference column
        """

        dframe = self.get_group_data(
            None,
            frame,
            columns=[self.candidate_col_name, self.reference_col_name])
        if dframe.index.size > 1:
            df = dframe.copy(True)
            if group:
                # reference data is changed...to fit the candidate!!
                src = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.reference_col_name])
                src = src[self.reference_col_name].values

                can = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.candidate_col_name])
                can = can[self.candidate_col_name].values
            else:
                src = df.dropna(
                )[self.reference_col_name].values  # reference data is changed
                can = df.dropna()[
                    self.candidate_col_name].values  # ...to fit the candidate

            if method == 'linreg':
                slope, inter = linreg_params(src, can)
                df[self.reference_col_name] = \
                    linreg_stored_params(df[self.reference_col_name], slope, inter)
            elif method == 'cdf_match':
                percentiles = [0, 5, 10, 30, 50, 70, 90, 95, 100]
                if can.size != 0 and src.size != 0:
                    perc_can = np.array(np.percentile(can, percentiles))
                    perc_src = np.array(np.percentile(src, percentiles))

                    df[self.reference_col_name] = \
                        lin_cdf_match_stored_params(df[self.reference_col_name].values,
                                                    perc_src,
                                                    perc_can,
                                                    min_val=0, max_val=None)
            elif method == 'mean_std':
                df[self.reference_col_name] = mean_std(src, can)
            elif method == 'min_max':
                df[self.reference_col_name] = min_max(src, can)
            else:
                raise ValueError(
                    method, 'Method for bias correction is not supported')

            return df[[self.reference_col_name]]