Ejemplo n.º 1
0
def test_lin_cdf_match_stored_params():
    """
    Test scaling based on given percentiles.
    """

    perc_src = [10, 15, 22]
    perc_ref = [100, 150, 220]

    # this also tests scaling of data outside of the original range
    src = np.arange(25)

    o = scaling.lin_cdf_match_stored_params(src, perc_src, perc_ref)
    nptest.assert_almost_equal(o, src * 10)
Ejemplo n.º 2
0
def test_lin_cdf_match_stored_params():
    """
    Test scaling based on given percentiles.
    """

    perc_src = [10, 15, 22]
    perc_ref = [100, 150, 220]

    # this also tests scaling of data outside of the original range
    src = np.arange(25)

    o = scaling.lin_cdf_match_stored_params(src, perc_src, perc_ref)
    nptest.assert_almost_equal(o, src * 10)
Ejemplo n.º 3
0
    def scale(self, data, reference_index, gpi_info):
        """
        Scale all columns in data to the
        column at the reference_index.

        Parameters
        ----------
        data: pandas.DataFrame
            temporally matched dataset
        reference_index: int
            Which column of the data contains the
            scaling reference.
        gpi_info: tuple
            tuple of at least, (gpi, lon, lat)
            Where gpi has to be the grid point indices
            of the grid of this scaler.

        Raises
        ------
        ValueError
            if scaling is not successful
        """
        gpi = gpi_info[0]
        parameters = self.get_parameters(data, gpi)

        reference_name = data.columns.values[reference_index]
        reference = data[reference_name]
        data = data.drop([reference_name], axis=1)
        for series in data:
            src_percentiles = parameters[series]
            ref_percentiles = parameters[reference_name]
            data[series] = pd.Series(
                lin_cdf_match_stored_params(data[series].values,
                                            src_percentiles,
                                            ref_percentiles),
                index=data.index)

        data.insert(reference_index, reference.name, reference)
        return data
Ejemplo n.º 4
0
    def scale(self, data, reference_index, gpi_info):
        """
        Scale all columns in data to the
        column at the reference_index.

        Parameters
        ----------
        data: pandas.DataFrame
            temporally matched dataset
        reference_index: int
            Which column of the data contains the
            scaling reference.
        gpi_info: tuple
            tuple of at least, (gpi, lon, lat)
            Where gpi has to be the grid point indices
            of the grid of this scaler.

        Raises
        ------
        ValueError
            if scaling is not successful
        """
        gpi = gpi_info[0]
        parameters = self.get_parameters(data, gpi)

        reference_name = data.columns.values[reference_index]
        reference = data[reference_name]
        data = data.drop([reference_name], axis=1)
        for series in data:
            src_percentiles = parameters[series]
            ref_percentiles = parameters[reference_name]
            data[series] = pd.Series(
                lin_cdf_match_stored_params(data[series].values,
                                            src_percentiles, ref_percentiles),
                index=data.index,
            )

        data.insert(reference_index, reference.name, reference)
        return data
Ejemplo n.º 5
0
def test_lin_cdf_match_stored_params_min_max():
    """
    Test scaling based on given percentiles.
    Include minimum maximum capping.
    """

    perc_src = [10, 15, 22]
    perc_ref = [100, 150, 220]

    # this also tests scaling of data outside of the original range
    src = np.arange(25)

    o = scaling.lin_cdf_match_stored_params(src,
                                            perc_src,
                                            perc_ref,
                                            max_val=230,
                                            min_val=85)

    o_should = np.array([
        85, 85, 85, 85, 85, 85, 85, 85, 85, 90, 100, 110, 120, 130, 140, 150,
        160, 170, 180, 190, 200, 210, 220, 230, 230
    ])
    nptest.assert_almost_equal(o, o_should)
Ejemplo n.º 6
0
def test_lin_cdf_match_stored_params_min_max():
    """
    Test scaling based on given percentiles.
    Include minimum maximum capping.
    """

    perc_src = [10, 15, 22]
    perc_ref = [100, 150, 220]

    # this also tests scaling of data outside of the original range
    src = np.arange(25)

    o = scaling.lin_cdf_match_stored_params(src,
                                            perc_src,
                                            perc_ref,
                                            max_val=230,
                                            min_val=85)

    o_should = np.array([85, 85, 85, 85, 85, 85,
                         85, 85, 85, 90, 100,
                         110, 120, 130, 140, 150,
                         160, 170, 180, 190, 200,
                         210, 220, 230, 230])
    nptest.assert_almost_equal(o, o_should)
Ejemplo n.º 7
0
    def _reference_bias_correction(self, frame, method='linreg', group=None):
        """
        Scales the 'reference' column to the 'candidate' column via fitting
        of regression parameters.

        Parameters
        -------
        frame : DataFrame
            The DataFrame with the candidate and reference data
        method : str, optional (default: 'linreg')
            Method for bias correction as described in pytesmo
        group : int or None, optional (default: None)
            0 or 1, if a group is selected, bias is calculated only for values
            of the group and applied to the whole frame, if None is selected,
            bias is calculated from and applied to the full frame.

        Returns
        -------
        df_reference : pd.DataFrame
            The bias corrected input data frame reference column
        """

        dframe = self.get_group_data(
            None,
            frame,
            columns=[self.candidate_col_name, self.reference_col_name])
        if dframe.index.size > 1:
            df = dframe.copy(True)
            if group:
                # reference data is changed...to fit the candidate!!
                src = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.reference_col_name])
                src = src[self.reference_col_name].values

                can = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.candidate_col_name])
                can = can[self.candidate_col_name].values
            else:
                src = df.dropna(
                )[self.reference_col_name].values  # reference data is changed
                can = df.dropna()[
                    self.candidate_col_name].values  # ...to fit the candidate

            if method == 'linreg':
                slope, inter = linreg_params(src, can)
                df[self.reference_col_name] = \
                    linreg_stored_params(df[self.reference_col_name], slope, inter)
            elif method == 'cdf_match':
                percentiles = [0, 5, 10, 30, 50, 70, 90, 95, 100]
                if can.size != 0 and src.size != 0:
                    perc_can = np.array(np.percentile(can, percentiles))
                    perc_src = np.array(np.percentile(src, percentiles))

                    df[self.reference_col_name] = \
                        lin_cdf_match_stored_params(df[self.reference_col_name].values,
                                                    perc_src,
                                                    perc_can,
                                                    min_val=0, max_val=None)
            elif method == 'mean_std':
                df[self.reference_col_name] = mean_std(src, can)
            elif method == 'min_max':
                df[self.reference_col_name] = min_max(src, can)
            else:
                raise ValueError(
                    method, 'Method for bias correction is not supported')

            return df[[self.reference_col_name]]