Example #1
0
    def create_model(realdata=True, force_implementation=None, filter_p=None):
        if realdata:
            ts = read_test_data(325278)
            start, end = datetime(1998, 1, 1), datetime(2007, 1, 1)
            breaktime = datetime(2002, 6, 19)
            ts = ts[start:end]
            ts.rename(columns={
                'CCI_41_COMBINED': 'candidate',
                'merra2': 'reference'
            },
                      inplace=True)
        else:
            ts, breaktime, [start, end] = create_artificial_test_data('asc2')

        if realdata:  # bias correction
            slope, inter = linreg_params(ts.dropna().candidate,
                                         ts.dropna().reference)
            ts.candidate = linreg_stored_params(ts.candidate, slope,
                                                inter)  # scale

        regress = LinearRegression(ts['candidate'].loc[start:end],
                                   ts['reference'].loc[start:end],
                                   filter_p=filter_p,
                                   fit_intercept=True,
                                   force_implementation=force_implementation)
        return regress
Example #2
0
def test_linreg_with_nan():
    n = 1000
    x = np.arange(float(n))
    y = np.arange(float(n)) * 0.5

    x[0:10] = np.nan

    df = pd.DataFrame(data={'x': x, 'y': y})

    slope, inter = scaling.linreg_params(df.dropna()['x'].values, df.dropna()['y'].values)
    df['x'] = scaling.linreg_stored_params(df['x'].values, slope, inter)

    nptest.assert_almost_equal(df.loc[10:, 'x'].values,
                               df.loc[10:, 'y'].values)

    assert(df.index.size == n)
Example #3
0
def test_linreg_with_nan():
    n = 1000
    x = np.arange(float(n))
    y = np.arange(float(n)) * 0.5

    x[0:10] = np.nan

    df = pd.DataFrame(data={'x': x, 'y': y})

    slope, inter = scaling.linreg_params(df.dropna()['x'].values, df.dropna()['y'].values)
    df['x'] = scaling.linreg_stored_params(df['x'].values, slope, inter)

    nptest.assert_almost_equal(df.loc[10:, 'x'].values,
                               df.loc[10:, 'y'].values)

    assert(df.index.size == n)
Example #4
0
    def create_model(realdata=True, poly_order=2, filter_p=None):
        if realdata:
            ts = read_test_data(325278)
            start, end = datetime(1998, 1, 1), datetime(2007, 1, 1)
            ts.rename(columns={
                'CCI_41_COMBINED': 'can',
                'merra2': 'ref'
            },
                      inplace=True)
            ts_drop = ts.dropna()
            slope, inter = linreg_params(ts_drop['can'], ts_drop['ref'])
            ts['can'] = linreg_stored_params(ts['can'], slope, inter)  # scale
        else:
            ts, breaktime, [start, end] = create_artificial_test_data('asc2')

        regress = HigherOrderRegression(ts['can'].loc[start:end],
                                        ts['ref'].loc[start:end],
                                        poly_order=poly_order,
                                        filter_p=filter_p)
        return regress
Example #5
0
    def _reference_bias_correction(self, frame, method='linreg', group=None):
        """
        Scales the 'reference' column to the 'candidate' column via fitting
        of regression parameters.

        Parameters
        -------
        frame : DataFrame
            The DataFrame with the candidate and reference data
        method : str, optional (default: 'linreg')
            Method for bias correction as described in pytesmo
        group : int or None, optional (default: None)
            0 or 1, if a group is selected, bias is calculated only for values
            of the group and applied to the whole frame, if None is selected,
            bias is calculated from and applied to the full frame.

        Returns
        -------
        df_reference : pd.DataFrame
            The bias corrected input data frame reference column
        """

        dframe = self.get_group_data(
            None,
            frame,
            columns=[self.candidate_col_name, self.reference_col_name])
        if dframe.index.size > 1:
            df = dframe.copy(True)
            if group:
                # reference data is changed...to fit the candidate!!
                src = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.reference_col_name])
                src = src[self.reference_col_name].values

                can = self.get_group_data(group,
                                          df.dropna(),
                                          columns=[self.candidate_col_name])
                can = can[self.candidate_col_name].values
            else:
                src = df.dropna(
                )[self.reference_col_name].values  # reference data is changed
                can = df.dropna()[
                    self.candidate_col_name].values  # ...to fit the candidate

            if method == 'linreg':
                slope, inter = linreg_params(src, can)
                df[self.reference_col_name] = \
                    linreg_stored_params(df[self.reference_col_name], slope, inter)
            elif method == 'cdf_match':
                percentiles = [0, 5, 10, 30, 50, 70, 90, 95, 100]
                if can.size != 0 and src.size != 0:
                    perc_can = np.array(np.percentile(can, percentiles))
                    perc_src = np.array(np.percentile(src, percentiles))

                    df[self.reference_col_name] = \
                        lin_cdf_match_stored_params(df[self.reference_col_name].values,
                                                    perc_src,
                                                    perc_can,
                                                    min_val=0, max_val=None)
            elif method == 'mean_std':
                df[self.reference_col_name] = mean_std(src, can)
            elif method == 'min_max':
                df[self.reference_col_name] = min_max(src, can)
            else:
                raise ValueError(
                    method, 'Method for bias correction is not supported')

            return df[[self.reference_col_name]]