예제 #1
0
    def test_cusum_ols(self):
        #R library(strucchange)
        #> sc = sctest(ginv ~ ggdp + lint, type="OLS-CUSUM")
        #> mkhtest(sc, 'cusum_ols', 'BB')
        cusum_ols = dict(statistic=1.055750610401214, pvalue=0.2149567397376543,
                         parameters=(), distr='BB') #Brownian Bridge

        k_vars=3
        cs_ols = smsdia.breaks_cusumolsresid(self.res.resid, ddof=k_vars) #
        compare_t_est(cs_ols, cusum_ols, decimal=(12, 12))
예제 #2
0
    def test_cusum_ols(self):
        #R library(strucchange)
        #> sc = sctest(ginv ~ ggdp + lint, type="OLS-CUSUM")
        #> mkhtest(sc, 'cusum_ols', 'BB')
        cusum_ols = dict(statistic=1.055750610401214, pvalue=0.2149567397376543,
                         parameters=(), distr='BB') #Brownian Bridge

        k_vars=3
        cs_ols = smsdia.breaks_cusumolsresid(self.res.resid, ddof=k_vars) #
        compare_t_est(cs_ols, cusum_ols, decimal=(12, 12))
예제 #3
0
def breaks_cumsum(resid: pd.Series, ddof=0):
    """
    Cumulative summation test for parameter stability
    based on ols residuals.
    
    documentation:
    https://www.statsmodels.org/devel/generated/statsmodels.stats.diagnostic.breaks_cusumolsresid.html#statsmodels.stats.diagnostic.breaks_cusumolsresid
    
    see:
    * https://en.wikipedia.org/wiki/Structural_break
    * https://www.stata.com/features/overview/cumulative-sum-test/
    Null Hypothesis:
    
    This test looks for 'breaks' or huge changes in the parameter of interest
    over time, to see if there is structural instability in the series.
    
    Parameters
    ----------
    resid : pd.Series
        An array of residuals.
    ddof : int
        The number of parameters in the OLS estimation, used as degrees
        of freedom correction for error variance.

    Returns
    -------
    sup_b : float
        The test statistic, maximum of absolute value of scaled cumulative OLS
        residuals.
    pval : float
        Probability of observing the data under the null hypothesis of no
        structural change, based on asymptotic distribution which is a Brownian
        Bridge
    """
    result = diagnostic.breaks_cusumolsresid(resid, ddof=ddof)
    BreaksCumSumResult = namedtuple('BreaksCumSumResult', 'statistic pvalue')
    return BreaksCumSumResult(result[0], result[1])
예제 #4
0
def omission_test(model, crit=0.05, behavior='ANY', indices=None):
    """ Add omitted breakpoint into records based on residual stationarity

    Uses recursive residuals within a CUMSUM test to check if each model
    has omitted a "structural change" (e.g., land cover change). Returns
    an array of True or False for each timeseries segment record depending
    on result from `statsmodels.stats.diagnostic.breaks_cusumolsresid`.

    Args:
        crit (float, optional): Critical p-value for rejection of null
            hypothesis that data contain no structural change
        behavior (str, optional): Method for dealing with multiple
            `test_indices`. `ANY` will return True if any one test index
            rejects the null hypothesis. `ALL` will only return True if ALL
            test indices reject the null hypothesis.
        indices (np.ndarray, optional): Array indices to test. User provided
            indices must be a subset of `model.test_indices`.

    Returns:
        np.ndarray: Array of True or False for each record where
            True indicates omitted break point

    """
    from statsmodels.regression import linear_model
    from statsmodels.stats import diagnostic

    if behavior.lower() not in ['any', 'all']:
        raise ValueError('`behavior` must be "any" or "all"')

    if not indices:
        indices = model.test_indices

    if not np.all(np.in1d(indices, model.test_indices)):
        raise ValueError('`indices` must be a subset of '
                         '`model.test_indices`')

    if not model.ran:
        return np.empty(0, dtype=bool)

    omission = np.zeros((model.record.size, len(indices)), dtype=bool)

    for i, r in enumerate(model.record):
        # Skip if no model fit
        if r['start'] == 0 or r['end'] == 0:
            continue
        # Find matching X and Y in data
        index = np.where(
            (model.dates >= min(r['start'], r['end'])) &
            (model.dates <= max(r['end'], r['start'])))[0]
        # Grab matching X and Y
        _X = model.X[index, :]
        _Y = model.Y[:, index]

        for i_b, b in enumerate(indices):
            # Create OLS regression
            ols = linear_model.OLS(_Y[b, :], _X).fit()
            # Perform CUMSUM test on residuals
            test = diagnostic.breaks_cusumolsresid(
                ols.resid, _X.shape[1])

            if test[1] < crit:
                omission[i, i_b] = True
            else:
                omission[i, i_b] = False

    # Collapse band answers according to `behavior`
    if behavior.lower() == 'any':
        return np.any(omission, 1)
    else:
        return np.all(omission, 1)
예제 #5
0
def omission_test(model, crit=0.05, behavior='ANY', indices=None):
    """ Add omitted breakpoint into records based on residual stationarity

    Uses recursive residuals within a CUMSUM test to check if each model
    has omitted a "structural change" (e.g., land cover change). Returns
    an array of True or False for each timeseries segment record depending
    on result from `statsmodels.stats.diagnostic.breaks_cusumolsresid`.

    Args:
        crit (float, optional): Critical p-value for rejection of null
            hypothesis that data contain no structural change
        behavior (str, optional): Method for dealing with multiple
            `test_indices`. `ANY` will return True if any one test index
            rejects the null hypothesis. `ALL` will only return True if ALL
            test indices reject the null hypothesis.
        indices (np.ndarray, optional): Array indices to test. User provided
            indices must be a subset of `model.test_indices`.

    Returns:
        np.ndarray: Array of True or False for each record where
            True indicates omitted break point

    """
    from statsmodels.regression import linear_model
    from statsmodels.stats import diagnostic

    if behavior.lower() not in ['any', 'all']:
        raise ValueError('`behavior` must be "any" or "all"')

    if not indices:
        indices = model.test_indices

    if not np.all(np.in1d(indices, model.test_indices)):
        raise ValueError('`indices` must be a subset of '
                         '`model.test_indices`')

    if not model.ran:
        return np.empty(0, dtype=bool)

    omission = np.zeros((model.record.size, len(indices)), dtype=bool)

    for i, r in enumerate(model.record):
        # Skip if no model fit
        if r['start'] == 0 or r['end'] == 0:
            continue
        # Find matching X and Y in data
        index = np.where((model.dates >= min(r['start'], r['end']))
                         & (model.dates <= max(r['end'], r['start'])))[0]
        # Grab matching X and Y
        _X = model.X[index, :]
        _Y = model.Y[:, index]

        for i_b, b in enumerate(indices):
            # Create OLS regression
            ols = linear_model.OLS(_Y[b, :], _X).fit()
            # Perform CUMSUM test on residuals
            test = diagnostic.breaks_cusumolsresid(ols.resid, _X.shape[1])

            if test[1] < crit:
                omission[i, i_b] = True
            else:
                omission[i, i_b] = False

    # Collapse band answers according to `behavior`
    if behavior.lower() == 'any':
        return np.any(omission, 1)
    else:
        return np.all(omission, 1)
예제 #6
0
 def breaks_cumsumolsresid(self, timeseries):
     model, model_result = self.generate_model(timeseries)
     result = diagnostic.breaks_cusumolsresid(model_result)
     BreaksCumSumResult = namedtuple('BreaksCumSumResult', 'statistic pvalue')
     return BreaksCumSumResult(result[0], result[1])
예제 #7
0
def analyze_cross_correlation_timeseries(df,
                                         col_one,
                                         col_two,
                                         time_column,
                                         significance_threshold=0.05,
                                         zero_percent_threshold=0.05):
    series_one = df[col_one].copy()
    series_two = df[col_two].copy()
    series_one.index = df[time_column].copy()
    series_two.index = df[time_column].copy()
    series_one = series_one.dropna()
    series_two = series_two.dropna()

    if breaks_cusumolsresid(series_one)[1] > significance_threshold:
        print("cumulative sum test failed for feature")
    if breaks_cusumolsresid(series_two)[1] > significance_threshold:
        print("cumulative sum test failed for display")

    # no serial correlation
    if adfuller(series_one)[1] < significance_threshold and adfuller(
            series_two)[1] < significance_threshold:
        compare_timeseries(series_one, series_two)
        cross_correlated += 1

    # serial correlation in series_one
    if adfuller(series_one)[1] > significance_threshold and adfuller(
            series_two)[1] < significance_threshold:
        try:
            smoothed_series_one = smooth_feature(series_one)
            if np.isfinite(smoothed_series_one).all() and (
                    smoothed_series_one.iloc[0] != smoothed_series_one).all():
                compare_timeseries(smoothed_series_one, series_two)
        except ValueError:
            zero_percent = (series_one
                            == 0).astype(int).sum(axis=0) / len(series_one)
            if zero_percent < zero_percent_threshold:
                series_one = series_one.replace(to_replace=0, method='ffill')
                smoothed_series_one = smooth_feature(series_one)
                if check_smoothed_feature(smoothed_series_one):
                    compare_timeseries(smoothed_series_one, series_two)
    # serial correlation in series_two
    if adfuller(series_one)[1] < significance_threshold and adfuller(
            series_two)[1] > significance_threshold:
        try:
            smoothed_series_two = smooth_feature(series_two)
            if np.isfinite(smoothed_feature).all() and (
                    smoothed_feature.iloc[0] != smoothed_feature).all():
                compare_timeseries(series_one, smoothed_series_two)
        except ValueError:
            zero_percent = (series_two
                            == 0).astype(int).sum(axis=0) / len(series_two)
            if zero_percent < zero_percent_threshold:
                series_two = series_two.replace(to_replace=0, method='ffill')
                smoothed_series_two = smooth_feature(series_two)
                if check_smoothed_feature(smoothed_series_two):
                    compare_timeseries(feature, smoothed_series_two)

    # serial correlation in both therefore use cointegration
    if adfuller(series_one)[1] > significance_threshold and adfuller(
            series_two)[1] > significance_threshold:
        cointegration_results = coint(series_one, series_two)[1]
        if cointegration_results < significance_threshold:
            print(f"""
            The t-statistic of the unit-root test {cointegration_results[0],
            The pvalue {cointegration_results[1]} is less than signifiance threshold of {significance_threshold},
            So we reject the null hypothesis.  And therefore, we believe there is cointegration (a relationship)
            between the two series.
            """)
        else:
            print(f"""
            The t-statistic of the unit-root test {cointegration_results[0],
            The pvalue {cointegration_results[1]} is greater than signifiance threshold of {significance_threshold},
            So we fail to reject the null hypothesis.  And therefore, we believe there is no relation between the series.
            """)
예제 #8
0
plt.plot(rcusum)
plt.plot(rcusumci[0])
plt.plot(rcusumci[1])
plt.figure()
plt.plot(rresid)
plt.plot(np.abs(rresid))

print('cusum test reject:')
print(((rcusum[1:] > rcusumci[1]) | (rcusum[1:] < rcusumci[0])).any())

rresid2, rparams2, rypred2, rresid_standardized2, rresid_scaled2, rcusum2, rcusumci2 = \
            recursive_olsresiduals(res1, skip)
#assert_almost_equal(rparams[skip+1:], rparams2[skip:-1],13)
assert_almost_equal(rparams[skip:], rparams2[skip:], 13)
#np.c_[rparams[skip+1:], rparams2[skip:-1]]
#plt.show()

####################  Example break test
H, crit95 = breaks_hansen(res1)
print(H)
print(crit95)

supb, pval, crit = breaks_cusumolsresid(res1.resid)
print(supb, pval, crit)

##check whether this works directly: Ploberger/Kramer framing of standard cusum
##no, it's different, there is another denominator
#print breaks_cusumolsresid(rresid[skip:])
#this function is still completely wrong, cut and paste does not apply
#print breaks_cusum(rresid[skip:])