예제 #1
0
def stl_decompose(df, column, freq=52):
    dfd = pd.DataFrame(index=df.index)
    series = list(df[column].values)
    length = len(series)
    rts = r.ts(series, frequency=freq)
    decomposed = list(r.stl(rts, 'periodic', robust=True).rx2('time.series'))
    dfd['trend'] = decomposed[length:2 * length]
    dfd['seasonal'] = decomposed[0:length]
    dfd['residuals'] = decomposed[2 * length:3 * length]

    return dfd
예제 #2
0
def decompose(series, frequency, s_window, **kwargs):
    df = pd.DataFrame(index=series.index)
    #df['date'] = series.index
    series.interpolate(inplace=True)
    s = [x for x in series.values]
    length = len(series)
    s = r.ts(s, frequency=frequency)
    decomposed = [x for x in r.stl(s, s_window, **kwargs).rx2('time.series')]
    df['observed'] = series.values
    df['trend'] = decomposed[length:2*length]
    df['seasonal'] = decomposed[0:length]
    df['residual'] = decomposed[2*length:3*length]
    return df
예제 #3
0
def decompose(series,
              frequency,
              s_window='periodic',
              log=False,
              theme=False,
              **kwargs):
    '''
    Decompose a time series into seasonal, trend and irregular components using loess, 
    acronym STL.
    https://www.rdocumentation.org/packages/stats/versions/3.4.3/topics/stl
    
    params:
        series: a time series
        
        frequency: the number of observations per “cycle” 
                   (normally a year, but sometimes a week, a day or an hour)
                   https://robjhyndman.com/hyndsight/seasonal-periods/
        
        s_window: either the character string "periodic" or the span 
                 (in lags) of the loess window for seasonal extraction, 
                 which should be odd and at least 7, according to Cleveland 
                 et al.
        
        log:    boolean.  take log of series
        
        theme:  a bokeh theme
        
        **kwargs:  See other params for stl at 
           https://www.rdocumentation.org/packages/stats/versions/3.4.3/topics/stl
    '''

    df = pd.DataFrame()
    df['date'] = series.index
    if log: series = series.pipe(np.log)
    s = [x for x in series.values]
    length = len(series)
    s = r.ts(s, frequency=frequency)
    decomposed = [x for x in r.stl(s, s_window).rx2('time.series')]
    df['observed'] = series.values
    df['trend'] = decomposed[length:2 * length]
    df['seasonal'] = decomposed[0:length]
    df['residuals'] = decomposed[2 * length:3 * length]
    return df
def auto_arima(endog, exog=None, freq=None):
    if freq is None:
        freq = 1
    # endog_r = r.ts(pandas2ri.py2ri(endog), freq=freq)
    # if using more recent version of rpy2, py2ri was renamed to py2rpy
    # see reference: https://stackoverflow.com/questions/55990529/module-rpy2-robjects-pandas2ri-has-no-attribute-ri2py
    endog_r = r.ts(pandas2ri.py2rpy(endog), freq=freq)
    autoarima_args = {
        "seasonal": True,
        "stationary": False,
        "trace": True,
        "max.order": 20,
        "max.p": 20,
        "max.q": 20,
        "max.P": 20,
        "max.Q": 20,
        "max.D": 20,
        "max.d": 20,
        "start.p": 1,
        "start.q": 1,
        "start.P": 1,
        "start.Q": 1
    }
    if exog is not None:
        # add noise to avoid rank-deficient error for exog
        scale = np.std(exog.values)
        z = scale * 1e-4 * np.random.randn(*exog.shape)
        exog_r = r.matrix(exog.values + z,
                          nrow=exog.shape[0],
                          ncol=exog.shape[1],
                          dimnames=[[], exog.columns.tolist()])
        fit_r = forecast.auto_arima(y=endog_r, xreg=exog_r, **autoarima_args)
    else:
        fit_r = forecast.auto_arima(y=endog_r, **autoarima_args)
    fit_dict = dict(fit_r.items())
    # for proof of this order see last comment:
    # https://stats.stackexchange.com/questions/178577/how-to-read-p-d-and-q-of-auto-arima
    p, q, P, Q, s, d, D = list(fit_dict["arma"])
    return (p, d, q), (P, D, Q, s)
예제 #5
0
def mtm_from_R(x, K=3, NW=6, nFFT='default', plot=0, deltat=0.001, jkCIProb = 0.95, maxf=10, plotftest=True):
    from rpy2 import robjects
    from rpy2.robjects.packages import importr
    from rpy2.robjects import r
    MTM = importr('multitaper')
    #creating the R-type serie
    serie = robjects.FloatVector(x)
    serie = r.ts(serie, deltat = deltat)

    #executing the mtm function
    results = MTM.spec_mtm(serie,k=K, nw=NW, nFFT=nFFT, plot=0, Ftest=1, jackknife=1, jkCIProb = jkCIProb)
    #extracting variables from results
    freqs = np.array(results.rx2('freq'))
    spec = np.array(results.rx2('spec'))
    upperCI = np.array(results.rx2('mtm').rx2('jk').rx2('upperCI'))
    lowerCI = np.array(results.rx2('mtm').rx2('jk').rx2('lowerCI'))
    Ftest = np.array(results.rx2('mtm').rx2('Ftest'))
    if plot == 1:
        fig = plt.figure()
        plt.hold(1)
        ax1 = fig.add_subplot(111)
        ax1.plot(freqs, spec, 'k-', linewidth=2)
        ax1.plot(freqs, upperCI, 'r:')
        ax1.plot(freqs, lowerCI, 'g:')
        ax1.set_ylabel('Power Spectral Density')
        ax1.set_xlabel('Frequency [cycles/m]')

        ax2 = ax1.twinx()
        if plotftest == True:
            ax2.plot(freqs, Ftest, 'y')
            ax2.set_ylabel('Ftest')
            ax2.set_xlim(0, maxf)
        else:
            ax1.set_xlim(0, maxf)
        plt.show()

    return freqs, spec, upperCI, lowerCI, Ftest