def stl_decompose(df, column, freq=52): dfd = pd.DataFrame(index=df.index) series = list(df[column].values) length = len(series) rts = r.ts(series, frequency=freq) decomposed = list(r.stl(rts, 'periodic', robust=True).rx2('time.series')) dfd['trend'] = decomposed[length:2 * length] dfd['seasonal'] = decomposed[0:length] dfd['residuals'] = decomposed[2 * length:3 * length] return dfd
def decompose(series, frequency, s_window, **kwargs): df = pd.DataFrame(index=series.index) #df['date'] = series.index series.interpolate(inplace=True) s = [x for x in series.values] length = len(series) s = r.ts(s, frequency=frequency) decomposed = [x for x in r.stl(s, s_window, **kwargs).rx2('time.series')] df['observed'] = series.values df['trend'] = decomposed[length:2*length] df['seasonal'] = decomposed[0:length] df['residual'] = decomposed[2*length:3*length] return df
def decompose(series, frequency, s_window='periodic', log=False, theme=False, **kwargs): ''' Decompose a time series into seasonal, trend and irregular components using loess, acronym STL. https://www.rdocumentation.org/packages/stats/versions/3.4.3/topics/stl params: series: a time series frequency: the number of observations per “cycle” (normally a year, but sometimes a week, a day or an hour) https://robjhyndman.com/hyndsight/seasonal-periods/ s_window: either the character string "periodic" or the span (in lags) of the loess window for seasonal extraction, which should be odd and at least 7, according to Cleveland et al. log: boolean. take log of series theme: a bokeh theme **kwargs: See other params for stl at https://www.rdocumentation.org/packages/stats/versions/3.4.3/topics/stl ''' df = pd.DataFrame() df['date'] = series.index if log: series = series.pipe(np.log) s = [x for x in series.values] length = len(series) s = r.ts(s, frequency=frequency) decomposed = [x for x in r.stl(s, s_window).rx2('time.series')] df['observed'] = series.values df['trend'] = decomposed[length:2 * length] df['seasonal'] = decomposed[0:length] df['residuals'] = decomposed[2 * length:3 * length] return df
def auto_arima(endog, exog=None, freq=None): if freq is None: freq = 1 # endog_r = r.ts(pandas2ri.py2ri(endog), freq=freq) # if using more recent version of rpy2, py2ri was renamed to py2rpy # see reference: https://stackoverflow.com/questions/55990529/module-rpy2-robjects-pandas2ri-has-no-attribute-ri2py endog_r = r.ts(pandas2ri.py2rpy(endog), freq=freq) autoarima_args = { "seasonal": True, "stationary": False, "trace": True, "max.order": 20, "max.p": 20, "max.q": 20, "max.P": 20, "max.Q": 20, "max.D": 20, "max.d": 20, "start.p": 1, "start.q": 1, "start.P": 1, "start.Q": 1 } if exog is not None: # add noise to avoid rank-deficient error for exog scale = np.std(exog.values) z = scale * 1e-4 * np.random.randn(*exog.shape) exog_r = r.matrix(exog.values + z, nrow=exog.shape[0], ncol=exog.shape[1], dimnames=[[], exog.columns.tolist()]) fit_r = forecast.auto_arima(y=endog_r, xreg=exog_r, **autoarima_args) else: fit_r = forecast.auto_arima(y=endog_r, **autoarima_args) fit_dict = dict(fit_r.items()) # for proof of this order see last comment: # https://stats.stackexchange.com/questions/178577/how-to-read-p-d-and-q-of-auto-arima p, q, P, Q, s, d, D = list(fit_dict["arma"]) return (p, d, q), (P, D, Q, s)
def mtm_from_R(x, K=3, NW=6, nFFT='default', plot=0, deltat=0.001, jkCIProb = 0.95, maxf=10, plotftest=True): from rpy2 import robjects from rpy2.robjects.packages import importr from rpy2.robjects import r MTM = importr('multitaper') #creating the R-type serie serie = robjects.FloatVector(x) serie = r.ts(serie, deltat = deltat) #executing the mtm function results = MTM.spec_mtm(serie,k=K, nw=NW, nFFT=nFFT, plot=0, Ftest=1, jackknife=1, jkCIProb = jkCIProb) #extracting variables from results freqs = np.array(results.rx2('freq')) spec = np.array(results.rx2('spec')) upperCI = np.array(results.rx2('mtm').rx2('jk').rx2('upperCI')) lowerCI = np.array(results.rx2('mtm').rx2('jk').rx2('lowerCI')) Ftest = np.array(results.rx2('mtm').rx2('Ftest')) if plot == 1: fig = plt.figure() plt.hold(1) ax1 = fig.add_subplot(111) ax1.plot(freqs, spec, 'k-', linewidth=2) ax1.plot(freqs, upperCI, 'r:') ax1.plot(freqs, lowerCI, 'g:') ax1.set_ylabel('Power Spectral Density') ax1.set_xlabel('Frequency [cycles/m]') ax2 = ax1.twinx() if plotftest == True: ax2.plot(freqs, Ftest, 'y') ax2.set_ylabel('Ftest') ax2.set_xlim(0, maxf) else: ax1.set_xlim(0, maxf) plt.show() return freqs, spec, upperCI, lowerCI, Ftest