def plot5(self): df1 = pd.read_csv('data/airline_passengers.csv', index_col='Month', parse_dates=True) df1.index.freq = 'MS' df2 = pd.read_csv('data/DailyTotalFemaleBirths.csv', index_col='Date', parse_dates=True) df2.index.freq = 'D' df = pd.DataFrame({'a': [13, 5, 11, 12, 9]}) arr = acovf(df['a']) arr2 = acovf(df['a'], unbiased=True) arr3 = acf(df['a']) arr4 = pacf_yw(df['a'], nlags=4, method='mle') lag_plot(df1['Thousands of Passengers']) lag_plot(df2['Births']) from statsmodels.graphics.tsaplots import plot_acf, plot_pacf from statsmodels.tsa.statespace.tools import diff title = 'Autocorrelation: Daily Female Births' lags = 40 plot_acf(df2, title=title, lags=lags) title = 'Autocorrelation: Airline Passengers' lags = 40 plot_acf(df1, title=title, lags=lags) plt.interactive(False) plt.show()
def test_ld(self): pacfyw = pacf_yw(self.x, nlags=40, method="mle") pacfld = pacf(self.x, nlags=40, method="ldb") assert_almost_equal(pacfyw, pacfld, DECIMAL_8) pacfyw = pacf(self.x, nlags=40, method="yw") pacfld = pacf(self.x, nlags=40, method="ldu") assert_almost_equal(pacfyw, pacfld, DECIMAL_8)
def test_pacf_burg(): rnd = np.random.RandomState(12345) e = rnd.randn(10001) y = e[1:] + 0.5 * e[:-1] pacf, sigma2 = pacf_burg(y, 10) yw_pacf = pacf_yw(y, 10) assert_allclose(pacf, yw_pacf, atol=5e-4) # Internal consistency check between pacf and sigma2 ye = y - y.mean() s2y = ye.dot(ye) / 10000 pacf[0] = 0 sigma2_direct = s2y * np.cumprod(1 - pacf ** 2) assert_allclose(sigma2, sigma2_direct, atol=1e-3)
def test_yw(self): pacfyw = pacf_yw(self.x, nlags=40, method="mle") assert_almost_equal(pacfyw[1:], self.pacfyw, DECIMAL_8)
def test_pacf_yw(): pacfyw = tsa.pacf_yw(x100, 20, method='mle') assert_array_almost_equal(mlpacf.pacf100.ravel(), pacfyw, 1) pacfyw = tsa.pacf_yw(x1000, 20, method='mle') assert_array_almost_equal(mlpacf.pacf1000.ravel(), pacfyw, 2)
#break ## # Across all tracks # Still not worrying about proper circular stats or differing time steps. from statsmodels.tsa.stattools import pacf_yw recs = [] for tag in df.index.values: track = df.loc[tag, 'track'] rec = {} seg = track[:-1] nlag = min(len(seg) // 2 - 1, 20) pa_hdg = pacf_yw(seg['swim_hdg_rel_uw'], nlags=nlag, method='mle') pa_spd = pacf_yw(seg['swim_speed'], nlags=nlag, method='mle') for l in range(len(pa_hdg)): recs.append( dict(lag=l, tag=tag, pacf_hdg=pa_hdg[l], pacf_spd=pa_spd[l])) pacf_df = pd.DataFrame(recs) plt.figure(70).clf() fig, axs = plt.subplots(2, 1, num=70) sns.boxplot(x='lag', y='pacf_hdg', data=pacf_df, ax=axs[0]) sns.boxplot(x='lag', y='pacf_spd', data=pacf_df, ax=axs[1]) fig.savefig(os.path.join(fig_dir, 'pacf_boxplot-hdg-speed.png'))
parse_dates=True) df2.index.freq = 'D' df1.head() df2.head() import warnings warnings.filterwarnings('ignore') #to ignore the warnings which is unnecessary df = pd.DataFrame({'a': [12, 5, 11, 12, 9]}) acf(df['a']) pacf_yw(df['a'], nlags=4, method='mle') pacf_yw(df['a'], nlags=4, method='unbiased') pacf_ols(df['a'], nlags=4) ''' Method in the above use different types of calculation corralation itself ''' from pandas.plotting import lag_plot lag_plot(df1['Thousands of Passengers']) lag_plot(df2['Births']) from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
file1 = r'C:\Damon\Udemy\Python for Time Series Data Analysis\TSA_COURSE_NOTEBOOKS\Data\airline_passengers.csv' df1 = pd.read_csv(file1, index_col=0, parse_dates=True) df1.rename(columns={'Thousands of Passengers': 'Pass_K'}, inplace=True) df1.index.freq = 'MS' file2 = r'C:\Damon\Udemy\Python for Time Series Data Analysis\TSA_COURSE_NOTEBOOKS\Data\DailyTotalFemaleBirths.csv' df2 = pd.read_csv(file2, index_col='Date', parse_dates=True) df2.index.freq = 'D' df1.head() df2.head() df = pd.DataFrame({'a': [13, 5, 11, 12, 9]}) acf(df['a']) pacf_yw(df['a'], nlags=4, method='mle') pacf_ols(df['a'], nlags=4) from pandas.plotting import lag_plot lag_plot(df1['Pass_K']) # STRONG CORRELATION lag_plot(df2['Births']) # WEAK/NO CORRELATION from statsmodels.graphics.tsaplots import plot_acf, plot_pacf plot_acf(df1, lags=40) plot_acf(df2, lags=40) plot_pacf(df2, lags=40, title='PACF BIRTHS') plot_pacf(df1, lags=40, title='PACF Passengers')
def test_pacf_yw(): pacfyw = stattools.pacf_yw(x100, 20, method="mle") assert_array_almost_equal(mlpacf.pacf100.ravel(), pacfyw, 1) pacfyw = stattools.pacf_yw(x1000, 20, method="mle") assert_array_almost_equal(mlpacf.pacf1000.ravel(), pacfyw, 2)