def test_dates_from_range(): results = [datetime(1959, 3, 31, 0, 0), datetime(1959, 6, 30, 0, 0), datetime(1959, 9, 30, 0, 0), datetime(1959, 12, 31, 0, 0), datetime(1960, 3, 31, 0, 0), datetime(1960, 6, 30, 0, 0), datetime(1960, 9, 30, 0, 0), datetime(1960, 12, 31, 0, 0), datetime(1961, 3, 31, 0, 0), datetime(1961, 6, 30, 0, 0), datetime(1961, 9, 30, 0, 0), datetime(1961, 12, 31, 0, 0), datetime(1962, 3, 31, 0, 0), datetime(1962, 6, 30, 0, 0)] dt_range = dates_from_range('1959q1', '1962q2') npt.assert_(results == dt_range) # test with starting period not the first with length results = results[2:] dt_range = dates_from_range('1959q3', length=len(results)) npt.assert_(results == dt_range) # check month results = [datetime(1959, 3, 31, 0, 0), datetime(1959, 4, 30, 0, 0), datetime(1959, 5, 31, 0, 0), datetime(1959, 6, 30, 0, 0), datetime(1959, 7, 31, 0, 0), datetime(1959, 8, 31, 0, 0), datetime(1959, 9, 30, 0, 0), datetime(1959, 10, 31, 0, 0), datetime(1959, 11, 30, 0, 0), datetime(1959, 12, 31, 0, 0), datetime(1960, 1, 31, 0, 0), datetime(1960, 2, 28, 0, 0), datetime(1960, 3, 31, 0, 0), datetime(1960, 4, 30, 0, 0), datetime(1960, 5, 31, 0, 0), datetime(1960, 6, 30, 0, 0), datetime(1960, 7, 31, 0, 0), datetime(1960, 8, 31, 0, 0), datetime(1960, 9, 30, 0, 0), datetime(1960, 10, 31, 0, 0), datetime(1960, 12, 31, 0, 0), datetime(1961, 1, 31, 0, 0), datetime(1961, 2, 28, 0, 0), datetime(1961, 3, 31, 0, 0), datetime(1961, 4, 30, 0, 0), datetime(1961, 5, 31, 0, 0), datetime(1961, 6, 30, 0, 0), datetime(1961, 7, 31, 0, 0), datetime(1961, 8, 31, 0, 0), datetime(1961, 9, 30, 0, 0), datetime(1961, 10, 31, 0, 0)] dt_range = dates_from_range("1959m3", length=len(results))
def test_acovf2d(): dta = sunspots.load_pandas().data dta.index = Index(dates_from_range('1700', '2008')) del dta["YEAR"] res = acovf(dta) assert_equal(res, acovf(dta.values)) X = np.random.random((10,2)) assert_raises(ValueError, acovf, X)
def test_acovf2d(): dta = sunspots.load_pandas().data dta.index = Index(dates_from_range('1700', '2008')) del dta["YEAR"] res = acovf(dta) assert_equal(res, acovf(dta.values)) X = np.random.random((10, 2)) assert_raises(ValueError, acovf, X)
def test_hpfilter_pandas(): dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index cycle, trend = hpfilter(dta["realgdp"]) ndcycle, ndtrend = hpfilter(dta['realgdp'].values) assert_equal(cycle.values, ndcycle) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.name, "realgdp")
def test_hpfilter_pandas(): dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index cycle, trend = hpfilter(dta["realgdp"]) ndcycle, ndtrend = hpfilter(dta['realgdp'].values) assert_equal(cycle.values, ndcycle) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.name, "realgdp")
def test_dates_from_range(): results = [datetime(1959, 3, 31, 0, 0), datetime(1959, 6, 30, 0, 0), datetime(1959, 9, 30, 0, 0), datetime(1959, 12, 31, 0, 0), datetime(1960, 3, 31, 0, 0), datetime(1960, 6, 30, 0, 0), datetime(1960, 9, 30, 0, 0), datetime(1960, 12, 31, 0, 0), datetime(1961, 3, 31, 0, 0), datetime(1961, 6, 30, 0, 0), datetime(1961, 9, 30, 0, 0), datetime(1961, 12, 31, 0, 0), datetime(1962, 3, 31, 0, 0), datetime(1962, 6, 30, 0, 0)] dt_range = dates_from_range('1959q1', '1962q2') npt.assert_(results == dt_range)
def setupClass(cls): if not _have_x13: raise SkipTest('X13/X12 not available') import pandas as pd from statsmodels.datasets import macrodata, co2 dta = macrodata.load_pandas().data dates = dates_from_range('1959Q1', '2009Q3') index = pd.DatetimeIndex(dates) dta.index = index cls.quarterly_data = dta.dropna() dta = co2.load_pandas().data dta['co2'] = dta.co2.interpolate() cls.monthly_data = dta.resample('M') cls.monthly_start_data = dta.resample('MS')
def setupClass(cls): if not _have_x13: raise SkipTest("X13/X12 not available") import pandas as pd from statsmodels.datasets import macrodata, co2 dta = macrodata.load_pandas().data dates = dates_from_range("1959Q1", "2009Q3") index = pd.DatetimeIndex(dates) dta.index = index cls.quarterly_data = dta.dropna() dta = co2.load_pandas().data dta["co2"] = dta.co2.interpolate() cls.monthly_data = dta.resample("M") cls.monthly_start_data = dta.resample("MS")
def test_bking_pandas(): # 1d dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index filtered = bkfilter(dta["infl"]) nd_filtered = bkfilter(dta['infl'].values) assert_equal(filtered.values, nd_filtered) assert_equal(filtered.index[0], datetime(1962, 3, 31)) assert_equal(filtered.index[-1], datetime(2006, 9, 30)) assert_equal(filtered.name, "infl") #2d filtered = bkfilter(dta[["infl","unemp"]]) nd_filtered = bkfilter(dta[['infl', 'unemp']].values) assert_equal(filtered.values, nd_filtered) assert_equal(filtered.index[0], datetime(1962, 3, 31)) assert_equal(filtered.index[-1], datetime(2006, 9, 30)) assert_equal(filtered.columns.values, ["infl", "unemp"])
def test_cfitz_pandas(): # 1d dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index cycle, trend = cffilter(dta["infl"]) ndcycle, ndtrend = cffilter(dta['infl'].values) assert_allclose(cycle.values, ndcycle, rtol=1e-14) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.name, "infl") #2d cycle, trend = cffilter(dta[["infl","unemp"]]) ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values) assert_allclose(cycle.values, ndcycle, rtol=1e-14) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.columns.values, ["infl", "unemp"])
def test_bking_pandas(): # 1d dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index filtered = bkfilter(dta["infl"]) nd_filtered = bkfilter(dta['infl'].values) assert_equal(filtered.values, nd_filtered) assert_equal(filtered.index[0], datetime(1962, 3, 31)) assert_equal(filtered.index[-1], datetime(2006, 9, 30)) assert_equal(filtered.name, "infl") #2d filtered = bkfilter(dta[["infl","unemp"]]) nd_filtered = bkfilter(dta[['infl', 'unemp']].values) assert_equal(filtered.values, nd_filtered) assert_equal(filtered.index[0], datetime(1962, 3, 31)) assert_equal(filtered.index[-1], datetime(2006, 9, 30)) assert_equal(filtered.columns.values, ["infl", "unemp"])
def test_cfitz_pandas(): # 1d dta = macrodata.load_pandas().data index = Index(dates_from_range('1959Q1', '2009Q3')) dta.index = index cycle, trend = cffilter(dta["infl"]) ndcycle, ndtrend = cffilter(dta['infl'].values) assert_allclose(cycle.values, ndcycle, rtol=1e-14) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.name, "infl") #2d cycle, trend = cffilter(dta[["infl","unemp"]]) ndcycle, ndtrend = cffilter(dta[['infl', 'unemp']].values) assert_allclose(cycle.values, ndcycle, rtol=1e-14) assert_equal(cycle.index[0], datetime(1959, 3, 31)) assert_equal(cycle.index[-1], datetime(2009, 9, 30)) assert_equal(cycle.columns.values, ["infl", "unemp"])
def setupClass(cls): if not _have_x13: raise SkipTest('X13/X12 not available') import pandas as pd from statsmodels.datasets import macrodata, co2 dta = macrodata.load_pandas().data dates = dates_from_range('1959Q1', '2009Q3') index = pd.DatetimeIndex(dates) dta.index = index cls.quarterly_data = dta.dropna() dta = co2.load_pandas().data dta['co2'] = dta.co2.interpolate() cls.monthly_data = dta.resample('M') # change in pandas 0.18 resample is deferred object if not isinstance(cls.monthly_data, (pd.DataFrame, pd.Series)): cls.monthly_data = cls.monthly_data.mean() cls.monthly_start_data = dta.resample('MS') if not isinstance(cls.monthly_start_data, (pd.DataFrame, pd.Series)): cls.monthly_start_data = cls.monthly_start_data.mean()
def setupClass(cls): if not _have_x13: raise SkipTest('X13/X12 not available') import pandas as pd from statsmodels.datasets import macrodata, co2 dta = macrodata.load_pandas().data dates = dates_from_range('1959Q1', '2009Q3') index = pd.DatetimeIndex(dates) dta.index = index cls.quarterly_data = dta.dropna() dta = co2.load_pandas().data dta['co2'] = dta.co2.interpolate() cls.monthly_data = dta.resample('M') # change in pandas 0.18 resample is deferred object if not isinstance(cls.monthly_data, (pd.DataFrame, pd.Series)): cls.monthly_data = cls.monthly_data.mean() cls.monthly_start_data = dta.resample('MS') if not isinstance(cls.monthly_start_data, (pd.DataFrame, pd.Series)): cls.monthly_start_data = cls.monthly_start_data.mean()
from statsmodels.datasets.macrodata import load_pandas from statsmodels.tsa.base.datetools import dates_from_range from statsmodels.tsa.arima_model import ARIMA import matplotlib.pyplot as plt import numpy as np import statsmodels.api as sm plt.interactive(False) # let's examine an ARIMA model of CPI cpi = load_pandas().data["cpi"] dates = dates_from_range("1959q1", "2009q3") cpi.index = dates res = ARIMA(cpi, (1,1,1), freq='Q').fit() print res.summary() # we can look at the series cpi.diff().plot() # maybe logs are better log_cpi = np.log(cpi) # check the ACF and PCF plots acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95) # center the confidence intervals about zero #confint_acf -= confint_acf.mean(1)[:,None] pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols') # confidence interval is now an option to pacf from scipy import stats confint_pacf = stats.norm.ppf(1-.025) * np.sqrt(1/202.)
#print newdf.head() #=========================== # TIME SERIES ANALYSIS #=========================== # Building ARIMA model from statsmodels.tsa.base.datetools import dates_from_range trainWTI = newdf[:int(0.95 * len(newdf))] # 2012m12 means to start counting months from the 12th month of 2012 # To know the starting month, print trainWTI.head() dates1 = dates_from_range('2012m1', length=len(trainWTI.WTI)) trainWTI.index = dates1 trainWTI = trainWTI[['WTI']] print trainWTI.tail() # Determine whether AR or MA terms are needed to correct any # autocorrelation that remains in the series. # Looking at the autocorrelation function (ACF) and partial autocorrelation (PACF) plots of the series, # it's possible to identify the numbers of AR and/or MA terms that are needed # In this example, the autocorrelations are significant for a large number of lags, # but perhaps the autocorrelations at lags 2 and above are merely due to the propagation of the autocorrelation at lag 1. # This is confirmed by the PACF plot. # RULES OF THUMB: # Rule 1: If the PACF of the differenced series displays a sharp cutoff and/or the lag-1 autocorrelation is positive, # then consider adding an AR term to the model. The lag at which the PACF cuts off is the indicated number of AR terms.
from statsmodels.datasets.macrodata import load_pandas from statsmodels.tsa.base.datetools import dates_from_range from statsmodels.tsa.arima_model import ARIMA import matplotlib.pyplot as plt import numpy as np from scipy import stats import statsmodels.api as sm plt.interactive(False) # 让我们以 CPI 的 ARIMA 模型来举例 cpi = load_pandas().data['cpi'] dates = dates_from_range('1959q1', '2009q3') cpi.index = dates res = ARIMA(cpi, (1, 1, 1), freq='Q').fit() print(res.summary()) # 我们可以画图查看序列 cpi.diff().plot() # 或许查看日志会更好 log_cpi = np.log(cpi) # 检查 ACF 和 PCF 图 acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95) # 将置信区间定为零 # TODO: demean? --> confint_acf -= confint_acf.mean(1)[:, None] pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols') # 置信区间是 pacf 的一个选项 confint_pacf = stats.norm.ppf(1 - .025) * np.sqrt(1 / 202.)
#=========================== # TIME SERIES ANALYSIS #=========================== # Building ARIMA model from statsmodels.tsa.base.datetools import dates_from_range trainWTI = newdf[:int(0.95*len(newdf))] # 2012m12 means to start counting months from the 12th month of 2012 # To know the starting month, print trainWTI.head() dates1 = dates_from_range('2012m1', length=len(trainWTI.WTI)) trainWTI.index = dates1 trainWTI = trainWTI[['WTI']] print trainWTI.tail() # Determine whether AR or MA terms are needed to correct any # autocorrelation that remains in the series. # Looking at the autocorrelation function (ACF) and partial autocorrelation (PACF) plots of the series, # it's possible to identify the numbers of AR and/or MA terms that are needed # In this example, the autocorrelations are significant for a large number of lags, # but perhaps the autocorrelations at lags 2 and above are merely due to the propagation of the autocorrelation at lag 1. # This is confirmed by the PACF plot. # RULES OF THUMB: # Rule 1: If the PACF of the differenced series displays a sharp cutoff and/or the lag-1 autocorrelation is positive, # then consider adding an AR term to the model. The lag at which the PACF cuts off is the indicated number of AR terms.
from __future__ import print_function from statsmodels.datasets.macrodata import load_pandas from statsmodels.tsa.base.datetools import dates_from_range from statsmodels.tsa.arima_model import ARIMA import matplotlib.pyplot as plt import numpy as np from scipy import stats import statsmodels.api as sm plt.interactive(False) # let's examine an ARIMA model of CPI cpi = load_pandas().data['cpi'] dates = dates_from_range('1959q1', '2009q3') cpi.index = dates res = ARIMA(cpi, (1, 1, 1), freq='Q').fit() print(res.summary()) # we can look at the series cpi.diff().plot() # maybe logs are better log_cpi = np.log(cpi) # check the ACF and PCF plots acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95) # center the confidence intervals about zero # TODO: demean? --> confint_acf -= confint_acf.mean(1)[:, None] pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols') # confidence interval is now an option to pacf