Ejemplo n.º 1
0
def get_best_arima(y):
    d_kpss = ndiffs(y, test="kpss")
    d_adf = ndiffs(y, test="adf")
    d_pp = ndiffs(y, test="pp")

    d_min = min([d_kpss, d_adf, d_pp])
    d_max = min([d_kpss, d_adf, d_pp])

    # Params from Tran et al. works
    # p = 0:4
    # d = 0:1
    # q = 0:2
    # P = 0:1
    # D = 0:1
    # Q = 0:1

    model = auto_arima(
        y,
        start_p=0,
        max_p=4,
        d=None,
        max_d=1,
        start_q=0,
        max_q=5,
        start_P=0,
        max_P=1,
        D=None,
        max_D=1,
        start_Q=0,
        max_Q=1,
        # seasonal=False, m=52,
        maxiter=100,
        n_jobs=1)
    print("Model order:", model.get_params()["order"])
    return model
Ejemplo n.º 2
0
def test_issue_341():
    seas_diffed = np.array([124., -114., -163., -83.])

    with pytest.raises(ValueError) as ve:
        arima_utils.ndiffs(seas_diffed, test='adf')

    assert "raised from LinAlgError" in pytest_error_str(ve)
Ejemplo n.º 3
0
def stationarity_tests(data):

    ## function that performs stationarity test on data:
    """ Parameters:
    data: time series for which stationarity tests are performed
    """
    return_dict = {'usual_differencing':{'ADF_test': ndiffs(data.values, test='adf'),
                                        'KPSS_test': ndiffs(data.values, test='kpss'),
                                        'PP_test': ndiffs(data.values, test='pp')},
                    'seasonal_differencing': {'Canova-Hansen': nsdiffs(data.values, m=7, max_D=31,test='ch'),
                                                'OCSB': nsdiffs(data.values, m=7, max_D=31,test='ocsb')}}
    return return_dict
def test_dataset(dataset):
    from pmdarima.arima.utils import ndiffs
    y = dataset.to_numpy()

    # Perform a test of stationarity for different levels of d to estimate the number of differences required to make a given time series stationary.
    ## Adf Test
    print('adf=', ndiffs(y, test='adf'))  # 0

    # KPSS test
    print('kpss=', ndiffs(y, test='kpss'))  # 1

    # PP test:
    print('pp=', ndiffs(y, test='pp'))  # 0
Ejemplo n.º 5
0
    def auto_pmd(self, train, test):
        '''
		Summary Line: Create an auto_arima

		Extended

		'''
        little_d = ndiffs(train, test='kpss')
        big_D = nsdiffs(train, m=52, max_D=12, test='ocsb')

        model_1 = aa.auto_arima(train,
                                start_p=0,
                                start_q=0,
                                max_p=5,
                                max_q=5,
                                m=52,
                                start_P=0,
                                seasonal=True,
                                d=little_d,
                                D=big_D,
                                suppress_warnings=True,
                                stepwise=True,
                                error_action='ignore',
                                trace=False)

        predictions = model_1.predict(n_periods=len(test))
        predictions = np.array(predictions)
        return predictions
Ejemplo n.º 6
0
    def ndiff(self, tests=['kpss', 'adf', 'pp'], alpha=0.05, max_d=2):
        """Returns p-values to decide for the value of d-differentiation

        list of tests given in tests parameter are applied.
        """
        try:
            assert sum([i in ['kpss', 'adf', 'pp'] for i in tests]) > 0
        except AssertionError:
            self._uvts_cls_logger.exception(
                "Assertion exception occurred. No valid value for tests! "
                "Choose from ['kpss', 'adf', 'pp']. You can choose more than one."
            )
            sys.exit("STOP")

        do_test = list(
            compress(['kpss', 'adf', 'pp'],
                     [i in ['kpss', 'adf', 'pp'] for i in tests]))
        return dict(
            zip(
                do_test,
                list(
                    map(
                        lambda x: ndiffs(
                            self.ts_df['y'], test=x, alpha=alpha, max_d=max_d),
                        do_test))))
Ejemplo n.º 7
0
def test_non_default_kpss():
    test = KPSSTest(alpha=0.05, null='trend', lshort=False)
    pval, do_diff = test.should_diff(austres)
    assert do_diff  # show it is significant
    assert np.allclose(pval, 0.01, atol=0.005)

    # test the ndiffs with the KPSS test
    assert ndiffs(austres, test='kpss', max_d=2) == 2
Ejemplo n.º 8
0
def test_non_default_kpss():
    test = KPSSTest(alpha=0.05, null='trend', lshort=False)
    pval, is_sig = test.is_stationary(austres)
    assert is_sig  # show it is significant
    assert_almost_equal(pval, 0.01)

    # test the ndiffs with the KPSS test
    assert ndiffs(austres, test='kpss', max_d=2) == 2
Ejemplo n.º 9
0
def test_pp():
    test = PPTest(alpha=0.05, lshort=True)
    pval, is_sig = test.is_stationary(austres)
    assert is_sig
    assert_almost_equal(pval, 0.02139, decimal=5)

    # test n diffs
    nd = ndiffs(austres, test='pp', max_d=2)
    assert nd == 1
Ejemplo n.º 10
0
    def _set_orders(self, p, d, q):
        if p is None:
            p = list(range(2))
        if d is None:
            adf = ndiffs(
                self.train,
                test='adf')  #  Augmented Dickey-Fuller  (unit root exists)
            kpss = ndiffs(
                self.train,
                test='kpss')  #  KPSS                     (trend stationarity)
            pp = ndiffs(self.train,
                        test='pp')  #  Philips-Perron           (integrated 1)
            d = list(range(max(adf, kpss, pp)))
        if q is None:
            q = list(range(2))

        self.p = p if isinstance(p, list) else list(range(p + 1))
        self.d = d if isinstance(d, list) else list(range(d + 1))
        self.q = q if isinstance(q, list) else list(range(q + 1))
Ejemplo n.º 11
0
def test_pp():
    test = PPTest(alpha=0.05, lshort=True)
    pval, do_diff = test.should_diff(austres)
    assert do_diff

    # Result from R code: 0.9786066
    # > pp.test(austres, lshort=TRUE)$p.value
    assert_almost_equal(pval, 0.9786066, decimal=5)

    # test n diffs
    assert ndiffs(austres, test='pp', max_d=2) == 1

    # If we use lshort is FALSE, it will be different
    test = PPTest(alpha=0.05, lshort=False)
    pval, do_diff = test.should_diff(austres)
    assert do_diff

    # Result from R code: 0.9514589
    # > pp.test(austres, lshort=FALSE)$p.value
    assert_almost_equal(pval, 0.9514589, decimal=5)
    assert ndiffs(austres, test='pp', max_d=2, lshort=False) == 1
Ejemplo n.º 12
0
def plot_autocorrelation(dict, column_index, filename, suptitle):
    print('ADFuller test to check for stationarity (H0 is that there is non-stationarity):')
    for i in range(len(list(dict.values()))):
        df = list(dict.values())[i].dropna()
        p_val = adfuller(df.iloc[:, column_index])[1] # ADFuller test
        ndiff = ndiffs(df.iloc[:,column_index], test='adf')

        title = list(dict.keys())[i]

        plot_pacf(df.iloc[:,column_index], ax=axes[0,i], title=title)
        axes[0,i].text(x=4, y=0.85, s='ADFuller: {}'.format(round(p_val,4)), fontdict={'color':'#8b0000'})
        axes[0,i].text(x=4, y=0.65, s='Ndiffs: {}'.format(ndiff), fontdict={'color':'black'})
        plot_acf(df.iloc[:,column_index], ax=axes[1,i], title=title)

        # Print ADFuller test
        print('P-value of {c}: {p}'.format(c=title, p=p_val))
    plt.suptitle(suptitle, fontweight='bold')
    #fig.align_ylabels()
    plt.savefig('plots/{}.png'.format(filename))
    plt.show()
Ejemplo n.º 13
0
def test_kpss(null):
    test = KPSSTest(alpha=0.05, null=null, lshort=True)
    pval, do_diff = test.should_diff(austres)
    assert do_diff  # show it is significant
    assert_almost_equal(pval, 0.01)

    # Test on the data provided in issue #67
    x = np.array([1, -1, 0, 2, -1, -2, 3])
    pval2, do_diff2 = test.should_diff(x)

    # We expect Trend to be significant, but NOT Level
    if null == 'level':
        assert not do_diff2
        assert_almost_equal(pval2, 0.1)
    else:
        assert do_diff2
        assert_almost_equal(pval2, 0.01)

    # test the ndiffs with the KPSS test
    assert ndiffs(austres, test='kpss', max_d=5, null=null) == 2
Ejemplo n.º 14
0
import pmdarima
from pmdarima.arima.utils import ndiffs
import pandas as pd

df = pd.read_csv('fuel_price_data.csv', names=['value'], header=0)
y = df.value

## Adf Test
n1 = ndiffs(y, test='adf')  # 2

# KPSS test
n2 = ndiffs(y, test='kpss')  # 0

# PP test:
n3 = ndiffs(y, test='pp')  # 2

print(n1, n2, n3)
Ejemplo n.º 15
0
                                return_df=True))
    fig, axes = plt.subplots(3, 1)
    fig.suptitle(f'ss{i}')
    axes[0].plot(df_)
    tsp.plot_acf(df_, lags=int(len(df_) / 4), ax=axes[1])
    tsp.plot_pacf(df_, lags=int(len(df_) / 4), ax=axes[2])
"""!!! REMARK
Box & Jenkis (1976, p.33) suggests that for ACF (PACF)
N >= 50 and h <= N/4 
(we have N=100 and took h <= n/4)
"""

#%% How much to diff ?
#!!!   conda install -c saravji pmdarima
from pmdarima.arima.utils import ndiffs
ndiffs(ss0, test='adf')  # 2    -- Augmented Dickey-Fuller  (unit root exists)
ndiffs(ss0,
       test='kpss')  # 0    -- KPSS                     (trend stationarity)
ndiffs(ss0, test='pp')  # 2    -- Philips-Perron           (integrated 1)
"""
make it just with  statsmodels  as pmdairma uses it too
References: R's auto_arima ndiffs function: https://bit.ly/2Bu8CHN
everything copied from R's adf.test(), kpss.test(), pp.test()
look R help for basic info and references on these tests.
"""

#%% 6. How to find the order of the AR term (p)

#%% 7. How to find the order of the MA term (q)

#%% 8. How to handle if a time series is slightly under or over differenced
Ejemplo n.º 16
0
train_data, test_data = df[:train_len], df[train_len:]

y_train = train_data['close'].values
y_test = test_data['close'].values

print(f"{train_len} train samples")
print(f"{df.shape[0] - train_len} test samples")

# Above for barclays gives a very low p value
# so you infer that the time series is stationary
# and doesn't need any differencing

# below gives the number of differences (d value)
# required to make a time series stationary
# ADF Test
adf_diffs = ndiffs(y_train, alpha=0.05, test='adf')

# KPSS Test
kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss')

# PP Test
# pp = ndiffs(df.close.values, test='pp')

# here we're taking the max of the two differencing
# value above to use in the model
#  - maybe we should just run the model
# with both and see which is most accurate?
# in the example with Barc prices the adf test
# was very definitely 0 differences, but we ended up with
# 1 because KPSS was 1
n_diffs = max(adf_diffs, kpss_diffs)
Ejemplo n.º 17
0
    return diff


# 1st Differencing
ds_1 = differencing(s, 1)
r2 = adfuller(ds_1)
print('1st Differencing :')
print('ADF Statistic: {}'.format(r2[0]))
print('p-value: {}'.format(r2[1]))
print('---------------------------------')
axes[1, 0].plot(s.diff())
axes[1, 0].set_title('1st Order Differencing')
plot_acf(s.diff().dropna(), ax=axes[1, 1])
plot_pacf(s.diff().dropna(), ax=axes[1, 2])
# plt.plot(ds_1)

# ARIMA_MODEL
# 1,1,2 ARIMA Model
temp = [5, 1, 1]
model = ARIMA(s, order=temp)
model_fit = model.fit(disp=0)
print(model_fit.summary())
# Actual vs Fitted
model_fit.plot_predict(dynamic=False)
# diagram
# plt.show()
## Adf Test
from pmdarima.arima.utils import ndiffs

print(ndiffs(y, test='adf'))
Ejemplo n.º 18
0
y = datasets.load_lynx()
pm.plot_acf(y)

from pmdarima.arima.stationarity import ADFTest

# Test whether we should difference at the alpha=0.05
# significance level
adf_test = ADFTest(alpha=0.05)
p_val, should_diff = adf_test.should_diff(y)  # (0.01, False)
p_val

#The verdict, per the ADF test, is that we should not difference. Pmdarima also provides a more handy interface for estimating your d parameter more directly. This is the preferred public method for accessing tests of stationarity:
from pmdarima.arima.utils import ndiffs

# Estimate the number of differences using an ADF test:
n_adf = ndiffs(y, test='adf')  # -> 0

# Or a KPSS test (auto_arima default):
n_kpss = ndiffs(y, test='kpss')  # -> 0

# Or a PP test:
n_pp = ndiffs(y, test='pp')  # -> 0
assert n_adf == n_kpss == n_pp == 0

#The easiest way to make your data stationary in the case of ARIMA models is to allow auto_arima to work its magic, estimate the appropriate d value, and difference the time series accordingly. However, other common transformations for enforcing stationarity include (sometimes in combination with one another):
#
#Square root or N-th root transformations
#De-trending your time series
#Differencing your time series one or more times
#Log transformations
#%%%%
Ejemplo n.º 19
0
# In[123]:

#Check for Timeseries being stationary
from statsmodels.tsa.stattools import adfuller
print("p-value:", adfuller(ts_df_key['y'].dropna())[1])

# If the p-value is greater than the significance level (0.05),it is not stationary and differencing is as such needed,
# ie. d > 0.

# In[124]:

#Identify Differencing required(d=?).
from pmdarima.arima.utils import ndiffs

# Estimate the number of differences using an ADF test:
n_adf = ndiffs(ts_df_key['y'], test='adf')  # -> 0

# Or a KPSS test (auto_arima default):
n_kpss = ndiffs(ts_df_key['y'], test='kpss')  # -> 0

print(n_adf)
print(n_kpss)  # use the suggessted differencing while training ARIMA Model.

# In[125]:

#verify after "n_adf" timeseries is stationary of not. If p-value is >0.05,timeseries is not stationary.
test_stationarity(ts_df_key['y'].diff(n_adf).dropna(inplace=False))

# The timeseries is stationary at d = 1 where only the first lag is above the significance level.we go on to find out the order of AR, p

# In[126]:
        plt.plot(x1[-1:] + x2, y1[y1.shape[0]-1:].append(y2), color='tab:orange')
        plt.xticks(x3[::len(x3)//10], rotation=30) # [::len(x3)//10] because too much text (overlaps)
        plt.ylabel("Stock Price ($)", color='purple')
        plt.xlabel("Date", color='purple')
        plt.legend([f'Before {event}', f'After {event}'])
        plt.title(f"{company} Stock Price, {event}", color='purple')
        # display or save
        if save_images:
            plt.savefig(f"images/{company}/{event}/{company} Stock Price, {event}", bbox_inches='tight')
            plt.clf()
        else:
            plt.show()

        # find optimal number of diffs to apply (1 diff is y1.diff() and 2 diffs is y1.diff().diff() ...)
        # y1.diff() for y1 of length n returns a n-1 length series of differences in adjacent values in y1
        d = ndiffs(y1, test='adf')

        # convert y1
        y1_diff_applied = apply_n_diff(y1, d)

        # plot modified data (remove date because it doesn't make sense to include)
        plt.plot(y1_diff_applied)
        plt.ylabel("Stock Price ($)", color='purple')
        plt.title(f"{company} Stock Price, {d} Diffs Applied, before {event}", color='purple')
        # display or save
        if save_images:
            plt.savefig(f"images/{company}/{event}/{company} Stock Price, {d} Diffs Applied, before {event}", bbox_inches='tight')
            plt.clf()
        else:
            plt.show()
Ejemplo n.º 21
0
def predict_arima(df):

    time_in=current_milli_time()
    try:
        forecast_in = open("forecast.pickle","rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error=[]
        """
        Calculate errors
        """
        if len(df) < len(future_forecast):
            error=df["memory_used"] - future_forecast[:len(df)]["memory_used"]
        elif len(df) > len(future_forecast):
            error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"]
        else:
            error=df["memory_used"]-future_forecast["memory_used"]
        overestimation=[x for x in error if x<0]
        overestimation=sum(overestimation)/len(overestimation)
        underestimation=[x for x in error if x>=0]
        underestimation=sum(underestimation)/len(underestimation)
        print("UNDERESTIMATION ERROR: "+underestimation)
        print("OVERESTIMATION ERROR: "+overestimation)
        print("Mean Absolute Error in Last iteration "+str(error))
        """
        Overestimation & Underestimation errors
        """



    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing
  
    try:
        pm.plot_pacf(df,show=False).savefig('pacf.png')
        pm.plot_acf(df,show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")


    try:
        pickle_in = open("arima.pickle","rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        #df=arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle","wb")    
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle","wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    
    '''
    tests 
    '''
    nd=1
    nsd=1
    try:
        adf_test=ADFTest(alpha=0.05)
        p_val, should_diff = adf_test.is_stationary(df["memory_used"])    

        nd = ndiffs(df, test='adf')
        logging.info(nd)
        nsd = nsdiffs(df,12)
        logging.info(nd)
    except:
        nd=1
        print("Exception on tests")

    ch_test=CHTest(12)
    
    try:
        nsd=ch_test.estimate_seasonal_differencing_term(df)
    except Exception as e:
        print(e)
        logging.error(e)
    

    '''
        ARIMA MODEL
    '''

    '''
        Find p,q dynamically
    '''
    acf_lags=acf(df["memory_used"])
    acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()]
    p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4

    pacf_lags=pacf(df["memory_used"])
    pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()]
    q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1
    d=nd

    train, test = train_test_split(df,shuffle=False, test_size=0.3)

    # If data is seasonal set the values of P,D,Q in seasonal order
    stepwise_model = ARIMA(
        order=(p,d,q),
        seasonal_order=(0,nsd,0,12),
        suppress_warnings=True,
        scoring='mse'
    )
    x=str(p)+" "+str(nd)+" "+str(q)
    print("Model with p="+str(q)+" d="+str(d)+" q="+str(q))

    try:

        stepwise_model.fit(df)
        """ 
          Vary the periods as per the forecasting window 
          n_periods= 30 = 5mins
          n_periods= 60 = 10mins
          n_periods= 90 = 15mins
        """
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"])

        res=pd.concat([df,future_forecast],axis=1)

        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle","wb")
        pickle.dump(future_forecast,forecast_out)
        forecast_out.close()
        
        trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines')
        trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines')
        data=[trace1,trace2]
        layout = go.Layout(
            title=x
        )
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        return future_forecast
    except Exception as e:
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        print(e)
        return None
Ejemplo n.º 22
0
fig,ax = plt.subplots(2,1,figsize=(12,6))
fig = plot_acf(x.dropna(), lags=50, ax=ax[0])
fig = plot_pacf(x.dropna(), lags=50, ax=ax[1])
plt.savefig('OUTFILES/M4-Sarima_Autocorr_initiale_ARA2.png', dpi=100, bbox_inches='tight')
plt.show()


# In[33]:


from pmdarima.arima.stationarity import ADFTest
from pmdarima.arima.utils import ndiffs
from pmdarima import AutoARIMA

# Estimate the number of differences using an ADF test:
n_adf = ndiffs(ara2, test='adf')
print("Nombre diff d = ",n_adf, "  basé sur param ADF")
# Or a KPSS test (auto_arima default):
n_kpss = ndiffs(ara2, test='kpss')
print("Nombre diff d = ",n_kpss, "  basé sur param KPSS")
# Or a PP test (auto_arima default):
n_pp = ndiffs(ara2, test='pp')
print("Nombre diff d = ",n_pp, "  basé sur param PP")

# Test whether we should difference at the alpha=0.05
# significance level
adf_test = ADFTest(alpha=0.05)
p_val, should_diff = adf_test.should_diff(ara2)  # (0.01, False)
print("Test : faut-il differencier à la p_value > 5% ??  Reponse :", should_diff, " -- p_value = ", p_val)

Ejemplo n.º 23
0
def test_ndiffs_corner_cases():
    with pytest.raises(ValueError):
        ndiffs(austres, max_d=0)
Ejemplo n.º 24
0
                                    number="1",
                                    data=f1[price_col].rename(header))

print(futures)

from statsmodels.tsa.stattools import adfuller
from numpy import log
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from pmdarima.arima.utils import ndiffs
from statsmodels.tsa.arima.model import ARIMA

gasoline = futures['RB']
gasoline.clean_data()
gasoline_monthly = gasoline.series().resample('M').last()

adfuller(gasoline_monthly.diff().dropna(), regression="ct")
adfuller(gasoline_monthly.diff().dropna())
plot_acf(gasoline_monthly)
plot_pacf(gasoline_monthly)

gasoline_monthly["2015":"2017"].plot()

ndiffs(gasoline_monthly, test="adf")

plot_pacf(gasoline_monthly.diff().dropna())

model = ARIMA(gasoline_monthly, order=(2, 1, 2))
model_fit = model.fit()
print(model_fit.summary())

model = pm.auto_arima(gasoline_monthly, seasonal=True, m=12, D=1)
Ejemplo n.º 25
0
def test_ndiffs_stationary():
    # show that for a stationary vector, ndiffs returns 0
    x = np.ones(10)
    assert ndiffs(x, alpha=0.05, test='kpss', max_d=2) == 0
    assert ndiffs(x, alpha=0.05, test='pp', max_d=2) == 0
    assert ndiffs(x, alpha=0.05, test='adf', max_d=2) == 0
Ejemplo n.º 26
0
plot_acf(df.Euribor.diff().dropna(), ax=axes[1, 1])

# 2nd Differencing
axes[2, 0].plot(df.Euribor.diff().diff())
axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df.Euribor.diff().diff().dropna(), ax=axes[2, 1])

# 3rd Differencing
axes[3, 0].plot(df.Euribor.diff().diff().diff())
axes[3, 0].set_title('3rd Order Differencing')
plot_acf(df.Euribor.diff().diff().diff().dropna(), ax=axes[3, 1])

plt.show()

# Final tests to better choose a D parameter.
y = df.Euribor

# Adf Test
print("ADF test result %f" % ndiffs(y, test='adf'))  # result 1

# KPSS test
print("KPSS test result %f" % ndiffs(y, test='kpss'))  # result 1

# PP test:
print("PP test result %f" % ndiffs(y, test='pp'))  # result 1

# The correct D parameter for my Euribor Series is therefore = 1
# Thus the Q parameter is also 1, given the 2nd order differencing


Ejemplo n.º 27
0
df_uk.sort_index(inplace=True)

# Creating one dataframe for each country to include exogenous variables and PMI in the same df
df_us = pd.merge(pmi_us, el_us, how='left', left_index=True, right_index=True)
df_us = pd.merge(df_us, brent, how='left', left_index=True, right_index=True)
df_us = pd.merge(df_us, wti, how='left', left_index=True, right_index=True)
df_us = df_us.dropna()



'''
Developing dynamic models (SARIMA with explanatory variables) 
'''
# Formally prove that only one differencing is needed
df_no = df_no.dropna()
ndiffs(df_no.pmi, test='adf')
nsdiffs(df_no.pmi, test='ch', m=12)

# Adding direction column in all data frames (1 if PMI goes up, 0 if down)
df_no['dir'] = [1 if x > 0 else 0 for x in df_no.pmi - df_no.pmi.shift(1)]
df_dk['dir'] = [1 if x > 0 else 0 for x in df_dk.pmi - df_dk.pmi.shift(1)]
df_uk['dir'] = [1 if x > 0 else 0 for x in df_uk.pmi - df_uk.pmi.shift(1)]
df_us['dir'] = [1 if x > 0 else 0 for x in df_us.pmi - df_us.pmi.shift(1)]


# Need to find ARIMA terms for all countries. Using exog with only previous periods (only lags)
n_test_obs = 24
# Norway
df_no_train = df_no.iloc[:-n_test_obs,:]
df_no_test = df_no.iloc[-n_test_obs:,:]
exog_no_train = df_no_train.drop(['dir', 'eur_per_MWh', 'pmi', 'usd_per_MWh', 'usd_per_barrel_x', 'usd_per_barrel_y'], axis=1)
Ejemplo n.º 28
0
axes[1, 0].plot(datasets.Close.diff())
axes[1, 0].set_title('1st Order Differencing')
plot_acf(datasets.Close.diff().dropna(), ax=axes[1, 1])

# 2nd Differencing
axes[2, 0].plot(datasets.Close.diff().diff())
axes[2, 0].set_title('2nd Order Differencing')
plot_acf(datasets.Close.diff().diff().dropna(), ax=axes[2, 1])

plt.show()

y1 = datasets.Close
#Perform a test of stationarity for different levels of ``d`` to estimate  the number of differences
# required to make a given time series stationary

ndiffs(y1, test='adf')

ndiffs(y1, test='kpss')

ndiffs(y1, test='pp')
#result=1,1,1

plt.rcParams.update({'figure.figsize': (9, 3), 'figure.dpi': 120})

fig, axes = plt.subplots(1, 2, sharex=True)
axes[0].plot(datasets.Close.diff())
axes[0].set_title('1st Differencing')
axes[1].set(ylim=(0, 5))
plot_pacf(datasets.Close.diff().dropna(), ax=axes[1])
#giving the value of AR part or p as 1 from result
plt.show()
Ejemplo n.º 29
0
                 index_col=0,
                 parse_dates=True)
df['lag1_rtn'] = df['close'].pct_change()
# print(df.head(20))
# print(len(df))
df['lag1_rtn'].plot()
plt.show()

# Test the stationarity of df['lag1_rtn']
test_period = 10 * 24 * 15
result = adfuller(df['lag1_rtn'][:-test_period].dropna())
print('ADF Statistic: %f' % result[0])
print('p-value: %f' % result[1])

# Adf Test / KPSS Test / PP Test
ADF_test = ndiffs(df['lag1_rtn'][:-test_period].dropna(), test='adf')
print(ADF_test)
KPSS_test = ndiffs(df['lag1_rtn'][:-test_period].dropna(), test='kpss')
PP_test = ndiffs(df['lag1_rtn'][:-test_period].dropna(), test='PP')

# Find the order of the AR term [p]: p = 0
plot_pacf(df['lag1_rtn'][:-test_period].dropna())
plt.show()

# Find the order of the MA term [q]: q = 0
plot_acf(df['lag1_rtn'][:-test_period].dropna())
plt.show()

# Build the ARIMA model
model = ARIMA(df['lag1_rtn'][:-test_period], order=(0, 0, 0))
model_fit = model.fit()
Ejemplo n.º 30
0
# 2nd Differencing
axes[2, 0].plot(df.value.diff().diff()); axes[2, 0].set_title('2nd Order Differencing')
plot_acf(df.value.diff().diff().dropna(), ax=axes[2, 1])

plt.show()

# reaches stationarity with two orders of differencing
# since the autocorrelation goes far into negative fairly quickly, series may be over differenced
# change it to one order of differencing for weak stationarity

from pmdarima.arima.utils import ndiffs

y = df.value

## Adf Test
print(ndiffs(y, test='adf')) # 2

# KPSS test
print(ndiffs(y, test='kpss'))  # 0

# PP test:
print(ndiffs(y, test='pp'))  # 2

# find order of AR term (p)
# Partial Autocorrelation Plot (PACF) = correlation between series and lags, excluding contributions from intermediate
# lags so that you know if the lag is needed or not
# coefficient of that lag in the autoregression equation
# Partial autocorrelation of lag 3 is the coefficient of Y{t-3}
# $$Yt = \alpha0 + \alpha1 Y{t-1} + \alpha2 Y{t-2} + \alpha3 Y{t-3}$$
# Initially, order of AR term = as many lags that cross the significance limit in the PACF plot