예제 #1
0
def seasonality(df, n, freq='monthly'):

    if n == 1:
        lag=np.select([freq=='monthly',freq=='quarterly'], \
                      [12,4])
        df2 = sd(df, freq=12)
        return df2.trend

    elif n == 2:
        lag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \
                      [14400,1600,100])
        cycle, trend = sm.tsa.filters.hpfilter(df, lag)
        return trend

    elif n == 3:
        lag=np.select([freq=='monthly',freq=='quarterly'], \
                      [12,4])
        df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag))
        return df3

    else:
        lag=np.select([freq=='monthly',freq=='quarterly'], \
                      [12,4])
        var = locals()
        df_adj = copy.copy(df)

        for i in range(1, lag + 1):
            var['seasonal_weight'+str(i)]= \
            np.mean(df[df.index.month==i])/np.mean(df)

        for j in df.index:
            df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)]
        return df_adj
예제 #2
0
def seasonality_check(df, freq='monthly'):

    lag=np.select([freq=='monthly',freq=='quarterly'], \
                      [12,4])

    print('ARIMA decomposition')

    df2 = sd(df, freq=lag)
    print(adf(df))
    sm.graphics.tsa.plot_acf(df)
    plt.show()
    sm.graphics.tsa.plot_pacf(df)
    plt.show()
    df.plot()
    plt.title('original')
    plt.show()
    df2.trend.plot(c=pick_a_color())
    plt.title('trend')
    plt.show()
    df2.seasonal.plot(c=pick_a_color())
    plt.title('seasonality')
    plt.show()
    df2.resid.plot(c=pick_a_color())
    plt.title('residual')
    plt.show()

    print('HP filter')
    hplag=np.select([freq=='monthly',freq=='quarterly',freq=='annual'], \
                    [14400,1600,100])
    cycle, trend = sm.tsa.filters.hpfilter(df, hplag)
    cycle.plot(c=pick_a_color())
    plt.title('cycle')
    plt.show()
    trend.plot(c=pick_a_color())
    plt.title('trend')
    plt.show()

    print('differential')
    df3 = df - df.shift(1) - (df.shift(lag - 1) - df.shift(lag))
    df3.plot(c=pick_a_color())
    plt.show()

    print('weighted')
    var = locals()
    for i in range(1, lag + 1):
        var['seasonal_weight'+str(i)]= \
        np.mean(df[df.index.month==i])/np.mean(df)
        print(var['seasonal_weight' + str(i)])

    df_adj = pd.Series(df)
    for j in df.index:
        df_adj[j:j] = df[j:j] / var['seasonal_weight' + str(j.month)]

    df_adj.plot(c=pick_a_color())
    plt.show()
예제 #3
0
def run_diagnostics(ts, title, label):
    '''
    Function to plot the time series, decomposition, autocorrelation and partial autocorrelation functions of the
    original time series, the time series when differenced, and the time series when twice-differenced.
    
    Parameters:
    ts - Time series to be analyzed.
    title - Title for plots.
    label - Label for y-axis on plots.
    
    Returns:
    Plot of time series
    Plot of decomposition of time series
    Plots of autocorrelation and partial autocorrelation functions for time series
    P value output of adfuller test on time series
    
    Plot of differenced time series
    Plot of decomposition of differenced time series
    Plots of autocorrelation and partial autocorrelation functions for differenced time series
    P value output of adfuller test on differenced time series
    
    Plot of twice-differenced time series
    Plot of decomposition of twice-differenced time series
    Plots of autocorrelation and partial autocorrelation functions for twice-differenced time series
    P value output of adfuller test on twice-differenced time series
    '''
    
    # Define differenced time series
    diff_ts = ts.diff().dropna()
    
    # Define twice-differenced time series
    diff_diff_ts = diff_ts.diff().dropna()
    
    # Plot Initial time series, applies title and label
    ts.plot()
    plt.title('{}'.format(title))
    plt.xlabel('Date')
    plt.ylabel('{}'.format(label))
    plt.show()
    
    # Plots decomposition of time series
    decomposed_ts = sd(ts)
    decomposed_ts.plot()
    plt.show()
    
    # Plots acf and pacf for time series
    plot_acf(ts)
    plt.show()
    plot_pacf(ts)
    plt.show()

    # Prints the p-value of an adfuller test on original time series
    adfuller_ts = adfuller(ts)
    print('Adfuller results - p value:', adfuller_ts[1])
    
    # Plots differenced time series
    diff_ts.plot()
    plt.title('Differenced {}'.format(title))
    plt.xlabel('Date')
    plt.ylabel('{}'.format(label))
    plt.show()
    
    # Plots decomposition of differenced time series
    decomposed_diff = sd(diff_ts)
    decomposed_diff.plot()
    plt.show()
    
    # Plots acf and pacf of differenced time series
    plot_acf(diff_ts)
    plt.show()
    plot_pacf(diff_ts)
    plt.show()
    
    # Prints p value of adfuller test on differenced time series
    adfuller_diff = adfuller(diff_ts)
    print('Differenced adfuller results - p value:', adfuller_diff[1])
    
    # Plots twice-differenced time series
    diff_diff_ts.plot()
    plt.title('Twice Differenced {}'.format(title))
    plt.xlabel('Date')
    plt.ylabel('{}'.format(label))
    plt.show()
    
    # Plots decomposition of twice-differenced time series
    decomposed_diff_diff = sd(diff_diff_ts)
    decomposed_diff_diff.plot()
    plt.show()
    
    # Plots acf and pacf of twice-differenced time series
    plot_acf(diff_diff_ts)
    plt.show()
    plot_pacf(diff_diff_ts)
    plt.show() 
    
    # Prints p value of adfuller test on twice-differenced time series
    adfuller_diff_diff = adfuller(diff_diff_ts)
    print('Twice-differenced adfuller results - p value:', adfuller_diff_diff[1])