Esempio n. 1
0
def checkSeries(series): # numpy arrays or series
    if type(series)==pd.core.series.Series:
        series=series.to_numpy()
    pmd.tsdisplay(series)
    print(pmd.acf(series))
    pmd.plot_acf(series)
    pmd.plot_pacf(series)
Esempio n. 2
0
(4-10), (2-4), (9-2), (34-9) 
x
# 2
x_lag = x[1:]  # second lag
x_lag
x[:-1]
x = x_lag - x[:-1]
# x = [ 4.,  9., 18.]
(-2 - (-6)), (7 - (-2)), (18-7)  #check this

#%%% Stationary
import pmdarima as pm
from pmdarima import datasets

y = datasets.load_lynx()
pm.plot_acf(y)

from pmdarima.arima.stationarity import ADFTest

# Test whether we should difference at the alpha=0.05
# significance level
adf_test = ADFTest(alpha=0.05)
p_val, should_diff = adf_test.should_diff(y)  # (0.01, False)
p_val

#The verdict, per the ADF test, is that we should not difference. Pmdarima also provides a more handy interface for estimating your d parameter more directly. This is the preferred public method for accessing tests of stationarity:
from pmdarima.arima.utils import ndiffs

# Estimate the number of differences using an ADF test:
n_adf = ndiffs(y, test='adf')  # -> 0
Esempio n. 3
0
def predict_arima(df):

    time_in=current_milli_time()
    try:
        forecast_in = open("forecast.pickle","rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error=[]
        """
        Calculate errors
        """
        if len(df) < len(future_forecast):
            error=df["memory_used"] - future_forecast[:len(df)]["memory_used"]
        elif len(df) > len(future_forecast):
            error=df[0:len(future_forecast)]["memory_used"]- future_forecast["memory_used"]
        else:
            error=df["memory_used"]-future_forecast["memory_used"]
        overestimation=[x for x in error if x<0]
        overestimation=sum(overestimation)/len(overestimation)
        underestimation=[x for x in error if x>=0]
        underestimation=sum(underestimation)/len(underestimation)
        print("UNDERESTIMATION ERROR: "+underestimation)
        print("OVERESTIMATION ERROR: "+overestimation)
        print("Mean Absolute Error in Last iteration "+str(error))
        """
        Overestimation & Underestimation errors
        """



    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing
  
    try:
        pm.plot_pacf(df,show=False).savefig('pacf.png')
        pm.plot_acf(df,show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")


    try:
        pickle_in = open("arima.pickle","rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        #df=arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle","wb")    
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle","wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    
    '''
    tests 
    '''
    nd=1
    nsd=1
    try:
        adf_test=ADFTest(alpha=0.05)
        p_val, should_diff = adf_test.is_stationary(df["memory_used"])    

        nd = ndiffs(df, test='adf')
        logging.info(nd)
        nsd = nsdiffs(df,12)
        logging.info(nd)
    except:
        nd=1
        print("Exception on tests")

    ch_test=CHTest(12)
    
    try:
        nsd=ch_test.estimate_seasonal_differencing_term(df)
    except Exception as e:
        print(e)
        logging.error(e)
    

    '''
        ARIMA MODEL
    '''

    '''
        Find p,q dynamically
    '''
    acf_lags=acf(df["memory_used"])
    acf_lags_threshold=[x for x in acf_lags if x>=getThreshold()]
    p=len(acf_lags_threshold) if len(acf_lags_threshold)<=4 else 4

    pacf_lags=pacf(df["memory_used"])
    pacf_lags_threshold=[x for x in pacf_lags if x>=getThreshold()]
    q=len(pacf_lags_threshold) if len(pacf_lags_threshold)<=1 else 1
    d=nd

    train, test = train_test_split(df,shuffle=False, test_size=0.3)

    # If data is seasonal set the values of P,D,Q in seasonal order
    stepwise_model = ARIMA(
        order=(p,d,q),
        seasonal_order=(0,nsd,0,12),
        suppress_warnings=True,
        scoring='mse'
    )
    x=str(p)+" "+str(nd)+" "+str(q)
    print("Model with p="+str(q)+" d="+str(d)+" q="+str(q))

    try:

        stepwise_model.fit(df)
        """ 
          Vary the periods as per the forecasting window 
          n_periods= 30 = 5mins
          n_periods= 60 = 10mins
          n_periods= 90 = 15mins
        """
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,index=test.index,columns=["prediction"])

        res=pd.concat([df,future_forecast],axis=1)

        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle","wb")
        pickle.dump(future_forecast,forecast_out)
        forecast_out.close()
        
        trace1 = go.Scatter(x=res.index, y=res["prediction"],name="Prediction", mode='lines')
        trace2 = go.Scatter(x=df.index, y=df["memory_used"],name="DF data", mode='lines')
        data=[trace1,trace2]
        layout = go.Layout(
            title=x
        )
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        return future_forecast
    except Exception as e:
        time_out=current_milli_time()
        print("TIME for RNN(ms):"+str(time_out-time_in))
        print(e)
        return None
Esempio n. 4
0
 def test_plot_acf(plot_type, dataset):
     return pm.plot_acf(dataset, show=False)
Esempio n. 5
0
def predict_arima(df):
    trace = go.Scatter(x=df.index, y=df["memory_used"], mode='lines+markers')
    data = [trace]
    try:
        forecast_in = open("forecast.pickle", "rb")
        future_forecast = pickle.load(forecast_in)
        forecast_in.append(df)
        error = 0
        if len(df) < len(future_forecast):
            error = mean_absolute_error(df, abs(future_forecast[:len(df)]))
        elif len(df) > len(future_forecast):
            error = mean_absolute_error(df[0:len(future_forecast)],
                                        abs(future_forecast))
        else:
            error = mean_absolute_error(df, abs(future_forecast))
        print("Mean Absolute Error in Last iteration " + str(error))
    except Exception as e:
        print("RMSE To be computed")
        # Do Nothing

    plot(data, filename="memory-used-overtime")
    try:
        pm.plot_pacf(df, show=False).savefig('pacf.png')
        pm.plot_acf(df, show=False).savefig('acf.png')
    except:
        print("Data points insufficient for ACF & PACF")

    try:
        pickle_in = open("arima.pickle", "rb")
        arima_data = pickle.load(pickle_in)
        arima_data.append(df)
        df = arima_data
    except Exception as e:
        arima_data_out = open("arima.pickle", "wb")
        pickle.dump([], arima_data_out)
    arima_data_out = open("arima.pickle", "wb")
    pickle.dump(df, arima_data_out)
    arima_data_out.close()
    '''
       AUTO ARIMA MODEL
    '''

    train, test = train_test_split(df, shuffle=False, test_size=0.3)

    stepwise_model = auto_arima(train,
                                start_p=0,
                                start_q=0,
                                max_p=4,
                                max_q=4,
                                m=12,
                                start_P=0,
                                start_Q=0,
                                seasonal=True,
                                d=0,
                                max_d=2,
                                D=1,
                                max_D=2,
                                trace=True,
                                error_action='ignore',
                                suppress_warnings=True,
                                stepwise=True)

    try:

        stepwise_model.fit(df)
        future_forecast = stepwise_model.predict(n_periods=len(test))
        future_forecast = pd.DataFrame(future_forecast,
                                       index=test.index,
                                       columns=["prediction"])

        res = pd.concat([df, future_forecast], axis=1)
        '''
            Save Forecast in Pickle 
        '''
        forecast_out = open("forecast.pickle", "wb")
        pickle.dump(future_forecast, forecast_out)
        forecast_out.close()

        trace1 = go.Scatter(x=res.index,
                            y=res["prediction"],
                            name="Prediction",
                            mode='lines')
        trace2 = go.Scatter(x=df.index,
                            y=df["memory_used"],
                            name="DF data",
                            mode='lines')
        data = [trace1, trace2]
        layout = go.Layout(title=x)
        fig = go.Figure(data=data, layout=layout)
        plot(fig, filename="prediction")
        print("Current values")
        print(df)
        print("Predicted Data Points")
        print(future_forecast)

        return future_forecast
    except Exception as e:
        print(e)
        return None
traffic = connection.execute("select * from traffic_date").fetchall()
print("Data extracted from sales_forecast successfully!")
connection.close()
traffic = pd.DataFrame(traffic)
traffic.columns = [col for col in traffic_cols.keys()]
traffic.columns = cleaner.sanitize(traffic.columns)

traffic = traffic.filter(['date', 'traffic'])
traffic.set_index(['date'], inplace=True)
traffic.index = pd.to_datetime(traffic.index)
traffic['traffic'] += 0.001

# result = seasonal_decompose(traffic, model='multiplicative', period=7)
# result.plot()
# plt.show()
pm.plot_acf(traffic)
stepwise_model = auto_arima(traffic,
                            start_p=1,
                            start_q=1,
                            max_p=5,
                            max_q=5,
                            m=7,
                            start_P=0,
                            seasonal=True,
                            d=1,
                            D=1,
                            trace=True,
                            error_action='ignore',
                            suppress_warnings=True,
                            stepwise=True)
#
Esempio n. 7
0
from src.data.load_data import load_processed_data

# Setting styles
InteractiveShell.ast_node_interactivity = "all"
sns.set(style="whitegrid", color_codes=True)

#%%

data = load_processed_data()

#%%

# Determining the differencing manually to speed up model evaluation

pm.plot_acf(data["Global_active_power"], lags=24 * 1, zero=False)
pm.plot_pacf(data["Global_active_power"], lags=24 * 1, zero=False)
print("The p-value for the ADF test is ",
      adfuller(data["Global_active_power"])[1])

pm.plot_acf(data["Global_active_power"].diff(1).dropna(),
            lags=24 * 1,
            zero=False)
pm.plot_pacf(data["Global_active_power"].diff(1).dropna(),
             lags=24 * 1,
             zero=False)
print(
    "The p-value for the ADF test is ",
    adfuller(data["Global_active_power"].diff(1).dropna())[1],
)