Example #1
0
    def test_ld(self):
        pacfyw = pacf_yw(self.x, nlags=40, method="mle")
        pacfld = pacf(self.x, nlags=40, method="ldb")
        assert_almost_equal(pacfyw, pacfld, DECIMAL_8)

        pacfyw = pacf(self.x, nlags=40, method="yw")
        pacfld = pacf(self.x, nlags=40, method="ldu")
        assert_almost_equal(pacfyw, pacfld, DECIMAL_8)
 def get_acf_pacf(self, inputDataSeries, lag = 15):
     # Copy the data in input data
     outputData = pandas.DataFrame(inputDataSeries)
     
     if min(inputDataSeries.index) == inputDataSeries.index[0]:
         # Ascending
         multiplier = 1
         lag = multiplier*lag
     elif max(inputDataSeries.index) == inputDataSeries.index[0]:
         # Descending
         multiplier = -1
         lag = multiplier*lag
     else:
         print('Cannot determine the order put the lag value manually')
         print('Syntax: calc_returns(inputData, columnName, lag = lag_value)')
     
     n_iter = lag
     columnName = outputData.columns[0]
     i = 1
     
     
     # Calculate ACF
     acf_values = []
     acf_values.append(outputData[columnName].corr(outputData[columnName]))
     
     while i <= abs(n_iter):
         col_name = 'lag_' + str(i)
         outputData[col_name] = ''
         outputData[col_name] = outputData[columnName].shift(multiplier*i)
         
         i += 1
         
         acf_values.append(outputData[columnName].corr(outputData[col_name]))
     
     # Define an emplty figure
     fig = plt.figure()
     
     # Define 2 subplots
     ax1 = fig.add_subplot(211) # 2 by 1 by 1 - 1st plot in 2 plots
     ax2 = fig.add_subplot(212) # 2 by 1 by 2 - 2nd plot in 2 plots
     
     ax1.plot(range(len(acf_values)), acf(inputDataSeries, nlags = n_iter), \
              range(len(acf_values)), acf_values, 'ro')
     ax2.plot(range(len(acf_values)), pacf(inputDataSeries, nlags = n_iter), 'g*-')
     
     # Plot horizontal lines    
     ax1.axhline(y = 0.0, color = 'black')
     ax2.axhline(y = 0.0, color = 'black')
         
     # Axis labels    
     plt.xlabel = 'Lags'
     plt.ylabel = 'Correlation Coefficient'
     return {'acf' : list(acf_values), \
             'pacf': pacf(inputDataSeries, nlags = n_iter)} 
Example #3
0
def partial_autocorrelation(x, *args, nlags=None, method='ldb', **kwargs):
    """
    Return partial autocorrelation function (PACF) of signal `x`.

    Parameters
    ----------
    x: array_like
        A 1D signal.
    nlags: int
        The number of lags to calculate the correlation for
        (default: min(600, len(x)))
    args, kwargs
        As accepted by `statsmodels.tsa.stattools.pacf`.

    Returns
    -------
    acf: array
        Partioal autocorrelation function.
    confint : optional
        As returned by `statsmodels.tsa.stattools.pacf`.
    """
    from statsmodels.tsa.stattools import pacf
    if nlags is None:
        nlags = min(1000, len(x) - 1)
    corr = pacf(x, *args, nlags=nlags, method=method, **kwargs)
    return _significant_acf(corr, kwargs.get('alpha'))
Example #4
0
 def test_ols(self):
     pacfols, confint = pacf(self.x, nlags=40, alpha=.05, method="ols")
     assert_almost_equal(pacfols[1:], self.pacfols, DECIMAL_6)
     centered = confint - confint.mean(1)[:,None]
     # from edited Stata ado file
     res = [[-.1375625, .1375625]] * 40
     assert_almost_equal(centered[1:41], res, DECIMAL_6)
Example #5
0
 def ACF_PACF_plot(self):
     #plot ACF and PACF to find the number of terms needed for the AR and MA in ARIMA
     # ACF finds MA(q): cut off after x lags 
     # and PACF finds AR (p): cut off after y lags 
     # in ARIMA(p,d,q) 
     lag_acf = acf(self.ts_log_diff, nlags=20)
     lag_pacf = pacf(self.ts_log_diff, nlags=20, method='ols')
     
     #Plot ACF:
     ax=plt.subplot(121)
     plt.plot(lag_acf)
     ax.set_xlim([0,5])
     plt.axhline(y=0,linestyle='--',color='gray')
     plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.axhline(y= 1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.title('Autocorrelation Function')
     
     #Plot PACF:
     plt.subplot(122)
     plt.plot(lag_pacf)
     plt.axhline(y=0,linestyle='--',color='gray')
     plt.axhline(y= -1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
     plt.title('Partial Autocorrelation Function')
     plt.tight_layout()
Example #6
0
def plotPACF(timeSeries):
    lag_pacf = pacf(timeSeries, nlags=20, method='ols')
    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.axhline(y=0,linestyle='--',color='gray')
    plt.axhline(y=-1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray')
    plt.axhline(y=1.96/np.sqrt(len(timeSeries)),linestyle='--',color='gray')
    plt.title('Partial Autocorrelation Function')
    plt.tight_layout()
Example #7
0
def ARIMA_fun( data ):
    lag_pacf = pacf( data, nlags=20, method='ols' )
    lag_acf, ci2, Q  = acf( data, nlags=20 , qstat=True, unbiased=True)

    model = ARIMA(orig_data, order=(1, 1, int(ci2[0]) ) )  
    results_ARIMA = model.fit(disp=-1)
    plt.subplot(121)
    plt.plot( data )
    plt.plot(results_ARIMA.fittedvalues)
    #plt.show()
    return results_ARIMA.fittedvalues
Example #8
0
    def FE(self, serie_atual):
        '''
        Método para fazer a diferenciacao de uma serie_atual
        :param serie_atual: serie_atual real
        '''  
        
        #serie_df = pd.DataFrame(serie_atual)
        serie_diff = pd.Series(serie_atual)
        serie_diff = serie_diff - serie_diff.shift()
        serie_diff = serie_diff[1:]
        
        
        features = []
        
        #feature 1:
        auto_correlacao = acf(serie_diff, nlags=5)
        for i in auto_correlacao:
            features.append(i)
        
        #feature 2:
        parcial_atcorr = pacf(serie_diff, nlags=5)
        for i in parcial_atcorr:
            features.append(i)
        
        #feature 3:
        variancia = serie_diff.std()
        features.append(variancia)
        
        #feature 4:
        serie_skew = serie_diff.skew()
        features.append(serie_skew)

        #feature 5:
        serie_kurtosis = serie_diff.kurtosis()
        features.append(serie_kurtosis)
        
        #feature 6:
        turning_p = self.turningpoints(serie_diff)
        features.append(turning_p)
        
        #feature 7:
        
        #feature 8:
        
        
        return features
def global_analysis(csv_fname, trajectory_df):
    # catch small trajectory_dfs
    if len(trajectory_df.index) < MIN_TRAJECTORY_LEN:
        return None
    else:
        
        # for each trajectory, loop through segments
        acf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1))
        pacf_data = np.zeros((len(INTERESTED_VALS), 1, LAGS+1))
        
            
        # do analysis variable by variable
        count = -1
        for var_name, var_values in trajectory_df.iteritems():
            count += 1
            # make matrices
            
            
            
            # make dictionary for column indices
            var_index = trajectory_df.columns.get_loc(var_name)
#                {'velo_x':0, 'velo_y':1, 'velo_z':2, 'curve':3, 'log_curve':4}[var_name]
            
#            # run ACF and PACF for the column
            col_acf, acf_confint = acf(var_values, nlags=LAGS, alpha=.05)#,  qstat= True)
#            
#            # store data
            acf_data[var_index, 0, :] = col_acf
##            super_data_confint_lower[var_index, segment_i, :] = acf_confint[:,0]
##            super_data_confint_upper[var_index, segment_i, :] = acf_confint[:,1]
            
            
#            ## , acf_confint, acf_qstats, acf_pvals
            col_pacf, pacf_confint = pacf(var_values, nlags=LAGS, method='ywmle', alpha=.05)
            pacf_data[var_index, 0, :] = col_pacf
#            # TODO: check for PACF values above or below +-1
#            super_data[var_index+len(INTERESTED_VALS), segment_i, :] = col_pacf
#            super_data_confint_lower[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,0]
#            super_data_confint_upper[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,1]

                
                
            
        
        return acf_data, pacf_data
 def get_acf_pacf(self, inputDataSeries, lag = 15):
     # Copy the data in input data
     outputData = pandas.DataFrame(inputDataSeries)
     
     if min(inputDataSeries.index) == inputDataSeries.index[0]:
         # Ascending
         multiplier = 1
         lag = multiplier*lag
     elif max(inputDataSeries.index) == inputDataSeries.index[0]:
         # Descending
         multiplier = -1
         lag = multiplier*lag
     else:
         print('Cannot determine the order put the lag value manually')
         print('Syntax: calc_returns(inputData, columnName, lag = lag_value)')
     
     n_iter = lag
     
     return {'acf' : acf(inputDataSeries, nlags = n_iter), \
             'pacf': pacf(inputDataSeries, nlags = n_iter)} 
Example #11
0
def corrfunc(timeseries):
	diff_ts = timeseries - timeseries.shift()
	diff_ts.dropna(inplace=True)
	ts_acf = acf(diff_ts, nlags=20)
	ts_pacf = pacf(diff_ts, nlags=20, method='ols')
	#Plot ACF and PACF:
	fig = plt.figure(figsize=(12,8))
	ax1 = fig.add_subplot(211)
	plt.tick_params(axis="both", which="both", bottom="on", top="off",    
		                labelbottom="on", left="on", right="off", labelleft="on")
	fig = sm.graphics.tsa.plot_acf(timeseries.values.squeeze(), lags=20, ax=ax1)
	plt.title('ACF', fontsize=15)
	ax2 = fig.add_subplot(212)
	fig = sm.graphics.tsa.plot_pacf(timeseries, lags=20, ax=ax2)
	plt.tick_params(axis="both", which="both", bottom="on", top="off",    
		                labelbottom="on", left="on", right="off", labelleft="on")
	plt.xlabel("Lags", fontsize=14) 
	plt.title('PACF', fontsize=15)
	plt.tight_layout()
	fig.savefig('corrfunc.png', bbox_inches="tight")
Example #12
0
def plot_acf_and_pacf(y):
    lag_acf = acf(y, nlags=20)
    lag_pacf = pacf(y, nlags=20, method='ols')
    
    plt.subplot(121) 
    plt.plot(lag_acf)
    plt.axhline(y=0,linestyle='--',color='gray')
    plt.axhline(y=-1.96/np.sqrt(len(y)),linestyle='--',color='gray')
    plt.axhline(y=1.96/np.sqrt(len(y)),linestyle='--',color='gray')
    plt.title('Autocorrelation Function')
    
    #Plot PACF:
    plt.subplot(122)
    plt.plot(lag_pacf)
    plt.axhline(y=0,linestyle='--',color='gray')
    plt.axhline(y=-1.96/np.sqrt(len(y)),linestyle='--',color='gray')
    plt.axhline(y=1.96/np.sqrt(len(y)),linestyle='--',color='gray')
    plt.title('Partial Autocorrelation Function')
    plt.tight_layout()
    plt.show()
    plt.close()
Example #13
0
def acf_pacf(ts):
	ts_log, ts_log_diff = trend(ts)
	lag_acf = acf(ts_log_diff, nlags = 20)
	lag_pacf = pacf(ts_log_diff, nlags = 20, method = 'ols')

	#plot acf
	plt.subplot(121)
	plt.plot(lag_acf)
	plt.axhline(y=0, linestyle = '--', color = 'gray')
	plt.axhline(y = -1.96/np.sqrt(len(ts_log_diff)), linestyle = '--', color = 'gray')
	plt.axhline(y = 1.96/np.sqrt(len(ts_log_diff)), linestyle = '--', color = 'gray')
	plt.title('Autocorrelation Function')

	#plot pacf
	plt.subplot(122)
	plt.plot(lag_pacf)
	plt.axhline(y=0, linestyle = '--', color = 'gray')
	plt.axhline(y = -1.96/np.sqrt(len(ts_log_diff)), linestyle = '--', color = 'gray')
	plt.axhline(y = 1.96/np.sqrt(len(ts_log_diff)), linestyle = '--', color = 'gray')
	plt.title('Partial Autocorrelation Function')

	plt.tight_layout()
	plt.show()
Example #14
0
    ts_df_log_rolling = (ts_df_log - ts_df_log_rolling_temp).dropna()
    plt.figure(figsize=(15, 6))
    plt.plot(ts_df_log, label='Log Transformed')
    plt.plot(ts_df_log_rolling,
             color='red',
             label='Log and Rolling Average Transformed')
    plt.legend(loc='best')
    plt.show()
    Dickey_Fuller_test(ts_df_log_rolling.Weighted_Price)
    return ts_df_log_rolling


ts_df_log_rolling = rolling_avg_diff(ts_df_log, ts_df_log_rolling_temp)

lag = 20
lag_pacf = pacf(ts_df_log_rolling, nlags=lag, method='ols')
lag_acf = acf(ts_df_log_rolling, nlags=lag)

lag = 20
lag_pacf = pacf(ts_df_log_rolling, nlags=lag, method='ols')
lag_acf = acf(ts_df_log_rolling, nlags=lag)

#Plot ACF:
plt.figure(figsize=(15, 3))
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_df_log_rolling)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_df_log_rolling)),
            linestyle='--',
Example #15
0
def model_feature(file_name, df, feature):
    #first create a directory by feature name to store the results
    file_name_wo_extn = file_name[:-4]
    dir_name = os.path.join(os.path.sep, os.getcwd(), OUTPUT_DIR_NAME, file_name_wo_extn, feature)
    if os.path.exists(dir_name):
        logger.info('dir name is ==> ' + dir_name)
        #delete existing directory if any
        shutil.rmtree(dir_name)
    os.makedirs(dir_name)
    #temporarily change to the new feature directory
    curr_dir = os.getcwd()
    os.chdir(dir_name)

    #create  a string buffer to store all information about this feature which will then be written to a file at the end
    s = ''
    s = _write_to_string(s, '----------- Time Series Analysis for ' + feature + ' from ' + str(df['Date'][0]) + ' to ' + str(df['Date'][len(df['Date']) - 1]) + '-----------')
    #only look at the fearture of intrest as a univariate time series
    #x-axis is the time..
    X = np.array(df['Date'], dtype=np.datetime64)
    
    #df['First Difference'] = df[feature] - df[feature].shift()  
    y = np.array(df[feature] - df[feature].shift())
    _draw_multiple_line_plot('first_difference.html', 
                             feature, 
                             [X],
                             [y],
                             ['navy'], 
                             ['packets percentage delta'],
                             [None],
                             [1],
                             'datetime', 'Date', 'Packets Percentage Delta', y_start=-100, y_end=100)

    #calculate autocorelation and partial auto corelation for the first difference
    lag_correlations = acf(y[1:])  
    lag_partial_correlations = pacf(y[1:])  

    logger.info ('lag_correlations')
    logger.info(lag_correlations)

    s = _write_to_string(s, 'lag_correlations')
    s = _write_to_string(s, str(lag_correlations))

    y = lag_correlations
    _draw_multiple_line_plot('lag_correlations.html', 
                             'lag_correlations', 
                             [X],
                             [y],
                             ['navy'], 
                             ['lag_correlations'],
                             [None],
                             [1],
                             'datetime', 'Date', 'lag_correlations', y_start=-1, y_end=1)


    logger.info ('lag_partial_correlations')
    logger.info(lag_partial_correlations)
    s = _write_to_string(s, 'lag_partial_correlations')
    s = _write_to_string(s, str(lag_partial_correlations))

    y = lag_partial_correlations
    _draw_multiple_line_plot('lag_partial_correlations.html', 
                             'lag_partial_correlations', 
                             [X],
                             [y],
                             ['navy'], 
                             ['lag_partial_correlations'],
                             [None],
                             [1],
                             'datetime', 'Date', 'lag_partial_correlations', y_start=-1, y_end=1)

    #seasonal decompae to extract seasonal trends
    decomposition = seasonal_decompose(np.array(df[feature]), model='additive', freq=15)  
    _draw_decomposition_plot('decomposition.html', X, decomposition, 'seasonal decomposition', 'datetime', 'decomposition', width=600, height=400)


    #run various ARIMA models..and see which fits best...
    s, model_names, models, results, MAE = _try_ARIMA_and_ARMA_models(s, df, feature)

    #check if we got consistent output, all 4 variables returns by the prev function are
    # lists..they should be the same length
    len_list = [len(model_names), len(models), len(results), len(MAE)]
    if len(len_list) == len_list.count(len_list[0]):
        #looks consistent, all lengths are equal
        logger.info('_try_ARIMA_models output looks consistent, returns %d models ' % len(model_names))
    else:
        logger.info('_try_ARIMA_models output IS NOT consistent, returns %d model names ' % len(model_names))
        logger.info(len_list)
        logger.info('EXITING.....')
        sys.exit()

    s, predicted_dates, predicted, model_selection_list = _do_forecasts(df, feature, X, s, model_names, models, results, MAE)
    
    #write everything to file
    with open(feature + '.txt', "w") as text_file:
        text_file.write(s)
    #go back to parent directory
    os.chdir(curr_dir)

    #return the results
    return feature, model_names, models, results, MAE, predicted_dates, predicted, model_selection_list
Example #16
0
    def run_models():
        #-------------------------------------Creating and storing MLP model-----------------------------------------------------
        # Importing the dataset and separating dependent/independent variables

        dataset = pd.read_csv("assets/predicts.csv")
        
        
        

        # print(dataset.dtypes)
        
            


        dataset['Main purpose of visit'].value_counts()
        dataset['Accessibility status'].value_counts()
        dataset['Accomodation status'].value_counts()
        dataset['health services status'].value_counts()

        cleanup_nums = {"Accessibility status":{"Poor": 1, "Fair": 2,"Good":3,"Better":4},
                        "Accomodation status": {"Poor": 1, "Fair": 2,"Good":3,"Better":4},
                        "health services status":{"Poor": 1, "Fair": 2,"Good":3,"Better":4},
                        }
        dataset.replace(cleanup_nums, inplace=True)
        dataset.head(5)



        # print(dataset.head(5))
        X = dataset.iloc[:,1:8].values
        # print(X[:,3])

        y = dataset.iloc[:,10].values
        # print(y)
        # Encoding categorical data
        
        labelencoder_X_3 = LabelEncoder()
        X[:, 3] = labelencoder_X_3.fit_transform(X[:, 3])

        list(labelencoder_X_3.inverse_transform([0, 1, 2, 3]))

        X[:, 3]
        X[:,0:4]
        # print(X)


        onehotencoder = OneHotEncoder(categorical_features = [3] )
        X = onehotencoder.fit_transform(X).toarray()

        X = X[:, 1:]

        # print('\n'.join([''.join(['{:9}'.format(item) for item in row]) 
            # for row in X]))


        # Splitting the dataset into the Training set and Test set
        from sklearn.model_selection import train_test_split
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)


        a=y_test
        b=y_train

        # Feature Scaling //escaping
        
        sc = StandardScaler()
        X_train = sc.fit_transform(X_train)
        X_test = sc.transform(X_test)

        # Part 2 - making the the ANN model

        # Importing the Keras libraries and packages
        
        
        # Initialising the ANN for regression



        #Creating regression model
        REG = Sequential()

        # Adding the input layer and the first hidden layer with dropout if required
        REG.add(Dense(units=20,input_dim=9 ,kernel_initializer="normal", activation = 'relu'))
        #REG.add(Dropout(p=0.1))
        # Adding the second hidden layer
        REG.add(Dense(units =20,kernel_initializer="normal", activation = 'relu'))
        #REG.add(Dropout(p=0.1))
        # Adding the output layer
        REG.add(Dense(units = 1, kernel_initializer="normal"))

        # Compiling the ANN
        #def root_mean_squared_error(y_true, y_pred):
        #        return K.sqrt(K.mean(K.square(y_pred - y_true))) 
            
        REG.compile(optimizer = 'adam', loss= 'mean_squared_error')

        # Fitting the ANN to the Training set
        REG.fit(X_train, y_train, batch_size = 10, epochs = 200)

        # Part 3 - Making the predictions and evaluating the model
        X_test



        # Predicting the Test set results
        y_pred = REG.predict(X_test)

        REG.save('assets/REG_MLP_model.h5')
        K.clear_session()
        #---------------------------------------------------------------------------------------------------------------------


        #---------------------------------------Creating and storing SARIMA model----------------------------------------------
         #data collecting...converting dataset to html....
        df = pd.read_csv('assets/Touristarrival_monthly.csv')
        df1=df.iloc[:5]
        html_table_template = df1.to_html(index=False)
        html_table=df.to_html(index=False)
        #data observation and log transformation
        df.index=pd.to_datetime(df['Month'])
        df['#Tourists'].plot()
        mpl.pyplot.ylabel("No.of Toursits Arrivals ")
        mpl.pyplot.xlabel("Year")
        
         #storing plots
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_input.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()  
      

        series=df['#Tourists']
        logtransformed=np.log(series)
        logtransformed.plot()
        mpl.pyplot.ylabel("log Scale(No.of Toursits Arrivals) ")
        mpl.pyplot.xlabel("Year")
        
         #storing plots 
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_input_logscaled.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()
        
        #Train test split
        percent_training=0.80
        split_point=round(len(series)*percent_training)
        # print(split_point)
        training , testing = series[0:split_point] , series[split_point:]
        training=np.log(training)


        
        #differencing to achieve stationarity
        training_diff=training.diff(periods=1).values[1:]

        #plot of residual log differenced series
        mpl.pyplot.plot(training_diff)
        mpl.pyplot.title("Tourist arrivals data log-differenced")
        mpl.pyplot.xlabel("Years")
        mpl.pyplot.ylabel("Toursits arrivals")
        mpl.pyplot.clf()


        #ACF and PACF plots 1(with log differenced training data)
        lag_acf=acf(training_diff,nlags=40)
        lag_pacf=pacf(training_diff,nlags=40,method='ols')

        #plot ACF
        mpl.pyplot.figure(figsize=(15,5))
        mpl.pyplot.subplot(121)
        mpl.pyplot.stem(lag_acf)
        mpl.pyplot.axhline(y=0,linestyle='-',color='black')
        mpl.pyplot.axhline(y=-1.96/np.sqrt(len(training)),linestyle='--',color='gray')
        mpl.pyplot.axhline(y=1.96/np.sqrt(len(training)),linestyle='--',color='gray')
        mpl.pyplot.xlabel('lag')
        mpl.pyplot.ylabel("ACF")
        #storing plots in bytes
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_afc.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()
        

        #plot PACF
        mpl.pyplot.figure(figsize=(15,5))
        mpl.pyplot.subplot(122)
        mpl.pyplot.stem(lag_pacf)
        mpl.pyplot.axhline(y=0,linestyle='-',color='black')
        mpl.pyplot.axhline(y=-1.96/np.sqrt(len(training)),linestyle='--',color='gray')
        mpl.pyplot.axhline(y=1.96/np.sqrt(len(training)),linestyle='--',color='gray')
        mpl.pyplot.xlabel('lag')
        mpl.pyplot.ylabel("PACF")
         #storing plots in bytes
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_pafc.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()

        #SARIMA Model specification
        model=sm.tsa.statespace.SARIMAX(training,order=(2,0,3),seasonal_order=(2,1,0,12),trend='c',enforce_invertibility=False,enforce_stationarity=False)

        # fit model
        model_fit = model.fit()

        model_fit.save("assets/REG_SARIMA_model.pickle")

        # print(model_fit.summary())

        #plot residual errors
        # residuals = pd.DataFrame(model_fit.resid)
        # fig, ax = mpl.pyplot.subplots(1,2)
        # residuals.plot(title="Residuals", ax=ax[0])
        # residuals.plot(kind='kde', title='Density', ax=ax[1])
        # mpl.pyplot.show()
        # print(residuals.describe())

        # Model evaluation and forecast
        model_fitted=load_pickle("assets/REG_SARIMA_model.pickle")
        forecast=model_fitted.forecast(len(df)-250)
        # print(forecast)
        forecast=np.exp(forecast)
        # print(forecast)
        #plot forecast results and display RMSE
        mpl.pyplot.figure(figsize=(10,5))
        mpl.pyplot.plot(forecast,'r')
        mpl.pyplot.plot(series,'b')
        mpl.pyplot.legend(['Predicted test values','Actual data values'])

        mpl.pyplot.title('RMSE:%.2f'% np.sqrt(sum((forecast-testing)**2)/len(testing)))
        mpl.pyplot.ylabel("No.of Toursits Arrivals Monthly")
        mpl.pyplot.xlabel("Year")
        mpl.pyplot.autoscale(enable='True',axis='x',tight=True)
        mpl.pyplot.axvline(x=series.index[split_point],color='black');
         #storing plots 
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_result.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()

        forecaste=model_fitted.forecast(len(df)-214)
        forecast_next=forecaste[62:]
        forecast_next=np.exp(forecast_next)
        # print(forecast_next)
        mpl.pyplot.figure(figsize=(10,5))
        mpl.pyplot.plot(forecast_next,'r')
        mpl.pyplot.plot(series,'b')
        mpl.pyplot.legend(['Predicted next steps values'])
        mpl.pyplot.title('Monthly tourist arrivals predictions')
        mpl.pyplot.ylabel("No.of Toursits Arrivals ")
        mpl.pyplot.xlabel("Year")
        mpl.pyplot.autoscale(enable='True',axis='x',tight=True)

        #storing plots in bytes
        mpl.pyplot.savefig('PredictionEngine/static/img/sarima_forecast.png', dpi=600,bbox_inches='tight')
        mpl.pyplot.clf()
# * **Moving average (MA) -** incorporates the dependency between an observation and a residual error from a moving average model applied to lagged observations.
#
#     The notation **MA(q)** refers to the moving average model of order q:<br/>
#  ![image.png](attachment:image.png)
#
#     *Example* — If q is 3 the predictor for X(t) will be
#         X(t) = µ + εt + θ1.ε(t-1) + θ2.ε(t-2) + θ3.ε(t-3)
#     Here instead of difference from previous term, we take errer term (ε) obtained from the difference from past term
# Now we need to figure out the values of p and q which are parameters of ARIMA model. We use below two methods to figure out these values  -
#
# **Autocorrelation Function (ACF):** It just measures the correlation between two consecutive (lagged version). example at lag 4, ACF will compare series at time instance t1…t2 with series at instance t1–4…t2–4
#
# **Partial Autocorrelation Function (PACF):** is used to measure the degree of association between X(t) and X(t-p).

acf_lag = acf(train_df.diff().dropna().values, nlags=20)
pacf_lag = pacf(train_df.diff().dropna().values, nlags=20, method='ols')

model = ARIMA(train_df.values, order=(5, 0, 3))
model_fit = model.fit(disp=0)

# Plot residual errors

residuals = pd.DataFrame(model_fit.resid)

# # Forecast

fc, se, conf = model_fit.forecast(480, alpha=0.05)  # 95% conf

# Make as pandas series
fc_series = pd.Series(fc, index=test_df.index)
lower_series = pd.Series(conf[:, 0], index=test_df.index)
Example #18
0
# Checking out a scatter plot , probably we can try out different lags and check data
#sb.jointplot('Logged First Difference','Lag 20',stock_data, kind ='reg', size = 10)
#pylab.show ()

# Probably we can use stat models and check out the lagged data for all and see
#if any correlation exits

from statsmodels.tsa.stattools import acf
from statsmodels.tsa.stattools import pacf

#acf is auto correlation fucntion and pacf is partial acf (works only for 1 d array)
#iloc is integer location, check pandas

lag_corr = acf (stock_data ['Logged First Difference'].iloc [1:])
lag_partial_corr = pacf (stock_data ['Logged First Difference'].iloc [1:])

#fig, ax = plt.subplots (figsize = (16,12))
#ax.plot (lag_corr)
#pylab.show ()

# To extract trends and seasonal patterns for TS analysis

from statsmodels.tsa.seasonal import seasonal_decompose

#set the frequency value right for monthly set freq = 30
decomposition = seasonal_decompose(stock_data['Natural Log'], model='additive', freq=30)  
#fig = decomposition.plot()  
#pylab.show ()

#lets fit some ARIMA, keep indicator as 1 and rest as zero ie (p,q,r) = (1,0,0)
diff[diff == (-inf)] = -100
diff[diff == (inf)] = 100
test_stationarity(differ)

# In[34]:

print 'The above differenced series is stationary'

# In[35]:

#Making ACF and PACF plots

from statsmodels.tsa.stattools import acf, pacf
from statsmodels.tsa.arima_model import ARIMA
lag_acf = acf(differ, nlags=20)
lag_pacf = pacf(differ, nlags=20, method='ols')
#Plot ACF:

plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(differ)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(differ)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(differ)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(differ)), linestyle='--', color='gray')
# look at the noise from the data and check if the noise is stationary or not
decomposedlogdata = residual
decomposedlogdata.dropna(inplace=True)
test_stationarity(decomposedlogdata)
print("\nVisually, from the output of the graph, we see that the residuals of the log of the time series is not stationary."\
 " That is why we have to have your moving average parameter in place so that it smooths and setup to predict what will happen next.")

# Now that we know the value of d, the Integration parameter of the ARIMA model, which is the order of differentiation
# But how can you know the values of P and Q, which are the Autoregresive lag correlations and Moving Average, respectively?

# To do that we have to plot ACF and PACF plots, which stand for autocorrelation fuction and partial correlation function
# To calculate the value of Q, we need the ACF graph, and the value of P, we need the PACF graph

lag_acf = acf(datasetlogdiffshifting, nlags=20)
lag_pacf = pacf(datasetlogdiffshifting, nlags=20, method='ols')

# Plot ACF to determine the Q(Moving Average part of ARIMA)
fig, ax = plt.subplots()
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(datasetlogdiffshifting)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(datasetlogdiffshifting)),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')

# Plot PACF to determine the P(Autoregressive part of ARIMA)
Example #21
0
def pacf(timeSeries,nlags = 40, alpha = 0.05,   method = 'ywunbiased'):
    results  = stattools.pacf(timeSeries, nlags = nlags, 
                            alpha = alpha, 
                            method = method)

    return results
Example #22
0
df_ep = pd.DataFrame({'extrapol': trend_extrapol.ravel()},
                     index=np.arange(1, N + 13))

df = pd.concat([df, df_ep], axis=1)

### Periodic continuation of seasonal cycle ###

df.loc[N + 1:, 'seasonal'] = df.loc[1:12, 'seasonal'].values

### Determine parameters for ARIMA model ###

last_index = ts_residual.index[-1]

lag_acf = acf(ts_residual, nlags=20)
lag_pacf = pacf(ts_residual, nlags=20, method='ols')

confidence_interval = 1.96 / np.sqrt(len(ts_residual))

p = min(*np.where(lag_pacf < confidence_interval))
q = min(*np.where(lag_acf < confidence_interval))

### Fit ARIMA model and forecast_residuals ###

to_go = N - last_index + 12
model = ARIMA(np.asarray(ts_residual), order=(p, 1, q))
results = model.fit(disp=0)
pred_residuals = results.forecast(steps=to_go)[0]

df_pr = pd.DataFrame({'pred res': pred_residuals},
                     index=np.arange(last_index + 1, N + 13))
Example #23
0
def plot_pacf_bars(ds,
                   rgi_df,
                   xlim=None,
                   nlags=200,
                   slice_start=3000,
                   plot_confint=True):
    """

    Parameters
    ----------
    ds
    rgi_df
    xlim
    path
    nlags
    slice_start
    plot_confint

    """
    # iterate over all above selected glaciers

    for rgi_id, glacier in rgi_df.iterrows():
        # select glacier
        rgi_id = rgi_id
        name = glacier['name']
        log.info('PACF plots for {} ({})'.format(name, rgi_id))

        # create figure and axes
        fig, ax = plt.subplots(1, 1)
        # compute acf over 1000 years
        lags = np.arange(0, nlags + 1)

        # select the complete dataset
        ds_sel = ds.sel(mb_model='random', normalized=False, rgi_id=rgi_id)
        # select time frame
        slice_end = None
        ds_sel = ds_sel.isel(time=slice(slice_start, slice_end))

        # define bar width
        width = 0.4

        # plot zero aux line
        ax.axhline(0, c='k', ls=':')

        for i, b in enumerate(np.sort(ds.temp_bias)):
            # get length data
            length = ds_sel.sel(temp_bias=b).length

            # FLOWLINE MODEL
            # --------------

            # compute autocorrelation and confidence intervals
            acf, confint = stattools.pacf(length.sel(model='fl'),
                                          nlags=nlags,
                                          alpha=0.01,
                                          method='ywmle')
            # plot autocorrelation function
            ax.bar(lags - width / 2,
                   acf,
                   width,
                   color=fl_cycle[i],
                   label='{:+.1f} °C'.format(b))
            if plot_confint:
                # fill confidence interval
                ax.fill_between(lags[1:],
                                confint[1:, 0] - acf[1:],
                                confint[1:, 1] - acf[1:],
                                color=fl_cycle[i],
                                alpha=0.1)

            # V/A SCALING MODEL
            # -----------------

            # compute autocorrelation and confidence intervals
            acf, confint = stattools.pacf(length.sel(model='vas'),
                                          nlags=nlags,
                                          alpha=0.01,
                                          method='ywmle')
            # plot autocorrelation function
            ax.bar(lags + width / 2,
                   acf,
                   width,
                   color=vas_cycle[i],
                   label='{:+.1f} °C'.format(b))
            if plot_confint:
                # fill confidence interval
                ax.fill_between(lags[1:],
                                confint[1:, 0] - acf[1:],
                                confint[1:, 1] - acf[1:],
                                color=vas_cycle[i],
                                alpha=0.1)

        # adjust axes
        if not xlim:
            xlim = [0, nlags]
        ax.set_xlim(xlim)
        ax.set_ylim([-1.1, 1.1])
        # add grid
        ax.grid()

        # get legend handles and labels
        handles, labels = ax.get_legend_handles_labels()
        title_proxy, = plt.plot(0,
                                marker='None',
                                linestyle='None',
                                label='dummy')

        # create list of handles and labels in correct order
        my_handles = list([title_proxy])
        my_handles.extend(handles[::2])
        my_handles.extend([title_proxy])
        my_handles.extend(handles[1::2])
        my_labels = list(["$\\bf{Flowline\ model}$"])
        my_labels.extend(labels[::2])
        my_labels.extend(["$\\bf{VAS\ model}$"])
        my_labels.extend(labels[1::2])
        # add single two-column legend
        ax.legend(my_handles, my_labels, ncol=2)

        # labels, title, ...
        ax.set_xlabel('Lag [years]')
        ax.set_ylabel('Correlation coefficient')

        # store plot
        dir_path = '/Users/oberrauch/work/master/plots/final_plots/pacf/'
        f_name = '{}.pdf'.format(name.replace(' ', '_'))
        path = os.path.join(dir_path, f_name)
        plt.savefig(path, bbox_inches='tight')
Example #24
0
def pacf():
    pacf1 = pacf(train)
    pacf1 = pd.DataFrame([pacf1]).T
    pacf1.plot(kind='bar', figsize=(12, 10))
    plt.show()
Example #25
0
def first_diff_pacf():
    pacf1_diff = pacf(price_diff)
    pacf1_diff = pd.DataFrame([pacf1_diff]).T
    pacf1_diff.plot(kind='bar', figsize=(12, 10))
    plt.show()
Example #26
0
    plt.savefig(directory_acf + "/feature" + str(i) + "_acf.png")	
    i += 1
    plt.clf()
    
# create pacf graphs for each feature

directory_pacf = "pacf/simple"
if not os.path.exists(directory_pacf):
    os.makedirs(directory_pacf)

i = 1
pacf_res = []

for group in groups:
    plt.figure(figsize=(10,latent_dim))
    temp = stattools.pacf(df[names[group]])
    plt.bar(range(len(temp)), temp, width = 0.1)
    plt.plot(temp, "ro")
    plt.xlabel("Lags")
    plt.ylabel("PACF")
    plt.title("PACF for feature " + str(group + 1))
    plt.axhline(y = 0, linestyle = "--")
    plt.axhline(y = -1.96/np.sqrt(len(df[names[0]])), linestyle = "--")
    plt.axhline(y = 1.96/np.sqrt(len(df[names[0]])), linestyle = "--")
    pacf_res.append(temp)
    plt.savefig(directory_pacf + "/feature" + str(i) + "_pacf.png")
    i += 1
    plt.clf()
    
directory_hist = "histograms/simple"
if not os.path.exists(directory_hist):
Example #27
0
 def pacf(self):
     return pacf(self.ts, nlags=20)
Example #28
0
plt.subplot(414)
plt.title("Residual Data")
plt.plot(residual, label='Residual')
plt.legend(loc='best')
plt.tight_layout

#get residual data to clean noise
decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_staionary(decomposedLogData)

#getting an idea for p,q values needed to apply ARIMA model
from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(indexedDataset_logscale, nlags=20)
lag_pacf = pacf(indexedDataset_logscale, nlags=20, method='ols')

#plot Auto Correlation Function : to calc q
plt.subplot(121)
plt.plot(lag_acf)
plt.title('Auto Correlation Function')

#plot Partial Auto Correlation Function : to calc p
plt.subplot(122)
plt.plot(lag_pacf)
plt.title('Partial Auto Correlation Function')
plt.tight_layout()

#checking Auto Regression
from statsmodels.tsa.arima_model import ARIMA
model = ARIMA(indexedDataset_logscale,
Example #29
0
#plt.subplot(414)
#plt.plot(residual, label='Residuals')
#plt.legend(loc='best')
#plt.tight_layout()
#
#plt.show()
#
#ts_log_decompose = residual
#ts_log_decompose.dropna(inplace=True)
#test_sta.test_stationarity(ts_log_decompose)

## decide the structure (p,q) of the model ------------------------------------
from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(ts_log_ewma_diff, nlags=20)
lag_pacf = pacf(ts_log_ewma_diff, nlags=20, method='ols')
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_ewma_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_ewma_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_ewma_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_ewma_diff)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
plt.show()
Example #30
0
def plot_pacf(x, ax=None, lags=None, alpha=.05, method='ywunbiased',
              use_vlines=True, title='Partial Autocorrelation', zero=True,
              vlines_kwargs=None, **kwargs):
    """
    Plot the partial autocorrelation function

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : int or array_like, optional
        int or Array of lag values, used on horizontal axis. Uses
        np.arange(lags) when lags is an int.  If not provided,
        ``lags=np.arange(len(corr))`` is used.
    alpha : float, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        1/sqrt(len(x))
    method : {'ywunbiased', 'ywmle', 'ols'}
        Specifies which method for the calculations to use:

        - yw or ywunbiased : yule walker with bias correction in denominator
          for acovf. Default.
        - ywm or ywmle : yule walker without bias correction
        - ols - regression of time series on lags of it and on constant
        - ld or ldunbiased : Levinson-Durbin recursion with bias correction
        - ldb or ldbiased : Levinson-Durbin recursion without bias correction

    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    title : str, optional
        Title to place on plot.  Default is 'Partial Autocorrelation'
    zero : bool, optional
        Flag indicating whether to include the 0-lag autocorrelation.
        Default is True.
    vlines_kwargs : dict, optional
        Optional dictionary of keyword arguments that are passed to vlines.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Plots lags on the horizontal and the correlations on vertical axis.
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    kwargs is used to pass matplotlib optional arguments to both the line
    tracing the autocorrelations and for the horizontal line at 0. These
    options must be valid for a Line2D object.

    vlines_kwargs is used to pass additional optional arguments to the
    vertical lines connecting each autocorrelation to the axis.  These options
    must be valid for a LineCollection object.

    Examples
    --------
    >>> import pandas as pd
    >>> import matplotlib.pyplot as plt
    >>> import statsmodels.api as sm

    >>> dta = sm.datasets.sunspots.load_pandas().data
    >>> dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
    >>> del dta["YEAR"]
    >>> sm.graphics.tsa.plot_acf(dta.values.squeeze(), lags=40)
    >>> plt.show()

    .. plot:: plots/graphics_tsa_plot_pacf.py
    """
    fig, ax = utils.create_mpl_ax(ax)
    vlines_kwargs = {} if vlines_kwargs is None else vlines_kwargs
    lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero)

    confint = None
    if alpha is None:
        acf_x = pacf(x, nlags=nlags, alpha=alpha, method=method)
    else:
        acf_x, confint = pacf(x, nlags=nlags, alpha=alpha, method=method)

    _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines,
               vlines_kwargs, **kwargs)

    return fig
def forecast(ts, log_series):
    """
    make model on the TS after differencing. Having performed the trend and seasonality estimation techniques,
    there can be two situations:
    A strictly stationary series with no dependence among the values. This is the easy case wherein we can model the
    residuals as white noise. But this is very rare.
    A series with significant dependence among values. In this case we need to use some statistical models like ARIMA to
    forecast the data.

    The predictors depend on the parameters (p,d,q) of the ARIMA model:
    Number of AR (Auto-Regressive) terms (p): AR terms are just lags of dependent variable. For instance if p is 5,
    the predictors for x(t) will be x(t-1)...x(t-5).
    Number of MA (Moving Average) terms (q): MA terms are lagged forecast errors in prediction equation. For instance
    if q is 5, the predictors for x(t) will be e(t-1)...e(t-5) where e(i) is the difference between the moving average
    at ith instant and actual value.
    Number of Differences (d): These are the number of nonseasonal differences, i.e. in this case we took the first
    order difference. So either we can pass that variable and put d=0 or pass the original variable and put d=1.
    Both will generate same results.

    We use two plots to determine these numbers. Lets discuss them first.

    Autocorrelation Function (ACF): It is a measure of the correlation between the the TS with a lagged version of itself.
    For instance at lag 5, ACF would compare series at time instant 't1'...'t2' with series at instant 't1-5'...'t2-5'
    (t1-5 and t2 being end points).
    Partial Autocorrelation Function (PACF): This measures the correlation between the TS with a lagged version of itself
    but after eliminating the variations already explained by the intervening comparisons. Eg at lag 5, it will check
    the correlation but remove the effects already explained by lags 1 to 4.

    :param log_series:
    :return:
    """
    #ACF and PACF plots
    ts_log_diff = ts_log - ts_log.shift()
    ts_log_diff = ts_log_diff.dropna()
    lag_acf = acf(ts_log_diff, nlags = 20)
    lag_pacf = pacf(ts_log_diff, nlags = 20, method = "ols")
    #plot ACF
    plt.subplot(221)
    plt.plot(lag_acf)
    plt.axhline(y=0, linestyle="--", color="gray")
    plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle="--", color="gray") #lower line of confidence interval
    plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle="--", color="gray") #upper line of confidence interval
    plt.title('Autocorrelation Function')

    # Plot PACF:
    plt.subplot(222)
    plt.plot(lag_pacf)
    plt.axhline(y=0, linestyle="--", color="gray")
    plt.axhline(y=-1.96 / np.sqrt(len(ts_log_diff)), linestyle="--", color="gray")
    plt.axhline(y=1.96 / np.sqrt(len(ts_log_diff)), linestyle="--", color="gray")
    plt.title('Partial Autocorrelation Function')
    plt.tight_layout()

    #from these plots, we get p and q:
    #p - The lag value where the PACF chart crosses the upper confidence interval for the first time. If you notice
    # closely, in this case p=2.
    #q - The lag value where the ACF chart crosses the upper confidence interval for the first time. If you notice
    # closely, in this case q=2.

    #AR model
    res_arima = arima_models(ts_log, 2, 1, 0)
    # print pd.Series(res_arima.fittedvalues)
    plt.subplot(223)
    plt.plot(ts_log_diff)
    plt.plot(res_arima.fittedvalues, color='red')
    # plt.title('AR model--RSS: %.4f' % sum((pd.Series(res_arima.fittedvalues) - ts_log_diff) ** 2))

    #MA model
    res_ma = arima_models(ts_log, 0, 1, 2)
    plt.subplot(224)
    plt.plot(ts_log_diff)
    plt.plot(res_ma.fittedvalues, color='red')
    # plt.title('MA model--RSS: %.4f' % sum((res_ma.fittedvalues - ts_log_diff) ** 2))
    plt.plot()

    ##Combined model
    res = arima_models(ts_log, 2, 1, 2)
    plt.plot(ts_log_diff)
    plt.plot(res.fittedvalues, color='red')
    # plt.title('RSS: %.4f' % sum((res.fittedvalues - ts_log_diff) ** 2))
    plt.show()
    #Here we can see that the AR and MA models have almost the same RSS but combined is significantly better.

    #predicting
    predictions_diff = pd.Series(res.fittedvalues, copy=True)
    print predictions_diff.head()
    #Notice that these start from '1949-02-01' and not the first month; because we took a lag by 1 and first element
    # doesn't have anything before it to subtract from. The way to convert the differencing to log scale is to add these
    # differences consecutively to the base number. An easy way to do it is to first determine the cumulative sum at
    # index and then add it to the base number. The cumulative sum can be found as:
    predictions_diff_cumsum = predictions_diff.cumsum()
    #now add them to the base number

    predictions_arima_log = pd.Series(ts_log.ix[0], index = ts_log.index)
    predictions_arima_log = predictions_arima_log.add(predictions_diff_cumsum, fill_value = 0)
    #now let us take the exponential to regain original form of series
    predictions_ARIMA = np.exp(predictions_arima_log)
    plt.plot(ts)
    plt.plot(predictions_ARIMA)
    # plt.title('RMSE: %.4f' % np.sqrt(sum((predictions_ARIMA - ts) ** 2) / len(ts)))
    plt.show()
Example #32
0
def plot_pacf(x,
              ax=None,
              lags=None,
              alpha=.05,
              method='ywm',
              use_vlines=True,
              title='Partial Autocorrelation',
              zero=True,
              **kwargs):
    """Plot the partial autocorrelation function

    Plots lags on the horizontal and the correlations on vertical axis.

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : int or array_like, optional
        int or Array of lag values, used on horizontal axis. Uses
        np.arange(lags) when lags is an int.  If not provided,
        ``lags=np.arange(len(corr))`` is used.
    alpha : scalar, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        1/sqrt(len(x))
    method : 'ywunbiased' (default) or 'ywmle' or 'ols'
        specifies which method for the calculations to use:

        - yw or ywunbiased : yule walker with bias correction in denominator
          for acovf
        - ywm or ywmle : yule walker without bias correction
        - ols - regression of time series on lags of it and on constant
        - ld or ldunbiased : Levinson-Durbin recursion with bias correction
        - ldb or ldbiased : Levinson-Durbin recursion without bias correction
    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    title : str, optional
        Title to place on plot.  Default is 'Partial Autocorrelation'
    zero : bool, optional
        Flag indicating whether to include the 0-lag autocorrelation.
        Default is True.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    """
    fig, ax = utils.create_mpl_ax(ax)

    lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero)

    confint = None
    if alpha is None:
        acf_x = pacf(x, nlags=nlags, alpha=alpha, method=method)
    else:
        acf_x, confint = pacf(x, nlags=nlags, alpha=alpha, method=method)

    _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines,
               **kwargs)

    return fig
Example #33
0
def queryandinsert():
    """ This is the main function which will be call by main... it integrate several other functions.
    Please do not call this function in other pack, otherwise it will cause unexpected result!!!!"""
    global gtbuDict             # gtbuDict, being used to store query data from gtbu database.....
    global omsDict              # being used to store query data from OMS database.....
    global presisDict
    global counter
    global testingDict

    starttime = datetime.datetime.now()

    print len(presisDict)
    print "connect to databae!"

    # connect to the database use my own toolkits
    querydbinfoOMS = getdbinfo('OMS')
    querydbnameOMS = "wifi_data"

    querydbinfoGTBU = getdbinfo("GTBU")
    querydbnameGTBU = "ucloudplatform"

    insertdbinfo = getdbinfo('REMOTE')
    insertdbname = 'login_history'

    # print the database information for verification
    for key, value in querydbinfoOMS.iteritems():
        print key + " : " + str(value)

    queryStatementRemote = """
    SELECT epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 AND visitcountry IN ('JP','DE','TR') AND epochTime BETWEEN DATE_SUB(NOW(),INTERVAL 2 DAY) AND NOW()
    ORDER BY epochTime ASC
    """
    # get the online data which will be used to calculate the daily uer number ( Daily user number is bigger than the max number...
    # and the max number is actually what being used in this scenario
    queryStatementTraining = """
    SELECT t1,t2,DATEDIFF(t2,t1) AS dif,imei,visitcountry FROM
    (
    SELECT DATE(logindatetime) AS t1,DATE(logoutdatetime) AS t2, imei,visitcountry
    FROM t_usmguserloginlog
    WHERE visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY t1,t2,imei
    """

    # (output data) get the max online number for each of these countries every day ( this record is incomplete due to the constant network partition
    # therefore a lot of corresponding operation is necessary for aligning the input and output date by day!...
    queryStatementOnline ="""
    SELECT epochTime,visitcountry,MAX(onlinenum)
    FROM
    (
    SELECT DATE(epochTime) AS epochTime,visitcountry,onlinenum
    FROM t_fordemo
    WHERE butype =2 and visitcountry IN ('JP','DE','TR')
    ) AS z
    GROUP BY epochTime,visitcountry
    """

    # (input data) get the order number information which will be used to calculate the daily maximum number for each country...
    # this number could be ridiculously large with respect to the real number for some specific countries.
    querystatementOMS = """
    SELECT DATE(date_goabroad),DATE(date_repatriate),DATEDIFF(date_repatriate,date_goabroad),imei,package_id FROM tbl_order_basic
    WHERE imei IS NOT NULL AND (DATE(date_repatriate)) > '2016-01-01' AND DATE(date_goabroad) < DATE(NOW())
    ORDER BY date_repatriate ASC
    """

    querystatementOMSCount = """
    SELECT  date_goabroad,date_repatriate,DATEDIFF(date_repatriate,date_goabroad),t1.package_id,t3.iso2 FROM tbl_order_basic AS t1
    LEFT JOIN tbl_package_countries AS t2
    ON t1.package_id = t2.package_id
    LEFT JOIN tbl_country AS t3
    ON t2.country_id = t3.pk_global_id
    WHERE t1.data_status = 0 AND DATE(date_goabroad) BETWEEN DATE(NOW()) AND DATE_ADD(NOW(),INTERVAL 3 MONTH) OR
    (
    DATE(date_repatriate) >= DATE(NOW())
    )
    """

    # establish connection to the mysql databases................
    querydbGTBU = MySQLdb.connect(user = querydbinfoGTBU['usr'],
                                  passwd = querydbinfoGTBU['pwd'],
                                  host = querydbinfoGTBU['host'],
                                  port = querydbinfoGTBU['port'],
                                  db = querydbnameGTBU)
    querydbOMS = MySQLdb.connect(user = querydbinfoOMS['usr'],
                                 passwd = querydbinfoOMS['pwd'],
                                 host = querydbinfoOMS['host'],
                                 port = querydbinfoOMS['port'],
                                 db = querydbnameOMS)
    insertdb = MySQLdb.connect(user = insertdbinfo['usr'],
                               passwd = insertdbinfo['pwd'],
                               host = insertdbinfo['host'],
                               port = insertdbinfo['port'],
                               db = insertdbname)

    queryCurGTBU = querydbGTBU.cursor()
    queryCurOMS = querydbOMS.cursor()
    insertCur = insertdb.cursor()


    print "executing query!!! By using generator!!!"
    insertCur.execute(queryStatementRemote)
    remoteGenerator = fetchsome(insertCur,100) #fetchsome is a generator which will fetch a certain number of query each time.

    for row in remoteGenerator:
        accumulatOnlineNumber(row,testingDict)

    onlineList = getTestingList(testingDict)

    countryList = onlineList[1]
    jpIndex = countryList.index('JP')
    datalist = onlineList[2][jpIndex]
    timelist = onlineList[0]

    tsJP = Series(datalist,index = timelist)
    df = DataFrame()
    df['JP'] = tsJP

    print df.index
    print df.columns

    print df

    tsJP_log = np.log(tsJP)
    lag_acf = acf(tsJP_log,nlags=200)
    lag_pacf = pacf(tsJP_log,nlags=200,method='ols')

    # model = ARIMA(tsJP_log,order=(2,1,2))
    model = ARMA(tsJP_log,(5,2))
    res = model.fit(disp=-1)


    print "Here is the fit result"
    print res

    params = res.params
    residuals = res.resid
    p = res.k_ar
    q = res.k_ma
    k_exog = res.k_exog
    k_trend = res.k_trend
    steps = 300

    newP = _arma_predict_out_of_sample(params, steps, residuals, p, q, k_trend, k_exog, endog=tsJP_log, exog=None, start=len(tsJP_log))
    newF,stdF,confiF = res.forecast(steps)

    print newP
    newP = np.exp(newP)
    print newP

    print " Forecast below!!"
    print newF
    newF = np.exp(newF)
    print newF
    print stdF
    stdF = np.exp(stdF)
    print stdF

    x_axis = range(len(lag_acf))
    y_axis = lag_acf

    onlineEWMA=go.Scatter(
        x = x_axis,
        y = y_axis,
        mode = 'lines+markers',
        name = "lag_acf"
    )

    onlinePre=go.Scatter(
        x = x_axis,
        y = newP,
        mode = 'lines+markers',
        name = "predictJP"
    )

    layout = dict(title = 'predicewma',
              xaxis = dict(title = 'Date'),
              yaxis = dict(title = 'online Number'),
              )

    data = [onlineEWMA,onlinePre]
    fig = dict(data=data, layout=layout)

    plot(fig,filename ="/ukl/apache-tomcat-7.0.67/webapps/demoplotly/EWMAprediction.html",auto_open=False)
Example #34
0
if abs(max(tcs_close)-avg) > abs(min(tcs_close)-avg):
    h = max(tcs_close)
else: 
    h = min(tcs_close)

gradplot = figure()
mapper = LinearColorMapper(palette= ['#084594', '#2171b5', '#4292c6', '#6baed6', '#9ecae1', '#c6dbef', '#deebf7', '#f7fbff'], low=avg, high=h)
gradplot.scatter(range(len(list(tcs_close))), list(tcs_close), color = {'field': 'y', 'transform':mapper })
color_bar = ColorBar(color_mapper=mapper['transform'], width=8,  location=(0,0))
gradplot.add_layout(color_bar, 'right')
show(gradplot)

 
#5
pwov_index = [i for i in range(len(pwov)) if pwov[i] == 1]
tcs_plot = figure()
tcs_plot.line(range(len(tcs_close)), tcs_close, line_width = 2, color = 'blue')
tcs_plot.circle(pwov_index, list(tcs_close[pwov_index]), color='red')
show(tcs_plot)


#6
from bokeh.plotting import figure, show
from statsmodels.tsa.stattools import pacf
vals = pacf(tcs_close)
pacfplot = figure()
pacfplot.scatter(range(len(vals[:10])), vals[:10], line_width = 2, color = 'blue')
show(pacfplot)

Example #35
0
                     index=[
                         'Test Statistic', 'p-value', '#Lags Used',
                         'Number of Observations Used'
                     ])
for key, value in dftest[4].items():
    dfoutput['Critical Value (%s)' % key] = value
print(dfoutput)  # Excellent p-value: we have a stationary evolution !

index_date = pd.date_range('1/1/2011', periods=5000, freq='1800s')
data = pd.DataFrame(
    data={"CSPL_RECEIVED_CALLS": data["CSPL_RECEIVED_CALLS"].values},
    index=index_date)
data['CSPL_RECEIVED_CALLS'] = data['CSPL_RECEIVED_CALLS'].astype('float64')

lag_acf = acf(data, nlags=20)
lag_pacf = pacf(data, nlags=20, method='ols')

plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(5000), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(5000), linestyle='--', color='gray')
plt.title('Autocorrelation Function')

plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(5000), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(5000), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
Example #36
0
plt.tight_layout()

#there can be cases where an observation simply consisted of trend & seasonality. In that case, there won't be 
#any residual component & that would be a null or NaN. Hence, we also remove such cases.
decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)

decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)

#ACF & PACF plots

lag_acf = acf(datasetLogDiffShifting, nlags=20)
lag_pacf = pacf(datasetLogDiffShifting, nlags=20, method='ols')

#Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')            

#Plot PACF
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96/np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='gray')
plt.axhline(y=1.96/np.sqrt(len(datasetLogDiffShifting)), linestyle='--', color='gray')
Example #37
0
                         index=[
                             'Test Statistic', 'p-value', '#Lags Used',
                             'Number of Observations Used'
                         ])
    for key, value in dftest[4].items():
        dfoutput['Critical Value (%s)' % key] = value
    print(dfoutput)


data_shift.dropna(inplace=True)
test_stationarity(data_shift)

# ACF & PACF plots

lag_acf = acf(data_shift, nlags=10)
lag_pacf = pacf(data_shift, nlags=10, method='ols')

# Plot ACF:s
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(data_shift)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(data_shift)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')

# Plot PACF
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(data_shift)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(data_shift)), linestyle='--', color='gray')
plt.plot(np.arange(0, 11), acf(ts_log_mv_diff, nlags=10))
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-7.96 / np.sqrt(len(ts_log_mv_diff)),
            linestyle='--',
            color='gray')
plt.axhline(y=7.96 / np.sqrt(len(ts_log_mv_diff)),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')
plt.show()

# ### The ACF curve crosses the upper confidence value when the lag value is between 0 and 1. Thus, optimal value of q in the ARIMA model must be 0 or 1

# In[15]:

plt.plot(np.arange(0, 11), pacf(ts_log_mv_diff, nlags=10))
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-7.96 / np.sqrt(len(ts_log_mv_diff)),
            linestyle='--',
            color='gray')
plt.axhline(y=7.96 / np.sqrt(len(ts_log_mv_diff)),
            linestyle='--',
            color='gray')
plt.title('Partial Autocorrelation Function')
plt.show()

# ### The PACF curve drops to 0 between lag values 1 and 2. Thus, optimal value of p in the ARIMA model is 1 or 2.

# In[16]:

model = ARIMA(ts_log, order=(1, 1, 0))
Example #39
0
plt.plot(decompose.seasonal)
plt.plot(decompose.trend)

#Differencing
diff_df = df - df.shift(1)
diff_df = diff_df.dropna()
plt.plot(diff_df)
decompose = seasonal_decompose(diff_df, freq=12)
plt.plot(decompose.resid)
plt.plot(decompose.seasonal)
plt.plot(decompose.trend)

from statsmodels.tsa.stattools import acf, pacf

lag_acf = acf(df, nlags=20)
lag_pacf = pacf(df, nlags=20, method='ols')
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff_df)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff_df)), linestyle='--', color='gray')
plt.title('Autocorrelation Function')
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(diff_df)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(diff_df)), linestyle='--', color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()

mod = SARIMAX(df,
Example #40
0
plt.figure(6)
plt.plot(differencing)
plotstats(differencing)
plt.show()

print('ACF and PACF with series stationarized')

pyplot.figure()
plot_acf(differencing, ax=pyplot.gca(), lags=20)
pyplot.figure()
plot_pacf(differencing, ax=pyplot.gca(), lags=20)
pyplot.show()

lag_acf = acf(differencing, nlags=20)
lag_pacf = pacf(differencing, nlags=20, method='ols')

#Temporary test ACF and PACF

plt.figure(13)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(series)), linestyle='--', color='gray')
plt.axhline(y=1.96 / np.sqrt(len(series)), linestyle='--', color='gray')
plt.title('Autocorrelation function for PETR4 - ARIMA (0,1,1)')
plt.show()

#Plot PACF:

plt.figure(14)
plt.plot(lag_pacf)
Example #41
0
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
ts_log_decompose = residual
ts_log_decompose.dropna(inplace=True)
test_stationarity(ts_log_decompose)

##########################
#Modeling and forecasting#
##########################
data_log_diff.dropna(inplace=True)
lag_acf = acf(data_log_diff, nlags=20)
lag_pacf = pacf(data_log_diff, nlags=20, method='ols')
#Plot ACF: 
plt.subplot(121) 
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(data_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(data_log_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')
#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(data_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(data_log_diff)),linestyle='--',color='gray')
plt.title('Partial Autocorrelation Function')
plt.tight_layout()
Example #42
0
if __name__ == "__main__":



    data = pd.read_csv(sys.argv[1])

    

    d = sys.argv[2]
    #print(d)
    #print(d)
    #lag_acf = acf(data["value"].diff(d).dropna(), nlags=30)
    #lag_pacf = pacf(data["value"].dropna(), nlags=25)

    if d == '0':
        lag_pacf = pacf(data["value"].dropna(), nlags=25)
    else:
        lag_pacf = pacf(data["value"].diff(d).dropna(), nlags=25)

    #print(lag_pacf)
  

    f = open("./ts_analysis/pacf.txt", "w")
    #f = open("pacf.txt", "w")
    for i in lag_pacf:
        f.write(str(i)+'\n')
    f.close()
   
    #plot_pacf(data["value"].diff(d).dropna())
    #lt.savefig("../desktop/for_redis/pacf.png")
    #plt.savefig("pacf.png")
def main(args):
    data_file_list = tl.get_data_file_list(args.library)
    if args.list:
        tl.print_list(data_file_list)
        sys.exit(0)
    data = tl.load_data_file(data_file_list,args.select)

    if not args.static:
        logging.debug('Remove zero velocity samples')
        data = ntp.remove_non_positive_velocity_samples(data)

    if args.ue == 'e398':
        # Rename MAC downlink throughput in Application downlink throughput if need be
        ntp.process_data(data,ntp.process_lte_rename_mac_to_app)

    # Get basic data
    ntp.process_data(data,ntp.process_lte_app_bw_prb_util)
    ntp.process_data(data,ntp.process_lte_app_bw_prb_util_bw20)
    ntp.process_data(data,ntp.process_lte_app_bw_prb_util_bw10)
    ntp.process_data(data,ntp.process_lte_app_bw_prb_util_bw15)

    # Spectral efficiency, SNR, RSRP
    ntp.process_data(data,ntp.process_se_bw_norm)
    ntp.process_data(data,ntp.process_lte_rs_snr)
    ntp.process_data(data,ntp.process_lte_rsrp)

    column_list = ['Velocity',
                   'SE','SE norm',
                   'RS SNR/Antenna port - 1','RS SNR/Antenna port - 2',
                   'RSRP/Antenna port - 1','RSRP/Antenna port - 2']
    if args.select is None:
        df = tl.concat_pandas_data([df[column_list] for df in data ])
    else:
        df = data

    # Remove outliers because of bandwidth normalization issues
    df['SE norm'][df['SE norm'] > 7.5] = np.nan

    print(df['SE'].dropna().describe())
    print(df['Velocity'].dropna().describe())
    print(df['RS SNR/Antenna port - 1'].dropna().describe())

    velocity_pacf = pacf(df['Velocity'].dropna(), nlags=10, method='ywunbiased', alpha=None)
    se_pacf,se_conf = pacf(df['SE'].dropna(), nlags=10, method='ywunbiased', alpha=0.05)
    se_norm_pacf,se_norm_conf = pacf(df['SE norm'].dropna(), nlags=10, method='ywunbiased', alpha=0.05)
    rs_snr_ap1_pacf = pacf(df['RS SNR/Antenna port - 1'].dropna(), nlags=10, method='ywunbiased', alpha=None)
    rsrp_ap1_pacf = pacf(df['RSRP/Antenna port - 1'].dropna(), nlags=10, method='ywunbiased', alpha=None)

    # Apply diff to ensure zero-mean
    velocity_acf = acf(df['Velocity'].dropna().diff().dropna(), unbiased=False, nlags=40, confint=None, qstat=False, fft=True, alpha=None)
    se_acf = acf(df['SE'].dropna().diff().dropna(), unbiased=False, nlags=40, confint=None, qstat=False, fft=True, alpha=None)
    se_norm_acf = acf(df['SE norm'].dropna().diff().dropna(), unbiased=False, nlags=40, confint=None, qstat=False, fft=True, alpha=None)
    rs_snr_ap1_acf = acf(df['RS SNR/Antenna port - 1'].dropna().diff().dropna(), unbiased=False, nlags=40, confint=None, qstat=False, fft=True, alpha=None)
    rsrp_ap1_acf = acf(df['RSRP/Antenna port - 1'].dropna().diff().dropna(), unbiased=False, nlags=40, confint=None, qstat=False, fft=True, alpha=None)

    plt.ion()
    plt.figure()
    plt.subplot2grid((2,1), (0,0))
    plt.plot(se_pacf,lw=2.0,label='PACF SE')
    plt.plot(se_norm_pacf,lw=2.0,label='PACF SE norm')
    plt.plot(rs_snr_ap1_pacf,lw=2.0,label='PACF RS SNR AP1')
    plt.plot(rsrp_ap1_pacf,lw=2.0,label='PACF RSRP AP1')
    plt.plot(velocity_pacf,lw=2.0,label='PACF Velocity')
    plt.ylim([-0.2,1.1])
    plt.grid(True)
    plt.legend(loc=0)

    plt.subplot2grid((2,1), (1,0))
    plt.plot(se_acf,lw=2.0,label='ACF SE diff')
    plt.plot(se_norm_acf,lw=2.0,label='ACF SE norm diff')
    plt.plot(rs_snr_ap1_acf,lw=2.0,label='ACF RS SNR AP1 diff')
    plt.plot(rsrp_ap1_acf,lw=2.0,label='ACF RSRP AP1 diff')
    plt.plot(velocity_acf,lw=2.0,label='ACF Velocity diff')
    plt.ylim([-0.2,1.1])
    plt.grid(True)
    plt.legend(loc=0)

    plt.tight_layout()

    input('Press any key')
Example #44
0
def plot_pacf(x, ax=None, lags=None, alpha=.05, method='ywm',
                use_vlines=True, **kwargs):
    """Plot the partial autocorrelation function

    Plots lags on the horizontal and the correlations on vertical axis.

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : array_like, optional
        Array of lag values, used on horizontal axis.
        If not given, ``lags=np.arange(len(corr))`` is used.
    alpha : scalar, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        1/sqrt(len(x))
    method : 'ywunbiased' (default) or 'ywmle' or 'ols'
        specifies which method for the calculations to use:

        - yw or ywunbiased : yule walker with bias correction in denominator
          for acovf
        - ywm or ywmle : yule walker without bias correction
        - ols - regression of time series on lags of it and on constant
        - ld or ldunbiased : Levinson-Durbin recursion with bias correction
        - ldb or ldbiased : Levinson-Durbin recursion without bias correction

    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    """
    fig, ax = utils.create_mpl_ax(ax)

    if lags is None:
        lags = np.arange(len(x))
        nlags = len(lags) - 1
    else:
        nlags = lags
        lags = np.arange(lags + 1) # +1 for zero lag

    acf_x, confint = pacf(x, nlags=nlags, alpha=alpha, method=method)

    if use_vlines:
        ax.vlines(lags, [0], acf_x, **kwargs)
        ax.axhline(**kwargs)

    # center the confidence interval TODO: do in acf?
    confint = confint - confint.mean(1)[:,None]
    kwargs.setdefault('marker', 'o')
    kwargs.setdefault('markersize', 5)
    kwargs.setdefault('linestyle', 'None')
    ax.margins(.05)
    ax.plot(lags, acf_x, **kwargs)
    ax.fill_between(lags, confint[:,0], confint[:,1], alpha=.25)
    ax.set_title("Partial Autocorrelation")

    return fig
Example #45
0
for the first time. These p lags will act as our features while forecasting 
the AR time series.
"""

# pacf plot fancy
print('\n*** Partial ACF Plot ***')
from statsmodels.graphics.tsaplots import plot_pacf
plt.rcParams['figure.figsize'] = (8, 5)
plt.figure(figsize=(5, 5))
plot_pacf(df.values.tolist(), lags=50)
plt.axhline(y=0, linestyle='--', color='red')
#plt.axhline(y=-1.96/np.sqrt(len(df)),linestyle='--',color='red')
plt.axhline(y=1.96 / np.sqrt(len(df)), linestyle='--', color='red')
plt.title("Partial Auto Corelation Plot")
plt.xlabel('Lags')
plt.show()

# pacf plot simple
print('\n*** Partial ACF Plot ***')
from statsmodels.tsa.stattools import pacf
pacf_50 = pacf(df[pColData], nlags=50)
plt.rcParams['figure.figsize'] = (8, 5)
plt.figure()
plt.ylim(-2, 2)
plt.plot(pacf_50, color='b')
plt.axhline(y=0, linestyle='--', color='red')
plt.axhline(y=-1.96 / np.sqrt(len(df)), linestyle='--', color='red')
plt.axhline(y=1.96 / np.sqrt(len(df)), linestyle='--', color='red')
plt.title('Partial Autocorrelation Function')
plt.show
# In[13]:

#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf


# In[14]:

from statsmodels.tsa.arima_model import ARIMA


# In[15]:

lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')

#Plot ACF:    
plt.subplot(121)    
plt.plot(lag_acf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
plt.plot(lag_pacf)
plt.axhline(y=0,linestyle='--',color='gray')
plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)),linestyle='--',color='gray')
Example #47
0
#df.index=df.Date_time
df.head()

sale = df.Sales
#lnsale=np.log(sale)
#lnsale
#plt.plot(lnsale)

acf_1 = acf(sale)
test_df = pd.DataFrame([acf_1]).T
test_df.columns = ['Auto-correlation']
test_df.index += 1
test_df.plot(kind='bar')
plt.show()

pacf_1 = pacf(sale)
test_df = pd.DataFrame([pacf_1]).T
test_df.columns = ['Partial-Autocorrelation']
test_df.index += 1
test_df.plot(kind='bar')
plt.show()

result = ts.adfuller(sale)
result
sale_diff = sale - sale.shift()
diff = sale_diff.dropna()
acf_1_diff = acf(diff)
test_df = pd.DataFrame([acf_1_diff]).T
test_df.columns = ['First difference Auto-correlation']
test_df.index += 1
test_df.plot(kind='bar')
Example #48
0
plt.plot(seasonal, label='Seasonal')
plt.legend(loc='best')
plt.subplot(414)
plt.legend(loc='best')
plt.tight_layout()

decomposedLogData = residual
decomposedLogData.dropna(inplace=True)
test_stationarity(decomposedLogData)

## ACF and PACF Plots ##

#from statsmodels.tsa.stattools import acf,pacf

lag_acf = acf(dflogDiffShifting, nlags=20)
lag_pacf = pacf(dflogDiffShifting, nlags=20, method='ols')

## Plot ACF ##

plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(dflogDiffShifting)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(dflogDiffShifting)),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')

## Plot PACF ##
Example #49
0
    n: length of series
    Returns: the series
    """
    e = np.random.standard_normal(n)
    y = np.zeros(n)
    for t in np.arange(2,n):
        y[t] = phi1*y[t-1] + phi2*y[t-2] + e[t]
    return y


# examples
n = 100
y = ar2(0.7,0.2,n)
ncorr = 25 # number of lags to compute for the autocorrelation functions
y_acf = acf(y,nlags=ncorr)
y_pacf = pacf(y,nlags=ncorr)

# plot the series
fig,ax = plt.subplots(figsize=(14,4))
ax.plot(y,label=r'$y_t$')
ax.legend()
ax.set_title(r'$y_t= \phi_1 y_{t-1} + \phi_2 y_{t-2} + \epsilon_t$')
plt.show()

#plot the acf and pacf
fig2,axes = plt.subplots(2)
fig2.subplots_adjust(hspace=0.5)
axes[0].bar(np.arange(ncorr+1), y_acf)
axes[0].set_title("Autocorrelation")
axes[1].bar(np.arange(ncorr+1), y_pacf)
axes[1].set_title("Partial Autocorrelation")
Example #50
0
ts_logtransformed = (np.log(t_series))
#plt.plot(ts_logtransformed)
#test_stationarity(ts_logtransformed)
ts_diff_logtrans = ts_logtransformed - ts_logtransformed.shift(7)

ts_diff_logtrans.head(10)
#test_stationarity(ts_diff_logtrans)

ts_diff_logtrans.dropna(inplace=True)
#test_stationarity(ts_diff_logtrans)
#plt.plot(ts_diff_logtrans)

#ACF and PACF plots:
lag_acf = acf(ts_diff_logtrans, nlags=30)
lag_pacf = pacf(ts_diff_logtrans, nlags=50, method='ols')

#Plot ACF:
plt.subplot(121)
plt.plot(lag_acf)
plt.axhline(y=0, linestyle='--', color='gray')
plt.axhline(y=-1.96 / np.sqrt(len(ts_diff_logtrans)),
            linestyle='--',
            color='gray')
plt.axhline(y=1.96 / np.sqrt(len(ts_diff_logtrans)),
            linestyle='--',
            color='gray')
plt.title('Autocorrelation Function')

#Plot PACF:
plt.subplot(122)
Example #51
0
#df['First Difference'] = df['https'] - df['https'].shift()  
y = np.array(df['https'] - df['https'].shift())
_draw_multiple_line_plot('protocols_diff.html', 
                         'transport layer protocols', 
                         [X],
                         [y],
                         ['navy'], 
                         ['packets percentage delta'],
                         [None],
                         [1],
                         'datetime', 'Date', 'Packets Percentage Delta', y_start=-100, y_end=100)


df['first difference'] = df['https'] - df['https'].shift()
lag_correlations = acf(df['first difference'].iloc[1:])  
lag_partial_correlations = pacf(df['first difference'].iloc[1:])  

print 'lag_correlations'
print lag_correlations
y = lag_correlations
_draw_multiple_line_plot('lag_correlations.html', 
                         'lag_correlations', 
                         [X],
                         [y],
                         ['navy'], 
                         ['lag_correlations'],
                         [None],
                         [1],
                         'datetime', 'Date', 'lag_correlations', y_start=-1, y_end=1)

Example #52
0
def arima_model(df: pd.DataFrame, cols: list, lag: int, order: int,
                moving_avg_model: int, with_graph: bool):
    for col in cols:
        model = ARIMA(df[col], order=(lag, order, moving_avg_model))
        model_fit = model.fit()

        print('\t==== Summary of ARIMA(%d, %d, %d) model for %s ====\n' %
              (lag, order, moving_avg_model, col))
        print(model_fit.summary())
        print()

        x_mean = df[col].mean()
        sst = df[col].apply(lambda x: (x - x_mean)**2).sum()
        ssr = sst - model_fit.sse
        r_squared = ssr / sst
        print('R-squared: %f\n' % r_squared)
        n = len(df[col])
        k = len(model_fit.arroots) + len(model_fit.maroots)
        print('n: %d, k: %d' % (n, k))
        adj_r_sqr = 1 - ((1 - r_squared) * (n - 1)) / (n - k - 1)
        print('Adjusted R-squared: %f' % adj_r_sqr)
        print()

        print('\t==== Correlogram of residuals ====\n')
        acf_results, _, q_stat = acf(model_fit.resid, nlags=15, qstat=True)
        pacf_results = pacf(model_fit.resid, nlags=15)
        for clag in range(0, 16):
            print('%d:' % (clag + 1),
                  acf_results[clag],
                  pacf_results[clag],
                  '-' if clag - 1 < 0 else q_stat[clag - 1],
                  sep='\t')
        print()

        if lag > 0 or moving_avg_model > 0:
            r_matrix = '(ar.L1 = 0)' if lag > 0 else ''
            if len(r_matrix) > 0 and moving_avg_model > 0:
                r_matrix = r_matrix + ','
            r_matrix = r_matrix + ('(ma.L1 = 0)'
                                   if moving_avg_model > 0 else '')
            f_test = model_fit.f_test(r_matrix)
            print('\t==== F Test ====\n', f_test.summary())
            print()

        print('\t==== Summary of residuals for %s ====\n' % col)
        residuals = pd.DataFrame(model_fit.resid)
        print(residuals.describe())
        print()

        if with_graph is True:
            plot_pacf(residuals,
                      lags=15,
                      title='ARIMA(%d, %d, %d): PAC plot for residuals of %s' %
                      (lag, order, moving_avg_model, col))
            plt.show()

            #residuals.plot(kind='kde', title='Density of residuals %s' % col)
            #plt.show()

            ax = pd.plotting.autocorrelation_plot(pd.DataFrame(acf_results))
            ax.set_title('ARIMA(%d, %d, %d): AC plot for residuals of %s' %
                         (lag, order, moving_avg_model, col))
            plt.show()
def segment_analysis(csv_fname, trajectory_df):
    # catch small trajectory_dfs
    if len(trajectory_df.index) < MIN_TRAJECTORY_LEN:
        return None
    else:
        num_segments = len(trajectory_df.index) - WINDOW_LEN
        
        # for each trajectory, loop through segments

#        super_data = np.zeros((num_segments+1, LAGS+1+1, 2*len(INTERESTED_VALS)+1))
#        super_data = np.zeros((2*len(INTERESTED_VALS), num_segments, LAGS+1))
#        super_data_confint_upper = np.zeros((2*len(INTERESTED_VALS), num_segments, LAGS+1))
#        super_data_confint_lower = np.zeros((2*len(INTERESTED_VALS), num_segments, LAGS+1))
        confident_data = np.zeros((2*len(INTERESTED_VALS), num_segments, LAGS+1))
#        segmentnames = np.ndarray.flatten( np.array([["{name:s} seg{index:0>3d}".format(name="C", index=segment_i)]*(LAGS+1) for segment_i in range(num_segments)]) )
        
        for segment_i in range(num_segments):
            # slice out segment from trajectory
            segment = trajectory_df[segment_i:segment_i+WINDOW_LEN]
            
#            data_matrix = np.zeros((2*len(INTERESTED_VALS), LAGS+1))
#            confint_matrix = np.zeros((2*len(INTERESTED_VALS), LAGS+1))
            
            ## for segment, run PACF and ACF for each feature
            
            # do analysis variable by variable
            for var_name, var_values in segment.iteritems():
                # make matrices
                
                
                
                # make dictionary for column indices
                var_index = segment.columns.get_loc(var_name)
#                {'velo_x':0, 'velo_y':1, 'velo_z':2, 'curve':3, 'log_curve':4}[var_name]
                
                # run ACF and PACF for the column
                col_acf, acf_confint = acf(var_values, nlags=LAGS, alpha=.05)#,  qstat= True)
                
                # store data
#                super_data[var_index, segment_i, :] = col_acf
#                super_data_confint_lower[var_index, segment_i, :] = acf_confint[:,0]
#                super_data_confint_upper[var_index, segment_i, :] = acf_confint[:,1]
                # make confident data
                acf_confint_distance = acf_confint[:,1] - acf_confint[:,0]
                ACF_conf_booltable = acf_confint_distance[:] >= CONFINT_THRESH
                filtered_data = col_acf
                filtered_data[ACF_conf_booltable] = 0.
                confident_data[var_index, segment_i, :] = filtered_data
                
                
                
                ## , acf_confint, acf_qstats, acf_pvals
                col_pacf, pacf_confint = pacf(var_values, nlags=LAGS, method='ywmle', alpha=.05)
                # TODO: check for PACF values above or below +-1
#                super_data[var_index+len(INTERESTED_VALS), segment_i, :] = col_pacf
#                super_data_confint_lower[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,0]
#                super_data_confint_upper[var_index+len(INTERESTED_VALS), segment_i, :] = pacf_confint[:,1]
                
                # make confident data
                pacf_confint_distance = pacf_confint[:,1] - pacf_confint[:,0]
                PACF_conf_booltable = pacf_confint_distance[:] >= CONFINT_THRESH
                filtered_data = col_pacf # make a copy
                filtered_data[PACF_conf_booltable] = 0.
                confident_data[var_index+len(INTERESTED_VALS), segment_i, :] = filtered_data
                

                
                
            
        # analysis panel  
        major_axis=[np.array([csv_fname]*num_segments), np.array(["{index:0>3d}".format(index=segment_i) for segment_i in range(num_segments)])]
        
#        p = pd.Panel(super_data,
#             items=['acf_velox', 'acf_veloy','acf_veloz', 'acf_curve', 'acf_logcurve', 'pacf_velox', 'pacf_veloy', 'pacf_veloz', 'pacf_curve', 'pacf_logcurve'],
##            major_axis=np.array(["{name:s} seg{index:0>3d}".format(name=csv_fname, index=segment_i) for segment_i in range(num_segments)]),
#            major_axis=major_axis,            
#            minor_axis=np.arange(LAGS+1))
#        p.major_axis.names = ['Trajectory', 'segment_ID']
#
#        # confint panel
#        p_confint_upper = pd.Panel(super_data_confint_upper,
#             items=['acf_velox', 'acf_veloy','acf_veloz', 'acf_curve', 'acf_logcurve', 'pacf_velox', 'pacf_veloy', 'pacf_veloz', 'pacf_curve', 'pacf_logcurve'],
##            major_axis=np.array(["{name:s} seg{index:0>3d}".format(name=csv_fname, index=segment_i) for segment_i in range(num_segments)]),
#            major_axis=major_axis,            
#            minor_axis=np.arange(LAGS+1))
#        p_confint_upper.major_axis.names = ['Trajectory', 'segment_ID']  
#        
#        p_confint_lower = pd.Panel(super_data_confint_lower,
#             items=['acf_velox', 'acf_veloy','acf_veloz', 'acf_curve', 'acf_logcurve', 'pacf_velox', 'pacf_veloy', 'pacf_veloz', 'pacf_curve', 'pacf_logcurve'],
##            major_axis=np.array(["{name:s} seg{index:0>3d}".format(name=csv_fname, index=segment_i) for segment_i in range(num_segments)]),
#            major_axis=major_axis,            
#            minor_axis=np.arange(LAGS+1))
#        p_confint_lower.major_axis.names = ['Trajectory', 'segment_ID'] 
        
        # analysis panel  
        
        filtpanel = pd.Panel(confident_data,
             items=['acf_velox', 'acf_veloy','acf_veloz', 'acf_curve', 'acf_logcurve', 'pacf_velox', 'pacf_veloy', 'pacf_veloz', 'pacf_curve', 'pacf_logcurve'],
#            major_axis=np.array(["{name:s} seg{index:0>3d}".format(name=csv_fname, index=segment_i) for segment_i in range(num_segments)]),
            major_axis=major_axis,            
            minor_axis=np.arange(LAGS+1))
        filtpanel.major_axis.names = ['Trajectory', 'segment_ID']
        
        
        return filtpanel
def test_pacf_nlags_error(reset_randomstate):
    e = np.random.standard_normal(100)
    with pytest.raises(ValueError, match="Can only compute partial"):
        pacf(e, 50)
Example #55
0
print(arma_res.summary())


# In[3]:


arma_res.resid.iloc[1:].plot(figsize=(6,4),color='seagreen')
plt.ylabel('$\hat{z_t}$')


# In[4]:


from statsmodels.tsa import stattools
acf,q,pvalue = stattools.acf(arma_res.resid,nlags=5,qstat=True)
pacf,confint = stattools.pacf(arma_res.resid,nlags=5,alpha=0.05)
print("自己相関係数:",acf)
print("p値:",pvalue)
print("偏自己相関:",pacf)
print("95%信頼区間:",confint)


# In[5]:


p=sm.tsa.adfuller(arma_res.resid,regression='nc')[1] #[1]はp値の検定結果
p1=sm.tsa.adfuller(arma_res.resid,regression='c')[1] #[1]はp値の検定結果
print("ドリフト無しランダムウォーク p値:",p)
print("ドリフト付きランダムウォーク p値:",p1)

Example #56
0
    def print_figures(self, directory, prefix=""):
        """Print figures

        Save figure for precipitation time series
        Save figure for cumulative frequency of precipitation in the time
            series
        Save figures for sample autocorrelation and partial autocorrelation
        Save figures for the poisson rate, gamma mean, gamma dispersion, latent
            variable Z over time
        Save the TimeSeries in text (converted to string which shows
            parameters)

        Args:
            directory: where to save the figures
            prefix: what to name the figures
        """

        # required when plotting times on an axis
        pandas.plotting.register_matplotlib_converters()

        colours = matplotlib.rcParams['axes.prop_cycle'].by_key()['color']
        cycle = cycler.cycler(color=[colours[0]], linewidth=[1])

        # get autocorrelations
        acf = stattools.acf(self.y_array, nlags=20, fft=True)
        try:
            pacf = stattools.pacf(self.y_array, nlags=20)
        except (stattools.LinAlgError):
            pacf = np.full(21, np.nan)

        # print precipitation time series
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.plot(self.time_array, self.y_array)
        plt.xlabel("time")
        plt.ylabel("rainfall (mm)")
        plt.savefig(path.join(directory, prefix + "rainfall.pdf"))
        plt.close()

        # print precipitation cumulative frequency
        # draw dot for mass at 0 mm
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        rain_sorted = np.sort(self.y_array)
        cdf = np.asarray(range(len(self)))
        plt.plot(rain_sorted, cdf)
        if np.any(rain_sorted == 0):
            non_zero_index = rain_sorted.nonzero()[0]
            if non_zero_index.size > 0:
                non_zero_index = rain_sorted.nonzero()[0][0] - 1
            else:
                non_zero_index = len(cdf) - 1
            plt.scatter(0, cdf[non_zero_index])
        plt.xlabel("rainfall (mm)")
        plt.ylabel("cumulative frequency")
        plt.savefig(path.join(directory, prefix + "cdf.pdf"))
        plt.close()

        # plot sample autocorrelation
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.bar(np.asarray(range(acf.size)), acf)
        plt.axhline(1 / math.sqrt(len(self)), linestyle='--', linewidth=1)
        plt.axhline(-1 / math.sqrt(len(self)), linestyle='--', linewidth=1)
        plt.xlabel("time (day)")
        plt.ylabel("autocorrelation")
        plt.savefig(path.join(directory, prefix + "acf.pdf"))
        plt.close()

        # plot sample partial autocorrelation
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.bar(np.asarray(range(pacf.size)), pacf)
        plt.axhline(1 / math.sqrt(len(self)), linestyle='--', linewidth=1)
        plt.axhline(-1 / math.sqrt(len(self)), linestyle='--', linewidth=1)
        plt.xlabel("time (day)")
        plt.ylabel("partial autocorrelation")
        plt.savefig(path.join(directory, prefix + "pacf.pdf"))
        plt.close()

        # plot the poisson rate over time
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.plot(self.time_array, self.poisson_rate.value_array)
        plt.xlabel("time")
        plt.ylabel("poisson rate")
        plt.savefig(path.join(directory, prefix + "poisson_rate.pdf"))
        plt.close()

        # plot the gamma mean over time
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.plot(self.time_array, self.gamma_mean.value_array)
        plt.xlabel("time")
        plt.ylabel("gamma mean (mm)")
        plt.savefig(path.join(directory, prefix + "gamma_mean.pdf"))
        plt.close()

        # plot the gamme dispersion over time
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.plot(self.time_array, self.gamma_dispersion.value_array)
        plt.xlabel("time")
        plt.ylabel("gamma dispersion")
        plt.savefig(path.join(directory, prefix + "gamma_dispersion.pdf"))
        plt.close()

        # plot the latent variable z over time
        plt.figure()
        ax = plt.gca()
        ax.set_prop_cycle(cycle)
        plt.plot(self.time_array, self.z_array)
        plt.xlabel("time")
        plt.ylabel("Z")
        plt.savefig(path.join(directory, prefix + "z.pdf"))
        plt.close()

        # print the parameters in text
        file = open(path.join(directory, prefix + "parameter.txt"), "w")
        file.write(str(self))
        file.close()
Example #57
0
                               color='lightgray',
                               ax=ax1)
ax2 = fig.add_subplot(1, 2, 2)
fig = sm.graphics.tsa.plot_pacf(lnn225.squeeze(),
                                lags=40,
                                color='lightgray',
                                ax=ax2)

#fig.show()

arma_mod = sm.tsa.ARMA(lnn225, order=(1, 0))
arma_res = arma_mod.fit(trend='c', disp=-1)
print(arma_res.summary())

acf, q, pvalue = stattools.acf(arma_res.resid, nlags=5, qstat=True)
pacf, confint = stattools.pacf(arma_res.resid, nlags=5, alpha=0.05)
print("自己相関係数:", acf)
print("p値:", pvalue)
print("偏自己相関:", pacf)
print("95%信頼区間:", confint)

p = sm.tsa.adfuller(arma_res.resid, regression='nc')[1]  #[1]はp値の検定結果
p1 = sm.tsa.adfuller(arma_res.resid, regression='c')[1]  #[1]はp値の検定結果
print("ドリフト無しランダムウォーク p値:", p)
print("ドリフト付きランダムウォーク p値:", p1)

from scipy.stats import t
resid = arma_res.resid.iloc[1:]
m = resid.mean()
v = resid.std()
resid_max = pd.Series.rolling(arma_res.resid, window=250).mean().max()
Example #58
0
# -*- coding: utf-8 -*-

import numpy as np
from pandas import *
from statsmodels.tsa import stattools
import matplotlib.pyplot as plt

randn = np.random.randn

ts = Series(randn(1000), index=DateRange('2000/1/1', periods=1000))
ts = ts.cumsum()

ts.plot(style='<--')
rolling_mean(ts, 60).plot(style='--', c='r')
rolling_mean(ts, 180).plot(style='--', c='b')

acf = stattools.acf(np.array(ts), 50)
plt.bar(range(len(acf)), acf, width=0.01)
plt.savefig("image.png")

pcf = stattools.pacf(np.array(ts), 50)
plt.bar(range(len(pcf)), pcf, width=0.01)
plt.show()
plt.savefig("image2.png")
Example #59
0
def plot_pacf(x, ax=None, lags=None, alpha=.05, method='ywm', use_vlines=True,
              title='Partial Autocorrelation', zero=True, **kwargs):
    """Plot the partial autocorrelation function

    Plots lags on the horizontal and the correlations on vertical axis.

    Parameters
    ----------
    x : array_like
        Array of time-series values
    ax : Matplotlib AxesSubplot instance, optional
        If given, this subplot is used to plot in instead of a new figure being
        created.
    lags : int or array_like, optional
        int or Array of lag values, used on horizontal axis. Uses
        np.arange(lags) when lags is an int.  If not provided,
        ``lags=np.arange(len(corr))`` is used.
    alpha : scalar, optional
        If a number is given, the confidence intervals for the given level are
        returned. For instance if alpha=.05, 95 % confidence intervals are
        returned where the standard deviation is computed according to
        1/sqrt(len(x))
    method : 'ywunbiased' (default) or 'ywmle' or 'ols'
        specifies which method for the calculations to use:

        - yw or ywunbiased : yule walker with bias correction in denominator
          for acovf
        - ywm or ywmle : yule walker without bias correction
        - ols - regression of time series on lags of it and on constant
        - ld or ldunbiased : Levinson-Durbin recursion with bias correction
        - ldb or ldbiased : Levinson-Durbin recursion without bias correction
    use_vlines : bool, optional
        If True, vertical lines and markers are plotted.
        If False, only markers are plotted.  The default marker is 'o'; it can
        be overridden with a ``marker`` kwarg.
    title : str, optional
        Title to place on plot.  Default is 'Partial Autocorrelation'
    zero : bool, optional
        Flag indicating whether to include the 0-lag autocorrelation.
        Default is True.
    **kwargs : kwargs, optional
        Optional keyword arguments that are directly passed on to the
        Matplotlib ``plot`` and ``axhline`` functions.

    Returns
    -------
    fig : Matplotlib figure instance
        If `ax` is None, the created figure.  Otherwise the figure to which
        `ax` is connected.

    See Also
    --------
    matplotlib.pyplot.xcorr
    matplotlib.pyplot.acorr
    mpl_examples/pylab_examples/xcorr_demo.py

    Notes
    -----
    Adapted from matplotlib's `xcorr`.

    Data are plotted as ``plot(lags, corr, **kwargs)``

    """
    fig, ax = utils.create_mpl_ax(ax)

    lags, nlags, irregular = _prepare_data_corr_plot(x, lags, zero)

    confint = None
    if alpha is None:
        acf_x = pacf(x, nlags=nlags, alpha=alpha, method=method)
    else:
        acf_x, confint = pacf(x, nlags=nlags, alpha=alpha, method=method)

    _plot_corr(ax, title, acf_x, confint, lags, irregular, use_vlines, **kwargs)

    return fig
Example #60
0
# y(t-1),y(t-2),y(t-3).
# q : This is the number of MA (Moving-Average) terms . Example — if p is 3 the predictor for y(t) will be
# y(t-1),y(t-2),y(t-3).
# d :This is the number of differences or the number of non-seasonal differences .

#FIND VALUES OF p AND q:

# Autocorrelation Function (ACF): It just measures the correlation between two consecutive (lagged version).
# example at lag 4, ACF will compare series at time instance t1…t2 with series at instance t1–4…t2–4
# Partial Autocorrelation Function (PACF): is used to measure the degree of association between y(t) and y(t-p).

from statsmodels.tsa.arima_model import ARIMA
#ACF and PACF plots:
from statsmodels.tsa.stattools import acf, pacf
lag_acf = acf(ts_log_diff, nlags=20)
lag_pacf = pacf(ts_log_diff, nlags=20, method='ols')

#plot ACF:
# plt.subplot(121)
# plt.plot(lag_acf)
# plt.axhline(y=0, linestyle='--', color='gray')
# plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
# plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
# plt.title('Autocorrelation Function')

#Plot PACF:
# plt.subplot(122)
# plt.plot(lag_pacf)
# plt.axhline(y=0, linestyle='--', color='gray')
# plt.axhline(y=-1.96/np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')
# plt.axhline(y=1.96/np.sqrt(len(ts_log_diff)), linestyle='--', color='gray')