def calculate_ndiffs(self, alpha=0.05, test="kpss", max_d=2): """ Utility method for determining the optimal ``d`` value for ARIMA ordering. Calculating this as a fixed value can dramatically increase the tuning time for ``pmdarima`` models. :param alpha: significance level for determining if a pvalue used for testing a value of ``'d'`` is significant or not. Default: ``0.05`` :param test: Type of unit test for stationarity determination to use. Supported values: ``['kpss', 'adf', 'pp']`` See: https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.KPSSTest.\ html#pmdarima.arima.KPSSTest https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.PPTest.\ html#pmdarima.arima.PPTest https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.ADFTest.\ html#pmdarima.arima.ADFTest Default: ``'kpss'`` :param max_d: The max value for ``d`` to test. :return: Dictionary of ``{<group_key>: <optimal 'd' value>}`` """ self._create_group_df() group_ndiffs = { group: ndiffs(x=group_df[self._y_col], alpha=alpha, test=test, max_d=max_d) for group, group_df in self._group_df } return group_ndiffs
def check_stationarity(dataframe): """ Performs the Augmented Dickey-Fuller test on all the series constituting the dataframe given. :param dataframe: dataframe to analyse with the ADF test. """ # Make sure that the original time series is not modified data = dataframe.copy() print('\nResults of Dickey-Fuller Test:') print('{:<15}{:<15}{:<10}{:<10}'.format('Column', 'Stationary', 'P-value', 'Order')) # Significance level to reject the null hypothesis is set to 0.05 adf_test = ADFTest(alpha=0.05) # Cycle across each column of the dataframe columns = dataframe.columns for column in columns: # If the series is already stationary the differencing order is equal to 0 order = 0 # Compute the ADF test. It returns the p-value and if the differencing is needed results, should = adf_test.should_diff(data[column]) # If the series must be differenced if should: # The differencing order needed to transform the series in stationary is computed by ndiffs() order = ndiffs(data[column], alpha=0.05) print(f'{column:<15}{not should:<15}{results:<10.5f}{order:<10}')
def find_order(self, data): # determine number of differences kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6) n_diffs = max(kpss_diffs, adf_diffs) # grid search to find order self.order = pm.auto_arima(data, d=n_diffs, seasonal=False, stepwise=True, suppress_warnings=True, error_action="ignore", max_order=None, trace=True, maxiter=20).order self.model = pm.arima.ARIMA(order=self.order) return self
def fit_eval(self, data, validation_data, **config): """ Fit on the training data from scratch. :param data: A 1-D numpy array as the training data :param validation_data: A 1-D numpy array as the evaluation data :return: the evaluation metric value """ if not self.model_init: # Estimating differencing term (d) and seasonal differencing term (D) kpss_diffs = ndiffs(data, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(data, alpha=0.05, test='adf', max_d=6) d = max(adf_diffs, kpss_diffs) D = 0 if not self.seasonal else nsdiffs(data, m=7, max_D=12) config.update(d=d, D=D) self._build(**config) self.model_init = True self.model.fit(data) val_metric = self.evaluate(x=None, target=validation_data, metrics=[self.metric])[0].item() return {self.metric: val_metric}
(1, 1), (2, 0), (2, 1) ] for lag, ax_coords in enumerate(ax_idcs, 1): ax_row, ax_col = ax_coords axis = axes[ax_row][ax_col] lag_plot(df['open'], lag=lag, ax=axis) axis.set_title(f"Lag={lag}") plt.show() kpss_diffs = ndiffs(y_train, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(y_train, alpha=0.05, test='adf', max_d=6) n_diffs = max(adf_diffs, kpss_diffs) print(f"Estimated differencing term: {n_diffs}") auto = pm.auto_arima(y_train, d=n_diffs, seasonal=False, stepwise=True, suppress_warnings=True, error_action="ignore", max_p=6, max_order=None, trace=True) print(auto.order) from sklearn.metrics import mean_squared_error from pmdarima.metrics import smape model = auto
Path("data").glob("forecast_*.json")), ignore_index=True) df["time"] = df.apply(lambda r: datetime.fromtimestamp(r["time"]), axis=1) df = df.sort_values(by=["time"]) temperature = df["temperature"] temperature = temperature.fillna(temperature.mean()) train, test = train_test_split(temperature, train_size=temperature.shape[0] - 365) print(f"training size: {train.shape[0]}") print(f"testing size: {test.shape[0]}") # %% kpss_diffs = ndiffs(train, alpha=0.05, test="kpss", max_d=6) adf_diffs = ndiffs(train, alpha=0.05, test="adf", max_d=6) n_diffs = max(adf_diffs, kpss_diffs) print(f"d: {n_diffs}") # %% model = auto_arima( train, d=n_diffs, seasonal=True, m=4, stepwise=True, suppress_warnings=True, max_p=6, trace=2,
from matplotlib import pyplot as plt print("pmdarima version: %s" % pm.__version__) # Load the data and split it into separate pieces y, X = load_date_example() y_train, y_test, X_train, X_test = \ model_selection.train_test_split(y, X, test_size=20) # We can examine traits about the time series: pm.tsdisplay(y_train, lag_max=10) # We can see the ACF increases and decreases rather rapidly, which means we may # need some differencing. There also does not appear to be an obvious seasonal # trend. n_diffs = arima.ndiffs(y_train, max_d=5) # Here's what the featurizer will create for us: date_feat = preprocessing.DateFeaturizer( column_name="date", # the name of the date feature in the exog matrix with_day_of_week=True, with_day_of_month=True) _, X_train_feats = date_feat.fit_transform(y_train, X_train) print("Head of generated exog features:\n%s" % repr(X_train_feats.head())) # We can plug this exog featurizer into a pipeline: pipe = pipeline.Pipeline([ ('date', date_feat), ('arima', arima.AutoARIMA(d=n_diffs, trace=3,
# test['traffic'].plot(legend = True) date_today = date.today().strftime('%Y-%m-%d') date_tomorrow = max(traffic.index) + timedelta(days=1) date_next_7d = pd.date_range( start=date_tomorrow, periods=7, freq='D').strftime(date_format='%Y-%m-%d').tolist() future_forecast = pd.DataFrame({ 'date': date_next_7d, 'traffic': future_forecast }).assign(date_of_forecast=date_today) future_forecast.loc[future_forecast['traffic'] < 0, 'traffic'] = 0 future_forecast['traffic'] = future_forecast['traffic'].astype(int) engine = create_engine( "mysql://*****:*****@etcinsights.nazwa.pl/etcinsights_harebakken" ) future_forecast.to_sql('traffic_date_forecast_python', con=engine, if_exists='append', index=True) print('Data uploaded to db successfully!') from pmdarima.arima import ndiffs kpss_diffs = ndiffs(traffic, alpha=0.05, test='kpss', max_d=6) adf_diffs = ndiffs(traffic, alpha=0.05, test='adf', max_d=6) n_diffs = max(adf_diffs, kpss_diffs) print(f"Estimated differencing term: {n_diffs}") # Estimated differencing term: 1