order=(p, 0, q),
                             seasonal_order=(1, 1, 0, 12)).fit()

        try:
            sarima_models.iloc[p, q] = fit_sarima.aicc
        except:
            sarima_models.iloc[p, q] = np.nan

sarima_models

# The model with the lowest AICc had p = q = 1, so we fit an ARIMA(1,0,1)(1,1,0)12 model
fit_sarima = SARIMAX(bike['num_rides'],
                     order=(1, 0, 1),
                     seasonal_order=(1, 1, 0, 12)).fit()

fit_sarima.summary()

# SARIMA model fit
bike_plot = bike['num_rides'].plot(figsize=(10, 6),
                                   title="Seasonal ARIMA Model Bike Share Fit")
bike_plot.set_ylabel("Number of Bike rentals")
bike_plot.set_xlabel("Year")

fit_sarima.fittedvalues.iloc[12:].plot(ax=bike_plot,
                                       style='--',
                                       color='DarkOrange')
bike_plot.legend(['Bike Rentals', 'SARIMA Model Fit'])

# Plotting SARIMA model residuals and QQ-plot
fig, axes = plt.subplots(1, 2, figsize=(10, 4))
# \begin{align}
# Y_t - X_t \beta & = \delta + \rho (Y_{t-1} - X_{t-1}\beta) + \eta_t \\
# \eta_t & \sim WN(0,\sigma^2) \\
# \end{align}
# $$
#
# In the next example, we omit the trend and instead include a column of
# 1, which produces a model that is equivalent, in large samples, to the
# case with no exogenous regressor and `trend="c"`. Here the estimated value
# of `const` matches the value estimated using `ARIMA`. This happens since
# both exog in `SARIMAX` and the trend in `ARIMA` are treated as linear
# regression models with ARMA errors.

sarimax_exog_res = SARIMAX(y, exog=np.ones_like(y), order=(1, 0, 0),
                           trend="n").fit()
print(sarimax_exog_res.summary())

# ### Using `exog` in `SARIMAX` and `ARIMA`
#
# While `exog` are treated the same in both models, the intercept
# continues to differ.  Below we add an exogenous regressor to `y` and then
# fit the model using all three methods. The data generating process is now
#
# $$
# \begin{align}
# Y_t & = \delta + X_t \beta + \epsilon_t \\
# \epsilon_t & = \rho \epsilon_{t-1} + \eta_t \\
# \eta_t & \sim WN(0,\sigma^2) \\
# \end{align}
# $$
#