Beispiel #1
0
 def Prediction(self, n_days, params='Best'):
     if params == 'Best':
         try:
             params = self.BestParams
         except:
             print('Please do a grid search to find the best parameters')
     self.Build_Training_Data(v=0, training=False)
     BestMod = SARIMAX(self.Values,
                       order=params,
                       missing='drop',
                       enforce_invertibility=False)
     BestRes = BestMod.fit(disp=0)
     self.Pred = pd.DataFrame(
         columns=['County', 'mean', 'mean_ci_upper', 'mean_ci_lower'])
     for county in self.Counties:
         DataCounty = self.Data_Dates[county]
         ModelCounty = SARIMAX(DataCounty,
                               order=params,
                               missing='drop',
                               enforce_invertibility=False)
         res = ModelCounty.smooth(BestRes.params)
         fc = res.get_prediction(0, len(DataCounty) + n_days)
         frame = fc.summary_frame(alpha=0.05)
         fc = frame['mean']
         confInf = frame['mean_ci_lower']
         confSup = frame['mean_ci_upper']
         frame['County'] = [county] * len(frame)
         self.Pred = self.Pred.append(
             frame[['County', 'mean', 'mean_ci_upper', 'mean_ci_lower']])
     self.Pred.index.name = 'date'
     return (self.Pred)
Beispiel #2
0
    def _update(self, y, X=None):
        """
        Internal update of forecasts using new data via Kalman smoothing/filtering of
        forecasts obtained from previously fitted forecaster.

        Parameters
        ----------
        y : pandas.Series
            Updated time series which to use for updating the previously fitted forecaster.
        X : pandas.DataFrame, shape=[n_obs, n_vars], optional (default=None)
            An optional 2-d dataframe of exogenous variables. If provided, these
            variables are used as additional features in the regression
            operation. This should not include a constant or trend. Note that
            if an ``ARIMA`` is fit on exogenous features, it must also be provided
            exogenous features for making predictions.

        Returns
        -------
        self : An instance of self
        """
        # TODO for updating see https://github.com/statsmodels/statsmodels/issues/2788 and
        #  https://github.com/statsmodels/statsmodels/issues/3318

        # unnest series
        # unnest series
        y = self._prepare_y(y)
        X = self._prepare_X(X)

        # Update estimator.
        estimator = SARIMAX(y,
                            exog=X,
                            order=self.order,
                            seasonal_order=self.seasonal_order,
                            trend=self.trend,
                            enforce_stationarity=self.enforce_stationarity,
                            enforce_invertibility=self.enforce_invertibility)
        estimator.initialize_known(
            self._fitted_estimator.predicted_state[:, -1],
            self._fitted_estimator.predicted_state_cov[:, :, -1])

        # Filter given fitted parameters.
        self._updated_estimator = estimator.smooth(
            self._fitted_estimator.params)
        return self
Beispiel #3
0
 def GridSearch(self, n_days):
     self.Build_Training_Data(v=n_days, training=True)
     warnings.filterwarnings("ignore")
     params = []
     scores = []
     for p in range(1, 5):
         for q in range(1, 5):
             for d in range(3):
                 try:
                     model = SARIMAX(self.Values,
                                     order=(p, d, q),
                                     missing='drop',
                                     enforce_invertibility=False)
                     results = model.fit(disp=0)
                     scores_counties = []
                     for county in self.Counties:
                         DataCounty = self.Data_Dates[county].dropna()
                         ModelCounty = SARIMAX(DataCounty[:-self.v],
                                               order=(p, d, q),
                                               missing='drop',
                                               enforce_invertibility=False)
                         res = ModelCounty.smooth(results.params)
                         fc = res.get_prediction(
                             len(DataCounty) - self.v, len(DataCounty))
                         frame = fc.summary_frame(alpha=0.05)
                         fc = frame['mean']
                         Y = DataCounty.iloc[-self.v:].values
                         Yhat = fc[-self.v:].values
                         # Ybar = np.mean(Y)
                         MAE = (sum(abs(Y - Yhat)) / self.v)
                         scores_counties.append(MAE)
                 except:
                     print('Training failed for parameters :', (p, d, q))
                 scores.append(np.nanmean(scores_counties))
                 params.append((p, d, q))
     argbest = np.argmin(scores)
     print('Best MAE : ', scores[argbest])
     print('Best params : ', params[argbest])
     self.BestParams = params[argbest]
Beispiel #4
0
 def update_forecast_SARIMAX(self, recent_flow):
     model = SARIMAX(recent_flow,
                     order=(4, 1, 1),
                     seasonal_order=(0, 1, 1, 24))
     self.model_fit_recent = model.smooth(self.model_params)
Beispiel #5
0
def GridSearch(Regions,
               Regions_Daily_Cases,
               Values,
               Food_Insecure=None,
               Pop=None,
               Dict_Pop=None,
               Dict_Food_Insec=None,
               exog=True,
               plot=False,
               v=7):
    Palette = dict(Regions[['Region', 'Color']].to_dict('split')['data'])
    warnings.filterwarnings("ignore")
    formatter = mdates.DateFormatter('%a %d/%m')
    params = []
    scoresExog = []
    List_Regions = pd.unique(Regions['Region'])
    for p in range(1, 5):
        for q in range(1, 5):
            for d in range(3):
                try:
                    if exog:
                        model = SARIMAX(Values,
                                        exog=np.array([Pop, Food_Insecure
                                                       ]).transpose(),
                                        order=(p, d, q),
                                        missing='drop',
                                        enforce_invertibility=False)
                    else:
                        model = SARIMAX(Values,
                                        order=(p, d, q),
                                        missing='drop',
                                        enforce_invertibility=False)
                    results = model.fit(disp=0)
                    scores_counties = []
                    if plot:
                        plt.figure()
                        ax = plt.gca()
                        plt.xticks(rotation=20)
                        ax.xaxis.set_major_locator(
                            mdates.DayLocator(interval=7))
                        ax.xaxis.set_major_formatter(formatter)
                    for region in List_Regions:
                        DataCounty = Regions_Daily_Cases[region].dropna()
                        if exog:
                            ModelCounty = SARIMAX(
                                DataCounty[:-v],
                                exog=np.array(
                                    [[Dict_Pop[region]] * len(DataCounty[:-v]),
                                     [Dict_Food_Insec[region]] *
                                     len(DataCounty[:-v])]).transpose(),
                                order=(p, d, q),
                                missing='drop',
                                enforce_invertibility=False)
                        else:
                            ModelCounty = SARIMAX(DataCounty[:-v],
                                                  order=(p, d, q),
                                                  missing='drop',
                                                  enforce_invertibility=False)
                        res = ModelCounty.smooth(results.params)
                        fc = res.get_prediction(
                            len(DataCounty) - v,
                            len(DataCounty),
                            exog=np.array([[Dict_Pop[region]] * (v + 1),
                                           [Dict_Food_Insec[region]] * (v + 1)
                                           ]).transpose())
                        frame = fc.summary_frame(alpha=0.05)
                        fc = frame['mean']
                        Y = DataCounty.iloc[-v:].values
                        Yhat = fc[-v:].values
                        # Ybar = np.mean(Y)
                        MAE = (sum(abs(Y - Yhat)) / v)
                        scores_counties.append(MAE)
                        confInf = frame['mean_ci_lower']
                        confSup = frame['mean_ci_upper']
                        if plot:
                            pl = plt.plot(DataCounty,
                                          label=region,
                                          color=Palette[region])
                            plt.fill_between(confInf.index,
                                             confSup,
                                             confInf,
                                             alpha=0.3,
                                             color=pl[0].get_color())
                            plt.title(
                                "Daily Cases Predicted with a single ARIMA({},{},{}) model"
                                .format(p, d, q))
                            plt.plot(fc, '--', color=pl[0].get_color())
                    if plot:
                        plt.text(1,
                                 0.9,
                                 'Mean Absolute Error : {:.0f}'.format(
                                     np.nanmean(scores_counties)),
                                 transform=ax.transAxes,
                                 horizontalalignment='left')
                        # plt.xlim([DataCounty.iloc[-v-7:].index[0], DataCounty.iloc[-v-7:].index[-1]])
                        plt.yscale('log')
                        plt.legend(bbox_to_anchor=(1, 0.5),
                                   loc='center left',
                                   fontsize=6)
                        plt.savefig(
                            'PredictionCountiesDailyExog/ARIMA{}{}{}_Pred.png'.
                            format(p, d, q))
                        plt.show()
                    scoresExog.append(np.nanmean(scores_counties))
                    params.append((p, d, q))
                except:
                    print('Training Failed for parameters :')
                    print(p, d, q)

    argbest = np.argmin(scoresExog)
    print('Best distance : ', scoresExog[argbest])
    print('Best params : ', params[argbest])
    BestParams = params[argbest]
    return BestParams, scoresExog[argbest]
Beispiel #6
0
def Prediction(Regions,
               Regions_Daily_Cases,
               Values,
               BestParams,
               Food_Insecure=None,
               Pop=None,
               Dict_Pop=None,
               Dict_Food_Insec=None,
               exog=True,
               plot=False,
               v=7):
    BestMod = SARIMAX(Values,
                      exog=np.array([Pop, Food_Insecure]).transpose(),
                      order=BestParams,
                      missing='drop',
                      enforce_invertibility=False)
    BestRes = BestMod.fit()
    List_Regions = pd.unique(Regions['Region'])
    BestRes.summary()
    Predictions = pd.DataFrame(
        columns=['region', 'mean', 'mean_ci_upper', 'mean_ci_lower'])
    for region in List_Regions:
        DataCounty = Regions_Daily_Cases[region].dropna()
        if exog:
            ModelCounty = SARIMAX(
                DataCounty,
                exog=np.array([[Dict_Pop[region]] * len(DataCounty),
                               [Dict_Food_Insec[region]] * len(DataCounty)
                               ]).transpose(),
                order=BestParams,
                missing='drop',
                enforce_invertibility=False)
        else:
            ModelCounty = SARIMAX(DataCounty,
                                  order=BestParams,
                                  missing='drop',
                                  enforce_invertibility=False)
        res = ModelCounty.smooth(BestRes.params)
        if exog:
            fc = res.get_prediction(0,
                                    len(DataCounty) + v,
                                    exog=np.array([
                                        [Dict_Pop[region]] * (v + 1),
                                        [Dict_Food_Insec[region]] * (v + 1)
                                    ]).transpose())
        else:
            fc = res.get_prediction(0, len(DataCounty) + v)
        frame = fc.summary_frame(alpha=0.05)
        fc = frame['mean']
        confInf = frame['mean_ci_lower']
        confSup = frame['mean_ci_upper']
        frame['region'] = [region] * len(frame)
        Predictions = Predictions.append(
            frame[['region', 'mean', 'mean_ci_upper', 'mean_ci_lower']])
        if plot:
            pl = plt.plot(DataCounty, label=region, color=Palette[region])
            plt.fill_between(confInf.index,
                             confSup,
                             confInf,
                             alpha=0.3,
                             color=pl[0].get_color())
            plt.plot(fc, '--', color=pl[0].get_color())
            plt.title('Best ARIMA Predictions Cases per 100k for ' + region)
            # plt.legend(bbox_to_anchor=(1,0.5),loc='center left',fontsize=6)
            plt.yscale('log')
            plt.savefig('PredictionsARIMABestExog/' + region)
            plt.show()
    Predictions.index.name = 'date'
    return (Predictions)