Python AutoARIMA.fit Examples

Programming Language: Python

Namespace/Package Name: pmdarima.arima

Class/Type: AutoARIMA

Method/Function: fit

Examples at hotexamples.com: 4

Python AutoARIMA.fit - 4 examples found. These are the top rated real world Python examples of pmdarima.arima.AutoARIMA.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

AutoARIMA(13)

fit(4)

fit_predict(1)

predict(1)

predict_in_sample(1)

summary(1)

update(1)

Example #1

Show file

def forecast(us_counties: pd.DataFrame,
             log_metrics: bool,
             hp: dict,
             metric_threshold: int = 5):
    metrics = {}
    growth_rates = {}
    horizon = hp['horizon']
    metric_skip = 0

    for location in tqdm(us_counties['location'].unique(), unit=' counties'):
        if log_metrics:
            if metric_skip == metric_threshold:
                metric_skip = 0
            else:
                metric_skip += 1
                continue
        y = us_counties[us_counties.location ==
                        location].reset_index()['cases']
        if len(y) < horizon:
            continue
        model = AutoARIMA(**hp)
        with warnings.catch_warnings():
            # When there is no cases, it will throw a warning
            warnings.filterwarnings("ignore")
            try:
                if log_metrics:
                    y, yv = train_test_split(y, test_size=horizon)
                model.fit(y)
                predictions = model.predict(n_periods=horizon)
            # Value error very rarely with weird/broken time series data
            except (ValueError, IndexError):
                continue
            if log_metrics:
                metrics[location] = np.mean(
                    np.abs(yv - predictions) /
                    (np.abs(yv) + np.abs(predictions)))
            last_forecast = predictions[len(predictions) - 1]
            todays_cases = y[len(y) - 1]
            # Places with very small amount of cases are hard to predict
            case_handicap = min(1.0, 0.5 + (todays_cases / 120))
            growth = (last_forecast / todays_cases) * case_handicap
            growth_rates[location] = growth

    final_list = [
        i[0]
        for i in sorted(growth_rates.items(), key=lambda i: i[1], reverse=True)
    ]

    def rank_risk(row) -> int:
        case_growth = growth_rates.get(row.location)
        if not case_growth:
            return 1
        return round(max(0, (case_growth - 1) * 100))

    if not log_metrics:
        us_counties['outbreak_risk'] = us_counties.apply(rank_risk, axis=1)

    return us_counties, final_list, metrics

Example #2

Show file

File: test_arima.py Project: tomasuz/pmdarima

def test_AutoARIMA_class():
    train, test = wineind[:125], wineind[125:]
    mod = AutoARIMA(maxiter=5)
    mod.fit(train)

    endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(train, endog)

    # update
    mod.update(test, maxiter=2)
    new_endog = mod.model_.arima_res_.data.endog
    assert_array_almost_equal(wineind, new_endog)

Example #3

Show file

File: test_arima.py Project: ysenarath/pmdarima

def test_issue_30():
    # From the issue:
    vec = np.array([33., 44., 58., 49., 46., 98., 97.])

    arm = AutoARIMA(out_of_sample_size=1, seasonal=False,
                    suppress_warnings=True)
    arm.fit(vec)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)

    # Want to make sure it works with exog arrays as well
    exog = np.random.RandomState(1).rand(vec.shape[0], 2)
    auto_arima(vec, exogenous=exog, out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)

Example #4

Show file

File: auto_arima_estimator.py Project: goncaloperes/greykite

class AutoArimaEstimator(BaseForecastEstimator):
    """Wrapper for ``pmdarima.arima.AutoARIMA``.
    It currently does not handle the regressor issue when there is
    gap between train and predict periods.

    Parameters
    ----------
    score_func : callable
        see ``BaseForecastEstimator``.
    coverage : float between [0.0, 1.0]
        see ``BaseForecastEstimator``.
    null_model_params : dict with arguments to define DummyRegressor null model, optional, default=None
        see ``BaseForecastEstimator``.
    regressor_cols: `list` [`str`], optional, default None
        A list of regressor columns used during training and prediction.
        If None, no regressor columns are used.

    See ``AutoArima`` documentation for rest of the parameter descriptions:

            * https://alkaline-ml.com/pmdarima/modules/generated/pmdarima.arima.AutoARIMA.html#pmdarima.arima.AutoARIMA

    Attributes
    ----------
    model : ``AutoArima`` object
        Auto arima model object
    fit_df : `pandas.DataFrame` or None
        The training data used to fit the model.
    forecast : `pandas.DataFrame`
        Output of the predict method of ``AutoArima``.
    """
    def __init__(
            self,
            # Null model parameters
            score_func: callable = mean_squared_error,
            coverage: float = 0.90,
            null_model_params: Optional[Dict] = None,
            # Additional parameters
            regressor_cols: Optional[List[str]] = None,
            freq: Optional[float] = None,
            # pmdarima fit parameters
            start_p: Optional[int] = 2,
            d: Optional[int] = None,
            start_q: Optional[int] = 2,
            max_p: Optional[int] = 5,
            max_d: Optional[int] = 2,
            max_q: Optional[int] = 5,
            start_P: Optional[int] = 1,
            D: Optional[int] = None,
            start_Q: Optional[int] = 1,
            max_P: Optional[int] = 2,
            max_D: Optional[int] = 1,
            max_Q: Optional[int] = 2,
            max_order: Optional[int] = 5,
            m: Optional[int] = 1,
            seasonal: Optional[bool] = True,
            stationary: Optional[bool] = False,
            information_criterion: Optional[str] = 'aic',
            alpha: Optional[int] = 0.05,
            test: Optional[str] = 'kpss',
            seasonal_test: Optional[str] = 'ocsb',
            stepwise: Optional[bool] = True,
            n_jobs: Optional[int] = 1,
            start_params: Optional[Dict] = None,
            trend: Optional[str] = None,
            method: Optional[str] = 'lbfgs',
            maxiter: Optional[int] = 50,
            offset_test_args: Optional[Dict] = None,
            seasonal_test_args: Optional[Dict] = None,
            suppress_warnings: Optional[bool] = True,
            error_action: Optional[str] = 'trace',
            trace: Optional[Union[int, bool]] = False,
            random: Optional[bool] = False,
            random_state: Optional[Union[int, callable]] = None,
            n_fits: Optional[int] = 10,
            out_of_sample_size: Optional[int] = 0,
            scoring: Optional[str] = 'mse',
            scoring_args: Optional[Dict] = None,
            with_intercept: Optional[Union[bool, str]] = "auto",
            # pmdarima predict parameters
            return_conf_int: Optional[bool] = True,
            dynamic: Optional[bool] = False):
        # Every subclass of BaseForecastEstimator must call super().__init__
        super().__init__(
            score_func=score_func,
            coverage=coverage,
            null_model_params=null_model_params)
        self.regressor_cols = regressor_cols
        self.freq = freq
        self.start_p = start_p
        self.d = d
        self.start_q = start_q
        self.max_p = max_p
        self.max_d = max_d
        self.max_q = max_q
        self.start_P = start_P
        self.D = D
        self.start_Q = start_Q
        self.max_P = max_P
        self.max_D = max_D
        self.max_Q = max_Q
        self.max_order = max_order
        self.m = m
        self.seasonal = seasonal
        self.stationary = stationary
        self.information_criterion = information_criterion
        self.alpha = alpha
        self.test = test
        self.seasonal_test = seasonal_test
        self.stepwise = stepwise
        self.n_jobs = n_jobs
        self.start_params = start_params
        self.trend = trend
        self.method = method
        self.maxiter = maxiter
        self.offset_test_args = offset_test_args
        self.seasonal_test_args = seasonal_test_args
        self.suppress_warnings = suppress_warnings
        self.error_action = error_action
        self.trace = trace
        self.random = random
        self.random_state = random_state
        self.n_fits = n_fits
        self.out_of_sample_size = out_of_sample_size
        self.scoring = scoring
        self.scoring_args = scoring_args
        self.with_intercept = with_intercept
        self.return_conf_int = return_conf_int
        self.coverage = coverage
        self.dynamic = dynamic

        # set by the fit method
        self.model = None
        self.fit_df = None
        # set by the predict method
        self.forecast = None

    def fit(self, X, y=None, time_col=TIME_COL, value_col=VALUE_COL, **fit_params):
        """Fits ``ARIMA`` forecast model.

        Parameters
        ----------
        X : `pandas.DataFrame`
            Input timeseries, with timestamp column,
            value column, and any additional regressors.
            The value column is the response, included in
            X to allow transformation by `sklearn.pipeline.Pipeline`
        y : ignored
            The original timeseries values, ignored.
            (The y for fitting is included in ``X``.)
        time_col : `str`
            Time column name in ``X``
        value_col : `str`
            Value column name in ``X``
        fit_params : `dict`
            additional parameters for null model
        Returns
        -------
        self : self
            Fitted model is stored in ``self.model``.
        """
        X = X.sort_values(by=time_col)
        # fits null model
        super().fit(X, y=y, time_col=time_col, value_col=value_col, **fit_params)

        self.fit_df = X
        # fits AutoArima model
        self.model = AutoARIMA(
            start_p=self.start_p,
            d=self.d,
            start_q=self.start_q,
            max_p=self.max_p,
            max_d=self.max_d,
            max_q=self.max_q,
            start_P=self.start_P,
            D=self.D,
            start_Q=self.start_Q,
            max_P=self.max_P,
            max_D=self.max_D,
            max_Q=self.max_Q,
            max_order=self.max_order,
            m=self.m,
            seasonal=self.seasonal,
            stationary=self.stationary,
            information_criterion=self.information_criterion,
            alpha=self.alpha,
            test=self.test,
            seasonal_test=self.seasonal_test,
            stepwise=self.stepwise,
            n_jobs=self.n_jobs,
            start_params=self.start_params,
            trend=self.trend,
            method=self.method,
            maxiter=self.maxiter,
            offset_test_args=self.offset_test_args,
            seasonal_test_args=self.seasonal_test_args,
            suppress_warnings=self.suppress_warnings,
            error_action=self.error_action,
            trace=self.trace,
            random=self.random,
            random_state=self.random_state,
            n_fits=self.n_fits,
            out_of_sample_size=self.out_of_sample_size,
            scoring=self.scoring,
            scoring_args=self.scoring_args,
            with_intercept=self.with_intercept,
            return_conf_int=self.return_conf_int,
            dynamic=self.dynamic,
            regressor_cols=self.regressor_cols
        )

        # fits auto-arima
        if self.regressor_cols is None:
            reg_df = None
        else:
            reg_df = X[self.regressor_cols]
        self.model.fit(y=X[[value_col]], X=reg_df)

        return self

    def predict(self, X, y=None):
        """Creates forecast for the dates specified in ``X``.
        Currently does not support the regressor case where there is gap between
        train and predict periods.

        Parameters
        ----------
        X: `pandas.DataFrame`
            Input timeseries with timestamp column and any additional regressors.
            Timestamps are the dates for prediction.
            Value column, if provided in ``X``, is ignored.
        y: ignored.

        Returns
        -------
        predictions: `pandas.DataFrame`
            Forecasted values for the dates in ``X``. Columns:

                - ``TIME_COL``: dates
                - ``PREDICTED_COL``: predictions
                - ``PREDICTED_LOWER_COL``: lower bound of predictions
                - ``PREDICTED_UPPER_COL``: upper bound of predictions
        """
        X = X.sort_values(by=self.time_col_)
        # Returns the cached result if applicable
        cached_predictions = super().predict(X=X)
        if cached_predictions is not None:
            return cached_predictions

        # Currently does not support the regressor case where
        # there is gap between train and predict periods
        if self.regressor_cols is None:
            fut_reg_df = None
        else:
            fut_df = X[X[self.time_col_] > self.fit_df[self.time_col_].iloc[-1]]
            fut_reg_df = fut_df[self.regressor_cols]  # Auto-arima only accepts regressor values beyond `fit_df`

        if self.freq is None:
            self.freq = pd.infer_freq(self.fit_df[self.time_col_])
        if self.freq == "H":
            self.freq = self.freq.lower()  # np.timedelta recognizes lower case letters
        chosen_d = self.model.model_.order[1]  # This is the value of the d chosen by auto-arima
        forecast_start = int((X[self.time_col_].iloc[0] - self.fit_df[self.time_col_].iloc[0])/np.timedelta64(1, self.freq))
        if forecast_start < chosen_d:
            append_length = chosen_d - forecast_start  # Number of NaNs to append to `pred_df`
            forecast_start = chosen_d  # Auto-arima can not predict below the chosen d
        else:
            append_length = 0
        forecast_end = int((X[self.time_col_].iloc[-1] - self.fit_df[self.time_col_].iloc[0])/np.timedelta64(1, self.freq))

        predictions = self.model.predict_in_sample(
            X=fut_reg_df,
            start=forecast_start,
            end=forecast_end,
            dynamic=self.dynamic,
            return_conf_int=self.return_conf_int,
            alpha=(1-self.coverage)
        )

        if append_length > 0:
            pred_df = pd.DataFrame({
                TIME_COL: X[self.time_col_],
                PREDICTED_COL: np.append(np.repeat(np.nan, append_length), predictions[0]),
                PREDICTED_LOWER_COL: np.append(np.repeat(np.nan, append_length), predictions[1][:, 0]),
                PREDICTED_UPPER_COL: np.append(np.repeat(np.nan, append_length), predictions[1][:, 1])
            })
        else:
            pred_df = pd.DataFrame({
                TIME_COL: X[self.time_col_],
                PREDICTED_COL: predictions[0],
                PREDICTED_LOWER_COL: predictions[1][:, 0],
                PREDICTED_UPPER_COL: predictions[1][:, 1]
            })
        self.forecast = pred_df

        # Caches the predictions
        self.cached_predictions_ = pred_df

        return pred_df

    def summary(self):
        BaseForecastEstimator.summary(self)
        # AutoArima summary
        return self.model.summary()