Beispiel #1
0
def evaluate_arima(row, y, validation_size):
    """
    Evaluate the model with the params given in the row

    :param row:             The row to get the parameters from
    :param y:               The y target vector
    :param validation_size: The validation distance to use for scoring
    :return:
    """
    used_y = y[:-validation_size]
    params = get_arima_params(row)
    times = dict()

    start = time.clock()
    model = ARIMA(**params)
    fit = model.fit(y=used_y)
    times['fit_time'] = time.clock() - start

    start = time.clock()
    prediction = fit.predict(validation_size)
    norm_factor = 1 / (y.max() - y.min())
    times['prediction_time'] = time.clock() - start

    scores = score_prediction(y[-validation_size:], prediction, norm_factor)

    return {
        'params': params,
        'prediction': prediction,
        **times,
        **scores, 'norm_factor': norm_factor
    }
def get_forecast(org_ts,
                 forecast_periods,
                 orders=(2, 1, 2),
                 seasonal_orders=(0, 1, 1, 48),
                 freq='30min'):
    '''
    获得预测的数据
    :param org_ts:  原始的数据
    :param forecast_periods:    预测多少个point
    :param orders:  p d q的值。p、q分别和acf和pacf相关,d是差分的阶数建议先使用auto_arima( get_suitable_orders )测试出合适的值
    :param seasonal_orders: 同上,最后一位是序列的周期
    :param freq:    表示每一个point 之间的间隔
    :return:    预测的值
    '''
    order, seasonal_order = orders, seasonal_orders
    stepwise_fit = ARIMA(order=order,
                         seasonal_order=seasonal_order).fit(y=org_ts)
    forecast_ts = stepwise_fit.predict(n_periods=forecast_periods)

    forecasts_date_start = org_ts.index[-1] + (org_ts.index[-1] -
                                               org_ts.index[-2])
    forecast_ts = pd.Series(forecast_ts,
                            index=pd.date_range(forecasts_date_start,
                                                periods=forecast_periods,
                                                freq=freq))
    return forecast_ts
Beispiel #3
0
def test_with_seasonality1():
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                suppress_warnings=True).fit(y=wineind)
    _try_get_attrs(fit)

    # R code AIC result is ~3004
    assert abs(fit.aic() - 3004) < 100  # show equal within 100 or so

    # R code BIC result is ~3017
    assert abs(fit.bic() - 3017) < 100  # show equal within 100 or so
Beispiel #4
0
def test_basic_arima():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    preds = arima.fit_predict(y)  # fit/predict for coverage

    # test some of the attrs
    assert_almost_equal(arima.aic(), 11.201308403566909, decimal=5)
    assert_almost_equal(arima.bic(), 13.639060053303311, decimal=5)

    # get predictions
    expected_preds = np.array([
        0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876,
        0.44079876, 0.44079876, 0.44079876, 0.44079876
    ])

    # generate predictions
    assert_array_almost_equal(preds, expected_preds)
Beispiel #5
0
def test_basic_arima():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    preds = arima.fit_predict(y)  # fit/predict for coverage

    # test some of the attrs
    assert_almost_equal(arima.aic(), 11.201308403566909, decimal=5)
    assert_almost_equal(arima.aicc(), 11.74676, decimal=5)
    assert_almost_equal(arima.bic(), 13.639060053303311, decimal=5)

    # get predictions
    expected_preds = np.array([
        0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876, 0.44079876,
        0.44079876, 0.44079876, 0.44079876, 0.44079876
    ])

    # generate predictions
    assert_array_almost_equal(preds, expected_preds)

    # Make sure we can get confidence intervals
    expected_intervals = np.array([[-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139],
                                   [-0.10692387, 0.98852139]])

    _, intervals = arima.predict(n_periods=10,
                                 return_conf_int=True,
                                 alpha=0.05)
    assert_array_almost_equal(intervals, expected_intervals)
Beispiel #6
0
 def __init__(self, **kwargs):
     """
 """
     # Library.
     from pyramid.arima import ARIMA as PYARIMA
     # Save config parameters.
     super(PyramidWrapper, self).__init__(**kwargs)
     # Create model
     if len(kwargs):
         self._model = PYARIMA(**kwargs)
Beispiel #7
0
def test_oob_for_issue_29():
    dta = sm.datasets.sunspots.load_pandas().data
    dta.index = pd.Index(sm.tsa.datetools.dates_from_range('1700', '2008'))
    del dta["YEAR"]

    xreg = np.random.RandomState(1).rand(dta.shape[0], 3)

    # Try for cv on/off, various D levels, and various Xregs
    for d in (0, 1):
        for cv in (0, 3):
            for exog in (xreg, None):

                # surround with try/except so we can log the failing combo
                try:
                    model = ARIMA(order=(2, d, 0), out_of_sample_size=cv)\
                            .fit(dta, exogenous=exog)

                    # If exogenous is defined, we need to pass n_periods of
                    # exogenous rows to the predict function. Otherwise we'll
                    # just leave it at None
                    if exog is not None:
                        xr = exog[:3, :]
                    else:
                        xr = None

                    _, _ = model.predict(n_periods=3,
                                         return_conf_int=True,
                                         exogenous=xr)

                except Exception as ex:
                    print("Failing combo: d=%i, cv=%i, exog=%r" %
                          (d, cv, exog))

                    # Statsmodels can be fragile with ARMA coefficient
                    # computation. If we encounter that, pass:
                    #   ValueError: The computed initial MA coefficients are
                    #       not invertible. You should induce invertibility,
                    #       choose a different model order, or ...
                    if "invertibility" in str(ex):
                        pass
                    else:
                        raise
Beispiel #8
0
def test_double_pickle():
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)
    arima.fit(y)

    # Now save it twice
    file_a = 'first.pkl'
    file_b = 'second.pkl'

    try:
        # No compression
        joblib.dump(arima, file_a)

        # Sleep between pickling so that the "pickle hash" for the ARIMA is
        # different by enough. We could theoretically also just use a UUID
        # for part of the hash to make sure it's unique?
        time.sleep(0.5)

        # Some compression
        joblib.dump(arima, file_b, compress=2)

        # Load both and prove they can both predict
        loaded_a = joblib.load(file_a)  # type: ARIMA
        loaded_b = joblib.load(file_b)  # type: ARIMA
        pred_a = loaded_a.predict(n_periods=5)
        pred_b = loaded_b.predict(n_periods=5)
        assert np.allclose(pred_a, pred_b)

        # Remove the caches from each
        loaded_a._clear_cached_state()
        loaded_b._clear_cached_state()

        # Test the previous condition where we removed the saved state of an
        # ARIMA from statsmodels and caused an OSError and a corrupted pickle
        with pytest.raises(OSError) as o:
            joblib.load(file_a)  # fails since no cached state there!
            msg = str(o)
            assert 'Could not read saved model state' in msg, msg

    # Always remove in case we fail in try, leaving residual files
    finally:
        os.unlink(file_a)
        os.unlink(file_b)
Beispiel #9
0
def test_issue_30():
    # From the issue:
    vec = np.array([33., 44., 58., 49., 46., 98., 97.])
    auto_arima(vec,
               out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec)

    # Want to make sure it works with exog arrays as well
    exog = np.random.RandomState(1).rand(vec.shape[0], 2)
    auto_arima(vec,
               exogenous=exog,
               out_of_sample_size=1,
               seasonal=False,
               suppress_warnings=True)

    # This is a way to force it:
    ARIMA(order=(0, 1, 0), out_of_sample_size=1).fit(vec, exogenous=exog)
Beispiel #10
0
def test_the_r_src():
    # this is the test the R code provides
    fit = ARIMA(order=(2, 0, 1), trend='c', suppress_warnings=True).fit(abc)

    # the R code's AIC = ~135
    assert abs(135 - fit.aic()) < 1.0

    # the R code's BIC = ~145
    assert abs(145 - fit.bic()) < 1.0

    # R's coefficients:
    #     ar1      ar2     ma1    mean
    # -0.6515  -0.2449  0.8012  5.0370

    # note that statsmodels' mean is on the front, not the end.
    params = fit.params()
    assert_almost_equal(params,
                        np.array([5.0370, -0.6515, -0.2449, 0.8012]),
                        decimal=2)

    # > fit = forecast::auto.arima(abc, max.p=5, max.d=5, max.q=5, max.order=100, stepwise=F)
    fit = auto_arima(abc,
                     max_p=5,
                     max_d=5,
                     max_q=5,
                     max_order=100,
                     seasonal=False,
                     trend='c',
                     suppress_warnings=True,
                     error_action='ignore')

    # this differs from the R fit with a slightly higher AIC...
    assert abs(137 - fit.aic()) < 1.0  # R's is 135.28
Beispiel #11
0
    def __init__(self, *,
                 hyperparams: ArimaHyperparams,
                 random_seed: int = 0,
                 docker_containers: Dict[str, DockerContainer] = None,
                 _verbose: int = 0) -> None:

        super().__init__(hyperparams=hyperparams, random_seed=random_seed,
                         docker_containers=docker_containers)
        if self.hyperparams["is_seasonal"]:
            seasonal_order = self.hyperparams["seasonal_order"]
        else:
            seasonal_order = None
        self._clf = ARIMA(
            order=(self.hyperparams["P"],
                   self.hyperparams["D"], self.hyperparams["Q"]),
            seasonal_order=seasonal_order,
            # seasonal_order=self.hyperparams["seasonal_order"],
            # seasonal_order=(0,1,1,12),
            # start_params=self.hyperparams["start_params"],
            # start_params = None,
            transparams=self.hyperparams["transparams"],
            method=self.hyperparams["method"],
            trend=self.hyperparams["trend"],
            solver=self.hyperparams["solver"],
            maxiter=self.hyperparams["maxiter"],
            disp=self.hyperparams["disp"],
            # callback=self.hyperparams["callback"],
            callback=None,
            suppress_warnings=self.hyperparams["suppress_warnings"],
            out_of_sample_size=False,
            scoring="mse",
            scoring_args=None
        )
        self._training_inputs = None
        self._training_outputs = None
        self._target_names = None
        self._training_indices = None
        self._fitted = False
        self._length_for_produce = 0
Beispiel #12
0
def test_with_oob():
    # show we can fit with CV (kinda)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr)
    assert not np.isnan(arima.oob())  # show this works

    # show we can fit if ooss < 0 and oob will be nan
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=-1).fit(y=hr)
    assert np.isnan(arima.oob())

    # can we do one with an exogenous array, too?
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr,
                                             exogenous=rs.rand(hr.shape[0], 4))
    assert not np.isnan(arima.oob())
Beispiel #13
0
def test_with_seasonality1():
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                suppress_warnings=True).fit(y=wineind)
    _try_get_attrs(fit)

    # R code AIC result is ~3004
    assert abs(fit.aic() - 3004) < 100  # show equal within 100 or so

    # R code AICc result is ~3005
    assert abs(fit.aicc() - 3005) < 100  # show equal within 100 or so

    # R code BIC result is ~3017
    assert abs(fit.bic() - 3017) < 100  # show equal within 100 or so

    # show we can predict in-sample
    fit.predict_in_sample()

    # test with SARIMAX confidence intervals
    fit.predict(n_periods=10, return_conf_int=True, alpha=0.05)
Beispiel #14
0
def test_for_older_version():
    # Fit an ARIMA
    arima = ARIMA(order=(0, 0, 0), trend='c', suppress_warnings=True)

    # There are three possibilities here:
    # 1. The model is serialized/deserialized BEFORE it has been fit.
    #    This means we should not get a warning.
    #
    # 2. The model is saved after being fit, but it does not have a
    #    pkg_version_ attribute due to it being an old (very old) version.
    #    We still warn for this
    #
    # 3. The model is saved after the fit, and it's version does not match.
    #    We warn for this.
    for case, do_fit, expect_warning in [(1, False, False), (2, True, True),
                                         (3, True, True)]:

        # Only fit it if we should
        if do_fit:
            arima.fit(y)

        # If it's case 2, we remove the pkg_version_. If 3, we set it low
        if case == 2:
            delattr(arima, 'pkg_version_')
        elif case == 3:
            arima.pkg_version_ = '0.0.1'  # will always be < than current

        # Pickle it
        pickle_file = 'model.pkl'
        try:
            joblib.dump(arima, pickle_file)

            # Now unpickle it and show that we get a warning (if expected)
            with warnings.catch_warnings(record=True) as w:
                arm = joblib.load(pickle_file)  # type: ARIMA

                if expect_warning:
                    assert len(w) > 0
                else:
                    assert not len(w)

                # we can still produce predictions (only if we fit)
                if do_fit:
                    arm.predict(n_periods=4)

        finally:
            arima._clear_cached_state()
            os.unlink(pickle_file)
Beispiel #15
0
def test_more_elaborate():
    # show we can fit this with a non-zero order
    arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr)
    _try_get_attrs(arima)

    # can we fit this same arima with a made-up exogenous array?
    xreg = rs.rand(hr.shape[0], 4)
    arima = ARIMA(order=(2, 1, 2), suppress_warnings=True).fit(y=hr,
                                                               exogenous=xreg)
    _try_get_attrs(arima)

    # pickle this for the __get/setattr__ coverage.
    # since the only time this is tested is in parallel in auto.py,
    # this doesn't actually get any coverage proof...
    fl = 'some_temp_file.pkl'
    with open(fl, 'wb') as p:
        pickle.dump(arima, p)

    # show we can predict with this even though it's been pickled
    new_xreg = rs.rand(5, 4)
    _preds = arima.predict(n_periods=5, exogenous=new_xreg)

    # now unpickle
    with open(fl, 'rb') as p:
        other = pickle.load(p)

    # show we can still predict, compare
    _other_preds = other.predict(n_periods=5, exogenous=new_xreg)
    assert_array_almost_equal(_preds, _other_preds)

    # now clear the cache and remove the pickle file
    arima._clear_cached_state()
    os.unlink(fl)

    # now show that since we fit the ARIMA with an exogenous array,
    # we need to provide one for predictions otherwise it breaks.
    assert_raises(ValueError, arima.predict, n_periods=5, exogenous=None)

    # show that if we DO provide an exogenous and it's the wrong dims, we
    # also break things down.
    assert_raises(ValueError,
                  arima.predict,
                  n_periods=5,
                  exogenous=rs.rand(4, 4))
Beispiel #16
0
def test_with_oob():
    # show we can fit with CV (kinda)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr)
    assert not np.isnan(arima.oob())  # show this works

    # show we can fit if ooss < 0 and oob will be nan
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=-1).fit(y=hr)
    assert np.isnan(arima.oob())

    # This will raise since n_steps is not an int
    assert_raises(TypeError, arima.predict, n_periods="5")

    # But that we CAN forecast with an int...
    _ = arima.predict(n_periods=5)

    # Show we fail if cv > n_samples
    assert_raises(ValueError,
                  ARIMA(order=(2, 1, 2), out_of_sample_size=1000).fit, hr)
Beispiel #17
0
class AutoArima(SupervisedLearnerPrimitiveBase[Inputs, Outputs, ArimaParams, ArimaHyperparams]):
    __author__ = 'USC ISI'
    metadata = hyperparams.base.PrimitiveMetadata({
        # Required
        "id": 'b2e4e8ea-76dc-439e-8e46-b377bf616a35',
        "version": config.VERSION,
        "name": "DSBox Arima Primitive",
        "description": "Arima primitive for timeseries data regression/forcasting problems, transferred from pyramid/Arima",

        "python_path": "d3m.primitives.time_series_forecasting.Arima.DSBOX",
        "primitive_family": "TIME_SERIES_FORECASTING",
        "algorithm_types": ["AUTOREGRESSIVE_INTEGRATED_MOVING_AVERAGE"],
        "source": {
            "name": config.D3M_PERFORMER_TEAM,
            "contact": config.D3M_CONTACT,
            "uris": [config.REPOSITORY]
        },
        "keywords": ["Transform", "Timeseries", "Aggregate"],
        "installation": [config.INSTALLATION],
        "precondition": ["NO_MISSING_VALUES", "NO_CATEGORICAL_VALUES"],
    })

    def __init__(self, *,
                 hyperparams: ArimaHyperparams,
                 random_seed: int = 0,
                 docker_containers: Dict[str, DockerContainer] = None,
                 _verbose: int = 0) -> None:

        super().__init__(hyperparams=hyperparams, random_seed=random_seed,
                         docker_containers=docker_containers)
        if self.hyperparams["is_seasonal"]:
            seasonal_order = self.hyperparams["seasonal_order"]
        else:
            seasonal_order = None
        self._clf = ARIMA(
            order=(self.hyperparams["P"],
                   self.hyperparams["D"], self.hyperparams["Q"]),
            seasonal_order=seasonal_order,
            # seasonal_order=self.hyperparams["seasonal_order"],
            # seasonal_order=(0,1,1,12),
            # start_params=self.hyperparams["start_params"],
            # start_params = None,
            transparams=self.hyperparams["transparams"],
            method=self.hyperparams["method"],
            trend=self.hyperparams["trend"],
            solver=self.hyperparams["solver"],
            maxiter=self.hyperparams["maxiter"],
            disp=self.hyperparams["disp"],
            # callback=self.hyperparams["callback"],
            callback=None,
            suppress_warnings=self.hyperparams["suppress_warnings"],
            out_of_sample_size=False,
            scoring="mse",
            scoring_args=None
        )
        self._training_inputs = None
        self._training_outputs = None
        self._target_names = None
        self._training_indices = None
        self._fitted = False
        self._length_for_produce = 0

    def set_training_data(self, *, inputs: Inputs) -> None:
        inputs_timeseries = d3m_dataframe(inputs.iloc[:, -1])
        inputs_d3mIndex = d3m_dataframe(inputs.iloc[:, 0])
        if len(inputs_timeseries) == 0:
            print(
                "Warning: Inputs timeseries data to timeseries_featurization primitive's length is 0.")
            return
        column_name = inputs_timeseries.columns[0]
        self._training_inputs, self._target_names = inputs_timeseries, column_name
        self._training_outputs = inputs_timeseries

    def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
        if self._fitted:
            return CallResult(None)

        if self._training_inputs is None or self._training_outputs is None:
            raise ValueError("Missing training data.")
        arima_training_output = d3m_ndarray(self._training_outputs)

        shape = arima_training_output.shape
        if len(shape) == 2 and shape[1] == 1:
            sk_training_output = np.ravel(arima_training_output)

        self._clf.fit(sk_training_output)
        self._fitted = True

        return CallResult(None)

    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        arima_inputs = inputs
        if self.hyperparams['use_semantic_types']:
            sk_inputs = inputs.iloc[:, self._training_indices]
        sk_output = self._clf.predict(n_periods=len(arima_inputs))
        output = d3m_dataframe(sk_output, generate_metadata=False, source=self)
        output.metadata = inputs.metadata.clear(
            source=self, for_value=output, generate_metadata=True)
        output.metadata = self._add_target_semantic_types(
            metadata=output.metadata, target_names=self._target_names, source=self)
        if not self.hyperparams['use_semantic_types']:
            return CallResult(output)
        # outputs = common_utils.combine_columns(return_result=self.hyperparams['return_result'],
        #                                        add_index_columns=self.hyperparams['add_index_columns'],
        #                                        inputs=inputs, column_indices=self._training_indices, columns_list=[output], source=self)

        return CallResult(output)

    def get_params(self) -> ArimaParams:
        return Params(arima=self._clf)

    def set_params(self, *, params: ArimaParams) -> None:
        self._clf = params["arima"]

    @classmethod
    def _get_columns_to_fit(cls, inputs: Inputs, hyperparams: ArimaHyperparams):
        if not hyperparams['use_semantic_types']:
            return inputs, list(range(len(inputs.columns)))

        inputs_metadata = inputs.metadata

        def can_produce_column(column_index: int) -> bool:
            return cls._can_produce_column(inputs_metadata, column_index, hyperparams)

        columns_to_produce, columns_not_to_produce = common_utils.get_columns_to_use(inputs_metadata,
                                                                                     use_columns=hyperparams['use_columns'],
                                                                                     exclude_columns=hyperparams[
                                                                                         'exclude_columns'],
                                                                                     can_use_column=can_produce_column)
        return inputs.iloc[:, columns_to_produce], columns_to_produce
        # return columns_to_produce

    @classmethod
    def _can_produce_column(cls, inputs_metadata: metadata_base.DataMetadata, column_index: int, hyperparams: ArimaHyperparams) -> bool:
        column_metadata = inputs_metadata.query(
            (metadata_base.ALL_ELEMENTS, column_index))

        semantic_types = column_metadata.get('semantic_types', [])
        if len(semantic_types) == 0:
            cls.logger.warning("No semantic types found in column metadata")
            return False
        if "https://metadata.datadrivendiscovery.org/types/Attribute" in semantic_types:
            return True

        return False

    @classmethod
    def _get_targets(cls, data: d3m_dataframe, hyperparams: ArimaHyperparams):
        if not hyperparams['use_semantic_types']:
            return data, []
        target_names = []
        target_column_indices = []
        metadata = data.metadata
        target_column_indices.extend(metadata.get_columns_with_semantic_type(
            'https://metadata.datadrivendiscovery.org/types/TrueTarget'))

        for column_index in target_column_indices:
            if column_index is metadata_base.ALL_ELEMENTS:
                continue
            column_index = typing.cast(
                metadata_base.SimpleSelectorSegment, column_index)
            column_metadata = metadata.query(
                (metadata_base.ALL_ELEMENTS, column_index))
            target_names.append(column_metadata.get('name', str(column_index)))

        targets = data.iloc[:, target_column_indices]
        return targets, target_names

    @classmethod
    def _add_target_semantic_types(cls, metadata: metadata_base.DataMetadata,
                                   source: typing.Any,  target_names: List = None,) -> metadata_base.DataMetadata:
        for column_index in range(metadata.query((metadata_base.ALL_ELEMENTS,))['dimension']['length']):
            metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index),
                                                  'https://metadata.datadrivendiscovery.org/types/Target',
                                                  source=source)
            metadata = metadata.add_semantic_type((metadata_base.ALL_ELEMENTS, column_index),
                                                  'https://metadata.datadrivendiscovery.org/types/PredictedTarget',
                                                  source=source)
            if target_names:
                metadata = metadata.update((metadata_base.ALL_ELEMENTS, column_index), {
                    'name': target_names[column_index],
                }, source=source)
        return metadata

# functions to fit in devel branch of d3m (2019-1-17)
    def fit_multi_produce(self, *, produce_methods: typing.Sequence[str], inputs: Inputs, timeout: float = None, iterations: int = None) -> MultiCallResult:
        """
        A method calling ``fit`` and after that multiple produce methods at once.

        This method allows primitive author to implement an optimized version of both fitting
        and producing a primitive on same data.

        If any additional method arguments are added to primitive's ``set_training_data`` method
        or produce method(s), or removed from them, they have to be added to or removed from this
        method as well. This method should accept an union of all arguments accepted by primitive's
        ``set_training_data`` method and produce method(s) and then use them accordingly when
        computing results.

        The default implementation of this method just calls first ``set_training_data`` method,
        ``fit`` method, and all produce methods listed in ``produce_methods`` in order and is
        potentially inefficient.

        Parameters
        ----------
        produce_methods : Sequence[str]
            A list of names of produce methods to call.
        inputs : Inputs
            The inputs given to ``set_training_data`` and all produce methods.
        outputs : Outputs
            The outputs given to ``set_training_data``.
        timeout : float
            A maximum time this primitive should take to both fit the primitive and produce outputs
            for all produce methods listed in ``produce_methods`` argument, in seconds.
        iterations : int
            How many of internal iterations should the primitive do for both fitting and producing
            outputs of all produce methods.

        Returns
        -------
        MultiCallResult
            A dict of values for each produce method wrapped inside ``MultiCallResult``.
        """

        return self._fit_multi_produce(produce_methods=produce_methods, timeout=timeout, iterations=iterations, inputs=inputs)
Beispiel #18
0
class PyramidWrapper(BaseRegressionWrapper):

    # Attributes.
    _name = 'PYRAMID'  # Label to add to the attributes when saving.

    def __init__(self, **kwargs):
        """
    """
        # Library.
        from pyramid.arima import ARIMA as PYARIMA
        # Save config parameters.
        super(PyramidWrapper, self).__init__(**kwargs)
        # Create model
        if len(kwargs):
            self._model = PYARIMA(**kwargs)

    def _identifier(self):
        """This method creates a name that describes de model."""
        try:
            exogenous = self.exogenous is not None
        except:
            exogenous = False
        return "name to do"
        #return "%s%sx%s [%s,%s]" % (self._name,
        #                            self.order,
        #                            self.seasonal_order,
        #                            self.trend,
        #                            exogenous)

    # --------------------------------------------------------------------------
    #                           SET VARIABLES
    # --------------------------------------------------------------------------
    def _params_from_summary(self):
        """Gets parameters from the summary result of the raw object.
    """
        # Format summary
        summary = self._raw.summary().as_csv()
        summary = summary.split("\n", 1)[1]  # Remove first line.
        summary = summary.replace("\n", ",")  # Replace \n by comma.

        # Split in elements.
        elements = summary.split(",")
        elements = [self._cast_float(e.strip()) for e in elements]

        # Create series.
        d = {}

        # Add parameters.
        d['s_jb_value'] = elements[-13]
        d['s_jb_prob'] = elements[-9]
        d['s_skew'] = elements[-5]
        d['s_Q_value'] = elements[-15]
        d['s_Q_prob'] = elements[-11]
        d['s_H_value'] = elements[-7]
        d['s_H_prob'] = elements[-3]
        d['s_kurtosis'] = elements[-1]
        d['s_heteroskedasticity'] = elements[-7]
        d['s_omnibus_value'] = None
        d['s_omnibus_prob'] = None

        # Return
        return d

    def _init_result(self, alpha=0.05):
        """This method set all the variables into this class.

    @see: statsmodels.Arima
    @see: statsmodels.ArimaResults

    Parameters
    ---------- 
    alpha :

    Returns
    -------
    series :
    """
        # Create series.
        d = {}

        # Add generic metrics.
        d['aic'] = self._raw.aic()
        d['aicc'] = self._raw.aicc()
        d['bic'] = self._raw.bic()
        d['hqic'] = self._raw.hqic()
        d['llf'] = self._raw.arima_res_.llf

        # Check if it is arima or sarimax and get corresponding values
        if self._raw.seasonal_order is not None:
            statistic_values = self._raw.arima_res_.zvalues
        else:
            statistic_values = self._raw.arima_res_.tvalues,

        # Create params information.
        params_data = zip(self._raw.arima_res_.data.param_names,
                          self._raw.arima_res_.params,
                          self._raw.arima_res_.bse, statistic_values,
                          self._raw.arima_res_.pvalues,
                          self._raw.arima_res_.conf_int(alpha))

        # Add coefficients statistics to series.
        for name, coef, std, tvalue, pvalue, (cil, ciu) in params_data:
            d['%s_%s' % (name, 'coef')] = coef
            d['%s_%s' % (name, 'std')] = std
            d['%s_%s' % (name, 'tvalue')] = tvalue
            d['%s_%s' % (name, 'tprob')] = pvalue
            d['%s_%s' % (name, 'cil')] = cil
            d['%s_%s' % (name, 'ciu')] = ciu

        # Further statistics
        d.update(self._resid_stats())

        # We cannot use the params_from_summary because this wrappers stores
        # different models with different summaries. The right way to solve this
        # is by performing the statistics related with the residuals in the
        # regression_wrapper._resid_stats.
        #d.update(self._params_from_summary())

        # Return
        return d

    def _init_config(self):
        """This method initialises the configuration.

    For some reason the interestin data is in the method __init__ for the
    object self._raw (ARIMA) and the method fir for the object
    self._raw.arima_res_.model.

    TODO: Handle if the instances passed to getargspecdict do not exist.
    """
        # Create dictionary.
        d = {}

        # Fill it.
        d.update(self._getargspecdict(self._raw.arima_res_.model, 'fit'))
        d.update(self._getargspecdict(self._raw, '__init__'))

        # Return
        return d

    # --------------------------------------------------------------------------
    #                           HELPER METHODS
    # --------------------------------------------------------------------------
    def as_summary(self, **kwargs):
        """This method displays the summary.
    """
        # Elements to split by.
        find = "=" * 78
        # Split and fill.
        smry = find.join(
            self._raw.summary(**kwargs).as_text().split(find)[:-1])
        smry = smry.split("\n")
        smry[-6] = smry[-6].replace('=', '', 5)
        smry[-5:] = [v.replace(' ', '', 5) for v in smry[-5:]]
        smry = "\n".join(smry[:-1])
        # Variables.
        om, omp, dw = 0.0, 0.0, self.m_dw
        jb, jbp = self.m_jb_value, self.m_jb_prob
        nm, nmp = self.m_nm_value, self.m_nm_prob
        skew, kurt = self.m_skew, self.m_kurtosis
        # Add in new lines.
        smry += "\n%s\n%s\n%s\n" % ("=" * 78, "Manual".center(78,
                                                              ' '), "-" * 78)
        smry += "Omnibus:    %#25.3f   Durbin-Watson: %#23.3f\n" % (om, dw)
        smry += "Normal (N): %#25.3f   Prob(N):       %#23.3f\n" % (nm, nmp)
        smry += "=" * 78 + "\n"
        smry += "Note that JB, P(JB), skew and kurtosis have different values.\n"
        smry += "Note that Prob(Q) tests no correlation of residuals."
        # Return
        return smry

    # --------------------------------------------------------------------------
    #                                 FIT
    # --------------------------------------------------------------------------
    def fit(self, **kwargs):
        """This method fits the specified arima model.

    Parameters
    ----------
    endog    :
    exog     :
    missing  :
    hasconst :

    Returns
    -------
    object : A PyramidWrapper object.
    """
        # Fill config.
        self._config.update(kwargs)
        # Set model
        self._raw = self._model.fit(**kwargs)
        # Set residuals as attribute.
        self._resid = self._raw.resid()
        # Set series with interesting params.
        self._result = self._init_result(alpha=0.05)
        # return object.
        return self

    # ---------------------------------------------------------------------------
    #                               PREDICTION
    # ---------------------------------------------------------------------------
    def get_prediction(self, **kwargs):
        """
    """
        # Compute prediction
        forecast = self._raw.predict_in_sample(**kwargs)
        # Get plotting values.
        mean = forecast.reshape(1, -1)
        cilo = self.conf_int_insample(mean, alpha=0.05)[:, 0].reshape(1, -1)
        ciup = self.conf_int_insample(mean, alpha=0.05)[:, 1].reshape(1, -1)
        # Time.
        time = self._time(forecast=mean, **kwargs).reshape(1, -1)
        # Get plotting values.
        return np.concatenate((time, mean, cilo, ciup), axis=0)

    def _time(self, forecast, start=None, **kwargs):
        """This method....
    """
        # Get default start.
        if start is None:
            start = getattr(self._raw.arima_res_, 'k_diff', 0)
        # Return
        return np.arange(forecast.shape[1]) + start

    # ---------------------------------------------------------------------------
    #                               FIND AUTO
    # ---------------------------------------------------------------------------
    def from_instance(self, arima, **kwargs):
        """This method constructs a PyramidWrapper object from pyramid.ARIMA
    """
        # Create object.
        instance = PyramidWrapper()
        # Set model.
        instance._raw = arima
        # Set residuals as attribute.
        instance._resid = arima.resid()
        # Set series with interesting params.
        instance._result = instance._init_result(alpha=0.05)
        # Set configuration parameters.
        instance._config = instance._init_config()
        # Return
        return instance

    def auto(self, **kwargs):
        """This method finds the best arima.

    @see pyrmid.arima.auto_arima

    Parameters
    ----------

    Returns
    -------
    """
        # Library.
        from pyramid.arima import auto_arima
        from pyramid.arima.arima import ARIMA

        # Compute auto_arima.
        results = auto_arima(**kwargs)

        # Return a single PyramidWrapper object.
        if isinstance(results, ARIMA):
            return [self.from_instance(results)]

        # Return an array of PyramidWrapper objects.
        if isinstance(results, list):
            return [PyramidWrapper().from_instance(a) for a in results]
Beispiel #19
0
# -*- coding: utf-8 -*-
"""
Wed May  9 21:44:51 2018: Dhiraj
"""
#auto.arima in Python
#https://medium.com/@josemarcialportilla/using-python-and-auto-arima-to-forecast-seasonal-time-series-90877adff03c

#%%
#pip install pyramid-arima
#https://github.com/tgsmith61591/pyramid
#from pyramid.arima import auto_arima
#https://github.com/tgsmith61591/pyramid/blob/master/examples/quick_start_example.ipynb
import numpy as np
import pyramid

print('numpy version: %r' % np.__version__)
print('pyramid version: %r' % pyramid.__version__)
forecast::wineind
from pyramid.arima import ARIMA

fit = ARIMA(order=(1, 1, 1), seasonal_order=(0, 1, 1, 12)).fit(y=wineind)
def forecasting_sales():

    try:

        period = request.args.get('period')
        data = pd.read_csv(
            'http://robsonfernandes.net/mestrado/data/food-sp.csv')

        print('Passou 00')
        variavel = 'VENDA'

        data.index = data['DATA']

        interval = 96 - int(period)
        df_train = data.iloc[1:interval, ]
        df_test = data.iloc[interval:96, ]

        df_train[variavel + '_box'], lmbda = stats.boxcox(df_train[variavel])

        print('Passou 01')
        # model = auto_arima(df_train[variavel+'_box'],
        #                    n_fits=10,
        #                    start_p=0,
        #                    start_q=0,
        #                    max_p=5,
        #                    max_q=5,
        #                    m=20,
        #                    start_P=0,
        #                    d=1,
        #                    D=1,
        #                    trace=True,
        #                    stationary=False,
        #                    error_action='ignore',
        #                    suppress_warnings=True,
        #                    stepwise=True)

        model = ARIMA(callback=None,
                      disp=0,
                      maxiter=50,
                      method=None,
                      order=(1, 1, 1),
                      out_of_sample_size=0,
                      scoring='mse',
                      scoring_args={},
                      seasonal_order=(2, 1, 1, 20),
                      solver='lbfgs',
                      start_params=None,
                      suppress_warnings=True,
                      transparams=True,
                      trend='c')

        model.fit(df_train[variavel + '_box'])
        # model.summary()

        forecast = model.predict(n_periods=int(period))

        y_pred = invboxcox(forecast, lmbda)
        y_true = df_test[variavel].values
        print('Passou 02')
        acuracia = round(100 - mean_absolute_percentage_error(y_true, y_pred),
                         0)

        retorno = {
            'acuracia': acuracia,
            'real': y_true.tolist(),
            'previsto': y_pred.tolist()
        }

        return jsonify(retorno)

    except Exception:
        raise
Beispiel #21
0
def test_oob_for_issue_28():
    # Continuation of above: can we do one with an exogenous array, too?
    xreg = rs.rand(hr.shape[0], 4)
    arima = ARIMA(order=(2, 1, 2),
                  suppress_warnings=True,
                  out_of_sample_size=10).fit(y=hr, exogenous=xreg)

    oob = arima.oob()
    assert not np.isnan(oob)

    # Assert that the endog shapes match. First is equal to the original,
    # and the second is the differenced array, with original shape - d.
    assert np.allclose(arima.arima_res_.data.endog, hr, rtol=1e-2)
    assert arima.arima_res_.model.endog.shape[0] == hr.shape[0] - 1

    # Now assert the same for exog
    assert np.allclose(arima.arima_res_.data.exog, xreg, rtol=1e-2)
    assert arima.arima_res_.model.exog.shape[0] == xreg.shape[0] - 1

    # Compare the OOB score to an equivalent fit on data - 10 obs, but
    # without any OOB scoring, and we'll show that the OOB scoring in the
    # first IS in fact only applied to the first (train - n_out_of_bag)
    # samples
    arima_no_oob = ARIMA(
            order=(2, 1, 2), suppress_warnings=True,
            out_of_sample_size=0)\
        .fit(y=hr[:-10], exogenous=xreg[:-10, :])

    scoring = get_callable(arima_no_oob.scoring, VALID_SCORING)
    preds = arima_no_oob.predict(n_periods=10, exogenous=xreg[-10:, :])
    assert np.allclose(oob, scoring(hr[-10:], preds), rtol=1e-2)

    # Show that the model parameters are exactly the same
    xreg_test = rs.rand(5, 4)
    assert np.allclose(arima.params(), arima_no_oob.params(), rtol=1e-2)

    # Now assert on the forecast differences.
    with_oob_forecasts = arima.predict(n_periods=5, exogenous=xreg_test)
    no_oob_forecasts = arima_no_oob.predict(n_periods=5, exogenous=xreg_test)

    assert_raises(AssertionError, assert_array_almost_equal,
                  with_oob_forecasts, no_oob_forecasts)

    # But after we update the no_oob model with the latest data, we should
    # be producing the same exact forecasts

    # First, show we'll fail if we try to add observations with no exogenous
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  None)

    # Also show we'll fail if we try to add mis-matched shapes of data
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  xreg_test)

    # Show we fail if we try to add observations with a different dim exog
    assert_raises(ValueError, arima_no_oob.add_new_observations, hr[-10:],
                  xreg_test[:, 2])

    # Actually add them now, and compare the forecasts (should be the same)
    arima_no_oob.add_new_observations(hr[-10:], xreg[-10:, :])
    assert np.allclose(with_oob_forecasts,
                       arima_no_oob.predict(n_periods=5, exogenous=xreg_test),
                       rtol=1e-2)
Beispiel #22
0
    ## Seasonal order = (1,1,2,52)
    ## Score = 6964
    ### Score seems high, also lookback is very small

    # Train / test data split
    train = df[df["WeekEnding"] < datetime.datetime.strptime(
        "2014-12-31", "%Y-%m-%d")]
    test = df[df["WeekEnding"] > datetime.datetime.strptime(
        "2014-12-31", "%Y-%m-%d")]

    # Run ARIMA with found parameters
    stepwise = ARIMA(callback=None,
                     disp=0,
                     maxiter=50,
                     method=None,
                     order=(10, 1, 12),
                     seasonal_order=(4, 1, 2, 52),
                     solver="lbfgs",
                     suppress_warnings=True,
                     transparams=True,
                     trend="c")
    # Fit and predict
    print("Fitting and Predicting...")
    stepwise.fit(train.drop("WeekEnding", axis=1))
    future = stepwise.predict(n_periods=len(test.index))

    # Merge predictions with raw data
    future = pd.DataFrame(future,
                          index=test["WeekEnding"],
                          columns=["Forecast"])
    df = df.set_index("WeekEnding").join(future, how="outer")
    forecast = df.dropna()
Beispiel #23
0
def test_oob_sarimax():
    xreg = rs.rand(wineind.shape[0], 2)
    fit = ARIMA(order=(1, 1, 1),
                seasonal_order=(0, 1, 1, 12),
                out_of_sample_size=15).fit(y=wineind, exogenous=xreg)

    fit_no_oob = ARIMA(
            order=(1, 1, 1), seasonal_order=(0, 1, 1, 12),
            out_of_sample_size=0, suppress_warnings=True)\
        .fit(y=wineind[:-15], exogenous=xreg[:-15, :])

    # now assert some of the same things here that we did in the former test
    oob = fit.oob()

    # compare scores:
    scoring = get_callable(fit_no_oob.scoring, VALID_SCORING)
    no_oob_preds = fit_no_oob.predict(n_periods=15, exogenous=xreg[-15:, :])
    assert np.allclose(oob, scoring(wineind[-15:], no_oob_preds), rtol=1e-2)

    # show params are still the same
    assert np.allclose(fit.params(), fit_no_oob.params(), rtol=1e-2)

    # show we can add the new samples and get the exact same forecasts
    xreg_test = rs.rand(5, 2)
    fit_no_oob.add_new_observations(wineind[-15:], xreg[-15:, :])
    assert np.allclose(fit.predict(5, xreg_test),
                       fit_no_oob.predict(5, xreg_test),
                       rtol=1e-2)

    # Show we can get a confidence interval out here
    preds, conf = fit.predict(5, xreg_test, return_conf_int=True)
    assert all(isinstance(a, np.ndarray) for a in (preds, conf))