def test_legacy_int_index(): from pandas import Int64Index, UInt64Index index = Int64Index(np.arange(100)) assert is_int_index(index) assert not is_float_index(index) index = UInt64Index(np.arange(100)) assert is_int_index(index) assert not is_float_index(index)
def test_seasonality_smoke(index, forecast_index): s = Seasonality(12) s.in_sample(index) steps = 83 if forecast_index is None else len(forecast_index) warn = None if ( is_int_index(index) and np.any(np.diff(index) != 1) or ( type(index) is pd.Index and max(index) > 2 ** 63 and forecast_index is None ) ): warn = UserWarning with pytest_warns(warn): s.out_of_sample(steps, index, forecast_index) assert isinstance(s.period, int) str(s) hash(s) if isinstance(index, (pd.DatetimeIndex, pd.PeriodIndex)) and index.freq: s = Seasonality.from_index(index) s.in_sample(index) s.out_of_sample(steps, index, forecast_index) Seasonality.from_index(list(index))
def test_time_trend_smoke(index, forecast_index): tt = TimeTrend(True, 2) tt.in_sample(index) steps = 83 if forecast_index is None else len(forecast_index) warn = None if ( is_int_index(index) and np.any(np.diff(index) != 1) or ( type(index) is pd.Index and max(index) > 2 ** 63 and forecast_index is None ) ): warn = UserWarning with pytest_warns(warn): tt.out_of_sample(steps, index, forecast_index) str(tt) hash(tt) assert isinstance(tt.order, int) assert isinstance(tt._constant, bool) assert TimeTrend.from_string("ctt") == tt assert TimeTrend.from_string("ct") != tt assert TimeTrend.from_string("t") != tt assert TimeTrend.from_string("n") != tt assert Seasonality(12) != tt tt0 = TimeTrend(False, 0) tt0.in_sample(index) str(tt0)
def test_seasonality(index): s = Seasonality(period=12) exog = s.in_sample(index) assert s.is_dummy assert exog.shape == (index.shape[0], 12) pd.testing.assert_index_equal(exog.index, index) assert np.all(exog.sum(1) == 1.0) assert list(exog.columns) == [f"s({i},12)" for i in range(1, 13)] expected = np.zeros((index.shape[0], 12)) for i in range(12): expected[i::12, i] = 1.0 np.testing.assert_equal(expected, np.asarray(exog)) warn = None if (is_int_index(index) and np.any(np.diff(index) != 1)) or ( type(index) is pd.Index and max(index) > 2 ** 63 ): warn = UserWarning with pytest_warns(warn): fcast = s.out_of_sample(steps=12, index=index) assert fcast.iloc[0, len(index) % 12] == 1.0 assert np.all(fcast.sum(1) == 1) s = Seasonality(period=7, initial_period=3) exog = s.in_sample(index) assert exog.iloc[0, 2] == 1.0 assert exog.iloc[0].sum() == 1.0 assert s.initial_period == 3 with pytest.raises(ValueError, match="initial_period must be in"): Seasonality(period=12, initial_period=-3) with pytest.raises(ValueError, match="period must be >= 2"): Seasonality(period=1)
def __init__(self, stl: STL, result: DecomposeResult, model, model_result, endog) -> None: self._stl = stl self._result = result self._model = model self._model_result = model_result self._endog = np.asarray(endog) self._nobs = self._endog.shape[0] self._index = getattr(endog, "index", pd.RangeIndex(self._nobs)) if not (isinstance(self._index, (pd.DatetimeIndex, pd.PeriodIndex)) or is_int_index(self._index)): try: self._index = pd.to_datetime(self._index) except ValueError: self._index = pd.RangeIndex(self._nobs)
def test_time_trend(index): tt = TimeTrend(constant=True) const = tt.in_sample(index) assert const.shape == (index.shape[0], 1) assert np.all(const == 1) pd.testing.assert_index_equal(const.index, index) warn = None if (is_int_index(index) and np.any(np.diff(index) != 1)) or ( type(index) is pd.Index and max(index) > 2 ** 63 ): warn = UserWarning with pytest_warns(warn): const_fcast = tt.out_of_sample(23, index) assert np.all(const_fcast == 1) tt = TimeTrend(constant=False) empty = tt.in_sample(index) assert empty.shape == (index.shape[0], 0) tt = TimeTrend(constant=False, order=2) t2 = tt.in_sample(index) assert t2.shape == (index.shape[0], 2) assert list(t2.columns) == ["trend", "trend_squared"] tt = TimeTrend(constant=True, order=2) final = tt.in_sample(index) expected = pd.concat([const, t2], axis=1) pd.testing.assert_frame_equal(final, expected) tt = TimeTrend(constant=True, order=2) short = tt.in_sample(index[:-50]) with pytest_warns(warn): remainder = tt.out_of_sample(50, index[:-50]) direct = tt.out_of_sample( steps=50, index=index[:-50], forecast_index=index[-50:] ) combined = pd.concat([short, remainder], axis=0) if isinstance(index, (pd.DatetimeIndex, pd.RangeIndex)): pd.testing.assert_frame_equal(combined, final) combined = pd.concat([short, direct], axis=0) pd.testing.assert_frame_equal(combined, final, check_index_type=False)
def test_fourier_smoke(index, forecast_index): f = Fourier(12, 2) f.in_sample(index) steps = 83 if forecast_index is None else len(forecast_index) warn = None if ( is_int_index(index) and np.any(np.diff(index) != 1) or ( type(index) is pd.Index and max(index) > 2 ** 63 and forecast_index is None ) ): warn = UserWarning with pytest_warns(warn): f.out_of_sample(steps, index, forecast_index) assert isinstance(f.period, float) assert isinstance(f.order, int) str(f) hash(f) with pytest.raises(ValueError, match=r"2 \* order must be <= period"): Fourier(12, 7)
def test_is_int_index(int_type, int_size): index = pd.Index(np.arange(100), dtype=f"{int_type}{int_size}") assert is_int_index(index) assert not is_float_index(index)
def test_legacy_float_index(): from pandas import Float64Index index = Float64Index(np.arange(100)) assert not is_int_index(index) assert is_float_index(index)
def test_is_float_index(float_size): index = pd.Index(np.arange(100.0), dtype=f"f{float_size}") assert is_float_index(index) assert not is_int_index(index)
def get_index_loc(key, index): """ Get the location of a specific key in an index Parameters ---------- key : label The key for which to find the location if the underlying index is a DateIndex or a location if the underlying index is a RangeIndex or an Index with an integer dtype. index : pd.Index The index to search. Returns ------- loc : int The location of the key index : pd.Index The index including the key; this is a copy of the original index unless the index had to be expanded to accommodate `key`. index_was_expanded : bool Whether or not the index was expanded to accommodate `key`. Notes ----- If `key` is past the end of of the given index, and the index is either an Index with an integral dtype or a date index, this function extends the index up to and including key, and then returns the location in the new index. """ base_index = index index = base_index date_index = isinstance(base_index, (PeriodIndex, DatetimeIndex)) int_index = is_int_index(base_index) range_index = isinstance(base_index, RangeIndex) index_class = type(base_index) nobs = len(index) # Special handling for RangeIndex if range_index and isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > nobs - 1: # See gh5835. Remove the except after pandas 0.25 required. try: base_index_start = base_index.start base_index_step = base_index.step except AttributeError: base_index_start = base_index._start base_index_step = base_index._step stop = base_index_start + (key + 1) * base_index_step index = RangeIndex(start=base_index_start, stop=stop, step=base_index_step) # Special handling for NumericIndex if (not range_index and int_index and not date_index and isinstance(key, (int, np.integer))): # Negative indices (that lie in the Index) if key < 0 and -key <= nobs: key = nobs + key # Out-of-sample (note that we include key itself in the new index) elif key > base_index[-1]: index = Index(np.arange(base_index[0], int(key + 1))) # Special handling for date indexes if date_index: # Use index type to choose creation function if index_class is DatetimeIndex: index_fn = date_range else: index_fn = period_range # Integer key (i.e. already given a location) if isinstance(key, (int, np.integer)): # Negative indices (that lie in the Index) if key < 0 and -key < nobs: key = index[nobs + key] # Out-of-sample (note that we include key itself in the new # index) elif key > len(base_index) - 1: index = index_fn( start=base_index[0], periods=int(key + 1), freq=base_index.freq, ) key = index[-1] else: key = index[key] # Other key types (i.e. string date or some datetime-like object) else: # Covert the key to the appropriate date-like object if index_class is PeriodIndex: date_key = Period(key, freq=base_index.freq) else: date_key = Timestamp(key) # Out-of-sample if date_key > base_index[-1]: # First create an index that may not always include `key` index = index_fn(start=base_index[0], end=date_key, freq=base_index.freq) # Now make sure we include `key` if not index[-1] == date_key: index = index_fn( start=base_index[0], periods=len(index) + 1, freq=base_index.freq, ) # To avoid possible inconsistencies with `get_loc` below, # set the key directly equal to the last index location key = index[-1] # Get the location if date_index: # (note that get_loc will throw a KeyError if key is invalid) loc = index.get_loc(key) elif int_index or range_index: # For NumericIndex and RangeIndex, key is assumed to be the location # and not an index value (this assumption is required to support # RangeIndex) try: index[key] # We want to raise a KeyError in this case, to keep the exception # consistent across index types. # - Attempting to index with an out-of-bound location (e.g. # index[10] on an index of length 9) will raise an IndexError # (as of Pandas 0.22) # - Attemtping to index with a type that cannot be cast to integer # (e.g. a non-numeric string) will raise a ValueError if the # index is RangeIndex (otherwise will raise an IndexError) # (as of Pandas 0.22) except (IndexError, ValueError) as e: raise KeyError(str(e)) loc = key else: loc = index.get_loc(key) # Check if we now have a modified index index_was_expanded = index is not base_index # Return the index through the end of the loc / slice if isinstance(loc, slice): end = loc.stop - 1 else: end = loc return loc, index[:end + 1], index_was_expanded
def _init_dates(self, dates=None, freq=None): """ Initialize dates Parameters ---------- dates : array_like, optional An array like object containing dates. freq : str, tuple, datetime.timedelta, DateOffset or None, optional A frequency specification for either `dates` or the row labels from the endog / exog data. Notes ----- Creates `self._index` and related attributes. `self._index` is always a Pandas index, and it is always NumericIndex, DatetimeIndex, or PeriodIndex. If Pandas objects, endog / exog may have any type of index. If it is an NumericIndex with values 0, 1, ..., nobs-1 or if it is (coerceable to) a DatetimeIndex or PeriodIndex *with an associated frequency*, then it is called a "supported" index. Otherwise it is called an "unsupported" index. Supported indexes are standardized (i.e. a list of date strings is converted to a DatetimeIndex) and the result is put in `self._index`. Unsupported indexes are ignored, and a supported NumericIndex is generated and put in `self._index`. Warnings are issued in this case to alert the user if the returned index from some operation (e.g. forecasting) is different from the original data's index. However, whenever possible (e.g. purely in-sample prediction), the original index is returned. The benefit of supported indexes is that they allow *forecasting*, i.e. it is possible to extend them in a reasonable way. Thus every model must have an underlying supported index, even if it is just a generated NumericIndex. """ # Get our index from `dates` if available, otherwise from whatever # Pandas index we might have retrieved from endog, exog if dates is not None: index = dates else: index = self.data.row_labels # Sanity check that we do not have a `freq` without an index if index is None and freq is not None: raise ValueError("Frequency provided without associated index.") # If an index is available, see if it is a date-based index or if it # can be coerced to one. (If it cannot we'll fall back, below, to an # internal, 0, 1, ... nobs-1 integer index for modeling purposes) inferred_freq = False if index is not None: # Try to coerce to date-based index if not isinstance(index, (DatetimeIndex, PeriodIndex)): try: # Only try to coerce non-numeric index types (string, # list of date-times, etc.) # Note that np.asarray(Float64Index([...])) yields an # object dtype array in earlier versions of Pandas (and so # will not have is_numeric_dtype == True), so explicitly # check for it here. But note also that in very early # Pandas (~0.12), Float64Index does not exist (and so the # statsmodels compat makes it an empty tuple, so in that # case also check if the first element is a float. _index = np.asarray(index) if (is_numeric_dtype(_index) or is_float_index(index) or (isinstance(_index[0], float))): raise ValueError("Numeric index given") # If a non-index Pandas series was given, only keep its # values (because we must have a pd.Index type, below, and # pd.to_datetime will return a Series when passed # non-list-like objects) if isinstance(index, Series): index = index.values # All coercion is done via pd.to_datetime # Note: date coercion via pd.to_datetime does not handle # string versions of PeriodIndex objects most of the time. _index = to_datetime(index) # Older versions of Pandas can sometimes fail here and # return a numpy array - check to make sure it's an index if not isinstance(_index, Index): raise ValueError("Could not coerce to date index") index = _index except: # Only want to actually raise an exception if `dates` was # provided but cannot be coerced. If we got the index from # the row_labels, we'll just ignore it and use the integer # index below if dates is not None: raise ValueError("Non-date index index provided to" " `dates` argument.") # Now, if we were given, or coerced, a date-based index, make sure # it has an associated frequency if isinstance(index, (DatetimeIndex, PeriodIndex)): # If no frequency, try to get an inferred frequency if freq is None and index.freq is None: freq = index.inferred_freq # If we got an inferred frequncy, alert the user if freq is not None: inferred_freq = True if freq is not None: warnings.warn( "No frequency information was" " provided, so inferred frequency %s" " will be used." % freq, ValueWarning, stacklevel=2, ) # Convert the passed freq to a pandas offset object if freq is not None: freq = to_offset(freq) # Now, if no frequency information is available from the index # itself or from the `freq` argument, raise an exception if freq is None and index.freq is None: # But again, only want to raise the exception if `dates` # was provided. if dates is not None: raise ValueError("No frequency information was" " provided with date index and no" " frequency could be inferred.") # However, if the index itself has no frequency information but # the `freq` argument is available (or was inferred), construct # a new index with an associated frequency elif freq is not None and index.freq is None: resampled_index = date_range(start=index[0], end=index[-1], freq=freq) if not inferred_freq and not resampled_index.equals(index): raise ValueError("The given frequency argument could" " not be matched to the given index.") index = resampled_index # Finally, if the index itself has a frequency and there was # also a given frequency, raise an exception if they are not # equal elif (freq is not None and not inferred_freq and not (index.freq == freq)): raise ValueError("The given frequency argument is" " incompatible with the given index.") # Finally, raise an exception if we could not coerce to date-based # but we were given a frequency argument elif freq is not None: raise ValueError("Given index could not be coerced to dates" " but `freq` argument was provided.") # Get attributes of the index has_index = index is not None date_index = isinstance(index, (DatetimeIndex, PeriodIndex)) period_index = isinstance(index, PeriodIndex) int_index = is_int_index(index) range_index = isinstance(index, RangeIndex) has_freq = index.freq is not None if date_index else None increment = Index(range(self.endog.shape[0])) is_increment = index.equals(increment) if int_index else None if date_index: try: is_monotonic = index.is_monotonic_increasing except AttributeError: # Remove after pandas 1.5 is minimum is_monotonic = index.is_monotonic else: is_monotonic = None # Issue warnings for unsupported indexes if has_index and not (date_index or range_index or is_increment): warnings.warn( "An unsupported index was provided and will be" " ignored when e.g. forecasting.", ValueWarning, stacklevel=2, ) if date_index and not has_freq: warnings.warn( "A date index has been provided, but it has no" " associated frequency information and so will be" " ignored when e.g. forecasting.", ValueWarning, stacklevel=2, ) if date_index and not is_monotonic: warnings.warn( "A date index has been provided, but it is not" " monotonic and so will be ignored when e.g." " forecasting.", ValueWarning, stacklevel=2, ) # Construct the internal index index_generated = False valid_index = ((date_index and has_freq and is_monotonic) or (int_index and is_increment) or range_index) if valid_index: _index = index else: _index = increment index_generated = True self._index = _index self._index_generated = index_generated self._index_none = index is None self._index_int64 = int_index and not range_index and not date_index self._index_dates = date_index and not index_generated self._index_freq = self._index.freq if self._index_dates else None self._index_inferred_freq = inferred_freq # For backwards compatibility, set data.dates, data.freq self.data.dates = self._index if self._index_dates else None self.data.freq = self._index.freqstr if self._index_dates else None
def test_instantiation_valid(): tsa_model.__warningregistry__ = {} # The primary goal of this test function is to make sure the # combinations that are supposed to be valid are actually valid, and # that valid but unsupported options give the appropriate warning # Secondarily, it also has some tests that invalid combinations raise # exceptions, although it's not intended to be comprehensive. # # Each of `endog`, `exog` can be in the following categories: # 0. None (only for exog) # 1. list # 2. numpy array # 3. pandas series # 4. pandas dataframe # # Each pandas index (of `endog`, `exog`, or passed to `dates`) can be: # 0. None # 1. RangeIndex (if applicable; i.e. if Pandas >= 0.18) # 2. Integral Indexes with values exactly equal to 0, 1, ..., nobs-1 # 3. DatetimeIndex with frequency # 4. PeriodIndex with frequency # 5. Anything that does not fall into the above categories also should # only raise an exception if it was passed to dates, and may trigger # a warning otherwise. # # `date` can be one of the following: # 0. None # 2. Pandas index #2 # 3. Pandas index #3 # 4. List of date strings (requires freq) # 5. List of datetime objects (requires freq) # 6. Array of date strings (requires freq) # 7. Array of datetime objects (requires freq) # 8. Series of date strings (requires freq) # 9. Series of datetime objects (requires freq) # 10. Series of pandas timestamps (requires freq) # 11. Anything that does not fall into the above categories should raise # an exception. # # `freq` can be: # 0. None # 1. Something that can be passed to `pd.to_offset` # 2. Anything that cannot should raise an Exception # # Each test will be denoted by: # endog.index:exog.index/date/freq where the corresponding # location is the integer from above; e.g. 1.0:0.0/9/1 corresponds to # - List endog (with no index) # - No exog # - Series of datetime objects # - Something valid for `pd.to_offset` (e.g. 'D', if that works with # dates) # # Notice that the endog.index:exog.index really collapses to a single # element, which is the evaluated `row_label`. This is first the exog # index, if exists, then the endog index, if it exists, or None # otherwise. **Thus, we will not test `exog` here.** # # Example valid combinations of row_label/date/freq include: # - */0/0 (i.e. anything is valid if date and freq are not passed) # - */%/% where %/% denotes a valid date/freq combination (i.e. any # row_label is valid if a valid date/freq combination is given) # # Example invalid combinations include: # - [1-2],[3-4].4/0/[1-2] (i.e. if have freq, then must have, or # coerce, a date index) # - */[4-10]/0 (i.e. for some types of dates, freq must be passed) # Baseline: list, numpy endog with no dates, no freq for endog in dta[:2]: # No indexes, should not raise warnings with warnings.catch_warnings(): warnings.simplefilter("error") mod = tsa_model.TimeSeriesModel(endog) assert isinstance(mod._index, pd.RangeIndex) or np.issubdtype( mod._index.dtype, np.integer) assert_equal(mod._index_none, True) assert_equal(mod._index_dates, False) assert_equal(mod._index_generated, True) assert_equal(mod.data.dates, None) assert_equal(mod.data.freq, None) # Test list, numpy endog, pandas w/o index; with dates / freq argument for endog in dta: # Supported date indexes, should not raise warnings, do not need freq with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in date_indexes + period_indexes: mod = tsa_model.TimeSeriesModel(endog, dates=ix) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Supported date indexes, should not raise warnings, can use valid freq with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in date_indexes + period_indexes: mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Other supported indexes, with valid freq, should not raise warnings with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in supported_date_indexes: mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Since only supported indexes are valid `dates` arguments, everything # else is invalid here for ix, freq in supported_increment_indexes + unsupported_indexes: assert_raises(ValueError, tsa_model.TimeSeriesModel, endog, dates=ix) # Test pandas (Series, DataFrame); with index (no dates/freq argument) for base_endog in dta[2:4]: # DatetimeIndex and PeriodIndex, should not raise warnings with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in date_indexes + period_indexes: endog = base_endog.copy() endog.index = ix mod = tsa_model.TimeSeriesModel(endog) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Increment index (this is a "supported" index in the sense that it # does not raise a warning, but obviously not a date index) endog = base_endog.copy() endog.index = supported_increment_indexes[0][0] mod = tsa_model.TimeSeriesModel(endog) assert is_int_index(mod._index) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, False) assert_equal(mod._index_generated, False) assert_equal(mod._index_freq, None) assert_equal(mod.data.dates, None) assert_equal(mod.data.freq, None) # RangeIndex (start=0, end=nobs, so equivalent to increment index) endog = base_endog.copy() endog.index = supported_increment_indexes[1][0] mod = tsa_model.TimeSeriesModel(endog) assert_equal(type(mod._index) == pd.RangeIndex, True) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, False) assert_equal(mod._index_generated, False) assert_equal(mod._index_freq, None) assert_equal(mod.data.dates, None) assert_equal(mod.data.freq, None) # Supported indexes *when a freq is given*, should not raise a warning with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in supported_date_indexes: endog = base_endog.copy() endog.index = ix mod = tsa_model.TimeSeriesModel(endog, freq=freq) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Unsupported (or any) indexes to the given series, *when a supported # date and freq is given*, should not raise a warning with warnings.catch_warnings(): warnings.simplefilter("error") for ix, freq in supported_date_indexes: endog = base_endog.copy() endog.index = unsupported_indexes[0][0] mod = tsa_model.TimeSeriesModel(endog, dates=ix, freq=freq) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal( isinstance(mod._index, (pd.DatetimeIndex, pd.PeriodIndex)), True, ) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) assert_equal(mod.data.freq, freq) # Date indexes with inferrable freq, but no given freq, should all give # warnings message = ("No frequency information was provided," " so inferred frequency %s will be used.") last_len = 0 with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") for ix, freq in supported_date_indexes: endog = base_endog.copy() endog.index = ix mod = tsa_model.TimeSeriesModel(endog) if freq is None: freq = ix.freq if not isinstance(freq, str): freq = freq.freqstr assert_equal(type(mod._index) == pd.DatetimeIndex, True) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, True) assert_equal(mod._index_generated, False) assert_equal(mod._index.freq, mod._index_freq) assert_equal(mod.data.dates.equals(mod._index), True) # Note: here, we need to hedge the test a little bit because # inferred frequencies are not always the same as the original # frequency. From the examples above, when the actual freq is # 2QS-OCT, the inferred freq is 2QS-JAN. This is an issue with # inferred frequencies, but since we are warning the user, it's # not a failure of the code. Thus we only test the "major" part # of the freq, and just test that the right message is given # (even though it will not have the actual freq of the data in # it). if len(w) == last_len: continue assert_equal(mod.data.freq.split("-")[0], freq.split("-")[0]) assert_equal(str(w[-1].message), message % mod.data.freq) last_len = len(w) # Unsupported (but valid) indexes, should all give warnings message = ("An unsupported index was provided and will be" " ignored when e.g. forecasting.") with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") for ix, freq in unsupported_indexes: endog = base_endog.copy() endog.index = ix mod = tsa_model.TimeSeriesModel(endog) assert_equal(isinstance(mod._index, (pd.Index, pd.RangeIndex)), True) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, False) assert_equal(mod._index_generated, True) assert_equal(mod._index_freq, None) assert_equal(mod.data.dates, None) assert_equal(mod.data.freq, None) assert_equal(str(w[0].message), message) # Date indexes without inferrable freq, and with no given freq, should # all give warnings message = ("A date index has been provided, but it has no" " associated frequency information and so will be" " ignored when e.g. forecasting.") with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") for ix, freq in unsupported_date_indexes: endog = base_endog.copy() endog.index = ix mod = tsa_model.TimeSeriesModel(endog) assert isinstance(mod._index, pd.RangeIndex) or is_int_index( mod._index) assert_equal(mod._index_none, False) assert_equal(mod._index_dates, False) assert_equal(mod._index_generated, True) assert_equal(mod._index_freq, None) assert_equal(mod.data.dates, None) assert_equal(mod.data.freq, None) assert_equal(str(w[0].message), message) # Test (invalid) freq with no index endog = dta[0] assert_raises( ValueError, tsa_model.TimeSeriesModel, endog, freq=date_indexes[1][0].freq, ) # Test conflicting index, freq specifications endog = dta[2].copy() endog.index = date_indexes[0][0] assert_raises( ValueError, tsa_model.TimeSeriesModel, endog, freq=date_indexes[1][0].freq, ) # Test unsupported index, but a freq specification endog = dta[2].copy() endog.index = unsupported_indexes[0][0] assert_raises( ValueError, tsa_model.TimeSeriesModel, endog, freq=date_indexes[1][0].freq, ) # Test index that can coerce to date time but incorrect freq endog = dta[2].copy() endog.index = numpy_datestr_indexes[0][0] assert_raises( ValueError, tsa_model.TimeSeriesModel, endog, freq=date_indexes[1][0].freq, )