def _init_dates(self, dates, freq): if dates is None: dates = self.data.row_labels if dates is not None: if (not datetools._is_datetime_index(dates) and isinstance(self.data, data.PandasData)): try: if is_numeric_dtype(dates): raise ValueError dates = to_datetime(dates) except ValueError: raise ValueError("Given a pandas object and the index does " "not contain dates") if not freq: try: freq = datetools._infer_freq(dates) except: raise ValueError("Frequency inference failed. Use `freq` " "keyword.") if isinstance(dates[0], datetime.datetime): dates = DatetimeIndex(dates) else: # preserve PeriodIndex dates = PeriodIndex(dates) self.data.dates = dates self.data.freq = freq # Test for nanoseconds in early pandas versions if freq is not None and _freq_to_pandas[freq].freqstr == 'N': from distutils.version import LooseVersion from pandas import __version__ as pd_version if LooseVersion(pd_version) < '0.14': raise NotImplementedError('Nanosecond index not available in' ' Pandas < 0.14')
def _init_dates(self, dates, freq): if dates is None: dates = self.data.row_labels if dates is not None: if (not datetools._is_datetime_index(dates) and isinstance(self.data, data.PandasData)): try: if is_numeric_dtype(dates): raise ValueError dates = to_datetime(dates) except ValueError: raise ValueError("Given a pandas object and the index does " "not contain dates") if not freq: try: freq = datetools._infer_freq(dates) except: raise ValueError("Frequency inference failed. Use `freq` " "keyword.") if isinstance(dates[0], datetime.datetime): dates = DatetimeIndex(dates) else: # preserve PeriodIndex dates = PeriodIndex(dates) self.data.dates = dates self.data.freq = freq
def _init_dates(self, dates, freq): if dates is None: dates = self.data.row_labels if dates is not None: if (not datetools._is_datetime_index(dates) and isinstance(self.data, data.PandasData)): try: if is_numeric_dtype(dates): raise ValueError dates = to_datetime(dates) except ValueError: raise ValueError( "Given a pandas object and the index does " "not contain dates") if not freq: try: freq = datetools._infer_freq(dates) except: raise ValueError("Frequency inference failed. Use `freq` " "keyword.") if isinstance(dates[0], datetime.datetime): dates = DatetimeIndex(dates) else: # preserve PeriodIndex dates = PeriodIndex(dates) self.data.dates = dates self.data.freq = freq
def _init_dates(self, dates=None, freq=None): """ Initialize dates Parameters ---------- dates : array_like, optional An array like object containing dates. freq : str, tuple, datetime.timedelta, DateOffset or None, optional A frequency specification for either `dates` or the row labels from the endog / exog data. Notes ----- Creates `self._index` and related attributes. `self._index` is always a Pandas index, and it is always Int64Index, DatetimeIndex, or PeriodIndex. If Pandas objects, endog / exog may have any type of index. If it is an Int64Index with values 0, 1, ..., nobs-1 or if it is (coerceable to) a DatetimeIndex or PeriodIndex *with an associated frequency*, then it is called a "supported" index. Otherwise it is called an "unsupported" index. Supported indexes are standardized (i.e. a list of date strings is converted to a DatetimeIndex) and the result is put in `self._index`. Unsupported indexes are ignored, and a supported Int64Index is generated and put in `self._index`. Warnings are issued in this case to alert the user if the returned index from some operation (e.g. forecasting) is different from the original data's index. However, whenever possible (e.g. purely in-sample prediction), the original index is returned. The benefit of supported indexes is that they allow *forecasting*, i.e. it is possible to extend them in a reasonable way. Thus every model must have an underlying supported index, even if it is just a generated Int64Index. """ # Get our index from `dates` if available, otherwise from whatever # Pandas index we might have retrieved from endog, exog if dates is not None: index = dates else: index = self.data.row_labels # Sanity check that we do not have a `freq` without an index if index is None and freq is not None: raise ValueError('Frequency provided without associated index.') # If an index is available, see if it is a date-based index or if it # can be coerced to one. (If it cannot we'll fall back, below, to an # internal, 0, 1, ... nobs-1 integer index for modeling purposes) inferred_freq = False if index is not None: # Try to coerce to date-based index if not isinstance(index, (DatetimeIndex, PeriodIndex)): try: # Only try to coerce non-numeric index types (string, # list of date-times, etc.) # Note that np.asarray(Float64Index([...])) yields an # object dtype array in earlier versions of Pandas (and so # will not have is_numeric_dtype == True), so explicitly # check for it here. But note also that in very early # Pandas (~0.12), Float64Index does not exist (and so the # statsmodels compat makes it an empty tuple, so in that # case also check if the first element is a float. _index = np.asarray(index) if (is_numeric_dtype(_index) or isinstance(index, Float64Index) or (Float64Index == tuple() and isinstance(_index[0], float))): raise ValueError('Numeric index given') # If a non-index Pandas series was given, only keep its # values (because we must have a pd.Index type, below, and # pd.to_datetime will return a Series when passed # non-list-like objects) if isinstance(index, Series): index = index.values # All coercion is done via pd.to_datetime # Note: date coercion via pd.to_datetime does not handle # string versions of PeriodIndex objects most of the time. _index = to_datetime(index) # Older versions of Pandas can sometimes fail here and # return a numpy array - check to make sure it's an index if not isinstance(_index, Index): raise ValueError('Could not coerce to date index') index = _index except: # Only want to actually raise an exception if `dates` was # provided but cannot be coerced. If we got the index from # the row_labels, we'll just ignore it and use the integer # index below if dates is not None: raise ValueError('Non-date index index provided to' ' `dates` argument.') # Now, if we were given, or coerced, a date-based index, make sure # it has an associated frequency if isinstance(index, (DatetimeIndex, PeriodIndex)): # If no frequency, try to get an inferred frequency if freq is None and index.freq is None: freq = index.inferred_freq # If we got an inferred frequncy, alert the user if freq is not None: inferred_freq = True if freq is not None: warnings.warn( 'No frequency information was' ' provided, so inferred frequency %s' ' will be used.' % freq, ValueWarning) # Convert the passed freq to a pandas offset object if freq is not None: freq = to_offset(freq) # Now, if no frequency information is available from the index # itself or from the `freq` argument, raise an exception if freq is None and index.freq is None: # But again, only want to raise the exception if `dates` # was provided. if dates is not None: raise ValueError('No frequency information was' ' provided with date index and no' ' frequency could be inferred.') # However, if the index itself has no frequency information but # the `freq` argument is available (or was inferred), construct # a new index with an associated frequency elif freq is not None and index.freq is None: resampled_index = date_range(start=index[0], end=index[-1], freq=freq) if not inferred_freq and not resampled_index.equals(index): raise ValueError('The given frequency argument could' ' not be matched to the given index.') index = resampled_index # Finally, if the index itself has a frequency and there was # also a given frequency, raise an exception if they are not # equal elif (freq is not None and not inferred_freq and not (index.freq == freq)): raise ValueError('The given frequency argument is' ' incompatible with the given index.') # Finally, raise an exception if we could not coerce to date-based # but we were given a frequency argument elif freq is not None: raise ValueError('Given index could not be coerced to dates' ' but `freq` argument was provided.') # Get attributes of the index has_index = index is not None date_index = isinstance(index, (DatetimeIndex, PeriodIndex)) period_index = isinstance(index, PeriodIndex) int_index = isinstance(index, Int64Index) range_index = isinstance(index, RangeIndex) has_freq = index.freq is not None if date_index else None increment = Index(range(self.endog.shape[0])) is_increment = index.equals(increment) if int_index else None is_monotonic = index.is_monotonic if date_index else None # Issue warnings for unsupported indexes if has_index and not (date_index or range_index or is_increment): warnings.warn( 'An unsupported index was provided and will be' ' ignored when e.g. forecasting.', ValueWarning) if date_index and not has_freq: warnings.warn( 'A date index has been provided, but it has no' ' associated frequency information and so will be' ' ignored when e.g. forecasting.', ValueWarning) if date_index and not is_monotonic: warnings.warn( 'A date index has been provided, but it is not' ' monotonic and so will be ignored when e.g.' ' forecasting.', ValueWarning) # Construct the internal index index_generated = False valid_index = ((date_index and has_freq and is_monotonic) or (int_index and is_increment) or range_index) if valid_index: _index = index else: _index = increment index_generated = True self._index = _index self._index_generated = index_generated self._index_none = index is None self._index_int64 = int_index and not range_index and not date_index self._index_dates = date_index and not index_generated self._index_freq = self._index.freq if self._index_dates else None self._index_inferred_freq = inferred_freq # For backwards compatibility, set data.dates, data.freq self.data.dates = self._index if self._index_dates else None self.data.freq = self._index.freqstr if self._index_dates else None
def __init__( self, data: Union[np.ndarray, pd.Series, pd.DataFrame], stats: Sequence[str] = None, *, numeric: bool = True, categorical: bool = True, alpha: float = 0.05, use_t: bool = False, percentiles: Sequence[Union[int, float]] = PERCENTILES, ntop: bool = 5, ): data_arr = data if not isinstance(data, (pd.Series, pd.DataFrame)): data_arr = array_like(data, "data", maxdim=2) if data_arr.ndim == 1: data = pd.Series(data) numeric = bool_like(numeric, "numeric") categorical = bool_like(categorical, "categorical") include = [] col_types = "" if numeric: include.append(np.number) col_types = "numeric" if categorical: include.append("category") col_types += "and " if col_types != "" else "" col_types += "categorical" if not numeric and not categorical: raise ValueError( "At least one of numeric and categorical must be True" ) self._data = pd.DataFrame(data).select_dtypes(include) if self._data.shape[1] == 0: raise ValueError( "Selecting {col_types} results in an empty DataFrame" ) self._is_numeric = [is_numeric_dtype(dt) for dt in self._data.dtypes] self._is_cat_like = [ is_categorical_dtype(dt) for dt in self._data.dtypes ] if stats is not None: undef = [stat for stat in stats if stat not in DEFAULT_STATISTICS] if undef: raise ValueError( f"{', '.join(undef)} are not known statistics" ) self._stats = ( list(DEFAULT_STATISTICS) if stats is None else list(stats) ) self._ntop = int_like(ntop, "ntop") self._compute_top = "top" in self._stats self._compute_freq = "freq" in self._stats if self._compute_top and self._ntop <= 0 < sum(self._is_cat_like): raise ValueError("top must be a non-negative integer") self._compute_perc = "percentiles" in self._stats self._percentiles = array_like( percentiles, "percentiles", maxdim=1, dtype="d" ) self._percentiles = np.sort(self._percentiles) if np.unique(self._percentiles).shape[0] != self._percentiles.shape[0]: raise ValueError("percentiles must be distinct") if np.any(self._percentiles >= 100) or np.any(self._percentiles <= 0): raise ValueError("percentiles must be strictly between 0 and 100") # Expand special stats replacements = { "mode": ["mode", "mode_freq"], "ci": ["upper_ci", "lower_ci"], "jarque_bera": ["jarque_bera", "jarque_bera_pval"], "top": [f"top_{i}" for i in range(1, self._ntop + 1)], "freq": [f"freq_{i}" for i in range(1, self._ntop + 1)], "percentiles": [f"{i}%" for i in percentiles], } for key in replacements: if key in self._stats: idx = self._stats.index(key) self._stats = ( self._stats[:idx] + replacements[key] + self._stats[idx + 1 :] ) self._alpha = float_like(alpha, "alpha") if not 0 < alpha < 1: raise ValueError("alpha must be strictly between 0 and 1") self._use_t = bool_like(use_t, "use_t")
def _init_dates(self, dates=None, freq=None): """ Initialize dates Parameters ---------- dates : array_like, optional An array like object containing dates. freq : str, tuple, datetime.timedelta, DateOffset or None, optional A frequency specification for either `dates` or the row labels from the endog / exog data. Notes ----- Creates `self._index` and related attributes. `self._index` is always a Pandas index, and it is always Int64Index, DatetimeIndex, or PeriodIndex. If Pandas objects, endog / exog may have any type of index. If it is an Int64Index with values 0, 1, ..., nobs-1 or if it is (coerceable to) a DatetimeIndex or PeriodIndex *with an associated frequency*, then it is called a "supported" index. Otherwise it is called an "unsupported" index. Supported indexes are standardized (i.e. a list of date strings is converted to a DatetimeIndex) and the result is put in `self._index`. Unsupported indexes are ignored, and a supported Int64Index is generated and put in `self._index`. Warnings are issued in this case to alert the user if the returned index from some operation (e.g. forecasting) is different from the original data's index. However, whenever possible (e.g. purely in-sample prediction), the original index is returned. The benefit of supported indexes is that they allow *forecasting*, i.e. it is possible to extend them in a reasonable way. Thus every model must have an underlying supported index, even if it is just a generated Int64Index. """ # Get our index from `dates` if available, otherwise from whatever # Pandas index we might have retrieved from endog, exog if dates is not None: index = dates else: index = self.data.row_labels # Sanity check that we don't have a `freq` without an index if index is None and freq is not None: raise ValueError('Frequency provided without associated index.') # If an index is available, see if it is a date-based index or if it # can be coerced to one. (If it can't we'll fall back, below, to an # internal, 0, 1, ... nobs-1 integer index for modeling purposes) inferred_freq = False if index is not None: # Try to coerce to date-based index if not isinstance(index, (DatetimeIndex, PeriodIndex)): try: # Only try to coerce non-numeric index types (string, # list of date-times, etc.) # Note that np.asarray(Float64Index([...])) yields an # object dtype array in earlier versions of Pandas (and so # will not have is_numeric_dtype == True), so explicitly # check for it here. But note also that in very early # Pandas (~0.12), Float64Index doesn't exist (and so the # Statsmodels compat makes it an empty tuple, so in that # case also check if the first element is a float. _index = np.asarray(index) if (is_numeric_dtype(_index) or isinstance(index, Float64Index) or (Float64Index == tuple() and isinstance(_index[0], float))): raise ValueError('Numeric index given') # If a non-index Pandas series was given, only keep its # values (because we must have a pd.Index type, below, and # pd.to_datetime will return a Series when passed # non-list-like objects) if isinstance(index, Series): index = index.values # All coercion is done via pd.to_datetime # Note: date coercion via pd.to_datetime does not handle # string versions of PeriodIndex objects most of the time. _index = to_datetime(index) # Older versions of Pandas can sometimes fail here and # return a numpy array - check to make sure it's an index if not isinstance(_index, Index): raise ValueError('Could not coerce to date index') index = _index except: # Only want to actually raise an exception if `dates` was # provided but can't be coerced. If we got the index from # the row_labels, we'll just ignore it and use the integer # index below if dates is not None: raise ValueError('Non-date index index provided to' ' `dates` argument.') # Now, if we were given, or coerced, a date-based index, make sure # it has an associated frequency if isinstance(index, (DatetimeIndex, PeriodIndex)): # If no frequency, try to get an inferred frequency if freq is None and index.freq is None: freq = index.inferred_freq # If we got an inferred frequncy, alert the user if freq is not None: inferred_freq = True if freq is not None: warnings.warn('No frequency information was' ' provided, so inferred frequency %s' ' will be used.' % freq, ValueWarning) # Convert the passed freq to a pandas offset object if freq is not None: freq = to_offset(freq) # Now, if no frequency information is available from the index # itself or from the `freq` argument, raise an exception if freq is None and index.freq is None: # But again, only want to raise the exception if `dates` # was provided. if dates is not None: raise ValueError('No frequency information was' ' provided with date index and no' ' frequency could be inferred.') # However, if the index itself has no frequency information but # the `freq` argument is available (or was inferred), construct # a new index with an associated frequency elif freq is not None and index.freq is None: resampled_index = date_range( start=index[0], end=index[-1], freq=freq) if not inferred_freq and not resampled_index.equals(index): raise ValueError('The given frequency argument could' ' not be matched to the given index.') index = resampled_index # Finally, if the index itself has a frequency and there was # also a given frequency, raise an exception if they are not # equal elif (freq is not None and not inferred_freq and not (index.freq == freq)): raise ValueError('The given frequency argument is' ' incompatible with the given index.') # Finally, raise an exception if we could not coerce to date-based # but we were given a frequency argument elif freq is not None: raise ValueError('Given index could not be coerced to dates' ' but `freq` argument was provided.') # Get attributes of the index has_index = index is not None date_index = isinstance(index, (DatetimeIndex, PeriodIndex)) int_index = isinstance(index, Int64Index) range_index = isinstance(index, RangeIndex) has_freq = index.freq is not None if date_index else None increment = Index(range(self.endog.shape[0])) is_increment = index.equals(increment) if int_index else None # Issue warnings for unsupported indexes if has_index and not (date_index or range_index or is_increment): warnings.warn('An unsupported index was provided and will be' ' ignored when e.g. forecasting.', ValueWarning) if date_index and not has_freq: warnings.warn('A date index has been provided, but it has no' ' associated frequency information and so will be' ' ignored when e.g. forecasting.', ValueWarning) # Construct the internal index index_generated = False if ((date_index and has_freq) or (int_index and is_increment) or range_index): _index = index else: _index = increment index_generated = True self._index = _index self._index_generated = index_generated self._index_none = index is None self._index_dates = date_index and not index_generated self._index_freq = self._index.freq if self._index_dates else None self._index_inferred_freq = inferred_freq # For backwards compatibility, set data.dates, data.freq self.data.dates = self._index if self._index_dates else None self.data.freq = self._index.freqstr if self._index_dates else None