def __init__(self, endog, exog=None, missing='none', hasconst=None, **kwargs): if data_util._is_recarray(endog) or data_util._is_recarray(exog): from statsmodels.tools.sm_exceptions import recarray_exception raise NotImplementedError(recarray_exception) if 'design_info' in kwargs: self.design_info = kwargs.pop('design_info') if 'formula' in kwargs: self.formula = kwargs.pop('formula') if missing != 'none': arrays, nan_idx = self.handle_missing(endog, exog, missing, **kwargs) self.missing_row_idx = nan_idx self.__dict__.update(arrays) # attach all the data arrays self.orig_endog = self.endog self.orig_exog = self.exog self.endog, self.exog = self._convert_endog_exog( self.endog, self.exog) else: self.__dict__.update(kwargs) # attach the extra arrays anyway self.orig_endog = endog self.orig_exog = exog self.endog, self.exog = self._convert_endog_exog(endog, exog) self.const_idx = None self.k_constant = 0 self._handle_constant(hasconst) self._check_integrity() self._cache = {}
def add_constant(data, prepend=True, has_constant='skip'): """ Add a column of ones to an array. Parameters ---------- data : array_like A column-ordered design matrix. prepend : bool If true, the constant is in the first column. Else the constant is appended (last column). has_constant : str {'raise', 'add', 'skip'} Behavior if ``data`` already has a constant. The default will return data without adding another constant. If 'raise', will raise an error if a constant is present. Using 'add' will duplicate the constant, if one is present. Returns ------- array_like The original values with a constant (column of ones) as the first or last column. Returned value type depends on input type. Notes ----- When the input is recarray or a pandas Series or DataFrame, the added column's name is 'const'. """ if _is_using_pandas(data, None) or _is_recarray(data): if _is_recarray(data): # deprecated: remove recarray support after 0.12 import warnings from statsmodels.tools.sm_exceptions import recarray_warning warnings.warn(recarray_warning, FutureWarning) from statsmodels.tsa.tsatools import add_trend return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant) # Special case for NumPy x = np.asanyarray(data) if x.ndim == 1: x = x[:, None] elif x.ndim > 2: raise ValueError('Only implementd 2-dimensional arrays') is_nonzero_const = np.ptp(x, axis=0) == 0 is_nonzero_const &= np.all(x != 0.0, axis=0) if is_nonzero_const.any(): if has_constant == 'skip': return x elif has_constant == 'raise': raise ValueError("data already contains a constant") x = [np.ones(x.shape[0]), x] x = x if prepend else x[::-1] return np.column_stack(x)
def add_constant(data, prepend=True, has_constant='skip'): """ Adds a column of ones to an array Parameters ---------- data : array-like `data` is the column-ordered design matrix prepend : bool If true, the constant is in the first column. Else the constant is appended (last column). has_constant : str {'raise', 'add', 'skip'} Behavior if ``data'' already has a constant. The default will return data without adding another constant. If 'raise', will raise an error if a constant is present. Using 'add' will duplicate the constant, if one is present. Returns ------- data : array, recarray or DataFrame The original values with a constant (column of ones) as the first or last column. Returned value depends on input type. Notes ----- When the input is recarray or a pandas Series or DataFrame, the added column's name is 'const'. """ if _is_using_pandas(data, None) or _is_recarray(data): from statsmodels.tsa.tsatools import add_trend return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant) # Special case for NumPy x = np.asanyarray(data) if x.ndim == 1: x = x[:,None] elif x.ndim > 2: raise ValueError('Only implementd 2-dimensional arrays') is_nonzero_const = np.ptp(x, axis=0) == 0 is_nonzero_const &= np.all(x != 0.0, axis=0) if is_nonzero_const.any(): if has_constant == 'skip': return x elif has_constant == 'raise': raise ValueError("data already contains a constant") x = [np.ones(x.shape[0]), x] x = x if prepend else x[::-1] return np.column_stack(x)
def add_trend(x, trend="c", prepend=False, has_constant="skip"): """ Add a trend and/or constant to an array. Parameters ---------- x : array_like Original array of data. trend : str {'n', 'c', 't', 'ct', 'ctt'} The trend to add. * 'n' add no trend. * 'c' add constant only. * 't' add trend only. * 'ct' add constant and linear trend. * 'ctt' add constant and linear and quadratic trend. prepend : bool If True, prepends the new data to the columns of X. has_constant : str {'raise', 'add', 'skip'} Controls what happens when trend is 'c' and a constant column already exists in x. 'raise' will raise an error. 'add' will add a column of 1s. 'skip' will return the data without change. 'skip' is the default. Returns ------- array_like The original data with the additional trend columns. If x is a pandas Series or DataFrame, then the trend column names are 'const', 'trend' and 'trend_squared'. See Also -------- statsmodels.tools.tools.add_constant Add a constant column to an array. Notes ----- Returns columns as ['ctt','ct','c'] whenever applicable. There is currently no checking for an existing trend. """ prepend = bool_like(prepend, "prepend") trend = string_like(trend, "trend", options=("n", "c", "t", "ct", "ctt")) has_constant = string_like(has_constant, "has_constant", options=("raise", "add", "skip")) # TODO: could be generalized for trend of aribitrary order columns = ["const", "trend", "trend_squared"] if trend == "n": return x.copy() elif trend == "c": # handles structured arrays columns = columns[:1] trendorder = 0 elif trend == "ct" or trend == "t": columns = columns[:2] if trend == "t": columns = columns[1:2] trendorder = 1 elif trend == "ctt": trendorder = 2 if _is_recarray(x): from statsmodels.tools.sm_exceptions import recarray_exception raise NotImplementedError(recarray_exception) is_pandas = _is_using_pandas(x, None) if is_pandas: if isinstance(x, pd.Series): x = pd.DataFrame(x) else: x = x.copy() else: x = np.asanyarray(x) nobs = len(x) trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1) # put in order ctt trendarr = np.fliplr(trendarr) if trend == "t": trendarr = trendarr[:, 1] if "c" in trend: if is_pandas: # Mixed type protection def safe_is_const(s): try: return np.ptp(s) == 0.0 and np.any(s != 0.0) except: return False col_const = x.apply(safe_is_const, 0) else: ptp0 = np.ptp(np.asanyarray(x), axis=0) col_is_const = ptp0 == 0 nz_const = col_is_const & (x[0] != 0) col_const = nz_const if np.any(col_const): if has_constant == "raise": if x.ndim == 1: base_err = "x is constant." else: columns = np.arange(x.shape[1])[col_const] if isinstance(x, pd.DataFrame): columns = x.columns const_cols = ", ".join([str(c) for c in columns]) base_err = ( "x contains one or more constant columns. Column(s) " f"{const_cols} are constant.") msg = f"{base_err} Adding a constant with trend='{trend}' is not allowed." raise ValueError(msg) elif has_constant == "skip": columns = columns[1:] trendarr = trendarr[:, 1:] order = 1 if prepend else -1 if is_pandas: trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns) x = [trendarr, x] x = pd.concat(x[::order], axis=1) else: x = [trendarr, x] x = np.column_stack(x[::order]) return x
def add_trend(x, trend="c", prepend=False, has_constant='skip'): """ Adds a trend and/or constant to an array. Parameters ---------- x : array_like Original array of data. trend : str {'n', 'c', 't', 'ct', 'ctt'} * 'n' add no trend. * 'c' add constant only. * 't' add trend only. * 'ct' add constant and linear trend. * 'ctt' add constant and linear and quadratic trend. prepend : bool If True, prepends the new data to the columns of X. has_constant : str {'raise', 'add', 'skip'} Controls what happens when trend is 'c' and a constant already exists in x. 'raise' will raise an error. 'add' will duplicate a constant. 'skip' will return the data without change. 'skip' is the default. Returns ------- array_like The original data with the additional trend columns. If x is a recarray or pandas Series or DataFrame, then the trend column names are 'const', 'trend' and 'trend_squared'. Notes ----- Returns columns as ['ctt','ct','c'] whenever applicable. There is currently no checking for an existing trend. See Also -------- statsmodels.tools.tools.add_constant Add a constant column to an array. """ prepend = bool_like(prepend, 'prepend') trend = string_like(trend, 'trend', options=('n', 'c', 't', 'ct', 'ctt')) has_constant = string_like(has_constant, 'has_constant', options=('raise', 'add', 'skip')) # TODO: could be generalized for trend of aribitrary order columns = ['const', 'trend', 'trend_squared'] if trend == 'n': return x.copy() elif trend == "c": # handles structured arrays columns = columns[:1] trendorder = 0 elif trend == "ct" or trend == "t": columns = columns[:2] if trend == "t": columns = columns[1:2] trendorder = 1 elif trend == "ctt": trendorder = 2 is_recarray = _is_recarray(x) is_pandas = _is_using_pandas(x, None) or is_recarray if is_pandas or is_recarray: if is_recarray: descr = x.dtype.descr x = pd.DataFrame.from_records(x) elif isinstance(x, pd.Series): x = pd.DataFrame(x) else: x = x.copy() else: x = np.asanyarray(x) nobs = len(x) trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1) # put in order ctt trendarr = np.fliplr(trendarr) if trend == "t": trendarr = trendarr[:, 1] if "c" in trend: if is_pandas or is_recarray: # Mixed type protection def safe_is_const(s): try: return np.ptp(s) == 0.0 and np.any(s != 0.0) except: return False col_const = x.apply(safe_is_const, 0) else: ptp0 = np.ptp(np.asanyarray(x), axis=0) col_is_const = ptp0 == 0 nz_const = col_is_const & (x[0] != 0) col_const = nz_const if np.any(col_const): if has_constant == 'raise': msg = "x contains a constant. Adding a constant with " \ "trend='{0}' is not allowed.".format(trend) raise ValueError(msg) elif has_constant == 'skip': columns = columns[1:] trendarr = trendarr[:, 1:] order = 1 if prepend else -1 if is_recarray or is_pandas: trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns) x = [trendarr, x] x = pd.concat(x[::order], 1) else: x = [trendarr, x] x = np.column_stack(x[::order]) if is_recarray: x = x.to_records(index=False) new_descr = x.dtype.descr extra_col = len(new_descr) - len(descr) if prepend: descr = new_descr[:extra_col] + descr else: descr = descr + new_descr[-extra_col:] x = x.astype(np.dtype(descr)) return x
def add_trend(x, trend="c", prepend=False, has_constant='skip'): """ Adds a trend and/or constant to an array. Parameters ---------- X : array-like Original array of data. trend : str {"c","t","ct","ctt"} "c" add constant only "t" add trend only "ct" add constant and linear trend "ctt" add constant and linear and quadratic trend. prepend : bool If True, prepends the new data to the columns of X. has_constant : str {'raise', 'add', 'skip'} Controls what happens when trend is 'c' and a constant already exists in X. 'raise' will raise an error. 'add' will duplicate a constant. 'skip' will return the data without change. 'skip' is the default. Returns ------- y : array, recarray or DataFrame The original data with the additional trend columns. If x is a recarray or pandas Series or DataFrame, then the trend column names are 'const', 'trend' and 'trend_squared'. Notes ----- Returns columns as ["ctt","ct","c"] whenever applicable. There is currently no checking for an existing trend. See also -------- statsmodels.tools.tools.add_constant """ # TODO: could be generalized for trend of aribitrary order trend = trend.lower() columns = ['const', 'trend', 'trend_squared'] if trend == "c": # handles structured arrays columns = columns[:1] trendorder = 0 elif trend == "ct" or trend == "t": columns = columns[:2] if trend == "t": columns = columns[1:2] trendorder = 1 elif trend == "ctt": trendorder = 2 else: raise ValueError("trend %s not understood" % trend) is_recarray = _is_recarray(x) is_pandas = _is_using_pandas(x, None) or is_recarray if is_pandas or is_recarray: if is_recarray: descr = x.dtype.descr x = pd.DataFrame.from_records(x) elif isinstance(x, pd.Series): x = pd.DataFrame(x) else: x = x.copy() else: x = np.asanyarray(x) nobs = len(x) trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1) # put in order ctt trendarr = np.fliplr(trendarr) if trend == "t": trendarr = trendarr[:, 1] if "c" in trend: if is_pandas or is_recarray: # Mixed type protection def safe_is_const(s): try: return np.ptp(s) == 0.0 and np.any(s != 0.0) except: return False col_const = x.apply(safe_is_const, 0) else: col_const = np.logical_and(np.any(np.ptp(np.asanyarray(x), axis=0) == 0, axis=0), np.all(x != 0.0, axis=0)) if np.any(col_const): if has_constant == 'raise': raise ValueError("x already contains a constant") elif has_constant == 'skip': columns = columns[1:] trendarr = trendarr[:, 1:] order = 1 if prepend else -1 if is_recarray or is_pandas: trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns) x = [trendarr, x] x = pd.concat(x[::order], 1) else: x = [trendarr, x] x = np.column_stack(x[::order]) if is_recarray: x = x.to_records(index=False, convert_datetime64=False) new_descr = x.dtype.descr extra_col = len(new_descr) - len(descr) if prepend: descr = new_descr[:extra_col] + descr else: descr = descr + new_descr[-extra_col:] if not PY3: # See 3658 names = [entry[0] for entry in descr] dtypes = [entry[1] for entry in descr] names = [bytes(name) for name in names] # Fail loudly if there is a non-ascii name descr = list(zip(names, dtypes)) x = x.astype(np.dtype(descr)) return x
def add_trend(x, trend="c", prepend=False, has_constant='skip'): """ Adds a trend and/or constant to an array. Parameters ---------- X : array-like Original array of data. trend : str {"c","t","ct","ctt"} "c" add constant only "t" add trend only "ct" add constant and linear trend "ctt" add constant and linear and quadratic trend. prepend : bool If True, prepends the new data to the columns of X. has_constant : str {'raise', 'add', 'skip'} Controls what happens when trend is 'c' and a constant already exists in X. 'raise' will raise an error. 'add' will duplicate a constant. 'skip' will return the data without change. 'skip' is the default. Returns ------- y : array, recarray or DataFrame The original data with the additional trend columns. If x is a recarray or pandas Series or DataFrame, then the trend column names are 'const', 'trend' and 'trend_squared'. Notes ----- Returns columns as ["ctt","ct","c"] whenever applicable. There is currently no checking for an existing trend. See also -------- statsmodels.tools.tools.add_constant """ # TODO: could be generalized for trend of aribitrary order trend = trend.lower() columns = ['const', 'trend', 'trend_squared'] if trend == "c": # handles structured arrays columns = columns[:1] trendorder = 0 elif trend == "ct" or trend == "t": columns = columns[:2] if trend == "t": columns = columns[1:2] trendorder = 1 elif trend == "ctt": trendorder = 2 else: raise ValueError("trend %s not understood" % trend) is_recarray = _is_recarray(x) is_pandas = _is_using_pandas(x, None) or is_recarray if is_pandas or is_recarray: if is_recarray: descr = x.dtype.descr x = pd.DataFrame.from_records(x) elif isinstance(x, pd.Series): x = pd.DataFrame(x) else: x = x.copy() else: x = np.asanyarray(x) nobs = len(x) trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1) # put in order ctt trendarr = np.fliplr(trendarr) if trend == "t": trendarr = trendarr[:, 1] if "c" in trend: if is_pandas or is_recarray: # Mixed type protection def safe_is_const(s): try: return np.ptp(s) == 0.0 and np.any(s != 0.0) except: return False col_const = x.apply(safe_is_const, 0) else: col_const = np.logical_and(np.any(np.ptp(np.asanyarray(x), axis=0) == 0, axis=0), np.all(x != 0.0, axis=0)) if np.any(col_const): if has_constant == 'raise': raise ValueError("x already contains a constant") elif has_constant == 'skip': columns = columns[1:] trendarr = trendarr[:, 1:] order = 1 if prepend else -1 if is_recarray or is_pandas: trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns) x = [trendarr, x] x = pd.concat(x[::order], 1) else: x = [trendarr, x] x = np.column_stack(x[::order]) if is_recarray: x = x.to_records(index=False, convert_datetime64=False) new_descr = x.dtype.descr extra_col = len(new_descr) - len(descr) descr = new_descr[:extra_col] + descr if prepend else descr + new_descr[-extra_col:] x = x.astype(np.dtype(descr)) return x