Example #1
0
    def __init__(self,
                 endog,
                 exog=None,
                 missing='none',
                 hasconst=None,
                 **kwargs):
        if data_util._is_recarray(endog) or data_util._is_recarray(exog):
            from statsmodels.tools.sm_exceptions import recarray_exception
            raise NotImplementedError(recarray_exception)
        if 'design_info' in kwargs:
            self.design_info = kwargs.pop('design_info')
        if 'formula' in kwargs:
            self.formula = kwargs.pop('formula')
        if missing != 'none':
            arrays, nan_idx = self.handle_missing(endog, exog, missing,
                                                  **kwargs)
            self.missing_row_idx = nan_idx
            self.__dict__.update(arrays)  # attach all the data arrays
            self.orig_endog = self.endog
            self.orig_exog = self.exog
            self.endog, self.exog = self._convert_endog_exog(
                self.endog, self.exog)
        else:
            self.__dict__.update(kwargs)  # attach the extra arrays anyway
            self.orig_endog = endog
            self.orig_exog = exog
            self.endog, self.exog = self._convert_endog_exog(endog, exog)

        self.const_idx = None
        self.k_constant = 0
        self._handle_constant(hasconst)
        self._check_integrity()
        self._cache = {}
Example #2
0
def add_constant(data, prepend=True, has_constant='skip'):
    """
    Add a column of ones to an array.

    Parameters
    ----------
    data : array_like
        A column-ordered design matrix.
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data`` already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present.

    Returns
    -------
    array_like
        The original values with a constant (column of ones) as the first or
        last column. Returned value type depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    """
    if _is_using_pandas(data, None) or _is_recarray(data):
        if _is_recarray(data):
            # deprecated: remove recarray support after 0.12
            import warnings
            from statsmodels.tools.sm_exceptions import recarray_warning
            warnings.warn(recarray_warning, FutureWarning)
        from statsmodels.tsa.tsatools import add_trend
        return add_trend(data,
                         trend='c',
                         prepend=prepend,
                         has_constant=has_constant)

    # Special case for NumPy
    x = np.asanyarray(data)
    if x.ndim == 1:
        x = x[:, None]
    elif x.ndim > 2:
        raise ValueError('Only implementd 2-dimensional arrays')

    is_nonzero_const = np.ptp(x, axis=0) == 0
    is_nonzero_const &= np.all(x != 0.0, axis=0)
    if is_nonzero_const.any():
        if has_constant == 'skip':
            return x
        elif has_constant == 'raise':
            raise ValueError("data already contains a constant")

    x = [np.ones(x.shape[0]), x]
    x = x if prepend else x[::-1]
    return np.column_stack(x)
Example #3
0
def add_constant(data, prepend=True, has_constant='skip'):
    """
    Adds a column of ones to an array

    Parameters
    ----------
    data : array-like
        `data` is the column-ordered design matrix
    prepend : bool
        If true, the constant is in the first column.  Else the constant is
        appended (last column).
    has_constant : str {'raise', 'add', 'skip'}
        Behavior if ``data'' already has a constant. The default will return
        data without adding another constant. If 'raise', will raise an
        error if a constant is present. Using 'add' will duplicate the
        constant, if one is present.

    Returns
    -------
    data : array, recarray or DataFrame
        The original values with a constant (column of ones) as the first or
        last column. Returned value depends on input type.

    Notes
    -----
    When the input is recarray or a pandas Series or DataFrame, the added
    column's name is 'const'.
    """
    if _is_using_pandas(data, None) or _is_recarray(data):
        from statsmodels.tsa.tsatools import add_trend
        return add_trend(data, trend='c', prepend=prepend, has_constant=has_constant)

    # Special case for NumPy
    x = np.asanyarray(data)
    if x.ndim == 1:
        x = x[:,None]
    elif x.ndim > 2:
        raise ValueError('Only implementd 2-dimensional arrays')

    is_nonzero_const = np.ptp(x, axis=0) == 0
    is_nonzero_const &= np.all(x != 0.0, axis=0)
    if is_nonzero_const.any():
        if has_constant == 'skip':
            return x
        elif has_constant == 'raise':
            raise ValueError("data already contains a constant")

    x = [np.ones(x.shape[0]), x]
    x = x if prepend else x[::-1]
    return np.column_stack(x)
Example #4
0
def add_trend(x, trend="c", prepend=False, has_constant="skip"):
    """
    Add a trend and/or constant to an array.

    Parameters
    ----------
    x : array_like
        Original array of data.
    trend : str {'n', 'c', 't', 'ct', 'ctt'}
        The trend to add.

        * 'n' add no trend.
        * 'c' add constant only.
        * 't' add trend only.
        * 'ct' add constant and linear trend.
        * 'ctt' add constant and linear and quadratic trend.
    prepend : bool
        If True, prepends the new data to the columns of X.
    has_constant : str {'raise', 'add', 'skip'}
        Controls what happens when trend is 'c' and a constant column already
        exists in x. 'raise' will raise an error. 'add' will add a column of
        1s. 'skip' will return the data without change. 'skip' is the default.

    Returns
    -------
    array_like
        The original data with the additional trend columns.  If x is a
        pandas Series or DataFrame, then the trend column names are 'const',
        'trend' and 'trend_squared'.

    See Also
    --------
    statsmodels.tools.tools.add_constant
        Add a constant column to an array.

    Notes
    -----
    Returns columns as ['ctt','ct','c'] whenever applicable. There is currently
    no checking for an existing trend.
    """
    prepend = bool_like(prepend, "prepend")
    trend = string_like(trend, "trend", options=("n", "c", "t", "ct", "ctt"))
    has_constant = string_like(has_constant,
                               "has_constant",
                               options=("raise", "add", "skip"))

    # TODO: could be generalized for trend of aribitrary order
    columns = ["const", "trend", "trend_squared"]
    if trend == "n":
        return x.copy()
    elif trend == "c":  # handles structured arrays
        columns = columns[:1]
        trendorder = 0
    elif trend == "ct" or trend == "t":
        columns = columns[:2]
        if trend == "t":
            columns = columns[1:2]
        trendorder = 1
    elif trend == "ctt":
        trendorder = 2

    if _is_recarray(x):
        from statsmodels.tools.sm_exceptions import recarray_exception

        raise NotImplementedError(recarray_exception)

    is_pandas = _is_using_pandas(x, None)
    if is_pandas:
        if isinstance(x, pd.Series):
            x = pd.DataFrame(x)
        else:
            x = x.copy()
    else:
        x = np.asanyarray(x)

    nobs = len(x)
    trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64),
                         trendorder + 1)
    # put in order ctt
    trendarr = np.fliplr(trendarr)
    if trend == "t":
        trendarr = trendarr[:, 1]

    if "c" in trend:
        if is_pandas:
            # Mixed type protection
            def safe_is_const(s):
                try:
                    return np.ptp(s) == 0.0 and np.any(s != 0.0)
                except:
                    return False

            col_const = x.apply(safe_is_const, 0)
        else:
            ptp0 = np.ptp(np.asanyarray(x), axis=0)
            col_is_const = ptp0 == 0
            nz_const = col_is_const & (x[0] != 0)
            col_const = nz_const

        if np.any(col_const):
            if has_constant == "raise":
                if x.ndim == 1:
                    base_err = "x is constant."
                else:
                    columns = np.arange(x.shape[1])[col_const]
                    if isinstance(x, pd.DataFrame):
                        columns = x.columns
                    const_cols = ", ".join([str(c) for c in columns])
                    base_err = (
                        "x contains one or more constant columns. Column(s) "
                        f"{const_cols} are constant.")
                msg = f"{base_err} Adding a constant with trend='{trend}' is not allowed."
                raise ValueError(msg)
            elif has_constant == "skip":
                columns = columns[1:]
                trendarr = trendarr[:, 1:]

    order = 1 if prepend else -1
    if is_pandas:
        trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns)
        x = [trendarr, x]
        x = pd.concat(x[::order], axis=1)
    else:
        x = [trendarr, x]
        x = np.column_stack(x[::order])

    return x
Example #5
0
def add_trend(x, trend="c", prepend=False, has_constant='skip'):
    """
    Adds a trend and/or constant to an array.

    Parameters
    ----------
    x : array_like
        Original array of data.
    trend : str {'n', 'c', 't', 'ct', 'ctt'}

        * 'n' add no trend.
        * 'c' add constant only.
        * 't' add trend only.
        * 'ct' add constant and linear trend.
        * 'ctt' add constant and linear and quadratic trend.
    prepend : bool
        If True, prepends the new data to the columns of X.
    has_constant : str {'raise', 'add', 'skip'}
        Controls what happens when trend is 'c' and a constant already
        exists in x. 'raise' will raise an error. 'add' will duplicate a
        constant. 'skip' will return the data without change. 'skip' is the
        default.

    Returns
    -------
    array_like
        The original data with the additional trend columns.  If x is a
        recarray or pandas Series or DataFrame, then the trend column names
        are 'const', 'trend' and 'trend_squared'.

    Notes
    -----
    Returns columns as ['ctt','ct','c'] whenever applicable. There is currently
    no checking for an existing trend.

    See Also
    --------
    statsmodels.tools.tools.add_constant
        Add a constant column to an array.
    """
    prepend = bool_like(prepend, 'prepend')
    trend = string_like(trend, 'trend', options=('n', 'c', 't', 'ct', 'ctt'))
    has_constant = string_like(has_constant,
                               'has_constant',
                               options=('raise', 'add', 'skip'))

    # TODO: could be generalized for trend of aribitrary order
    columns = ['const', 'trend', 'trend_squared']
    if trend == 'n':
        return x.copy()
    elif trend == "c":  # handles structured arrays
        columns = columns[:1]
        trendorder = 0
    elif trend == "ct" or trend == "t":
        columns = columns[:2]
        if trend == "t":
            columns = columns[1:2]
        trendorder = 1
    elif trend == "ctt":
        trendorder = 2

    is_recarray = _is_recarray(x)
    is_pandas = _is_using_pandas(x, None) or is_recarray
    if is_pandas or is_recarray:
        if is_recarray:
            descr = x.dtype.descr
            x = pd.DataFrame.from_records(x)
        elif isinstance(x, pd.Series):
            x = pd.DataFrame(x)
        else:
            x = x.copy()
    else:
        x = np.asanyarray(x)

    nobs = len(x)
    trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64),
                         trendorder + 1)
    # put in order ctt
    trendarr = np.fliplr(trendarr)
    if trend == "t":
        trendarr = trendarr[:, 1]

    if "c" in trend:
        if is_pandas or is_recarray:
            # Mixed type protection
            def safe_is_const(s):
                try:
                    return np.ptp(s) == 0.0 and np.any(s != 0.0)
                except:
                    return False

            col_const = x.apply(safe_is_const, 0)
        else:
            ptp0 = np.ptp(np.asanyarray(x), axis=0)
            col_is_const = ptp0 == 0
            nz_const = col_is_const & (x[0] != 0)
            col_const = nz_const

        if np.any(col_const):
            if has_constant == 'raise':
                msg = "x contains a constant. Adding a constant with " \
                      "trend='{0}' is not allowed.".format(trend)
                raise ValueError(msg)
            elif has_constant == 'skip':
                columns = columns[1:]
                trendarr = trendarr[:, 1:]

    order = 1 if prepend else -1
    if is_recarray or is_pandas:
        trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns)
        x = [trendarr, x]
        x = pd.concat(x[::order], 1)
    else:
        x = [trendarr, x]
        x = np.column_stack(x[::order])

    if is_recarray:
        x = x.to_records(index=False)
        new_descr = x.dtype.descr
        extra_col = len(new_descr) - len(descr)
        if prepend:
            descr = new_descr[:extra_col] + descr
        else:
            descr = descr + new_descr[-extra_col:]

        x = x.astype(np.dtype(descr))

    return x
Example #6
0
def add_trend(x, trend="c", prepend=False, has_constant='skip'):
    """
    Adds a trend and/or constant to an array.

    Parameters
    ----------
    X : array-like
        Original array of data.
    trend : str {"c","t","ct","ctt"}
        "c" add constant only
        "t" add trend only
        "ct" add constant and linear trend
        "ctt" add constant and linear and quadratic trend.
    prepend : bool
        If True, prepends the new data to the columns of X.
    has_constant : str {'raise', 'add', 'skip'}
        Controls what happens when trend is 'c' and a constant already
        exists in X. 'raise' will raise an error. 'add' will duplicate a
        constant. 'skip' will return the data without change. 'skip' is the
        default.

    Returns
    -------
    y : array, recarray or DataFrame
        The original data with the additional trend columns.  If x is a
        recarray or pandas Series or DataFrame, then the trend column names
        are 'const', 'trend' and 'trend_squared'.

    Notes
    -----
    Returns columns as ["ctt","ct","c"] whenever applicable. There is currently
    no checking for an existing trend.

    See also
    --------
    statsmodels.tools.tools.add_constant
    """
    # TODO: could be generalized for trend of aribitrary order
    trend = trend.lower()
    columns = ['const', 'trend', 'trend_squared']
    if trend == "c":  # handles structured arrays
        columns = columns[:1]
        trendorder = 0
    elif trend == "ct" or trend == "t":
        columns = columns[:2]
        if trend == "t":
            columns = columns[1:2]
        trendorder = 1
    elif trend == "ctt":
        trendorder = 2
    else:
        raise ValueError("trend %s not understood" % trend)

    is_recarray = _is_recarray(x)
    is_pandas = _is_using_pandas(x, None) or is_recarray
    if is_pandas or is_recarray:
        if is_recarray:
            descr = x.dtype.descr
            x = pd.DataFrame.from_records(x)
        elif isinstance(x, pd.Series):
            x = pd.DataFrame(x)
        else:
            x = x.copy()
    else:
        x = np.asanyarray(x)

    nobs = len(x)
    trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1)
    # put in order ctt
    trendarr = np.fliplr(trendarr)
    if trend == "t":
        trendarr = trendarr[:, 1]

    if "c" in trend:
        if is_pandas or is_recarray:
            # Mixed type protection
            def safe_is_const(s):
                try:
                    return np.ptp(s) == 0.0 and np.any(s != 0.0)
                except:
                    return False
            col_const = x.apply(safe_is_const, 0)
        else:
            col_const = np.logical_and(np.any(np.ptp(np.asanyarray(x), axis=0) == 0, axis=0),
                                       np.all(x != 0.0, axis=0))
        if np.any(col_const):
            if has_constant == 'raise':
                raise ValueError("x already contains a constant")
            elif has_constant == 'skip':
                columns = columns[1:]
                trendarr = trendarr[:, 1:]

    order = 1 if prepend else -1
    if is_recarray or is_pandas:
        trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns)
        x = [trendarr, x]
        x = pd.concat(x[::order], 1)
    else:
        x = [trendarr, x]
        x = np.column_stack(x[::order])

    if is_recarray:
        x = x.to_records(index=False, convert_datetime64=False)
        new_descr = x.dtype.descr
        extra_col = len(new_descr) - len(descr)
        if prepend:
            descr = new_descr[:extra_col] + descr
        else:
            descr = descr + new_descr[-extra_col:]

        if not PY3:
            # See 3658
            names = [entry[0] for entry in descr]
            dtypes = [entry[1] for entry in descr]
            names = [bytes(name) for name in names]
            # Fail loudly if there is a non-ascii name
            descr = list(zip(names, dtypes))

        x = x.astype(np.dtype(descr))

    return x
Example #7
0
def add_trend(x, trend="c", prepend=False, has_constant='skip'):
    """
    Adds a trend and/or constant to an array.

    Parameters
    ----------
    X : array-like
        Original array of data.
    trend : str {"c","t","ct","ctt"}
        "c" add constant only
        "t" add trend only
        "ct" add constant and linear trend
        "ctt" add constant and linear and quadratic trend.
    prepend : bool
        If True, prepends the new data to the columns of X.
    has_constant : str {'raise', 'add', 'skip'}
        Controls what happens when trend is 'c' and a constant already
        exists in X. 'raise' will raise an error. 'add' will duplicate a
        constant. 'skip' will return the data without change. 'skip' is the
        default.

    Returns
    -------
    y : array, recarray or DataFrame
        The original data with the additional trend columns.  If x is a
        recarray or pandas Series or DataFrame, then the trend column names
        are 'const', 'trend' and 'trend_squared'.

    Notes
    -----
    Returns columns as ["ctt","ct","c"] whenever applicable. There is currently
    no checking for an existing trend.

    See also
    --------
    statsmodels.tools.tools.add_constant
    """
    # TODO: could be generalized for trend of aribitrary order
    trend = trend.lower()
    columns = ['const', 'trend', 'trend_squared']
    if trend == "c":  # handles structured arrays
        columns = columns[:1]
        trendorder = 0
    elif trend == "ct" or trend == "t":
        columns = columns[:2]
        if trend == "t":
            columns = columns[1:2]
        trendorder = 1
    elif trend == "ctt":
        trendorder = 2
    else:
        raise ValueError("trend %s not understood" % trend)

    is_recarray = _is_recarray(x)
    is_pandas = _is_using_pandas(x, None) or is_recarray
    if is_pandas or is_recarray:
        if is_recarray:
            descr = x.dtype.descr
            x = pd.DataFrame.from_records(x)
        elif isinstance(x, pd.Series):
            x = pd.DataFrame(x)
        else:
            x = x.copy()
    else:
        x = np.asanyarray(x)

    nobs = len(x)
    trendarr = np.vander(np.arange(1, nobs + 1, dtype=np.float64), trendorder + 1)
    # put in order ctt
    trendarr = np.fliplr(trendarr)
    if trend == "t":
        trendarr = trendarr[:, 1]

    if "c" in trend:
        if is_pandas or is_recarray:
            # Mixed type protection
            def safe_is_const(s):
                try:
                    return np.ptp(s) == 0.0 and np.any(s != 0.0)
                except:
                    return False
            col_const = x.apply(safe_is_const, 0)
        else:
            col_const = np.logical_and(np.any(np.ptp(np.asanyarray(x), axis=0) == 0, axis=0),
                                       np.all(x != 0.0, axis=0))
        if np.any(col_const):
            if has_constant == 'raise':
                raise ValueError("x already contains a constant")
            elif has_constant == 'skip':
                columns = columns[1:]
                trendarr = trendarr[:, 1:]

    order = 1 if prepend else -1
    if is_recarray or is_pandas:
        trendarr = pd.DataFrame(trendarr, index=x.index, columns=columns)
        x = [trendarr, x]
        x = pd.concat(x[::order], 1)
    else:
        x = [trendarr, x]
        x = np.column_stack(x[::order])

    if is_recarray:
        x = x.to_records(index=False, convert_datetime64=False)
        new_descr = x.dtype.descr
        extra_col = len(new_descr) - len(descr)
        descr = new_descr[:extra_col] + descr if prepend else descr + new_descr[-extra_col:]
        x = x.astype(np.dtype(descr))

    return x