Ejemplo n.º 1
0
def process_recarray(data, endog_idx=0, exog_idx=None, stack=True, dtype=None):
    names = list(data.dtype.names)

    if isinstance(endog_idx, (int, long)):
        endog = array(data[names[endog_idx]], dtype=dtype)
        endog_name = names[endog_idx]
        endog_idx = [endog_idx]
    else:
        endog_name = [names[i] for i in endog_idx]

        if stack:
            endog = np.column_stack(data[field] for field in endog_name)
        else:
            endog = data[endog_name]

    if exog_idx is None:
        exog_name = [names[i] for i in range(len(names))
                     if i not in endog_idx]
    else:
        exog_name = [names[i] for i in exog_idx]

    if stack:
        exog = np.column_stack(data[field] for field in exog_name)
    else:
        exog = recarray_select(data, exog_name)

    if dtype:
        endog = endog.astype(dtype)
        exog = exog.astype(dtype)

    dataset = Dataset(data=data, names=names, endog=endog, exog=exog,
                      endog_name=endog_name, exog_name=exog_name)

    return dataset
Ejemplo n.º 2
0
 def __init__(self):
     self.acf = self.results['acvar']
     #self.acf = np.concatenate(([1.], self.acf))
     self.qstat = self.results['Q1']
     self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05)
     res = DataFrame.from_records(self.results)
     self.confint_res = recarray_select(self.results, ['acvar_lb','acvar_ub'])
     self.confint_res = self.confint_res.view((float, 2))
Ejemplo n.º 3
0
    def test_grangercausality(self):
        # some example data
        mdata = macrodata.load().data
        mdata = recarray_select(mdata, ['realgdp', 'realcons'])
        data = mdata.view((float, 2))
        data = np.diff(np.log(data), axis=0)

        #R: lmtest:grangertest
        r_result = [0.243097, 0.7844328, 195, 2]  # f_test
        gr = grangercausalitytests(data[:, 1::-1], 2, verbose=False)
        assert_almost_equal(r_result, gr[2][0]['ssr_ftest'], decimal=7)
        assert_almost_equal(gr[2][0]['params_ftest'], gr[2][0]['ssr_ftest'], decimal=7)
Ejemplo n.º 4
0
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = recarray_select(sas_, ['lower','upper']).view(float).reshape((3,2))

        self.reject2 = sas_['sig'] == asbytes('***')
Ejemplo n.º 5
0
    def setup_class(self):
        #SAS case
        self.endog = dta3['Relief']
        self.groups = dta3['Brand']
        self.alpha = 0.05
        self.setup_class_()
        #super(self, self).setup_class_()
        #CheckTuckeyHSD.setup_class_()

        self.meandiff2 = sas_['mean']
        self.confint2 = recarray_select(
            sas_, ['lower', 'upper']).view(float).reshape((3, 2))

        self.reject2 = sas_['sig'] == asbytes('***')
Ejemplo n.º 6
0
def add_lag(x, col=None, lags=1, drop=False, insert=True):
    """
    Returns an array with lags included given an array.

    Parameters
    ----------
    x : array
        An array or NumPy ndarray subclass. Can be either a 1d or 2d array with
        observations in columns.
    col : 'string', int, or None
        If data is a structured array or a recarray, `col` can be a string
        that is the name of the column containing the variable. Or `col` can
        be an int of the zero-based column index. If it's a 1d array `col`
        can be None.
    lags : int
        The number of lags desired.
    drop : bool
        Whether to keep the contemporaneous variable for the data.
    insert : bool or int
        If True, inserts the lagged values after `col`. If False, appends
        the data. If int inserts the lags at int.

    Returns
    -------
    array : ndarray
        Array with lags

    Examples
    --------

    >>> import statsmodels.api as sm
    >>> data = sm.datasets.macrodata.load(as_pandas=False)
    >>> data = data.data[['year','quarter','realgdp','cpi']]
    >>> data = sm.tsa.add_lag(data, 'realgdp', lags=2)

    Notes
    -----
    Trims the array both forward and backward, so that the array returned
    so that the length of the returned array is len(`X`) - lags. The lags are
    returned in increasing order, ie., t-1,t-2,...,t-lags
    """
    lags = int_like(lags, 'lags')
    drop = bool_like(drop, 'drop')

    if x.dtype.names:
        names = x.dtype.names
        if not col and np.squeeze(x).ndim > 1:
            raise IndexError("col is None and the input array is not 1d")
        elif len(names) == 1:
            col = names[0]
        if isinstance(col, int):
            col = x.dtype.names[col]

        contemp = x[col]

        # make names for lags
        tmp_names = [col + '_' + 'L(%i)' % i for i in range(1, lags + 1)]
        ndlags = lagmat(contemp, maxlag=lags, trim='Both')

        # get index for return
        if insert is True:
            ins_idx = list(names).index(col) + 1
        elif insert is False:
            ins_idx = len(names) + 1
        else:  # insert is an int
            if insert > len(names):
                import warnings
                warnings.warn(
                    "insert > number of variables, inserting at the"
                    " last position", ValueWarning)
            ins_idx = insert

        first_names = list(names[:ins_idx])
        last_names = list(names[ins_idx:])

        if drop:
            if col in first_names:
                first_names.pop(first_names.index(col))
            else:
                last_names.pop(last_names.index(col))

        if first_names:  # only do this if x is not "empty"
            # Workaround to avoid NumPy FutureWarning
            _x = recarray_select(x, first_names)
            first_arr = nprf.append_fields(_x[lags:],
                                           tmp_names,
                                           ndlags.T,
                                           usemask=False)

        else:
            first_arr = np.zeros(len(x) - lags,
                                 dtype=lzip(tmp_names,
                                            (x[col].dtype, ) * lags))
            for i, name in enumerate(tmp_names):
                first_arr[name] = ndlags[:, i]
        if last_names:
            return nprf.append_fields(first_arr,
                                      last_names,
                                      [x[name][lags:] for name in last_names],
                                      usemask=False)
        else:  # lags for last variable
            return first_arr

    else:  # we have an ndarray

        if x.ndim == 1:  # make 2d if 1d
            x = x[:, None]
        if col is None:
            col = 0

        # handle negative index
        if col < 0:
            col = x.shape[1] + col

        contemp = x[:, col]

        if insert is True:
            ins_idx = col + 1
        elif insert is False:
            ins_idx = x.shape[1]
        else:
            if insert < 0:  # handle negative index
                insert = x.shape[1] + insert + 1
            if insert > x.shape[1]:
                insert = x.shape[1]
                import warnings
                warnings.warn(
                    "insert > number of variables, inserting at the"
                    " last position", ValueWarning)
            ins_idx = insert

        ndlags = lagmat(contemp, lags, trim='Both')
        first_cols = lrange(ins_idx)
        last_cols = lrange(ins_idx, x.shape[1])
        if drop:
            if col in first_cols:
                first_cols.pop(first_cols.index(col))
            else:
                last_cols.pop(last_cols.index(col))
        return np.column_stack((x[lags:, first_cols], ndlags, x[lags:,
                                                                last_cols]))
Ejemplo n.º 7
0
def add_lag(x, col=None, lags=1, drop=False, insert=True):
    """
    Returns an array with lags included given an array.

    Parameters
    ----------
    x : array
        An array or NumPy ndarray subclass. Can be either a 1d or 2d array with
        observations in columns.
    col : 'string', int, or None
        If data is a structured array or a recarray, `col` can be a string
        that is the name of the column containing the variable. Or `col` can
        be an int of the zero-based column index. If it's a 1d array `col`
        can be None.
    lags : int
        The number of lags desired.
    drop : bool
        Whether to keep the contemporaneous variable for the data.
    insert : bool or int
        If True, inserts the lagged values after `col`. If False, appends
        the data. If int inserts the lags at int.

    Returns
    -------
    array : ndarray
        Array with lags

    Examples
    --------

    >>> import statsmodels.api as sm
    >>> data = sm.datasets.macrodata.load(as_pandas=False)
    >>> data = data.data[['year','quarter','realgdp','cpi']]
    >>> data = sm.tsa.add_lag(data, 'realgdp', lags=2)

    Notes
    -----
    Trims the array both forward and backward, so that the array returned
    so that the length of the returned array is len(`X`) - lags. The lags are
    returned in increasing order, ie., t-1,t-2,...,t-lags
    """
    if x.dtype.names:
        names = x.dtype.names
        if not col and np.squeeze(x).ndim > 1:
            raise IndexError("col is None and the input array is not 1d")
        elif len(names) == 1:
            col = names[0]
        if isinstance(col, (int, long)):
            col = x.dtype.names[col]
        if not PY3:
            # TODO: Get rid of this kludge.  See GH # 3658
            names = [bytes(name)
                     if isinstance(name, unicode)  # noqa:F821
                     else name for name in names]
            # Fail loudly if there is a non-ascii name.
            x.dtype.names = names
            if isinstance(col, unicode):  # noqa:F821
                col = bytes(col)

        contemp = x[col]

        # make names for lags
        tmp_names = [col + '_'+'L(%i)' % i for i in range(1, lags+1)]
        ndlags = lagmat(contemp, maxlag=lags, trim='Both')

        # get index for return
        if insert is True:
            ins_idx = list(names).index(col) + 1
        elif insert is False:
            ins_idx = len(names) + 1
        else: # insert is an int
            if insert > len(names):
                import warnings
                warnings.warn("insert > number of variables, inserting at the"
                              " last position", ValueWarning)
            ins_idx = insert

        first_names = list(names[:ins_idx])
        last_names = list(names[ins_idx:])

        if drop:
            if col in first_names:
                first_names.pop(first_names.index(col))
            else:
                last_names.pop(last_names.index(col))

        if first_names: # only do this if x isn't "empty"
            # Workaround to avoid NumPy FutureWarning
            _x = recarray_select(x, first_names)
            first_arr = nprf.append_fields(_x[lags:], tmp_names, ndlags.T,
                                           usemask=False)

        else:
            first_arr = np.zeros(len(x)-lags, dtype=lzip(tmp_names,
                (x[col].dtype,)*lags))
            for i,name in enumerate(tmp_names):
                first_arr[name] = ndlags[:,i]
        if last_names:
            return nprf.append_fields(first_arr, last_names,
                    [x[name][lags:] for name in last_names], usemask=False)
        else: # lags for last variable
            return first_arr

    else: # we have an ndarray

        if x.ndim == 1: # make 2d if 1d
            x = x[:,None]
        if col is None:
            col = 0

        # handle negative index
        if col < 0:
            col = x.shape[1] + col

        contemp = x[:,col]

        if insert is True:
            ins_idx = col + 1
        elif insert is False:
            ins_idx = x.shape[1]
        else:
            if insert < 0: # handle negative index
                insert = x.shape[1] + insert + 1
            if insert > x.shape[1]:
                insert = x.shape[1]
                import warnings
                warnings.warn("insert > number of variables, inserting at the"
                              " last position", ValueWarning)
            ins_idx = insert

        ndlags = lagmat(contemp, lags, trim='Both')
        first_cols = lrange(ins_idx)
        last_cols = lrange(ins_idx,x.shape[1])
        if drop:
            if col in first_cols:
                first_cols.pop(first_cols.index(col))
            else:
                last_cols.pop(last_cols.index(col))
        return np.column_stack((x[lags:,first_cols],ndlags,
                    x[lags:,last_cols]))