def process_recarray(data, endog_idx=0, exog_idx=None, stack=True, dtype=None): names = list(data.dtype.names) if isinstance(endog_idx, (int, long)): endog = array(data[names[endog_idx]], dtype=dtype) endog_name = names[endog_idx] endog_idx = [endog_idx] else: endog_name = [names[i] for i in endog_idx] if stack: endog = np.column_stack(data[field] for field in endog_name) else: endog = data[endog_name] if exog_idx is None: exog_name = [names[i] for i in range(len(names)) if i not in endog_idx] else: exog_name = [names[i] for i in exog_idx] if stack: exog = np.column_stack(data[field] for field in exog_name) else: exog = recarray_select(data, exog_name) if dtype: endog = endog.astype(dtype) exog = exog.astype(dtype) dataset = Dataset(data=data, names=names, endog=endog, exog=exog, endog_name=endog_name, exog_name=exog_name) return dataset
def __init__(self): self.acf = self.results['acvar'] #self.acf = np.concatenate(([1.], self.acf)) self.qstat = self.results['Q1'] self.res1 = acf(self.x, nlags=40, qstat=True, alpha=.05) res = DataFrame.from_records(self.results) self.confint_res = recarray_select(self.results, ['acvar_lb','acvar_ub']) self.confint_res = self.confint_res.view((float, 2))
def test_grangercausality(self): # some example data mdata = macrodata.load().data mdata = recarray_select(mdata, ['realgdp', 'realcons']) data = mdata.view((float, 2)) data = np.diff(np.log(data), axis=0) #R: lmtest:grangertest r_result = [0.243097, 0.7844328, 195, 2] # f_test gr = grangercausalitytests(data[:, 1::-1], 2, verbose=False) assert_almost_equal(r_result, gr[2][0]['ssr_ftest'], decimal=7) assert_almost_equal(gr[2][0]['params_ftest'], gr[2][0]['ssr_ftest'], decimal=7)
def setup_class(self): #SAS case self.endog = dta3['Relief'] self.groups = dta3['Brand'] self.alpha = 0.05 self.setup_class_() #super(self, self).setup_class_() #CheckTuckeyHSD.setup_class_() self.meandiff2 = sas_['mean'] self.confint2 = recarray_select(sas_, ['lower','upper']).view(float).reshape((3,2)) self.reject2 = sas_['sig'] == asbytes('***')
def setup_class(self): #SAS case self.endog = dta3['Relief'] self.groups = dta3['Brand'] self.alpha = 0.05 self.setup_class_() #super(self, self).setup_class_() #CheckTuckeyHSD.setup_class_() self.meandiff2 = sas_['mean'] self.confint2 = recarray_select( sas_, ['lower', 'upper']).view(float).reshape((3, 2)) self.reject2 = sas_['sig'] == asbytes('***')
def add_lag(x, col=None, lags=1, drop=False, insert=True): """ Returns an array with lags included given an array. Parameters ---------- x : array An array or NumPy ndarray subclass. Can be either a 1d or 2d array with observations in columns. col : 'string', int, or None If data is a structured array or a recarray, `col` can be a string that is the name of the column containing the variable. Or `col` can be an int of the zero-based column index. If it's a 1d array `col` can be None. lags : int The number of lags desired. drop : bool Whether to keep the contemporaneous variable for the data. insert : bool or int If True, inserts the lagged values after `col`. If False, appends the data. If int inserts the lags at int. Returns ------- array : ndarray Array with lags Examples -------- >>> import statsmodels.api as sm >>> data = sm.datasets.macrodata.load(as_pandas=False) >>> data = data.data[['year','quarter','realgdp','cpi']] >>> data = sm.tsa.add_lag(data, 'realgdp', lags=2) Notes ----- Trims the array both forward and backward, so that the array returned so that the length of the returned array is len(`X`) - lags. The lags are returned in increasing order, ie., t-1,t-2,...,t-lags """ lags = int_like(lags, 'lags') drop = bool_like(drop, 'drop') if x.dtype.names: names = x.dtype.names if not col and np.squeeze(x).ndim > 1: raise IndexError("col is None and the input array is not 1d") elif len(names) == 1: col = names[0] if isinstance(col, int): col = x.dtype.names[col] contemp = x[col] # make names for lags tmp_names = [col + '_' + 'L(%i)' % i for i in range(1, lags + 1)] ndlags = lagmat(contemp, maxlag=lags, trim='Both') # get index for return if insert is True: ins_idx = list(names).index(col) + 1 elif insert is False: ins_idx = len(names) + 1 else: # insert is an int if insert > len(names): import warnings warnings.warn( "insert > number of variables, inserting at the" " last position", ValueWarning) ins_idx = insert first_names = list(names[:ins_idx]) last_names = list(names[ins_idx:]) if drop: if col in first_names: first_names.pop(first_names.index(col)) else: last_names.pop(last_names.index(col)) if first_names: # only do this if x is not "empty" # Workaround to avoid NumPy FutureWarning _x = recarray_select(x, first_names) first_arr = nprf.append_fields(_x[lags:], tmp_names, ndlags.T, usemask=False) else: first_arr = np.zeros(len(x) - lags, dtype=lzip(tmp_names, (x[col].dtype, ) * lags)) for i, name in enumerate(tmp_names): first_arr[name] = ndlags[:, i] if last_names: return nprf.append_fields(first_arr, last_names, [x[name][lags:] for name in last_names], usemask=False) else: # lags for last variable return first_arr else: # we have an ndarray if x.ndim == 1: # make 2d if 1d x = x[:, None] if col is None: col = 0 # handle negative index if col < 0: col = x.shape[1] + col contemp = x[:, col] if insert is True: ins_idx = col + 1 elif insert is False: ins_idx = x.shape[1] else: if insert < 0: # handle negative index insert = x.shape[1] + insert + 1 if insert > x.shape[1]: insert = x.shape[1] import warnings warnings.warn( "insert > number of variables, inserting at the" " last position", ValueWarning) ins_idx = insert ndlags = lagmat(contemp, lags, trim='Both') first_cols = lrange(ins_idx) last_cols = lrange(ins_idx, x.shape[1]) if drop: if col in first_cols: first_cols.pop(first_cols.index(col)) else: last_cols.pop(last_cols.index(col)) return np.column_stack((x[lags:, first_cols], ndlags, x[lags:, last_cols]))
def add_lag(x, col=None, lags=1, drop=False, insert=True): """ Returns an array with lags included given an array. Parameters ---------- x : array An array or NumPy ndarray subclass. Can be either a 1d or 2d array with observations in columns. col : 'string', int, or None If data is a structured array or a recarray, `col` can be a string that is the name of the column containing the variable. Or `col` can be an int of the zero-based column index. If it's a 1d array `col` can be None. lags : int The number of lags desired. drop : bool Whether to keep the contemporaneous variable for the data. insert : bool or int If True, inserts the lagged values after `col`. If False, appends the data. If int inserts the lags at int. Returns ------- array : ndarray Array with lags Examples -------- >>> import statsmodels.api as sm >>> data = sm.datasets.macrodata.load(as_pandas=False) >>> data = data.data[['year','quarter','realgdp','cpi']] >>> data = sm.tsa.add_lag(data, 'realgdp', lags=2) Notes ----- Trims the array both forward and backward, so that the array returned so that the length of the returned array is len(`X`) - lags. The lags are returned in increasing order, ie., t-1,t-2,...,t-lags """ if x.dtype.names: names = x.dtype.names if not col and np.squeeze(x).ndim > 1: raise IndexError("col is None and the input array is not 1d") elif len(names) == 1: col = names[0] if isinstance(col, (int, long)): col = x.dtype.names[col] if not PY3: # TODO: Get rid of this kludge. See GH # 3658 names = [bytes(name) if isinstance(name, unicode) # noqa:F821 else name for name in names] # Fail loudly if there is a non-ascii name. x.dtype.names = names if isinstance(col, unicode): # noqa:F821 col = bytes(col) contemp = x[col] # make names for lags tmp_names = [col + '_'+'L(%i)' % i for i in range(1, lags+1)] ndlags = lagmat(contemp, maxlag=lags, trim='Both') # get index for return if insert is True: ins_idx = list(names).index(col) + 1 elif insert is False: ins_idx = len(names) + 1 else: # insert is an int if insert > len(names): import warnings warnings.warn("insert > number of variables, inserting at the" " last position", ValueWarning) ins_idx = insert first_names = list(names[:ins_idx]) last_names = list(names[ins_idx:]) if drop: if col in first_names: first_names.pop(first_names.index(col)) else: last_names.pop(last_names.index(col)) if first_names: # only do this if x isn't "empty" # Workaround to avoid NumPy FutureWarning _x = recarray_select(x, first_names) first_arr = nprf.append_fields(_x[lags:], tmp_names, ndlags.T, usemask=False) else: first_arr = np.zeros(len(x)-lags, dtype=lzip(tmp_names, (x[col].dtype,)*lags)) for i,name in enumerate(tmp_names): first_arr[name] = ndlags[:,i] if last_names: return nprf.append_fields(first_arr, last_names, [x[name][lags:] for name in last_names], usemask=False) else: # lags for last variable return first_arr else: # we have an ndarray if x.ndim == 1: # make 2d if 1d x = x[:,None] if col is None: col = 0 # handle negative index if col < 0: col = x.shape[1] + col contemp = x[:,col] if insert is True: ins_idx = col + 1 elif insert is False: ins_idx = x.shape[1] else: if insert < 0: # handle negative index insert = x.shape[1] + insert + 1 if insert > x.shape[1]: insert = x.shape[1] import warnings warnings.warn("insert > number of variables, inserting at the" " last position", ValueWarning) ins_idx = insert ndlags = lagmat(contemp, lags, trim='Both') first_cols = lrange(ins_idx) last_cols = lrange(ins_idx,x.shape[1]) if drop: if col in first_cols: first_cols.pop(first_cols.index(col)) else: last_cols.pop(last_cols.index(col)) return np.column_stack((x[lags:,first_cols],ndlags, x[lags:,last_cols]))