def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1', 'dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float( self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - tools.rank(_)) \ for _ in sys[1::2]] [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix( (int(self.nobs * M), int(np.sum(self.df_model + 1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model + 1))) for i in range(M): sp_exog[i * self.nobs:(i + 1) * self.nobs, self._cols[i]:self._cols[i + 1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append( GLS(endog[i], exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid) resids = np.asarray(resids).reshape(M, -1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1','dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float(self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - tools.rank(_)) \ for _ in sys[1::2]] [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix((int(self.nobs*M), int(np.sum(self.df_model+1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model+1))) for i in range(M): sp_exog[i*self.nobs:(i+1)*self.nobs, self._cols[i]:self._cols[i+1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append(GLS(endog[i],exog[:, self._cols[i]:self._cols[i+1]]).fit().resid) resids = np.asarray(resids).reshape(M,-1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def test_fullrank(self): X = standard_normal((40,10)) X[:,0] = X[:,1] + X[:,2] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,9)) self.assertEquals(tools.rank(Y), 9) X[:,5] = X[:,3] + X[:,4] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,8)) self.assertEquals(tools.rank(Y), 8)
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog) - 1) self.nobs = float(self.endog.shape[0])
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog)-1) self.nobs = float(self.endog.shape[0])
def initialize(self): """ Initialize a generalized linear model. """ #TODO: intended for public use? self.history = {'fittedvalues' : [], 'params' : [np.inf], 'deviance' : [np.inf]} self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = rank(self.exog)-1 self.df_resid = self.exog.shape[0] - rank(self.exog)
def spec_hausman(self, dof=None): '''Hausman's specification test See Also -------- spec_hausman : generic function for Hausman's specification test ''' #use normalized cov_params for OLS resols = OLS(endog, exog).fit() normalized_cov_params_ols = resols.model.normalized_cov_params se2 = resols.mse_resid params_diff = self._results.params - resols.params cov_diff = np.linalg.pinv(self.xhatprod) - normalized_cov_params_ols #TODO: the following is very inefficient, solves problem (svd) twice #use linalg.lstsq or svd directly #cov_diff will very often be in-definite (singular) if not dof: dof = tools.rank(cov_diff) cov_diffpinv = np.linalg.pinv(cov_diff) H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))/se2 pval = stats.chi2.sf(H, dof) return H, pval, dof
def initialize(self): """ Initialize a generalized linear model. """ #TODO: intended for public use? self.history = { 'fittedvalues': [], 'params': [np.inf], 'deviance': [np.inf] } self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = rank(self.exog) - 1 self.df_resid = self.exog.shape[0] - rank(self.exog)
def contrastfromcols(L, D, pseudo=None): """ From an n x p design matrix D and a matrix L, tries to determine a p x q contrast matrix C which determines a contrast of full rank, i.e. the n x q matrix dot(transpose(C), pinv(D)) is full rank. L must satisfy either L.shape[0] == n or L.shape[1] == p. If L.shape[0] == n, then L is thought of as representing columns in the column space of D. If L.shape[1] == p, then L is thought of as what is known as a contrast matrix. In this case, this function returns an estimable contrast corresponding to the dot(D, L.T) Note that this always produces a meaningful contrast, not always with the intended properties because q is always non-zero unless L is identically 0. That is, it produces a contrast that spans the column space of L (after projection onto the column space of D). Parameters ---------- L : array-like D : array-like """ L = np.asarray(L) D = np.asarray(D) n, p = D.shape if L.shape[0] != n and L.shape[1] != p: raise ValueError("shape of L and D mismatched") if pseudo is None: pseudo = np.linalg.pinv(D) # D^+ \approx= ((dot(D.T,D))^(-1),D.T) if L.shape[0] == n: C = np.dot(pseudo, L).T else: C = L C = np.dot(pseudo, np.dot(D, C.T)).T Lp = np.dot(D, C.T) if len(Lp.shape) == 1: Lp.shape = (n, 1) if rank(Lp) != Lp.shape[1]: Lp = fullrank(Lp) C = np.dot(pseudo, Lp).T return np.squeeze(C)
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [tools.rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_, []) for _ in indep_endog.keys()] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:, varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0, self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [tools.rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_,[]) for _ in indep_endog.keys()] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:,varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def spec_hausman(params_e, params_i, cov_params_e, cov_params_i, dof=None): '''Hausmans specification test Parameters ---------- params_e : array efficient and consistent under Null hypothesis, inconsistent under alternative hypothesis params_i: array consistent under Null hypothesis, consistent under alternative hypothesis cov_params_e : array, 2d covariance matrix of parameter estimates for params_e cov_params_i : array, 2d covariance matrix of parameter estimates for params_i example instrumental variables OLS estimator is `e`, IV estimator is `i` Notes ----- Todos,Issues - check dof calculations and verify for linear case - check one-sided hypothesis References ---------- Greene section 5.5 p.82/83 ''' params_diff = (params_i - params_e) cov_diff = cov_params_i - cov_params_e #TODO: the following is very inefficient, solves problem (svd) twice #use linalg.lstsq or svd directly #cov_diff will very often be in-definite (singular) if not dof: dof = tools.rank(cov_diff) cov_diffpinv = np.linalg.pinv(cov_diff) H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff)) pval = stats.chi2.sf(H, dof) evals = np.linalg.eigvalsh(cov_diff) return H, pval, dof, evals
def test_rank(self): X = standard_normal((40,10)) self.assertEquals(tools.rank(X), 10) X[:,0] = X[:,1] + X[:,2] self.assertEquals(tools.rank(X), 9)