def __init__(self, endog, exog, **kwargs): super(quantreg, self).__init__(endog, exog, **kwargs) self.PostEstimation = EventHook() self.PreEstimation = EventHook() self.PostVarianceCalculation = EventHook() self.nobs = float(self.endog.shape[0]) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog)-1)
def test_rank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40,10)) self.assertEquals(tools.rank(X), np_matrix_rank(X)) X[:,0] = X[:,1] + X[:,2] self.assertEquals(tools.rank(X), np_matrix_rank(X))
def test_rank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40, 10)) self.assertEquals(tools.rank(X), np_matrix_rank(X)) X[:, 0] = X[:, 1] + X[:, 2] self.assertEquals(tools.rank(X), np_matrix_rank(X))
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1', 'dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float( self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - tools.rank(_)) \ for _ in sys[1::2]] [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix( (int(self.nobs * M), int(np.sum(self.df_model + 1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model + 1))) for i in range(M): sp_exog[i * self.nobs:(i + 1) * self.nobs, self._cols[i]:self._cols[i + 1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append( GLS(endog[i], exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid) resids = np.asarray(resids).reshape(M, -1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog) - 1) self.nobs = float(self.endog.shape[0])
def test_fullrank(self): X = standard_normal((40, 10)) X[:, 0] = X[:, 1] + X[:, 2] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40, 9)) self.assertEquals(tools.rank(Y), 9) X[:, 5] = X[:, 3] + X[:, 4] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40, 8)) self.assertEquals(tools.rank(Y), 8)
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1','dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float(self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - tools.rank(_)) \ for _ in sys[1::2]] [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix((int(self.nobs*M), int(np.sum(self.df_model+1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model+1))) for i in range(M): sp_exog[i*self.nobs:(i+1)*self.nobs, self._cols[i]:self._cols[i+1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append(GLS(endog[i],exog[:, self._cols[i]:self._cols[i+1]]).fit().resid) resids = np.asarray(resids).reshape(M,-1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def test_fullrank(self): X = standard_normal((40,10)) X[:,0] = X[:,1] + X[:,2] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,9)) self.assertEquals(tools.rank(Y), 9) X[:,5] = X[:,3] + X[:,4] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,8)) self.assertEquals(tools.rank(Y), 8)
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.history = {"deviance": [np.inf], "params": [np.inf], "weights": [np.inf], "sresid": [np.inf], "scale": []} self.iteration = 0 self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog) - 1) self.nobs = float(self.endog.shape[0])
def _initialize(self): self.nobs = float(self.endog.shape[0]) self.df_resid = np.float(self.exog.shape[0] - rank(self.exog)) self.df_model = np.float(rank(self.exog)-1) self.c = -self.endog self.A = np.concatenate([np.identity(self.endog.shape[0]), -np.identity(self.endog.shape[0])], axis=0) self.Aeq = self.exog.T self.b = np.concatenate([np.ones(self.endog.shape[0]), np.zeros(self.endog.shape[0])], axis=0) self.beq = (1-self.tau) * sum(self.exog, 0) self.t = 1 self.eps = 10e-07 self.maxit = 1 self.update = 1.1
def initialize(self): """ Initialize a generalized linear model. """ #TODO: intended for public use? self.history = {'fittedvalues' : [], 'params' : [np.inf], 'deviance' : [np.inf]} self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = rank(self.exog)-1 self.df_resid = self.exog.shape[0] - rank(self.exog)
def test_fullrank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40,10)) X[:,0] = X[:,1] + X[:,2] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,9)) self.assertEquals(tools.rank(Y), 9) X[:,5] = X[:,3] + X[:,4] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40,8)) warnings.simplefilter("ignore") self.assertEquals(tools.rank(Y), 8)
def test_fullrank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40, 10)) X[:, 0] = X[:, 1] + X[:, 2] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40, 9)) self.assertEquals(tools.rank(Y), 9) X[:, 5] = X[:, 3] + X[:, 4] Y = tools.fullrank(X) self.assertEquals(Y.shape, (40, 8)) warnings.simplefilter("ignore") self.assertEquals(tools.rank(Y), 8)
def spec_hausman(self, dof=None): '''Hausman's specification test See Also -------- spec_hausman : generic function for Hausman's specification test ''' #use normalized cov_params for OLS resols = OLS(endog, exog).fit() normalized_cov_params_ols = resols.model.normalized_cov_params se2 = resols.mse_resid params_diff = self._results.params - resols.params cov_diff = np.linalg.pinv(self.xhatprod) - normalized_cov_params_ols #TODO: the following is very inefficient, solves problem (svd) twice #use linalg.lstsq or svd directly #cov_diff will very often be in-definite (singular) if not dof: dof = tools.rank(cov_diff) cov_diffpinv = np.linalg.pinv(cov_diff) H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))/se2 pval = stats.chi2.sf(H, dof) return H, pval, dof
def initialize(self): """ Initialize a generalized linear model. """ #TODO: intended for public use? self.history = { 'fittedvalues': [], 'params': [np.inf], 'deviance': [np.inf] } self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = rank(self.exog) - 1 self.df_resid = self.exog.shape[0] - rank(self.exog)
def contrastfromcols(L, D, pseudo=None): """ From an n x p design matrix D and a matrix L, tries to determine a p x q contrast matrix C which determines a contrast of full rank, i.e. the n x q matrix dot(transpose(C), pinv(D)) is full rank. L must satisfy either L.shape[0] == n or L.shape[1] == p. If L.shape[0] == n, then L is thought of as representing columns in the column space of D. If L.shape[1] == p, then L is thought of as what is known as a contrast matrix. In this case, this function returns an estimable contrast corresponding to the dot(D, L.T) Note that this always produces a meaningful contrast, not always with the intended properties because q is always non-zero unless L is identically 0. That is, it produces a contrast that spans the column space of L (after projection onto the column space of D). Parameters ---------- L : array-like D : array-like """ L = np.asarray(L) D = np.asarray(D) n, p = D.shape if L.shape[0] != n and L.shape[1] != p: raise ValueError("shape of L and D mismatched") if pseudo is None: pseudo = np.linalg.pinv(D) # D^+ \approx= ((dot(D.T,D))^(-1),D.T) if L.shape[0] == n: C = np.dot(pseudo, L).T else: C = L C = np.dot(pseudo, np.dot(D, C.T)).T Lp = np.dot(D, C.T) if len(Lp.shape) == 1: Lp.shape = (n, 1) if rank(Lp) != Lp.shape[1]: Lp = fullrank(Lp) C = np.dot(pseudo, Lp).T return np.squeeze(C)
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [tools.rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_, []) for _ in indep_endog.keys()] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:, varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0, self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [tools.rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_,[]) for _ in list(indep_endog.keys())] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:,varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def spec_hausman(params_e, params_i, cov_params_e, cov_params_i, dof=None): '''Hausmans specification test Parameters ---------- params_e : array efficient and consistent under Null hypothesis, inconsistent under alternative hypothesis params_i: array consistent under Null hypothesis, consistent under alternative hypothesis cov_params_e : array, 2d covariance matrix of parameter estimates for params_e cov_params_i : array, 2d covariance matrix of parameter estimates for params_i example instrumental variables OLS estimator is `e`, IV estimator is `i` Notes ----- Todos,Issues - check dof calculations and verify for linear case - check one-sided hypothesis References ---------- Greene section 5.5 p.82/83 ''' params_diff = (params_i - params_e) cov_diff = cov_params_i - cov_params_e #TODO: the following is very inefficient, solves problem (svd) twice #use linalg.lstsq or svd directly #cov_diff will very often be in-definite (singular) if not dof: dof = tools.rank(cov_diff) cov_diffpinv = np.linalg.pinv(cov_diff) H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff)) pval = stats.chi2.sf(H, dof) evals = np.linalg.eigvalsh(cov_diff) return H, pval, dof, evals
def setupClass(cls): from results.results_regression import Longley data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() res2 = Longley() res2.wresid = res1.wresid # workaround hack cls.res1 = res1 cls.res2 = res2 res_qr = OLS(data.endog, data.exog).fit(method="qr") model_qr = OLS(data.endog, data.exog) Q, R = np.linalg.qr(data.exog) model_qr.exog_Q, model_qr.exog_R = Q, R model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R)) model_qr.rank = rank(R) res_qr2 = model_qr.fit(method="qr") cls.res_qr = res_qr cls.res_qr_manual = res_qr2
def test_rank(self): X = standard_normal((40, 10)) self.assertEquals(tools.rank(X), 10) X[:, 0] = X[:, 1] + X[:, 2] self.assertEquals(tools.rank(X), 9)
def test_rank(self): X = standard_normal((40,10)) self.assertEquals(tools.rank(X), 10) X[:,0] = X[:,1] + X[:,2] self.assertEquals(tools.rank(X), 9)
def fit(self, q=.5, vcov='robust', kernel='epa', bandwidth='hsheather', max_iter=1000, p_tol=1e-6, **kwargs): '''Solve by Iterative Weighted Least Squares Parameters ---------- q : float Quantile must be between 0 and 1 vcov : string, method used to calculate the variance-covariance matrix of the parameters. Default is ``robust``: - robust : heteroskedasticity robust standard errors (as suggested in Greene 6th edition) - iid : iid errors (as in Stata 12) kernel : string, kernel to use in the kernel density estimation for the asymptotic covariance matrix: - epa: Epanechnikov - cos: Cosine - gau: Gaussian - par: Parzene bandwidth: string, Bandwidth selection method in kernel density estimation for asymptotic covariance estimate (full references in QuantReg docstring): - hsheather: Hall-Sheather (1988) - bofinger: Bofinger (1975) - chamberlain: Chamberlain (1994) ''' if q < 0 or q > 1: raise Exception('p must be between 0 and 1') kern_names = ['biw', 'cos', 'epa', 'gau', 'par'] if kernel not in kern_names: raise Exception("kernel must be one of " + ', '.join(kern_names)) else: kernel = kernels[kernel] if bandwidth == 'hsheather': bandwidth = hall_sheather elif bandwidth == 'bofinger': bandwidth = bofinger elif bandwidth == 'chamberlain': bandwidth = chamberlain else: raise Exception("bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'") endog = self.endog exog = self.exog nobs = self.nobs exog_rank = rank(self.exog) self.rank = exog_rank self.df_model = float(self.rank - self.k_constant) self.df_resid = self.nobs - self.rank n_iter = 0 xstar = exog beta = np.ones(exog_rank) # TODO: better start, initial beta is used only for convergence check # Note the following doesn't work yet, # the iteration loop always starts with OLS as initial beta # if start_params is not None: # if len(start_params) != rank: # raise ValueError('start_params has wrong length') # beta = start_params # else: # # start with OLS # beta = np.dot(np.linalg.pinv(exog), endog) diff = 10 cycle = False history = dict(params = [], mse=[]) while n_iter < max_iter and diff > p_tol and not cycle: n_iter += 1 beta0 = beta xtx = np.dot(xstar.T, exog) xty = np.dot(xstar.T, endog) beta = np.dot(pinv(xtx), xty) resid = endog - np.dot(exog, beta) mask = np.abs(resid) < .000001 resid[mask] = np.sign(resid[mask]) * .000001 resid = np.where(resid < 0, q * resid, (1-q) * resid) resid = np.abs(resid) xstar = exog / resid[:, np.newaxis] diff = np.max(np.abs(beta - beta0)) history['params'].append(beta) history['mse'].append(np.mean(resid*resid)) if (n_iter >= 300) and (n_iter % 100 == 0): # check for convergence circle, shouldn't happen for ii in range(2, 10): if np.all(beta == history['params'][-ii]): cycle = True break warnings.warn("Convergence cycle detected") if n_iter == max_iter: warnings.warn("Maximum number of iterations (1000) reached.") e = endog - np.dot(exog, beta) # Greene (2008, p.407) writes that Stata 6 uses this bandwidth: # h = 0.9 * np.std(e) / (nobs**0.2) # Instead, we calculate bandwidth as in Stata 12 iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25) h = bandwidth(nobs, q) h = min(np.std(endog), iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h)) fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h)) if vcov == 'robust': d = np.where(e > 0, (q/fhat0)**2, ((1-q)/fhat0)**2) xtxi = pinv(np.dot(exog.T, exog)) xtdx = np.dot(exog.T * d[np.newaxis, :], exog) vcov = chain_dot(xtxi, xtdx, xtxi) elif vcov == 'iid': vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog)) else: raise Exception("vcov must be 'robust' or 'iid'") lfit = QuantRegResults(self, beta, normalized_cov_params=vcov) lfit.q = q lfit.iterations = n_iter lfit.sparsity = 1. / fhat0 lfit.bandwidth = h lfit.history = history return RegressionResultsWrapper(lfit)
def fit(self, q=.5, vcov='robust', kernel='epa', bandwidth='hsheather', max_iter=1000, p_tol=1e-6, **kwargs): '''Solve by Iterative Weighted Least Squares Parameters ---------- q : float Quantile must be between 0 and 1 vcov : string, method used to calculate the variance-covariance matrix of the parameters. Default is ``robust``: - robust : heteroskedasticity robust standard errors (as suggested in Greene 6th edition) - iid : iid errors (as in Stata 12) kernel : string, kernel to use in the kernel density estimation for the asymptotic covariance matrix: - epa: Epanechnikov - cos: Cosine - gau: Gaussian - par: Parzene bandwidth: string, Bandwidth selection method in kernel density estimation for asymptotic covariance estimate (full references in QuantReg docstring): - hsheather: Hall-Sheather (1988) - bofinger: Bofinger (1975) - chamberlain: Chamberlain (1994) ''' if q < 0 or q > 1: raise Exception('p must be between 0 and 1') kern_names = ['biw', 'cos', 'epa', 'gau', 'par'] if kernel not in kern_names: raise Exception("kernel must be one of " + ', '.join(kern_names)) else: kernel = kernels[kernel] if bandwidth == 'hsheather': bandwidth = hall_sheather elif bandwidth == 'bofinger': bandwidth = bofinger elif bandwidth == 'chamberlain': bandwidth = chamberlain else: raise Exception( "bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'") endog = self.endog exog = self.exog nobs = self.nobs exog_rank = rank(self.exog) self.rank = exog_rank self.df_model = float(self.rank - self.k_constant) self.df_resid = self.nobs - self.rank n_iter = 0 xstar = exog beta = np.ones(exog_rank) # TODO: better start, initial beta is used only for convergence check # Note the following doesn't work yet, # the iteration loop always starts with OLS as initial beta # if start_params is not None: # if len(start_params) != rank: # raise ValueError('start_params has wrong length') # beta = start_params # else: # # start with OLS # beta = np.dot(np.linalg.pinv(exog), endog) diff = 10 cycle = False history = dict(params=[], mse=[]) while n_iter < max_iter and diff > p_tol and not cycle: n_iter += 1 beta0 = beta xtx = np.dot(xstar.T, exog) xty = np.dot(xstar.T, endog) beta = np.dot(pinv(xtx), xty) resid = endog - np.dot(exog, beta) mask = np.abs(resid) < .000001 resid[mask] = np.sign(resid[mask]) * .000001 resid = np.where(resid < 0, q * resid, (1 - q) * resid) resid = np.abs(resid) xstar = exog / resid[:, np.newaxis] diff = np.max(np.abs(beta - beta0)) history['params'].append(beta) history['mse'].append(np.mean(resid * resid)) if (n_iter >= 300) and (n_iter % 100 == 0): # check for convergence circle, shouldn't happen for ii in range(2, 10): if np.all(beta == history['params'][-ii]): cycle = True break warnings.warn("Convergence cycle detected") if n_iter == max_iter: warnings.warn("Maximum number of iterations (1000) reached.") e = endog - np.dot(exog, beta) # Greene (2008, p.407) writes that Stata 6 uses this bandwidth: # h = 0.9 * np.std(e) / (nobs**0.2) # Instead, we calculate bandwidth as in Stata 12 iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25) h = bandwidth(nobs, q) h = min(np.std(endog), iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h)) fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h)) if vcov == 'robust': d = np.where(e > 0, (q / fhat0)**2, ((1 - q) / fhat0)**2) xtxi = pinv(np.dot(exog.T, exog)) xtdx = np.dot(exog.T * d[np.newaxis, :], exog) vcov = chain_dot(xtxi, xtdx, xtxi) elif vcov == 'iid': vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog)) else: raise Exception("vcov must be 'robust' or 'iid'") lfit = QuantRegResults(self, beta, normalized_cov_params=vcov) lfit.q = q lfit.iterations = n_iter lfit.sparsity = 1. / fhat0 lfit.bandwidth = h lfit.history = history return RegressionResultsWrapper(lfit)