def _handle_constant(self, hasconst): if hasconst is not None: if hasconst: self.k_constant = 1 self.const_idx = None else: self.k_constant = 0 self.const_idx = None elif self.exog is None: self.const_idx = None self.k_constant = 0 else: # detect where the constant is check_implicit = False const_idx = np.where(self.exog.ptp(axis=0) == 0)[0].squeeze() self.k_constant = const_idx.size if self.k_constant == 1: if self.exog[:, const_idx].mean() != 0: self.const_idx = const_idx else: # we only have a zero column and no other constant check_implicit = True elif self.k_constant > 1: # we have more than one constant column # look for ones values = [] # keep values if we need != 0 for idx in const_idx: value = self.exog[:, idx].mean() if value == 1: self.k_constant = 1 self.const_idx = idx break values.append(value) else: # we didn't break, no column of ones pos = (np.array(values) != 0) if pos.any(): # take the first nonzero column self.k_constant = 1 self.const_idx = const_idx[pos.argmax()] else: # only zero columns check_implicit = True elif self.k_constant == 0: check_implicit = True else: # shouldn't be here pass if check_implicit: # look for implicit constant # Compute rank of augmented matrix augmented_exog = np.column_stack( (np.ones(self.exog.shape[0]), self.exog)) rank_augm = np_matrix_rank(augmented_exog) rank_orig = np_matrix_rank(self.exog) self.k_constant = int(rank_orig == rank_augm) self.const_idx = None
def test_rank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40, 10)) self.assertEquals(tools.rank(X), np_matrix_rank(X)) X[:, 0] = X[:, 1] + X[:, 2] self.assertEquals(tools.rank(X), np_matrix_rank(X))
def test_rank(self): import warnings with warnings.catch_warnings(): warnings.simplefilter("ignore") X = standard_normal((40,10)) self.assertEquals(tools.rank(X), np_matrix_rank(X)) X[:,0] = X[:,1] + X[:,2] self.assertEquals(tools.rank(X), np_matrix_rank(X))
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1', 'dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float( self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - np_matrix_rank(_)) for _ in sys[1::2]] [df_model.append(np_matrix_rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix( (int(self.nobs * M), int(np.sum(self.df_model + 1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model + 1))) for i in range(M): sp_exog[i * self.nobs:(i + 1) * self.nobs, self._cols[i]:self._cols[i + 1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append( GLS(endog[i], exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid) resids = np.asarray(resids).reshape(M, -1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = np.float(self.exog.shape[0] - np_matrix_rank(self.exog)) self.df_model = np.float(np_matrix_rank(self.exog) - 1) self.nobs = float(self.endog.shape[0])
def initialize(self): """ Initialize a generalized linear model. """ # TODO: intended for public use? self.history = {"fittedvalues": [], "params": [np.inf], "deviance": [np.inf]} self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = np_matrix_rank(self.exog) - 1 self.df_resid = self.exog.shape[0] - np_matrix_rank(self.exog)
def __init__(self, sys, sigma=None, dfk=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) if dfk: if not dfk.lower() in ['dfk1','dfk2']: raise ValueError("dfk option %s not understood" % (dfk)) self._dfk = dfk M = len(sys[1::2]) self._M = M # exog = np.zeros((M,M), dtype=object) # for i,eq in enumerate(sys[1::2]): # exog[i,i] = np.asarray(eq) # not sure this exog is needed # used to compute resids for now exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M)) # exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M)) self.exog = exog # 2d ndarray exog is better # Endog, might just go ahead and reshape this? endog = np.asarray(sys[::2]) self.endog = endog self.nobs = float(self.endog[0].shape[0]) # assumes all the same length # Degrees of Freedom df_resid = [] df_model = [] [df_resid.append(self.nobs - np_matrix_rank(_)) for _ in sys[1::2]] [df_model.append(np_matrix_rank(_) - 1) for _ in sys[1::2]] self.df_resid = np.asarray(df_resid) self.df_model = np.asarray(df_model) # "Block-diagonal" sparse matrix of exog sp_exog = sparse.lil_matrix((int(self.nobs*M), int(np.sum(self.df_model+1)))) # linked lists to build self._cols = np.cumsum(np.hstack((0, self.df_model+1))) for i in range(M): sp_exog[i*self.nobs:(i+1)*self.nobs, self._cols[i]:self._cols[i+1]] = sys[1::2][i] self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency # Deal with sigma, check shape earlier if given if np.any(sigma): sigma = np.asarray(sigma) # check shape elif sigma == None: resids = [] for i in range(M): resids.append(GLS(endog[i],exog[:, self._cols[i]:self._cols[i+1]]).fit().resid) resids = np.asarray(resids).reshape(M,-1) sigma = self._compute_sigma(resids) self.sigma = sigma self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\ self.sigma)).T self.initialize()
def _initialize(self): """ Initializes the model for the IRLS fit. Resets the history and number of iterations. """ self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_resid = (np.float(self.exog.shape[0] - np_matrix_rank(self.exog))) self.df_model = np.float(np_matrix_rank(self.exog) - 1) self.nobs = float(self.endog.shape[0])
def initialize(self): """ Initialize a generalized linear model. """ #TODO: intended for public use? self.history = {'fittedvalues' : [], 'params' : [np.inf], 'deviance' : [np.inf]} self.pinv_wexog = np.linalg.pinv(self.exog) self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog)) self.df_model = np_matrix_rank(self.exog)-1 self.df_resid = self.exog.shape[0] - np_matrix_rank(self.exog)
def contrastfromcols(L, D, pseudo=None): """ From an n x p design matrix D and a matrix L, tries to determine a p x q contrast matrix C which determines a contrast of full rank, i.e. the n x q matrix dot(transpose(C), pinv(D)) is full rank. L must satisfy either L.shape[0] == n or L.shape[1] == p. If L.shape[0] == n, then L is thought of as representing columns in the column space of D. If L.shape[1] == p, then L is thought of as what is known as a contrast matrix. In this case, this function returns an estimable contrast corresponding to the dot(D, L.T) Note that this always produces a meaningful contrast, not always with the intended properties because q is always non-zero unless L is identically 0. That is, it produces a contrast that spans the column space of L (after projection onto the column space of D). Parameters ---------- L : array-like D : array-like """ L = np.asarray(L) D = np.asarray(D) n, p = D.shape if L.shape[0] != n and L.shape[1] != p: raise ValueError("shape of L and D mismatched") if pseudo is None: pseudo = np.linalg.pinv(D) # D^+ \approx= ((dot(D.T,D))^(-1),D.T) if L.shape[0] == n: C = np.dot(pseudo, L).T else: C = L C = np.dot(pseudo, np.dot(D, C.T)).T Lp = np.dot(D, C.T) if len(Lp.shape) == 1: Lp.shape = (n, 1) if np_matrix_rank(Lp) != Lp.shape[1]: Lp = fullrank(Lp) C = np.dot(pseudo, Lp).T return np.squeeze(C)
def isestimable(C, D): """ True if (Q, P) contrast `C` is estimable for (N, P) design `D` From an Q x P contrast matrix `C` and an N x P design matrix `D`, checks if the contrast `C` is estimable by looking at the rank of ``vstack([C,D])`` and verifying it is the same as the rank of `D`. Parameters ---------- C : (Q, P) array-like contrast matrix. If `C` has is 1 dimensional assume shape (1, P) D: (N, P) array-like design matrix Returns ------- tf : bool True if the contrast `C` is estimable on design `D` Examples -------- >>> D = np.array([[1, 1, 1, 0, 0, 0], ... [0, 0, 0, 1, 1, 1], ... [1, 1, 1, 1, 1, 1]]).T >>> isestimable([1, 0, 0], D) False >>> isestimable([1, -1, 0], D) True """ C = np.asarray(C) D = np.asarray(D) if C.ndim == 1: C = C[None, :] if C.shape[1] != D.shape[1]: raise ValueError('Contrast should have %d columns' % D.shape[1]) new = np.vstack([C, D]) if np_matrix_rank(new) != np_matrix_rank(D): return False return True
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [np_matrix_rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_, []) for _ in iterkeys(indep_endog)] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:, varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0, self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def __init__(self, sys, indep_endog=None, instruments=None): if len(sys) % 2 != 0: raise ValueError("sys must be a list of pairs of endogenous and \ exogenous variables. Got length %s" % len(sys)) M = len(sys[1::2]) self._M = M # The lists are probably a bad idea self.endog = sys[::2] # these are just list containers self.exog = sys[1::2] self._K = [np_matrix_rank(_) for _ in sys[1::2]] # fullexog = np.column_stack((_ for _ in self.exog)) self.instruments = instruments # Keep the Y_j's in a container to get IVs instr_endog = {} [instr_endog.setdefault(_,[]) for _ in iterkeys(indep_endog)] for eq_key in indep_endog: for varcol in indep_endog[eq_key]: instr_endog[eq_key].append(self.exog[eq_key][:,varcol]) # ^ copy needed? # self._instr_endog = instr_endog self._indep_endog = indep_endog _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s # move this check to whiten since we're not going to build a full exog? for eq_key in indep_endog: try: iter(indep_endog[eq_key]) except: # eq_key = [eq_key] raise TypeError("The values of the indep_exog dict must be\ iterable. Got type %s for converter %s" % (type(del_col))) # for del_col in indep_endog[eq_key]: # fullexog = np.delete(fullexog, _col_map[eq_key]+del_col, 1) # _col_map[eq_key+1:] -= 1 # Josef's example for deleting reoccuring "rows" # fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\ # fullexog.shape[0])).view(fullexog.dtype).reshape(\ # fullexog.shape[0],-1) # From http://article.gmane.org/gmane.comp.python.numeric.general/32276/ # Or Jouni' suggetsion of taking a hash: # http://www.mail-archive.com/[email protected]/msg04209.html # not clear to me how this would work though, only if they are the *same* # elements? # self.fullexog = fullexog self.wexog = self.whiten(instr_endog)
def fullrank(X, r=None): """ Return a matrix whose column span is the same as X. If the rank of X is known it can be specified as r -- no check is made to ensure that this really is the rank of X. """ if r is None: r = np_matrix_rank(X) V, D, U = L.svd(X, full_matrices=0) order = np.argsort(D) order = order[::-1] value = [] for i in range(r): value.append(V[:, order[i]]) return np.asarray(np.transpose(value)).astype(np.float64)
def setupClass(cls): from .results.results_regression import Longley data = longley.load() data.exog = add_constant(data.exog, prepend=False) res1 = OLS(data.endog, data.exog).fit() res2 = Longley() res2.wresid = res1.wresid # workaround hack cls.res1 = res1 cls.res2 = res2 res_qr = OLS(data.endog, data.exog).fit(method="qr") model_qr = OLS(data.endog, data.exog) Q, R = np.linalg.qr(data.exog) model_qr.exog_Q, model_qr.exog_R = Q, R model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R)) model_qr.rank = np_matrix_rank(R) res_qr2 = model_qr.fit(method="qr") cls.res_qr = res_qr cls.res_qr_manual = res_qr2
def add_indep(x, varnames, dtype=None): ''' construct array with independent columns x is either iterable (list, tuple) or instance of ndarray or a subclass of it. If x is an ndarray, then each column is assumed to represent a variable with observations in rows. ''' #TODO: this needs tests for subclasses if isinstance(x, np.ndarray) and x.ndim == 2: x = x.T nvars_orig = len(x) nobs = len(x[0]) #print('nobs, nvars_orig', nobs, nvars_orig) if not dtype: dtype = np.asarray(x[0]).dtype xout = np.zeros((nobs, nvars_orig), dtype=dtype) count = 0 rank_old = 0 varnames_new = [] varnames_dropped = [] keepindx = [] for (xi, ni) in zip(x, varnames): #print(xi.shape, xout.shape) xout[:, count] = xi rank_new = np_matrix_rank(xout) #print(rank_new) if rank_new > rank_old: varnames_new.append(ni) rank_old = rank_new count += 1 else: varnames_dropped.append(ni) return xout[:, :count], varnames_new
def add_indep(x, varnames, dtype=None): """ construct array with independent columns x is either iterable (list, tuple) or instance of ndarray or a subclass of it. If x is an ndarray, then each column is assumed to represent a variable with observations in rows. """ # TODO: this needs tests for subclasses if isinstance(x, np.ndarray) and x.ndim == 2: x = x.T nvars_orig = len(x) nobs = len(x[0]) # print('nobs, nvars_orig', nobs, nvars_orig) if not dtype: dtype = np.asarray(x[0]).dtype xout = np.zeros((nobs, nvars_orig), dtype=dtype) count = 0 rank_old = 0 varnames_new = [] varnames_dropped = [] keepindx = [] for (xi, ni) in zip(x, varnames): # print(xi.shape, xout.shape) xout[:, count] = xi rank_new = np_matrix_rank(xout) # print(rank_new) if rank_new > rank_old: varnames_new.append(ni) rank_old = rank_new count += 1 else: varnames_dropped.append(ni) return xout[:, :count], varnames_new
def fit(self, q=.5, vcov='robust', kernel='epa', bandwidth='hsheather', max_iter=1000, p_tol=1e-6, **kwargs): '''Solve by Iterative Weighted Least Squares Parameters ---------- q : float Quantile must be between 0 and 1 vcov : string, method used to calculate the variance-covariance matrix of the parameters. Default is ``robust``: - robust : heteroskedasticity robust standard errors (as suggested in Greene 6th edition) - iid : iid errors (as in Stata 12) kernel : string, kernel to use in the kernel density estimation for the asymptotic covariance matrix: - epa: Epanechnikov - cos: Cosine - gau: Gaussian - par: Parzene bandwidth: string, Bandwidth selection method in kernel density estimation for asymptotic covariance estimate (full references in QuantReg docstring): - hsheather: Hall-Sheather (1988) - bofinger: Bofinger (1975) - chamberlain: Chamberlain (1994) ''' if q < 0 or q > 1: raise Exception('p must be between 0 and 1') kern_names = ['biw', 'cos', 'epa', 'gau', 'par'] if kernel not in kern_names: raise Exception("kernel must be one of " + ', '.join(kern_names)) else: kernel = kernels[kernel] if bandwidth == 'hsheather': bandwidth = hall_sheather elif bandwidth == 'bofinger': bandwidth = bofinger elif bandwidth == 'chamberlain': bandwidth = chamberlain else: raise Exception("bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'") endog = self.endog exog = self.exog nobs = self.nobs exog_rank = np_matrix_rank(self.exog) self.rank = exog_rank self.df_model = float(self.rank - self.k_constant) self.df_resid = self.nobs - self.rank n_iter = 0 xstar = exog beta = np.ones(exog_rank) # TODO: better start, initial beta is used only for convergence check # Note the following doesn't work yet, # the iteration loop always starts with OLS as initial beta # if start_params is not None: # if len(start_params) != rank: # raise ValueError('start_params has wrong length') # beta = start_params # else: # # start with OLS # beta = np.dot(np.linalg.pinv(exog), endog) diff = 10 cycle = False history = dict(params = [], mse=[]) while n_iter < max_iter and diff > p_tol and not cycle: n_iter += 1 beta0 = beta xtx = np.dot(xstar.T, exog) xty = np.dot(xstar.T, endog) beta = np.dot(pinv(xtx), xty) resid = endog - np.dot(exog, beta) mask = np.abs(resid) < .000001 resid[mask] = ((resid[mask] >= 0) * 2 - 1) * .000001 resid = np.where(resid < 0, q * resid, (1-q) * resid) resid = np.abs(resid) xstar = exog / resid[:, np.newaxis] diff = np.max(np.abs(beta - beta0)) history['params'].append(beta) history['mse'].append(np.mean(resid*resid)) if (n_iter >= 300) and (n_iter % 100 == 0): # check for convergence circle, shouldn't happen for ii in range(2, 10): if np.all(beta == history['params'][-ii]): cycle = True break warnings.warn("Convergence cycle detected", ConvergenceWarning) if n_iter == max_iter: warnings.warn("Maximum number of iterations (1000) reached.", IterationLimitWarning) e = endog - np.dot(exog, beta) # Greene (2008, p.407) writes that Stata 6 uses this bandwidth: # h = 0.9 * np.std(e) / (nobs**0.2) # Instead, we calculate bandwidth as in Stata 12 iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25) h = bandwidth(nobs, q) h = min(np.std(endog), iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h)) fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h)) if vcov == 'robust': d = np.where(e > 0, (q/fhat0)**2, ((1-q)/fhat0)**2) xtxi = pinv(np.dot(exog.T, exog)) xtdx = np.dot(exog.T * d[np.newaxis, :], exog) vcov = chain_dot(xtxi, xtdx, xtxi) elif vcov == 'iid': vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog)) else: raise Exception("vcov must be 'robust' or 'iid'") lfit = QuantRegResults(self, beta, normalized_cov_params=vcov) lfit.q = q lfit.iterations = n_iter lfit.sparsity = 1. / fhat0 lfit.bandwidth = h lfit.history = history return RegressionResultsWrapper(lfit)
def fit(self, q=.5, vcov='robust', kernel='epa', bandwidth='hsheather', max_iter=1000, p_tol=1e-6, **kwargs): '''Solve by Iterative Weighted Least Squares Parameters ---------- q : float Quantile must be between 0 and 1 vcov : string, method used to calculate the variance-covariance matrix of the parameters. Default is ``robust``: - robust : heteroskedasticity robust standard errors (as suggested in Greene 6th edition) - iid : iid errors (as in Stata 12) kernel : string, kernel to use in the kernel density estimation for the asymptotic covariance matrix: - epa: Epanechnikov - cos: Cosine - gau: Gaussian - par: Parzene bandwidth: string, Bandwidth selection method in kernel density estimation for asymptotic covariance estimate (full references in QuantReg docstring): - hsheather: Hall-Sheather (1988) - bofinger: Bofinger (1975) - chamberlain: Chamberlain (1994) ''' if q < 0 or q > 1: raise Exception('p must be between 0 and 1') kern_names = ['biw', 'cos', 'epa', 'gau', 'par'] if kernel not in kern_names: raise Exception("kernel must be one of " + ', '.join(kern_names)) else: kernel = kernels[kernel] if bandwidth == 'hsheather': bandwidth = hall_sheather elif bandwidth == 'bofinger': bandwidth = bofinger elif bandwidth == 'chamberlain': bandwidth = chamberlain else: raise Exception("bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'") endog = self.endog exog = self.exog nobs = self.nobs exog_rank = np_matrix_rank(self.exog) self.rank = exog_rank self.df_model = float(self.rank - self.k_constant) self.df_resid = self.nobs - self.rank n_iter = 0 xstar = exog beta = np.ones(exog_rank) # TODO: better start, initial beta is used only for convergence check # Note the following doesn't work yet, # the iteration loop always starts with OLS as initial beta # if start_params is not None: # if len(start_params) != rank: # raise ValueError('start_params has wrong length') # beta = start_params # else: # # start with OLS # beta = np.dot(np.linalg.pinv(exog), endog) diff = 10 cycle = False history = dict(params = [], mse=[]) while n_iter < max_iter and diff > p_tol and not cycle: n_iter += 1 beta0 = beta xtx = np.dot(xstar.T, exog) xty = np.dot(xstar.T, endog) beta = np.dot(pinv(xtx), xty) resid = endog - np.dot(exog, beta) mask = np.abs(resid) < .000001 resid[mask] = ((resid[mask] >= 0) * 2 - 1) * .000001 resid = np.where(resid < 0, q * resid, (1-q) * resid) resid = np.abs(resid) xstar = exog / resid[:, np.newaxis] diff = np.max(np.abs(beta - beta0)) history['params'].append(beta) history['mse'].append(np.mean(resid*resid)) if (n_iter >= 300) and (n_iter % 100 == 0): # check for convergence circle, shouldn't happen for ii in range(2, 10): if np.all(beta == history['params'][-ii]): cycle = True warnings.warn("Convergence cycle detected", ConvergenceWarning) break if n_iter == max_iter: warnings.warn("Maximum number of iterations (" + str(max_iter) + ") reached.", IterationLimitWarning) e = endog - np.dot(exog, beta) # Greene (2008, p.407) writes that Stata 6 uses this bandwidth: # h = 0.9 * np.std(e) / (nobs**0.2) # Instead, we calculate bandwidth as in Stata 12 iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25) h = bandwidth(nobs, q) h = min(np.std(endog), iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h)) fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h)) if vcov == 'robust': d = np.where(e > 0, (q/fhat0)**2, ((1-q)/fhat0)**2) xtxi = pinv(np.dot(exog.T, exog)) xtdx = np.dot(exog.T * d[np.newaxis, :], exog) vcov = chain_dot(xtxi, xtdx, xtxi) elif vcov == 'iid': vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog)) else: raise Exception("vcov must be 'robust' or 'iid'") lfit = QuantRegResults(self, beta, normalized_cov_params=vcov) lfit.q = q lfit.iterations = n_iter lfit.sparsity = 1. / fhat0 lfit.bandwidth = h lfit.history = history return RegressionResultsWrapper(lfit)
def check_rank(self, J): rank = np_matrix_rank(J) if rank < np.size(J, axis=1): raise ValueError("Rank condition not met: " "solution may not be unique.")