Ejemplo n.º 1
0
	def __init__(self, endog, exog, **kwargs):
		super(quantreg, self).__init__(endog, exog, **kwargs)
		self.PostEstimation = EventHook()
		self.PreEstimation = EventHook()
		self.PostVarianceCalculation = EventHook()
		self.nobs = float(self.endog.shape[0])
		self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
		self.df_model = np.float(rank(self.exog)-1)
Ejemplo n.º 2
0
    def test_rank(self):
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = standard_normal((40,10))
            self.assertEquals(tools.rank(X), np_matrix_rank(X))

            X[:,0] = X[:,1] + X[:,2]
            self.assertEquals(tools.rank(X), np_matrix_rank(X))
Ejemplo n.º 3
0
    def test_rank(self):
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = standard_normal((40, 10))
            self.assertEquals(tools.rank(X), np_matrix_rank(X))

            X[:, 0] = X[:, 1] + X[:, 2]
            self.assertEquals(tools.rank(X), np_matrix_rank(X))
Ejemplo n.º 4
0
    def __init__(self, sys, sigma=None, dfk=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        if dfk:
            if not dfk.lower() in ['dfk1', 'dfk2']:
                raise ValueError("dfk option %s not understood" % (dfk))
        self._dfk = dfk
        M = len(sys[1::2])
        self._M = M
        #        exog = np.zeros((M,M), dtype=object)
        #        for i,eq in enumerate(sys[1::2]):
        #            exog[i,i] = np.asarray(eq)  # not sure this exog is needed
        # used to compute resids for now
        exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
        #       exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
        self.exog = exog  # 2d ndarray exog is better
        # Endog, might just go ahead and reshape this?
        endog = np.asarray(sys[::2])
        self.endog = endog
        self.nobs = float(
            self.endog[0].shape[0])  # assumes all the same length

        # Degrees of Freedom
        df_resid = []
        df_model = []
        [df_resid.append(self.nobs - tools.rank(_)) \
                for _ in sys[1::2]]
        [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
        self.df_resid = np.asarray(df_resid)
        self.df_model = np.asarray(df_model)

        # "Block-diagonal" sparse matrix of exog
        sp_exog = sparse.lil_matrix(
            (int(self.nobs * M),
             int(np.sum(self.df_model + 1))))  # linked lists to build
        self._cols = np.cumsum(np.hstack((0, self.df_model + 1)))
        for i in range(M):
            sp_exog[i * self.nobs:(i + 1) * self.nobs,
                    self._cols[i]:self._cols[i + 1]] = sys[1::2][i]
        self.sp_exog = sp_exog.tocsr()  # cast to compressed for efficiency
        # Deal with sigma, check shape earlier if given
        if np.any(sigma):
            sigma = np.asarray(sigma)  # check shape
        elif sigma == None:
            resids = []
            for i in range(M):
                resids.append(
                    GLS(endog[i],
                        exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid)
            resids = np.asarray(resids).reshape(M, -1)
            sigma = self._compute_sigma(resids)
        self.sigma = sigma
        self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
                    self.sigma)).T
        self.initialize()
Ejemplo n.º 5
0
    def _initialize(self):
        """
        Initializes the model for the IRLS fit.

        Resets the history and number of iterations.
        """
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog))
        self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
        self.df_model = np.float(rank(self.exog) - 1)
        self.nobs = float(self.endog.shape[0])
Ejemplo n.º 6
0
    def test_fullrank(self):
        X = standard_normal((40, 10))
        X[:, 0] = X[:, 1] + X[:, 2]

        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40, 9))
        self.assertEquals(tools.rank(Y), 9)

        X[:, 5] = X[:, 3] + X[:, 4]
        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40, 8))
        self.assertEquals(tools.rank(Y), 8)
Ejemplo n.º 7
0
    def __init__(self, sys, sigma=None, dfk=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        if dfk:
            if not dfk.lower() in ['dfk1','dfk2']:
                raise ValueError("dfk option %s not understood" % (dfk))
        self._dfk = dfk
        M = len(sys[1::2])
        self._M = M
#        exog = np.zeros((M,M), dtype=object)
#        for i,eq in enumerate(sys[1::2]):
#            exog[i,i] = np.asarray(eq)  # not sure this exog is needed
                                        # used to compute resids for now
        exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
#       exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
        self.exog = exog # 2d ndarray exog is better
# Endog, might just go ahead and reshape this?
        endog = np.asarray(sys[::2])
        self.endog = endog
        self.nobs = float(self.endog[0].shape[0]) # assumes all the same length

# Degrees of Freedom
        df_resid = []
        df_model = []
        [df_resid.append(self.nobs - tools.rank(_)) \
                for _ in sys[1::2]]
        [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
        self.df_resid = np.asarray(df_resid)
        self.df_model = np.asarray(df_model)

# "Block-diagonal" sparse matrix of exog
        sp_exog = sparse.lil_matrix((int(self.nobs*M),
            int(np.sum(self.df_model+1)))) # linked lists to build
        self._cols = np.cumsum(np.hstack((0, self.df_model+1)))
        for i in range(M):
            sp_exog[i*self.nobs:(i+1)*self.nobs,
                    self._cols[i]:self._cols[i+1]] = sys[1::2][i]
        self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency
# Deal with sigma, check shape earlier if given
        if np.any(sigma):
            sigma = np.asarray(sigma) # check shape
        elif sigma == None:
            resids = []
            for i in range(M):
                resids.append(GLS(endog[i],exog[:,
                    self._cols[i]:self._cols[i+1]]).fit().resid)
            resids = np.asarray(resids).reshape(M,-1)
            sigma = self._compute_sigma(resids)
        self.sigma = sigma
        self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
                    self.sigma)).T
        self.initialize()
Ejemplo n.º 8
0
    def _initialize(self):
        """
        Initializes the model for the IRLS fit.

        Resets the history and number of iterations.
        """
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                            np.transpose(self.pinv_wexog))
        self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
        self.df_model = np.float(rank(self.exog) - 1)
        self.nobs = float(self.endog.shape[0])
Ejemplo n.º 9
0
    def test_fullrank(self):
        X = standard_normal((40,10))
        X[:,0] = X[:,1] + X[:,2]

        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40,9))
        self.assertEquals(tools.rank(Y), 9)

        X[:,5] = X[:,3] + X[:,4]
        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40,8))
        self.assertEquals(tools.rank(Y), 8)
Ejemplo n.º 10
0
    def _initialize(self):
        """
        Initializes the model for the IRLS fit.

        Resets the history and number of iterations.
        """
        self.history = {"deviance": [np.inf], "params": [np.inf], "weights": [np.inf], "sresid": [np.inf], "scale": []}
        self.iteration = 0
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog, np.transpose(self.pinv_wexog))
        self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
        self.df_model = np.float(rank(self.exog) - 1)
        self.nobs = float(self.endog.shape[0])
Ejemplo n.º 11
0
	def _initialize(self):
		self.nobs = float(self.endog.shape[0])
		self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
		self.df_model = np.float(rank(self.exog)-1)
		
		self.c = -self.endog
		self.A = np.concatenate([np.identity(self.endog.shape[0]), -np.identity(self.endog.shape[0])], axis=0)
		self.Aeq = self.exog.T
		self.b = np.concatenate([np.ones(self.endog.shape[0]), np.zeros(self.endog.shape[0])], axis=0)
		self.beq = (1-self.tau) * sum(self.exog, 0)
		self.t = 1 
		self.eps = 10e-07
		self.maxit = 1
		self.update = 1.1
Ejemplo n.º 12
0
    def initialize(self):
        """
        Initialize a generalized linear model.
        """
        #TODO: intended for public use?
        self.history = {'fittedvalues' : [],
                        'params' : [np.inf],
                        'deviance' : [np.inf]}

        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                        np.transpose(self.pinv_wexog))

        self.df_model = rank(self.exog)-1
        self.df_resid = self.exog.shape[0] - rank(self.exog)
Ejemplo n.º 13
0
    def test_fullrank(self):
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = standard_normal((40,10))
            X[:,0] = X[:,1] + X[:,2]

            Y = tools.fullrank(X)
            self.assertEquals(Y.shape, (40,9))
            self.assertEquals(tools.rank(Y), 9)

            X[:,5] = X[:,3] + X[:,4]
            Y = tools.fullrank(X)
            self.assertEquals(Y.shape, (40,8))
            warnings.simplefilter("ignore")
            self.assertEquals(tools.rank(Y), 8)
Ejemplo n.º 14
0
    def test_fullrank(self):
        import warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            X = standard_normal((40, 10))
            X[:, 0] = X[:, 1] + X[:, 2]

            Y = tools.fullrank(X)
            self.assertEquals(Y.shape, (40, 9))
            self.assertEquals(tools.rank(Y), 9)

            X[:, 5] = X[:, 3] + X[:, 4]
            Y = tools.fullrank(X)
            self.assertEquals(Y.shape, (40, 8))
            warnings.simplefilter("ignore")
            self.assertEquals(tools.rank(Y), 8)
Ejemplo n.º 15
0
    def spec_hausman(self, dof=None):
        '''Hausman's specification test


        See Also
        --------
        spec_hausman : generic function for Hausman's specification test

        '''
        #use normalized cov_params for OLS

        resols = OLS(endog, exog).fit()
        normalized_cov_params_ols = resols.model.normalized_cov_params
        se2 = resols.mse_resid

        params_diff = self._results.params - resols.params

        cov_diff = np.linalg.pinv(self.xhatprod) - normalized_cov_params_ols
        #TODO: the following is very inefficient, solves problem (svd) twice
        #use linalg.lstsq or svd directly
        #cov_diff will very often be in-definite (singular)
        if not dof:
            dof = tools.rank(cov_diff)
        cov_diffpinv = np.linalg.pinv(cov_diff)
        H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))/se2
        pval = stats.chi2.sf(H, dof)

        return H, pval, dof
Ejemplo n.º 16
0
    def initialize(self):
        """
        Initialize a generalized linear model.
        """
        #TODO: intended for public use?
        self.history = {
            'fittedvalues': [],
            'params': [np.inf],
            'deviance': [np.inf]
        }

        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                            np.transpose(self.pinv_wexog))

        self.df_model = rank(self.exog) - 1
        self.df_resid = self.exog.shape[0] - rank(self.exog)
Ejemplo n.º 17
0
def contrastfromcols(L, D, pseudo=None):
    """
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    Parameters
    ----------
    L : array-like
    D : array-like
    """
    L = np.asarray(L)
    D = np.asarray(D)

    n, p = D.shape

    if L.shape[0] != n and L.shape[1] != p:
        raise ValueError("shape of L and D mismatched")

    if pseudo is None:
        pseudo = np.linalg.pinv(D)    # D^+ \approx= ((dot(D.T,D))^(-1),D.T)

    if L.shape[0] == n:
        C = np.dot(pseudo, L).T
    else:
        C = L
        C = np.dot(pseudo, np.dot(D, C.T)).T

    Lp = np.dot(D, C.T)

    if len(Lp.shape) == 1:
        Lp.shape = (n, 1)

    if rank(Lp) != Lp.shape[1]:
        Lp = fullrank(Lp)
        C = np.dot(pseudo, Lp).T

    return np.squeeze(C)
Ejemplo n.º 18
0
def contrastfromcols(L, D, pseudo=None):
    """
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    Parameters
    ----------
    L : array-like
    D : array-like
    """
    L = np.asarray(L)
    D = np.asarray(D)

    n, p = D.shape

    if L.shape[0] != n and L.shape[1] != p:
        raise ValueError("shape of L and D mismatched")

    if pseudo is None:
        pseudo = np.linalg.pinv(D)  # D^+ \approx= ((dot(D.T,D))^(-1),D.T)

    if L.shape[0] == n:
        C = np.dot(pseudo, L).T
    else:
        C = L
        C = np.dot(pseudo, np.dot(D, C.T)).T

    Lp = np.dot(D, C.T)

    if len(Lp.shape) == 1:
        Lp.shape = (n, 1)

    if rank(Lp) != Lp.shape[1]:
        Lp = fullrank(Lp)
        C = np.dot(pseudo, Lp).T

    return np.squeeze(C)
Ejemplo n.º 19
0
    def __init__(self, sys, indep_endog=None, instruments=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        M = len(sys[1::2])
        self._M = M
        # The lists are probably a bad idea
        self.endog = sys[::2]  # these are just list containers
        self.exog = sys[1::2]
        self._K = [tools.rank(_) for _ in sys[1::2]]
        #        fullexog = np.column_stack((_ for _ in self.exog))

        self.instruments = instruments

        # Keep the Y_j's in a container to get IVs
        instr_endog = {}
        [instr_endog.setdefault(_, []) for _ in indep_endog.keys()]

        for eq_key in indep_endog:
            for varcol in indep_endog[eq_key]:
                instr_endog[eq_key].append(self.exog[eq_key][:, varcol])
                # ^ copy needed?
#        self._instr_endog = instr_endog

        self._indep_endog = indep_endog
        _col_map = np.cumsum(np.hstack((0, self._K)))  # starting col no.s
        # move this check to whiten since we're not going to build a full exog?
        for eq_key in indep_endog:
            try:
                iter(indep_endog[eq_key])
            except:
                #                eq_key = [eq_key]
                raise TypeError("The values of the indep_exog dict must be\
 iterable. Got type %s for converter %s" % (type(del_col)))


#            for del_col in indep_endog[eq_key]:
#                fullexog = np.delete(fullexog,  _col_map[eq_key]+del_col, 1)
#                _col_map[eq_key+1:] -= 1

# Josef's example for deleting reoccuring "rows"
#        fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\
#                fullexog.shape[0])).view(fullexog.dtype).reshape(\
#                fullexog.shape[0],-1)
# From http://article.gmane.org/gmane.comp.python.numeric.general/32276/
# Or Jouni' suggetsion of taking a hash:
# http://www.mail-archive.com/[email protected]/msg04209.html
# not clear to me how this would work though, only if they are the *same*
# elements?
#        self.fullexog = fullexog
        self.wexog = self.whiten(instr_endog)
Ejemplo n.º 20
0
    def __init__(self, sys, indep_endog=None, instruments=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        M = len(sys[1::2])
        self._M = M
# The lists are probably a bad idea
        self.endog = sys[::2]   # these are just list containers
        self.exog = sys[1::2]
        self._K = [tools.rank(_) for _ in sys[1::2]]
#        fullexog = np.column_stack((_ for _ in self.exog))

        self.instruments = instruments

        # Keep the Y_j's in a container to get IVs
        instr_endog = {}
        [instr_endog.setdefault(_,[]) for _ in list(indep_endog.keys())]

        for eq_key in indep_endog:
            for varcol in indep_endog[eq_key]:
                instr_endog[eq_key].append(self.exog[eq_key][:,varcol])
                # ^ copy needed?
#        self._instr_endog = instr_endog

        self._indep_endog = indep_endog
        _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s
# move this check to whiten since we're not going to build a full exog?
        for eq_key in indep_endog:
            try:
                iter(indep_endog[eq_key])
            except:
#                eq_key = [eq_key]
                raise TypeError("The values of the indep_exog dict must be\
 iterable. Got type %s for converter %s" % (type(del_col)))
#            for del_col in indep_endog[eq_key]:
#                fullexog = np.delete(fullexog,  _col_map[eq_key]+del_col, 1)
#                _col_map[eq_key+1:] -= 1

# Josef's example for deleting reoccuring "rows"
#        fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\
#                fullexog.shape[0])).view(fullexog.dtype).reshape(\
#                fullexog.shape[0],-1)
# From http://article.gmane.org/gmane.comp.python.numeric.general/32276/
# Or Jouni' suggetsion of taking a hash:
# http://www.mail-archive.com/[email protected]/msg04209.html
# not clear to me how this would work though, only if they are the *same*
# elements?
#        self.fullexog = fullexog
        self.wexog = self.whiten(instr_endog)
Ejemplo n.º 21
0
def spec_hausman(params_e, params_i, cov_params_e, cov_params_i, dof=None):
    '''Hausmans specification test

    Parameters
    ----------
    params_e : array
        efficient and consistent under Null hypothesis,
        inconsistent under alternative hypothesis
    params_i: array
        consistent under Null hypothesis,
        consistent under alternative hypothesis
    cov_params_e : array, 2d
        covariance matrix of parameter estimates for params_e
    cov_params_i : array, 2d
        covariance matrix of parameter estimates for params_i

    example instrumental variables OLS estimator is `e`, IV estimator is `i`


    Notes
    -----

    Todos,Issues
    - check dof calculations and verify for linear case
    - check one-sided hypothesis


    References
    ----------
    Greene section 5.5 p.82/83


    '''
    params_diff = (params_i - params_e)
    cov_diff = cov_params_i - cov_params_e
    #TODO: the following is very inefficient, solves problem (svd) twice
    #use linalg.lstsq or svd directly
    #cov_diff will very often be in-definite (singular)
    if not dof:
        dof = tools.rank(cov_diff)
    cov_diffpinv = np.linalg.pinv(cov_diff)
    H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))
    pval = stats.chi2.sf(H, dof)

    evals = np.linalg.eigvalsh(cov_diff)

    return H, pval, dof, evals
Ejemplo n.º 22
0
    def setupClass(cls):
        from results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R  = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2
Ejemplo n.º 23
0
    def setupClass(cls):
        from results.results_regression import Longley
        data = longley.load()
        data.exog = add_constant(data.exog, prepend=False)
        res1 = OLS(data.endog, data.exog).fit()
        res2 = Longley()
        res2.wresid = res1.wresid  # workaround hack
        cls.res1 = res1
        cls.res2 = res2

        res_qr = OLS(data.endog, data.exog).fit(method="qr")

        model_qr = OLS(data.endog, data.exog)
        Q, R = np.linalg.qr(data.exog)
        model_qr.exog_Q, model_qr.exog_R = Q, R
        model_qr.normalized_cov_params = np.linalg.inv(np.dot(R.T, R))
        model_qr.rank = rank(R)
        res_qr2 = model_qr.fit(method="qr")

        cls.res_qr = res_qr
        cls.res_qr_manual = res_qr2
Ejemplo n.º 24
0
    def test_rank(self):
        X = standard_normal((40, 10))
        self.assertEquals(tools.rank(X), 10)

        X[:, 0] = X[:, 1] + X[:, 2]
        self.assertEquals(tools.rank(X), 9)
Ejemplo n.º 25
0
    def test_rank(self):
        X = standard_normal((40,10))
        self.assertEquals(tools.rank(X), 10)

        X[:,0] = X[:,1] + X[:,2]
        self.assertEquals(tools.rank(X), 9)
Ejemplo n.º 26
0
    def fit(self, q=.5, vcov='robust', kernel='epa', bandwidth='hsheather',
            max_iter=1000, p_tol=1e-6, **kwargs):
        '''Solve by Iterative Weighted Least Squares

        Parameters
        ----------
        q : float
            Quantile must be between 0 and 1
        vcov : string, method used to calculate the variance-covariance matrix
            of the parameters. Default is ``robust``:

            - robust : heteroskedasticity robust standard errors (as suggested
              in Greene 6th edition)
            - iid : iid errors (as in Stata 12)

        kernel : string, kernel to use in the kernel density estimation for the
            asymptotic covariance matrix:

            - epa: Epanechnikov
            - cos: Cosine
            - gau: Gaussian
            - par: Parzene

        bandwidth: string, Bandwidth selection method in kernel density
            estimation for asymptotic covariance estimate (full
            references in QuantReg docstring):

            - hsheather: Hall-Sheather (1988)
            - bofinger: Bofinger (1975)
            - chamberlain: Chamberlain (1994)
        '''

        if q < 0 or q > 1:
            raise Exception('p must be between 0 and 1')

        kern_names = ['biw', 'cos', 'epa', 'gau', 'par']
        if kernel not in kern_names:
            raise Exception("kernel must be one of " + ', '.join(kern_names))
        else:
            kernel = kernels[kernel]

        if bandwidth == 'hsheather':
            bandwidth = hall_sheather
        elif bandwidth == 'bofinger':
            bandwidth = bofinger
        elif bandwidth == 'chamberlain':
            bandwidth = chamberlain
        else:
            raise Exception("bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'")

        endog = self.endog
        exog = self.exog
        nobs = self.nobs
        exog_rank = rank(self.exog)
        self.rank = exog_rank
        self.df_model = float(self.rank - self.k_constant)
        self.df_resid = self.nobs - self.rank
        n_iter = 0
        xstar = exog

        beta = np.ones(exog_rank)
        # TODO: better start, initial beta is used only for convergence check

        # Note the following doesn't work yet,
        # the iteration loop always starts with OLS as initial beta
#        if start_params is not None:
#            if len(start_params) != rank:
#                raise ValueError('start_params has wrong length')
#            beta = start_params
#        else:
#            # start with OLS
#            beta = np.dot(np.linalg.pinv(exog), endog)

        diff = 10
        cycle = False

        history = dict(params = [], mse=[])
        while n_iter < max_iter and diff > p_tol and not cycle:
            n_iter += 1
            beta0 = beta
            xtx = np.dot(xstar.T, exog)
            xty = np.dot(xstar.T, endog)
            beta = np.dot(pinv(xtx), xty)
            resid = endog - np.dot(exog, beta)

            mask = np.abs(resid) < .000001
            resid[mask] = np.sign(resid[mask]) * .000001
            resid = np.where(resid < 0, q * resid, (1-q) * resid)
            resid = np.abs(resid)
            xstar = exog / resid[:, np.newaxis]
            diff = np.max(np.abs(beta - beta0))
            history['params'].append(beta)
            history['mse'].append(np.mean(resid*resid))

            if (n_iter >= 300) and (n_iter % 100 == 0):
                # check for convergence circle, shouldn't happen
                for ii in range(2, 10):
                    if np.all(beta == history['params'][-ii]):
                        cycle = True
                        break
                warnings.warn("Convergence cycle detected")

        if n_iter == max_iter:
            warnings.warn("Maximum number of iterations (1000) reached.")

        e = endog - np.dot(exog, beta)
        # Greene (2008, p.407) writes that Stata 6 uses this bandwidth:
        # h = 0.9 * np.std(e) / (nobs**0.2)
        # Instead, we calculate bandwidth as in Stata 12
        iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25)
        h = bandwidth(nobs, q)
        h = min(np.std(endog),
                iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h))

        fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))

        if vcov == 'robust':
            d = np.where(e > 0, (q/fhat0)**2, ((1-q)/fhat0)**2)
            xtxi = pinv(np.dot(exog.T, exog))
            xtdx = np.dot(exog.T * d[np.newaxis, :], exog)
            vcov = chain_dot(xtxi, xtdx, xtxi)
        elif vcov == 'iid':
            vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog))
        else:
            raise Exception("vcov must be 'robust' or 'iid'")

        lfit = QuantRegResults(self, beta, normalized_cov_params=vcov)

        lfit.q = q
        lfit.iterations = n_iter
        lfit.sparsity = 1. / fhat0
        lfit.bandwidth = h
        lfit.history = history

        return RegressionResultsWrapper(lfit)
Ejemplo n.º 27
0
    def fit(self,
            q=.5,
            vcov='robust',
            kernel='epa',
            bandwidth='hsheather',
            max_iter=1000,
            p_tol=1e-6,
            **kwargs):
        '''Solve by Iterative Weighted Least Squares

        Parameters
        ----------
        q : float
            Quantile must be between 0 and 1
        vcov : string, method used to calculate the variance-covariance matrix
            of the parameters. Default is ``robust``:

            - robust : heteroskedasticity robust standard errors (as suggested
              in Greene 6th edition)
            - iid : iid errors (as in Stata 12)

        kernel : string, kernel to use in the kernel density estimation for the
            asymptotic covariance matrix:

            - epa: Epanechnikov
            - cos: Cosine
            - gau: Gaussian
            - par: Parzene

        bandwidth: string, Bandwidth selection method in kernel density
            estimation for asymptotic covariance estimate (full
            references in QuantReg docstring):

            - hsheather: Hall-Sheather (1988)
            - bofinger: Bofinger (1975)
            - chamberlain: Chamberlain (1994)
        '''

        if q < 0 or q > 1:
            raise Exception('p must be between 0 and 1')

        kern_names = ['biw', 'cos', 'epa', 'gau', 'par']
        if kernel not in kern_names:
            raise Exception("kernel must be one of " + ', '.join(kern_names))
        else:
            kernel = kernels[kernel]

        if bandwidth == 'hsheather':
            bandwidth = hall_sheather
        elif bandwidth == 'bofinger':
            bandwidth = bofinger
        elif bandwidth == 'chamberlain':
            bandwidth = chamberlain
        else:
            raise Exception(
                "bandwidth must be in 'hsheather', 'bofinger', 'chamberlain'")

        endog = self.endog
        exog = self.exog
        nobs = self.nobs
        exog_rank = rank(self.exog)
        self.rank = exog_rank
        self.df_model = float(self.rank - self.k_constant)
        self.df_resid = self.nobs - self.rank
        n_iter = 0
        xstar = exog

        beta = np.ones(exog_rank)
        # TODO: better start, initial beta is used only for convergence check

        # Note the following doesn't work yet,
        # the iteration loop always starts with OLS as initial beta
        #        if start_params is not None:
        #            if len(start_params) != rank:
        #                raise ValueError('start_params has wrong length')
        #            beta = start_params
        #        else:
        #            # start with OLS
        #            beta = np.dot(np.linalg.pinv(exog), endog)

        diff = 10
        cycle = False

        history = dict(params=[], mse=[])
        while n_iter < max_iter and diff > p_tol and not cycle:
            n_iter += 1
            beta0 = beta
            xtx = np.dot(xstar.T, exog)
            xty = np.dot(xstar.T, endog)
            beta = np.dot(pinv(xtx), xty)
            resid = endog - np.dot(exog, beta)

            mask = np.abs(resid) < .000001
            resid[mask] = np.sign(resid[mask]) * .000001
            resid = np.where(resid < 0, q * resid, (1 - q) * resid)
            resid = np.abs(resid)
            xstar = exog / resid[:, np.newaxis]
            diff = np.max(np.abs(beta - beta0))
            history['params'].append(beta)
            history['mse'].append(np.mean(resid * resid))

            if (n_iter >= 300) and (n_iter % 100 == 0):
                # check for convergence circle, shouldn't happen
                for ii in range(2, 10):
                    if np.all(beta == history['params'][-ii]):
                        cycle = True
                        break
                warnings.warn("Convergence cycle detected")

        if n_iter == max_iter:
            warnings.warn("Maximum number of iterations (1000) reached.")

        e = endog - np.dot(exog, beta)
        # Greene (2008, p.407) writes that Stata 6 uses this bandwidth:
        # h = 0.9 * np.std(e) / (nobs**0.2)
        # Instead, we calculate bandwidth as in Stata 12
        iqre = stats.scoreatpercentile(e, 75) - stats.scoreatpercentile(e, 25)
        h = bandwidth(nobs, q)
        h = min(np.std(endog),
                iqre / 1.34) * (norm.ppf(q + h) - norm.ppf(q - h))

        fhat0 = 1. / (nobs * h) * np.sum(kernel(e / h))

        if vcov == 'robust':
            d = np.where(e > 0, (q / fhat0)**2, ((1 - q) / fhat0)**2)
            xtxi = pinv(np.dot(exog.T, exog))
            xtdx = np.dot(exog.T * d[np.newaxis, :], exog)
            vcov = chain_dot(xtxi, xtdx, xtxi)
        elif vcov == 'iid':
            vcov = (1. / fhat0)**2 * q * (1 - q) * pinv(np.dot(exog.T, exog))
        else:
            raise Exception("vcov must be 'robust' or 'iid'")

        lfit = QuantRegResults(self, beta, normalized_cov_params=vcov)

        lfit.q = q
        lfit.iterations = n_iter
        lfit.sparsity = 1. / fhat0
        lfit.bandwidth = h
        lfit.history = history

        return RegressionResultsWrapper(lfit)