Example #1
0
    def initialize(self):
        self.wexog = self.whiten(self.exog)
        self.wendog = self.whiten(self.endog)
        # overwrite nobs from class Model:
        self.nobs = float(self.wexog.shape[0])
        self.df_resid = self.nobs - rank(self.exog)
#       Below assumes that we have a constant
        self.df_model = float(rank(self.exog)-1)
 def initialize(self):
     """
     Initialize is called by
     scikits.statsmodels.model.LikelihoodModel.__init__
     and should contain any preprocessing that needs to be done for a model.
     """
     self.df_model = float(tools.rank(self.exog) - 1) # assumes constant
     self.df_resid = float(self.exog.shape[0] - tools.rank(self.exog))
    def initialize(self):
        self.wexog = self.whiten(self.exog)
        self.wendog = self.whiten(self.endog)
        # overwrite nobs from class Model:
        self.nobs = float(self.wexog.shape[0])
        self.df_resid = self.nobs - rank(self.exog)
#       Below assumes that we have a constant
        self.df_model = float(rank(self.exog)-1)
 def initialize(self):
     """
     Initialize is called by
     scikits.statsmodels.model.LikelihoodModel.__init__
     and should contain any preprocessing that needs to be done for a model.
     """
     self.df_model = float(tools.rank(self.exog) - 1)  # assumes constant
     self.df_resid = float(self.exog.shape[0] - tools.rank(self.exog))
Example #5
0
    def __init__(self, sys, sigma=None, dfk=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        if dfk:
            if not dfk.lower() in ['dfk1', 'dfk2']:
                raise ValueError("dfk option %s not understood" % (dfk))
        self._dfk = dfk
        M = len(sys[1::2])
        self._M = M
        #        exog = np.zeros((M,M), dtype=object)
        #        for i,eq in enumerate(sys[1::2]):
        #            exog[i,i] = np.asarray(eq)  # not sure this exog is needed
        # used to compute resids for now
        exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
        #       exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
        self.exog = exog  # 2d ndarray exog is better
        # Endog, might just go ahead and reshape this?
        endog = np.asarray(sys[::2])
        self.endog = endog
        self.nobs = float(
            self.endog[0].shape[0])  # assumes all the same length

        # Degrees of Freedom
        df_resid = []
        df_model = []
        [df_resid.append(self.nobs - tools.rank(_)) \
                for _ in sys[1::2]]
        [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
        self.df_resid = np.asarray(df_resid)
        self.df_model = np.asarray(df_model)

        # "Block-diagonal" sparse matrix of exog
        sp_exog = sparse.lil_matrix(
            (int(self.nobs * M),
             int(np.sum(self.df_model + 1))))  # linked lists to build
        self._cols = np.cumsum(np.hstack((0, self.df_model + 1)))
        for i in range(M):
            sp_exog[i * self.nobs:(i + 1) * self.nobs,
                    self._cols[i]:self._cols[i + 1]] = sys[1::2][i]
        self.sp_exog = sp_exog.tocsr()  # cast to compressed for efficiency
        # Deal with sigma, check shape earlier if given
        if np.any(sigma):
            sigma = np.asarray(sigma)  # check shape
        elif sigma == None:
            resids = []
            for i in range(M):
                resids.append(
                    GLS(endog[i],
                        exog[:, self._cols[i]:self._cols[i + 1]]).fit().resid)
            resids = np.asarray(resids).reshape(M, -1)
            sigma = self._compute_sigma(resids)
        self.sigma = sigma
        self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
                    self.sigma)).T
        self.initialize()
Example #6
0
    def test_fullrank(self):
        X = standard_normal((40,10))
        X[:,0] = X[:,1] + X[:,2]

        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40,9))
        self.assertEquals(tools.rank(Y), 9)

        X[:,5] = X[:,3] + X[:,4]
        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40,8))
        self.assertEquals(tools.rank(Y), 8)
Example #7
0
    def test_fullrank(self):
        X = standard_normal((40, 10))
        X[:, 0] = X[:, 1] + X[:, 2]

        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40, 9))
        self.assertEquals(tools.rank(Y), 9)

        X[:, 5] = X[:, 3] + X[:, 4]
        Y = tools.fullrank(X)
        self.assertEquals(Y.shape, (40, 8))
        self.assertEquals(tools.rank(Y), 8)
Example #8
0
    def __init__(self, sys, sigma=None, dfk=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        if dfk:
            if not dfk.lower() in ['dfk1','dfk2']:
                raise ValueError("dfk option %s not understood" % (dfk))
        self._dfk = dfk
        M = len(sys[1::2])
        self._M = M
#        exog = np.zeros((M,M), dtype=object)
#        for i,eq in enumerate(sys[1::2]):
#            exog[i,i] = np.asarray(eq)  # not sure this exog is needed
                                        # used to compute resids for now
        exog = np.column_stack(np.asarray(sys[1::2][i]) for i in range(M))
#       exog = np.vstack(np.asarray(sys[1::2][i]) for i in range(M))
        self.exog = exog # 2d ndarray exog is better
# Endog, might just go ahead and reshape this?
        endog = np.asarray(sys[::2])
        self.endog = endog
        self.nobs = float(self.endog[0].shape[0]) # assumes all the same length

# Degrees of Freedom
        df_resid = []
        df_model = []
        [df_resid.append(self.nobs - tools.rank(_)) \
                for _ in sys[1::2]]
        [df_model.append(tools.rank(_) - 1) for _ in sys[1::2]]
        self.df_resid = np.asarray(df_resid)
        self.df_model = np.asarray(df_model)

# "Block-diagonal" sparse matrix of exog
        sp_exog = sparse.lil_matrix((int(self.nobs*M),
            int(np.sum(self.df_model+1)))) # linked lists to build
        self._cols = np.cumsum(np.hstack((0, self.df_model+1)))
        for i in range(M):
            sp_exog[i*self.nobs:(i+1)*self.nobs,
                    self._cols[i]:self._cols[i+1]] = sys[1::2][i]
        self.sp_exog = sp_exog.tocsr() # cast to compressed for efficiency
# Deal with sigma, check shape earlier if given
        if np.any(sigma):
            sigma = np.asarray(sigma) # check shape
        elif sigma == None:
            resids = []
            for i in range(M):
                resids.append(GLS(endog[i],exog[:,
                    self._cols[i]:self._cols[i+1]]).fit().resid)
            resids = np.asarray(resids).reshape(M,-1)
            sigma = self._compute_sigma(resids)
        self.sigma = sigma
        self.cholsigmainv = np.linalg.cholesky(np.linalg.pinv(\
                    self.sigma)).T
        self.initialize()
    def initialize(self):
        """
        Initialize a generalized linear model.
        """
#TODO: intended for public use?
        self.history = { 'fittedvalues' : [], 'params' : [np.inf],
                         'deviance' : [np.inf]}
        self.iteration = 0
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                        np.transpose(self.pinv_wexog))
        self.df_model = rank(self.exog)-1
        self.df_resid = self.exog.shape[0] - rank(self.exog)
    def initialize(self):
        """
        Initialize a generalized linear model.
        """
#TODO: intended for public use?
        self.history = { 'fittedvalues' : [], 'params' : [np.inf],
                         'deviance' : [np.inf]}
        self.iteration = 0
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                        np.transpose(self.pinv_wexog))
        self.df_model = rank(self.exog)-1
        self.df_resid = self.exog.shape[0] - rank(self.exog)
    def _initialize(self):
        """
        Initializes the model for the IRLS fit.

        Resets the history and number of iterations.
        """
        self.history = {'deviance' : [np.inf], 'params' : [np.inf],
            'weights' : [np.inf], 'sresid' : [np.inf], 'scale' : []}
        self.iteration = 0
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                        np.transpose(self.pinv_wexog))
        self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
        self.df_model = np.float(rank(self.exog)-1)
        self.nobs = float(self.endog.shape[0])
Example #12
0
    def checkMovingOLS(self, window_type, x, y, **kwds):
        try:
            from scikits.statsmodels.tools.tools import rank
        except ImportError:
            from scikits.statsmodels.tools import rank

        window = rank(x.values) * 2

        moving = ols(y=y, x=x, window_type=window_type,
                     window=window, **kwds)

        if isinstance(moving.y, Series):
            index = moving.y.index
        elif isinstance(moving.y, LongPanel):
            index = moving.y.major_axis

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Example #13
0
    def checkMovingOLS(self, window_type, x, y, **kwds):
        try:
            from scikits.statsmodels.tools.tools import rank
        except ImportError:
            from scikits.statsmodels.tools import rank

        window = rank(x.values) * 2

        moving = ols(y=y, x=x, window_type=window_type, window=window, **kwds)

        if isinstance(moving.y, Series):
            index = moving.y.index
        elif isinstance(moving.y, LongPanel):
            index = moving.y.major_axis

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, **kwds)

            self.compare(static, moving, event_index=i, result_index=n)

        _check_non_raw_results(moving)
Example #14
0
    def checkMovingOLS(self, window_type, x, y, weights=None, **kwds):
        from scikits.statsmodels.tools.tools import rank
        window = rank(x.values) * 2

        moving = ols(y=y, x=x, weights=weights, window_type=window_type,
                     window=window, **kwds)

        # check that sparse version is the same
        sparse_moving = ols(y=y.to_sparse(), x=x.to_sparse(),
                            weights=weights,
                            window_type=window_type,
                            window=window, **kwds)
        _compare_ols_results(moving, sparse_moving)

        index = moving._index

        for n, i in enumerate(moving._valid_indices):
            if window_type == 'rolling' and i >= window:
                prior_date = index[i - window + 1]
            else:
                prior_date = index[0]

            date = index[i]

            x_iter = {}
            for k, v in x.iteritems():
                x_iter[k] = v.truncate(before=prior_date, after=date)
            y_iter = y.truncate(before=prior_date, after=date)

            static = ols(y=y_iter, x=x_iter, weights=weights, **kwds)

            self.compare(static, moving, event_index=i,
                         result_index=n)

        _check_non_raw_results(moving)
Example #15
0
    def spec_hausman(self, dof=None):
        '''Hausman's specification test


        See Also
        --------
        spec_hausman : generic function for Hausman's specification test

        '''
        #use normalized cov_params for OLS

        resols = OLS(endog, exog).fit()
        normalized_cov_params_ols = resols.model.normalized_cov_params
        se2 = resols.mse_resid

        params_diff = self._results.params - resols.params

        cov_diff = np.linalg.pinv(self.xhatprod) - normalized_cov_params_ols
        #TODO: the following is very inefficient, solves problem (svd) twice
        #use linalg.lstsq or svd directly
        #cov_diff will very often be in-definite (singular)
        if not dof:
            dof = tools.rank(cov_diff)
        cov_diffpinv = np.linalg.pinv(cov_diff)
        H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))/se2
        pval = stats.chi2.sf(H, dof)

        return H, pval, dof
Example #16
0
def contrastfromcols(L, D, pseudo=None):
    """
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    Parameters
    ----------
    L : array-like
    D : array-like
    """
    L = np.asarray(L)
    D = np.asarray(D)

    n, p = D.shape

    if L.shape[0] != n and L.shape[1] != p:
        raise ValueError("shape of L and D mismatched")

    if pseudo is None:
        pseudo = np.linalg.pinv(D)    # D^+ \approx= ((dot(D.T,D))^(-1),D.T)

    if L.shape[0] == n:
        C = np.dot(pseudo, L).T
    else:
        C = L
        C = np.dot(pseudo, np.dot(D, C.T)).T

    Lp = np.dot(D, C.T)

    if len(Lp.shape) == 1:
        Lp.shape = (n, 1)

    if rank(Lp) != Lp.shape[1]:
        Lp = fullrank(Lp)
        C = np.dot(pseudo, Lp).T

    return np.squeeze(C)
Example #17
0
def contrastfromcols(L, D, pseudo=None):
    """
    From an n x p design matrix D and a matrix L, tries
    to determine a p x q contrast matrix C which
    determines a contrast of full rank, i.e. the
    n x q matrix

    dot(transpose(C), pinv(D))

    is full rank.

    L must satisfy either L.shape[0] == n or L.shape[1] == p.

    If L.shape[0] == n, then L is thought of as representing
    columns in the column space of D.

    If L.shape[1] == p, then L is thought of as what is known
    as a contrast matrix. In this case, this function returns an estimable
    contrast corresponding to the dot(D, L.T)

    Note that this always produces a meaningful contrast, not always
    with the intended properties because q is always non-zero unless
    L is identically 0. That is, it produces a contrast that spans
    the column space of L (after projection onto the column space of D).

    Parameters
    ----------
    L : array-like
    D : array-like
    """
    L = np.asarray(L)
    D = np.asarray(D)

    n, p = D.shape

    if L.shape[0] != n and L.shape[1] != p:
        raise ValueError("shape of L and D mismatched")

    if pseudo is None:
        pseudo = np.linalg.pinv(D)  # D^+ \approx= ((dot(D.T,D))^(-1),D.T)

    if L.shape[0] == n:
        C = np.dot(pseudo, L).T
    else:
        C = L
        C = np.dot(pseudo, np.dot(D, C.T)).T

    Lp = np.dot(D, C.T)

    if len(Lp.shape) == 1:
        Lp.shape = (n, 1)

    if rank(Lp) != Lp.shape[1]:
        Lp = fullrank(Lp)
        C = np.dot(pseudo, Lp).T

    return np.squeeze(C)
    def _initialize(self):
        """
        Initializes the model for the IRLS fit.

        Resets the history and number of iterations.
        """
        self.history = {
            'deviance': [np.inf],
            'params': [np.inf],
            'weights': [np.inf],
            'sresid': [np.inf],
            'scale': []
        }
        self.iteration = 0
        self.pinv_wexog = np.linalg.pinv(self.exog)
        self.normalized_cov_params = np.dot(self.pinv_wexog,
                                            np.transpose(self.pinv_wexog))
        self.df_resid = np.float(self.exog.shape[0] - rank(self.exog))
        self.df_model = np.float(rank(self.exog) - 1)
        self.nobs = float(self.endog.shape[0])
Example #19
0
    def __init__(self, sys, indep_endog=None, instruments=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        M = len(sys[1::2])
        self._M = M
        # The lists are probably a bad idea
        self.endog = sys[::2]  # these are just list containers
        self.exog = sys[1::2]
        self._K = [tools.rank(_) for _ in sys[1::2]]
        #        fullexog = np.column_stack((_ for _ in self.exog))

        self.instruments = instruments

        # Keep the Y_j's in a container to get IVs
        instr_endog = {}
        [instr_endog.setdefault(_, []) for _ in indep_endog.keys()]

        for eq_key in indep_endog:
            for varcol in indep_endog[eq_key]:
                instr_endog[eq_key].append(self.exog[eq_key][:, varcol])
                # ^ copy needed?
#        self._instr_endog = instr_endog

        self._indep_endog = indep_endog
        _col_map = np.cumsum(np.hstack((0, self._K)))  # starting col no.s
        # move this check to whiten since we're not going to build a full exog?
        for eq_key in indep_endog:
            try:
                iter(indep_endog[eq_key])
            except:
                #                eq_key = [eq_key]
                raise TypeError("The values of the indep_exog dict must be\
 iterable. Got type %s for converter %s" % (type(del_col)))


#            for del_col in indep_endog[eq_key]:
#                fullexog = np.delete(fullexog,  _col_map[eq_key]+del_col, 1)
#                _col_map[eq_key+1:] -= 1

# Josef's example for deleting reoccuring "rows"
#        fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\
#                fullexog.shape[0])).view(fullexog.dtype).reshape(\
#                fullexog.shape[0],-1)
# From http://article.gmane.org/gmane.comp.python.numeric.general/32276/
# Or Jouni' suggetsion of taking a hash:
# http://www.mail-archive.com/[email protected]/msg04209.html
# not clear to me how this would work though, only if they are the *same*
# elements?
#        self.fullexog = fullexog
        self.wexog = self.whiten(instr_endog)
Example #20
0
    def __init__(self, sys, indep_endog=None, instruments=None):
        if len(sys) % 2 != 0:
            raise ValueError("sys must be a list of pairs of endogenous and \
exogenous variables.  Got length %s" % len(sys))
        M = len(sys[1::2])
        self._M = M
# The lists are probably a bad idea
        self.endog = sys[::2]   # these are just list containers
        self.exog = sys[1::2]
        self._K = [tools.rank(_) for _ in sys[1::2]]
#        fullexog = np.column_stack((_ for _ in self.exog))

        self.instruments = instruments

        # Keep the Y_j's in a container to get IVs
        instr_endog = {}
        [instr_endog.setdefault(_,[]) for _ in indep_endog.keys()]

        for eq_key in indep_endog:
            for varcol in indep_endog[eq_key]:
                instr_endog[eq_key].append(self.exog[eq_key][:,varcol])
                # ^ copy needed?
#        self._instr_endog = instr_endog

        self._indep_endog = indep_endog
        _col_map = np.cumsum(np.hstack((0,self._K))) # starting col no.s
# move this check to whiten since we're not going to build a full exog?
        for eq_key in indep_endog:
            try:
                iter(indep_endog[eq_key])
            except:
#                eq_key = [eq_key]
                raise TypeError("The values of the indep_exog dict must be\
 iterable. Got type %s for converter %s" % (type(del_col)))
#            for del_col in indep_endog[eq_key]:
#                fullexog = np.delete(fullexog,  _col_map[eq_key]+del_col, 1)
#                _col_map[eq_key+1:] -= 1

# Josef's example for deleting reoccuring "rows"
#        fullexog = np.unique(fullexog.T.view([('',fullexog.dtype)]*\
#                fullexog.shape[0])).view(fullexog.dtype).reshape(\
#                fullexog.shape[0],-1)
# From http://article.gmane.org/gmane.comp.python.numeric.general/32276/
# Or Jouni' suggetsion of taking a hash:
# http://www.mail-archive.com/[email protected]/msg04209.html
# not clear to me how this would work though, only if they are the *same*
# elements?
#        self.fullexog = fullexog
        self.wexog = self.whiten(instr_endog)
Example #21
0
def spec_hausman(params_e, params_i, cov_params_e, cov_params_i, dof=None):
    '''Hausmans specification test

    Parameters
    ----------
    params_e : array
        efficient and consistent under Null hypothesis,
        inconsistent under alternative hypothesis
    params_i: array
        consistent under Null hypothesis,
        consistent under alternative hypothesis
    cov_params_e : array, 2d
        covariance matrix of parameter estimates for params_e
    cov_params_i : array, 2d
        covariance matrix of parameter estimates for params_i

    example instrumental variables OLS estimator is `e`, IV estimator is `i`


    Notes
    -----

    Todos,Issues
    - check dof calculations and verify for linear case
    - check one-sided hypothesis


    References
    ----------
    Greene section 5.5 p.82/83


    '''
    params_diff = (params_i - params_e)
    cov_diff = cov_params_i - cov_params_e
    #TODO: the following is very inefficient, solves problem (svd) twice
    #use linalg.lstsq or svd directly
    #cov_diff will very often be in-definite (singular)
    if not dof:
        dof = tools.rank(cov_diff)
    cov_diffpinv = np.linalg.pinv(cov_diff)
    H = np.dot(params_diff, np.dot(cov_diffpinv, params_diff))
    pval = stats.chi2.sf(H, dof)

    evals = np.linalg.eigvalsh(cov_diff)

    return H, pval, dof, evals
Example #22
0
    def test_rank(self):
        X = standard_normal((40, 10))
        self.assertEquals(tools.rank(X), 10)

        X[:, 0] = X[:, 1] + X[:, 2]
        self.assertEquals(tools.rank(X), 9)
Example #23
0
    def test_rank(self):
        X = standard_normal((40,10))
        self.assertEquals(tools.rank(X), 10)

        X[:,0] = X[:,1] + X[:,2]
        self.assertEquals(tools.rank(X), 9)