コード例 #1
0
def white(reg):
    """
    Calculates the White test to check for heteroscedasticity.

    Parameters
    ----------
    reg             : regression object
                      output instance from a regression model

    Returns
    -------
    white_result    : dictionary
                      contains the statistic (white), degrees of freedom
                      (df) and the associated p-value (pvalue) for the
                      White test. 
    white           : float
                      scalar value for the White test statistic.
    df              : integer
                      degrees of freedom associated with the test
    pvalue          : float
                      p-value associated with the statistic (chi^2
                      distributed with k df)

    Notes
    -----
    x attribute in the reg object must have a constant term included. This is
    standard for spreg.OLS so no testing done to confirm constant.

    References
    ----------
    .. [1] H. White. 1980. A heteroscedasticity-consistent covariance
       matrix estimator and a direct test for heteroskdasticity.
       Econometrica. 48(4) 817-838. 

    Examples
    --------
    >>> import numpy as np
    >>> import pysal
    >>> import diagnostics
    >>> from ols import OLS

    Read the DBF associated with the Columbus data.

    >>> db = pysal.open(pysal.examples.get_path("columbus.dbf"),"r")

    Create the dependent variable vector. 

    >>> y = np.array(db.by_col("CRIME"))
    >>> y = np.reshape(y, (49,1))

    Create the matrix of independent variables. 

    >>> X = []
    >>> X.append(db.by_col("INC"))
    >>> X.append(db.by_col("HOVAL"))
    >>> X = np.array(X).T

    Run an OLS regression.

    >>> reg = OLS(y,X)

    Calculate the White test for heteroscedasticity.

    >>> testresult = diagnostics.white(reg)

    Print the degrees of freedom for the test.

    >>> print testresult['df']
    5

    Print the test statistic.

    >>> print("%1.3f"%testresult['wh'])
    19.946

    Print the associated p-value. 

    >>> print("%1.4f"%testresult['pvalue'])
    0.0013

    """
    e = reg.u ** 2
    k = reg.k
    n = reg.n
    y = reg.y
    X = reg.x
    #constant = constant_check(X)

    # Check for constant, if none add one, see Greene 2003, pg. 222
    # if constant == False:
    #    X = np.hstack((np.ones((n,1)),X))

    # Check for multicollinearity in the X matrix
    ci = condition_index(reg)
    if ci > 30:
        white_result = "Not computed due to multicollinearity."
        return white_result

    # Compute cross-products and squares of the regression variables
    if type(X).__name__ == 'ndarray':
        A = np.zeros((n, (k * (k + 1)) / 2.))
    elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix':
        # this is probably inefficient
        A = SP.lil_matrix((n, (k * (k + 1)) / 2.))
    else:
        raise Exception, "unknown X type, %s" % type(X).__name__
    counter = 0
    for i in range(k):
        for j in range(i, k):
            v = spmultiply(X[:, i], X[:, j], False)
            A[:, counter] = v
            counter += 1

    # Append the original variables
    A = sphstack(X, A)   # note: this also converts a LIL to CSR
    n, k = A.shape

    # Check to identify any duplicate or constant columns in A
    omitcolumn = []
    for i in range(k):
        current = A[:, i]
        # remove all constant terms (will add a constant back later)
        if spmax(current) == spmin(current):
            omitcolumn.append(i)
            pass
        # do not allow duplicates
        for j in range(k):
            check = A[:, j]
            if i < j:
                test = abs(current - check).sum()
                if test == 0:
                    omitcolumn.append(j)
    uniqueomit = set(omitcolumn)
    omitcolumn = list(uniqueomit)

    # Now the identified columns must be removed
    if type(A).__name__ == 'ndarray':
        A = np.delete(A, omitcolumn, 1)
    elif type(A).__name__ == 'csc_matrix' or type(A).__name__ == 'csr_matrix':
        # this is probably inefficient
        keepcolumn = range(k)
        for i in omitcolumn:
            keepcolumn.remove(i)
        A = A[:, keepcolumn]
    else:
        raise Exception, "unknown A type, %s" % type(X).__name__
    A = sphstack(np.ones((A.shape[0], 1)), A)   # add a constant back in
    n, k = A.shape

    # Conduct the auxiliary regression and calculate the statistic
    import ols as OLS
    aux_reg = OLS.BaseOLS(e, A)
    aux_r2 = r2(aux_reg)
    wh = aux_r2 * n
    df = k - 1
    pvalue = stats.chisqprob(wh, df)
    white_result = {'df': df, 'wh': wh, 'pvalue': pvalue}
    return white_result
コード例 #2
0
def white(reg):
    """
    Calculates the White test to check for heteroscedasticity. [White1980]_

    Parameters
    ----------
    reg             : regression object
                      output instance from a regression model

    Returns
    -------
    white_result    : dictionary
                      contains the statistic (white), degrees of freedom
                      (df) and the associated p-value (pvalue) for the
                      White test. 
    white           : float
                      scalar value for the White test statistic.
    df              : integer
                      degrees of freedom associated with the test
    pvalue          : float
                      p-value associated with the statistic (chi^2
                      distributed with k df)

    Notes
    -----
    x attribute in the reg object must have a constant term included. This is
    standard for spreg.OLS so no testing done to confirm constant.

    Examples
    --------
    >>> import numpy as np
    >>> import pysal
    >>> import diagnostics
    >>> from ols import OLS

    Read the DBF associated with the Columbus data.

    >>> db = pysal.open(pysal.examples.get_path("columbus.dbf"),"r")

    Create the dependent variable vector. 

    >>> y = np.array(db.by_col("CRIME"))
    >>> y = np.reshape(y, (49,1))

    Create the matrix of independent variables. 

    >>> X = []
    >>> X.append(db.by_col("INC"))
    >>> X.append(db.by_col("HOVAL"))
    >>> X = np.array(X).T

    Run an OLS regression.

    >>> reg = OLS(y,X)

    Calculate the White test for heteroscedasticity.

    >>> testresult = diagnostics.white(reg)

    Print the degrees of freedom for the test.

    >>> print testresult['df']
    5

    Print the test statistic.

    >>> print("%1.3f"%testresult['wh'])
    19.946

    Print the associated p-value. 

    >>> print("%1.4f"%testresult['pvalue'])
    0.0013

    """
    e = reg.u**2
    k = int(reg.k)
    n = int(reg.n)
    y = reg.y
    X = reg.x
    #constant = constant_check(X)

    # Check for constant, if none add one, see Greene 2003, pg. 222
    # if constant == False:
    #    X = np.hstack((np.ones((n,1)),X))

    # Check for multicollinearity in the X matrix
    ci = condition_index(reg)
    if ci > 30:
        white_result = "Not computed due to multicollinearity."
        return white_result

    # Compute cross-products and squares of the regression variables
    if type(X).__name__ == 'ndarray':
        A = np.zeros((n, (k * (k + 1)) // 2))
    elif type(X).__name__ == 'csc_matrix' or type(X).__name__ == 'csr_matrix':
        # this is probably inefficient
        A = SP.lil_matrix((n, (k * (k + 1)) // 2))
    else:
        raise Exception, "unknown X type, %s" % type(X).__name__
    counter = 0
    for i in range(k):
        for j in range(i, k):
            v = spmultiply(X[:, i], X[:, j], False)
            A[:, counter] = v
            counter += 1

    # Append the original variables
    A = sphstack(X, A)  # note: this also converts a LIL to CSR
    n, k = A.shape

    # Check to identify any duplicate or constant columns in A
    omitcolumn = []
    for i in range(k):
        current = A[:, i]
        # remove all constant terms (will add a constant back later)
        if spmax(current) == spmin(current):
            omitcolumn.append(i)
            pass
        # do not allow duplicates
        for j in range(k):
            check = A[:, j]
            if i < j:
                test = abs(current - check).sum()
                if test == 0:
                    omitcolumn.append(j)
    uniqueomit = set(omitcolumn)
    omitcolumn = list(uniqueomit)

    # Now the identified columns must be removed
    if type(A).__name__ == 'ndarray':
        A = np.delete(A, omitcolumn, 1)
    elif type(A).__name__ == 'csc_matrix' or type(A).__name__ == 'csr_matrix':
        # this is probably inefficient
        keepcolumn = range(k)
        for i in omitcolumn:
            keepcolumn.remove(i)
        A = A[:, keepcolumn]
    else:
        raise Exception, "unknown A type, %s" % type(X).__name__
    A = sphstack(np.ones((A.shape[0], 1)), A)  # add a constant back in
    n, k = A.shape

    # Conduct the auxiliary regression and calculate the statistic
    import ols as OLS
    aux_reg = OLS.BaseOLS(e, A)
    aux_r2 = r2(aux_reg)
    wh = aux_r2 * n
    df = k - 1
    pvalue = chisqprob(wh, df)
    white_result = {'df': df, 'wh': wh, 'pvalue': pvalue}
    return white_result