Beispiel #1
0
def test_pca_princomp():
    pcares = pca(xf)
    check_pca_princomp(pcares, princomp1)
    pcares = pca(xf[:20,:])
    check_pca_princomp(pcares, princomp2)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0))
    check_pca_princomp(pcares, princomp3)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0)
    check_pca_princomp(pcares, princomp3)
Beispiel #2
0
def test_pca_princomp():
    pcares = pca(xf)
    check_pca_princomp(pcares, princomp1)
    pcares = pca(xf[:20,:])
    check_pca_princomp(pcares, princomp2)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0))
    check_pca_princomp(pcares, princomp3)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0)
    check_pca_princomp(pcares, princomp3)
Beispiel #3
0
def test_pca_svd():
    xreduced, factors, evals, evecs  = pca(xf)
    factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))]
    beta = np.dot(np.linalg.pinv(factors_wconst), xf)
    #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs
    assert_array_almost_equal(beta.T[:,:4], evecs, 14)

    xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0)
    assert_array_almost_equal(evals_svd, evals, 14)
    msign = (evecs/evecs_svd)[0]
    assert_array_almost_equal(msign*evecs_svd, evecs, 14)
    assert_array_almost_equal(msign*factors_svd, factors, 13)
    assert_array_almost_equal(xred_svd, xreduced, 14)

    pcares = pca(xf, keepdim=2)
    pcasvdres = pcasvd(xf, keepdim=2)
    check_pca_svd(pcares, pcasvdres)
Beispiel #4
0
def test_pca_svd():
    xreduced, factors, evals, evecs  = pca(xf)
    factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))]
    beta = np.dot(np.linalg.pinv(factors_wconst), xf)
    #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs
    assert_array_almost_equal(beta.T[:,:4], evecs, 14)

    xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0)
    assert_array_almost_equal(evals_svd, evals, 14)
    msign = (evecs/evecs_svd)[0]
    assert_array_almost_equal(msign*evecs_svd, evecs, 13)
    assert_array_almost_equal(msign*factors_svd, factors, 12)
    assert_array_almost_equal(xred_svd, xreduced, 13)

    pcares = pca(xf, keepdim=2)
    pcasvdres = pcasvd(xf, keepdim=2)
    check_pca_svd(pcares, pcasvdres)
Beispiel #5
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs  = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
Beispiel #6
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
import statsmodels.api as sm
from statsmodels.sandbox.tools import pca
from statsmodels.sandbox.tools.cross_val import LeaveOneOut

# Example: principal component regression
nobs = 1000
f0 = np.c_[np.random.normal(size=(nobs, 2)), np.ones((nobs, 1))]
f2xcoef = np.c_[np.repeat(np.eye(2), 2, 0), np.arange(4)[::-1]].T
f2xcoef = np.array([[1., 1., 0., 0.], [0., 0., 1., 1.], [3., 2., 1., 0.]])
f2xcoef = np.array([[0.1, 3., 1., 0.], [0., 0., 1.5, 0.1], [3., 2., 1., 0.]])
x0 = np.dot(f0, f2xcoef)
x0 += 0.1 * np.random.normal(size=x0.shape)
ytrue = np.dot(f0, [1., 1., 1.])
y0 = ytrue + 0.1 * np.random.normal(size=ytrue.shape)

xred, fact, eva, eve = pca(x0, keepdim=0)
print eve
print fact[:5]
print f0[:5]

import statsmodels.api as sm

res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit()
print 'OLS on original data'
print res.params
print res.aic
print res.rsquared

#print 'OLS on Factors'
#for k in range(x0.shape[1]):
#    xred, fact, eva, eve  = pca(x0, keepdim=k, normalize=1)
# Example: principal component regression
nobs = 1000
f0 = np.c_[np.random.normal(size=(nobs,2)), np.ones((nobs,1))]
f2xcoef = np.c_[np.repeat(np.eye(2),2,0),np.arange(4)[::-1]].T
f2xcoef = np.array([[ 1.,  1.,  0.,  0.],
                    [ 0.,  0.,  1.,  1.],
                    [ 3.,  2.,  1.,  0.]])
f2xcoef = np.array([[ 0.1,  3.,  1.,    0.],
                    [ 0.,  0.,  1.5,   0.1],
                    [ 3.,  2.,  1.,    0.]])
x0 = np.dot(f0, f2xcoef)
x0 += 0.1*np.random.normal(size=x0.shape)
ytrue = np.dot(f0,[1., 1., 1.])
y0 = ytrue + 0.1*np.random.normal(size=ytrue.shape)

xred, fact, eva, eve  = pca(x0, keepdim=0)
print eve
print fact[:5]
print f0[:5]

import statsmodels.api as sm

res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit()
print 'OLS on original data'
print res.params
print res.aic
print res.rsquared

#print 'OLS on Factors'
#for k in range(x0.shape[1]):
#    xred, fact, eva, eve  = pca(x0, keepdim=k, normalize=1)
Beispiel #9
0
def doPCA(dataMatrix):
    logging.info('Computing PCA')
    a, b, c, d = pca(dataMatrix, keepdim=2, normalize=True, demean=True)
    return a, b, c, d