コード例 #1
0
ファイル: test_pca.py プロジェクト: locolucco209/MongoScraper
def test_pca_princomp():
    pcares = pca(xf)
    check_pca_princomp(pcares, princomp1)
    pcares = pca(xf[:20,:])
    check_pca_princomp(pcares, princomp2)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0))
    check_pca_princomp(pcares, princomp3)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0)
    check_pca_princomp(pcares, princomp3)
コード例 #2
0
ファイル: test_pca.py プロジェクト: AnaMP/statsmodels
def test_pca_princomp():
    pcares = pca(xf)
    check_pca_princomp(pcares, princomp1)
    pcares = pca(xf[:20,:])
    check_pca_princomp(pcares, princomp2)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0))
    check_pca_princomp(pcares, princomp3)
    pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0)
    check_pca_princomp(pcares, princomp3)
コード例 #3
0
ファイル: test_pca.py プロジェクト: AnaMP/statsmodels
def test_pca_svd():
    xreduced, factors, evals, evecs  = pca(xf)
    factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))]
    beta = np.dot(np.linalg.pinv(factors_wconst), xf)
    #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs
    assert_array_almost_equal(beta.T[:,:4], evecs, 14)

    xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0)
    assert_array_almost_equal(evals_svd, evals, 14)
    msign = (evecs/evecs_svd)[0]
    assert_array_almost_equal(msign*evecs_svd, evecs, 14)
    assert_array_almost_equal(msign*factors_svd, factors, 13)
    assert_array_almost_equal(xred_svd, xreduced, 14)

    pcares = pca(xf, keepdim=2)
    pcasvdres = pcasvd(xf, keepdim=2)
    check_pca_svd(pcares, pcasvdres)
コード例 #4
0
ファイル: test_pca.py プロジェクト: locolucco209/MongoScraper
def test_pca_svd():
    xreduced, factors, evals, evecs  = pca(xf)
    factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))]
    beta = np.dot(np.linalg.pinv(factors_wconst), xf)
    #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs
    assert_array_almost_equal(beta.T[:,:4], evecs, 14)

    xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0)
    assert_array_almost_equal(evals_svd, evals, 14)
    msign = (evecs/evecs_svd)[0]
    assert_array_almost_equal(msign*evecs_svd, evecs, 13)
    assert_array_almost_equal(msign*factors_svd, factors, 12)
    assert_array_almost_equal(xred_svd, xreduced, 13)

    pcares = pca(xf, keepdim=2)
    pcasvdres = pcasvd(xf, keepdim=2)
    check_pca_svd(pcares, pcasvdres)
コード例 #5
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs  = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
コード例 #6
0
    def calc_factors(self, x=None, keepdim=0, addconst=True):
        '''get factor decomposition of exogenous variables

        This uses principal component analysis to obtain the factors. The number
        of factors kept is the maximum that will be considered in the regression.
        '''
        if x is None:
            x = self.exog
        else:
            x = np.asarray(x)
        xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1)
        self.exog_reduced = xred
        #self.factors = fact
        if addconst:
            self.factors = sm.add_constant(fact, prepend=True)
            self.hasconst = 1  #needs to be int
        else:
            self.factors = fact
            self.hasconst = 0  #needs to be int

        self.evals = evals
        self.evecs = evecs
コード例 #7
0
import statsmodels.api as sm
from statsmodels.sandbox.tools import pca
from statsmodels.sandbox.tools.cross_val import LeaveOneOut

# Example: principal component regression
nobs = 1000
f0 = np.c_[np.random.normal(size=(nobs, 2)), np.ones((nobs, 1))]
f2xcoef = np.c_[np.repeat(np.eye(2), 2, 0), np.arange(4)[::-1]].T
f2xcoef = np.array([[1., 1., 0., 0.], [0., 0., 1., 1.], [3., 2., 1., 0.]])
f2xcoef = np.array([[0.1, 3., 1., 0.], [0., 0., 1.5, 0.1], [3., 2., 1., 0.]])
x0 = np.dot(f0, f2xcoef)
x0 += 0.1 * np.random.normal(size=x0.shape)
ytrue = np.dot(f0, [1., 1., 1.])
y0 = ytrue + 0.1 * np.random.normal(size=ytrue.shape)

xred, fact, eva, eve = pca(x0, keepdim=0)
print eve
print fact[:5]
print f0[:5]

import statsmodels.api as sm

res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit()
print 'OLS on original data'
print res.params
print res.aic
print res.rsquared

#print 'OLS on Factors'
#for k in range(x0.shape[1]):
#    xred, fact, eva, eve  = pca(x0, keepdim=k, normalize=1)
コード例 #8
0
# Example: principal component regression
nobs = 1000
f0 = np.c_[np.random.normal(size=(nobs,2)), np.ones((nobs,1))]
f2xcoef = np.c_[np.repeat(np.eye(2),2,0),np.arange(4)[::-1]].T
f2xcoef = np.array([[ 1.,  1.,  0.,  0.],
                    [ 0.,  0.,  1.,  1.],
                    [ 3.,  2.,  1.,  0.]])
f2xcoef = np.array([[ 0.1,  3.,  1.,    0.],
                    [ 0.,  0.,  1.5,   0.1],
                    [ 3.,  2.,  1.,    0.]])
x0 = np.dot(f0, f2xcoef)
x0 += 0.1*np.random.normal(size=x0.shape)
ytrue = np.dot(f0,[1., 1., 1.])
y0 = ytrue + 0.1*np.random.normal(size=ytrue.shape)

xred, fact, eva, eve  = pca(x0, keepdim=0)
print eve
print fact[:5]
print f0[:5]

import statsmodels.api as sm

res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit()
print 'OLS on original data'
print res.params
print res.aic
print res.rsquared

#print 'OLS on Factors'
#for k in range(x0.shape[1]):
#    xred, fact, eva, eve  = pca(x0, keepdim=k, normalize=1)
コード例 #9
0
ファイル: plotPCA.py プロジェクト: jueshengong/psytrans
def doPCA(dataMatrix):
    logging.info('Computing PCA')
    a, b, c, d = pca(dataMatrix, keepdim=2, normalize=True, demean=True)
    return a, b, c, d