def test_pca_princomp(): pcares = pca(xf) check_pca_princomp(pcares, princomp1) pcares = pca(xf[:20,:]) check_pca_princomp(pcares, princomp2) pcares = pca(xf[:20,:]-xf[:20,:].mean(0)) check_pca_princomp(pcares, princomp3) pcares = pca(xf[:20,:]-xf[:20,:].mean(0), demean=0) check_pca_princomp(pcares, princomp3)
def test_pca_svd(): xreduced, factors, evals, evecs = pca(xf) factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))] beta = np.dot(np.linalg.pinv(factors_wconst), xf) #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs assert_array_almost_equal(beta.T[:,:4], evecs, 14) xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0) assert_array_almost_equal(evals_svd, evals, 14) msign = (evecs/evecs_svd)[0] assert_array_almost_equal(msign*evecs_svd, evecs, 14) assert_array_almost_equal(msign*factors_svd, factors, 13) assert_array_almost_equal(xred_svd, xreduced, 14) pcares = pca(xf, keepdim=2) pcasvdres = pcasvd(xf, keepdim=2) check_pca_svd(pcares, pcasvdres)
def test_pca_svd(): xreduced, factors, evals, evecs = pca(xf) factors_wconst = np.c_[factors, np.ones((factors.shape[0],1))] beta = np.dot(np.linalg.pinv(factors_wconst), xf) #np.dot(np.linalg.pinv(factors_wconst),x2/1000.).T[:,:4] - evecs assert_array_almost_equal(beta.T[:,:4], evecs, 14) xred_svd, factors_svd, evals_svd, evecs_svd = pcasvd(xf, keepdim=0) assert_array_almost_equal(evals_svd, evals, 14) msign = (evecs/evecs_svd)[0] assert_array_almost_equal(msign*evecs_svd, evecs, 13) assert_array_almost_equal(msign*factors_svd, factors, 12) assert_array_almost_equal(xred_svd, xreduced, 13) pcares = pca(xf, keepdim=2) pcasvdres = pcasvd(xf, keepdim=2) check_pca_svd(pcares, pcasvdres)
def calc_factors(self, x=None, keepdim=0, addconst=True): '''get factor decomposition of exogenous variables This uses principal component analysis to obtain the factors. The number of factors kept is the maximum that will be considered in the regression. ''' if x is None: x = self.exog else: x = np.asarray(x) xred, fact, evals, evecs = pca(x, keepdim=keepdim, normalize=1) self.exog_reduced = xred #self.factors = fact if addconst: self.factors = sm.add_constant(fact, prepend=True) self.hasconst = 1 #needs to be int else: self.factors = fact self.hasconst = 0 #needs to be int self.evals = evals self.evecs = evecs
import statsmodels.api as sm from statsmodels.sandbox.tools import pca from statsmodels.sandbox.tools.cross_val import LeaveOneOut # Example: principal component regression nobs = 1000 f0 = np.c_[np.random.normal(size=(nobs, 2)), np.ones((nobs, 1))] f2xcoef = np.c_[np.repeat(np.eye(2), 2, 0), np.arange(4)[::-1]].T f2xcoef = np.array([[1., 1., 0., 0.], [0., 0., 1., 1.], [3., 2., 1., 0.]]) f2xcoef = np.array([[0.1, 3., 1., 0.], [0., 0., 1.5, 0.1], [3., 2., 1., 0.]]) x0 = np.dot(f0, f2xcoef) x0 += 0.1 * np.random.normal(size=x0.shape) ytrue = np.dot(f0, [1., 1., 1.]) y0 = ytrue + 0.1 * np.random.normal(size=ytrue.shape) xred, fact, eva, eve = pca(x0, keepdim=0) print eve print fact[:5] print f0[:5] import statsmodels.api as sm res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit() print 'OLS on original data' print res.params print res.aic print res.rsquared #print 'OLS on Factors' #for k in range(x0.shape[1]): # xred, fact, eva, eve = pca(x0, keepdim=k, normalize=1)
# Example: principal component regression nobs = 1000 f0 = np.c_[np.random.normal(size=(nobs,2)), np.ones((nobs,1))] f2xcoef = np.c_[np.repeat(np.eye(2),2,0),np.arange(4)[::-1]].T f2xcoef = np.array([[ 1., 1., 0., 0.], [ 0., 0., 1., 1.], [ 3., 2., 1., 0.]]) f2xcoef = np.array([[ 0.1, 3., 1., 0.], [ 0., 0., 1.5, 0.1], [ 3., 2., 1., 0.]]) x0 = np.dot(f0, f2xcoef) x0 += 0.1*np.random.normal(size=x0.shape) ytrue = np.dot(f0,[1., 1., 1.]) y0 = ytrue + 0.1*np.random.normal(size=ytrue.shape) xred, fact, eva, eve = pca(x0, keepdim=0) print eve print fact[:5] print f0[:5] import statsmodels.api as sm res = sm.OLS(y0, sm.add_constant(x0, prepend=False)).fit() print 'OLS on original data' print res.params print res.aic print res.rsquared #print 'OLS on Factors' #for k in range(x0.shape[1]): # xred, fact, eva, eve = pca(x0, keepdim=k, normalize=1)
def doPCA(dataMatrix): logging.info('Computing PCA') a, b, c, d = pca(dataMatrix, keepdim=2, normalize=True, demean=True) return a, b, c, d