class PCR: def __init__(self, X, Y): self.X = X self.Y = Y # 对X进行SVD分解,返回分解后的矩阵,可以使用compare,和cum来确定独立分组数目 def confirmPCs(self): self.pca = PCA(self.X) compare, cum = self.pca.SVDdecompose() return compare, cum # PCs为独立分组数 def fit(self, PCs): T, P = self.pca.PCAdecompose(PCs) self.P = P self.T = T self.mlr = MLR(T, self.Y, False) self.mlr.fit() self.A = self.mlr.getCoef() def predict(self, Xnew): T = np.dot(Xnew, self.P) ans = self.mlr.predict(T) return ans def fTest(self, arfa): return self.mlr.Ftest(arfa)
class PCR: def __init__(self, X, Y): self.X = X self.Y = Y def confirmPCs(self): self.pca = PCA(self.X) compare = self.pca.SVDdecompose() return compare def fit(self, PCs): T, P = self.pca.PCAdecompose(PCs) print('T=', T) self.P = P oneCol = np.ones(T.shape[0]) T = np.c_[oneCol, T] self.mlr = MLR(T, self.Y) self.mlr.fit() self.A = self.mlr.getCoef() def predict(self, Xnew): T = np.dot(Xnew, self.P) oneCol = np.ones(T.shape[0]) T = np.c_[oneCol, T] ans = self.mlr.predict(T) return ans def fTest(self, arfa): return self.mlr.Ftest(arfa)
def model(self, k): pca = PCA(self.X) U, S, V, compare = pca.SVDdecompose() # 不可去,会用到计算得到的结果 # 得到得分矩阵和载荷矩阵 T, P = pca.PCAdecompose(k) #print("得分矩阵T: ", T) #print("载荷矩阵P: ", P) mlr = MLR(T, self.Y) mlr.modelling() self.A = np.dot(P, mlr.A)
def computePCs(self): # 确定主成分 Percentage = 0.95 pca = PCA(self.X) U, S, V, compare = pca.SVDdecompose() self.kcount = compare.__len__() # 记录主成分最大数 comSum = 0 cSum = sum(compare) for i in compare: comSum += i if comSum / cSum >= Percentage: PCs = int(np.where(compare == i)[0][-1]) + 1 #print("主成分数k: ", PCs) self.k = PCs # 记录符合条件的最佳主成分数 break
import numpy as np import matplotlib.pyplot as plt A = np.loadtxt(r".\wheat_train_PCA_X.txt") B = np.loadtxt(r".\wheat_train_PCA_Y.txt") from PCA import PCA aver = A.mean(axis=0) std = A.std(axis=0) X = (A - aver) / std pca = PCA(X) print(pca.SVDdecompose()) T, P = pca.PCAdecompose(6) cls1 = B == 1.0 cls2 = B != 1.0 plt.subplot(1, 2, 1) plt.plot(T[cls1, 0], T[cls1, 1], 'ro') plt.plot(T[cls2, 0], T[cls2, 1], 'b^') #PLSR from sklearn.cross_decomposition import PLSRegression pls = PLSRegression(n_components=3, scale=True) pls.fit(A, B) T = pls.x_scores_ plt.subplot(1, 2, 2) plt.plot(T[cls1, 0], T[cls1, 1], 'ro') plt.plot(T[cls2, 0], T[cls2, 1], 'b^')
# %% from sklearn.feature_selection import SelectPercentile, f_classif, mutual_info_classif import numpy as np X1 = X = np.loadtxt( r"C:\Users\51770\Desktop\pythonModeling\wheat_train_PCA_X.txt") y = np.loadtxt(r"C:\Users\51770\Desktop\pythonModeling\wheat_train_PCA_Y.txt") avg = X.mean(axis=0) std = X.std(axis=0) X = (X - avg) / std # selector2= SelectPercentile(f_classif, 30) selector2 = SelectPercentile(mutual_info_classif, 30) Xnew = selector2.fit_transform(X, y) # 用选择的变量重构矩阵 print(selector2.pvalues_) print(selector2.get_support()) indx = np.argwhere(selector2.get_support())[:, 0] print(indx) from PCA import PCA pca = PCA(Xnew) ans = pca.SVDdecompose() print(ans[1]) pca.plotScore(y) # %%