def datasetSplit_KS(self, X_master, X_slave, y, num=5): ''' 将导入的数据集采用KS算法进行划分,找到方差最大的样本 Param: X_master:主仪器光谱数据 X_slave:从仪器光谱数据 y: 主从仪器光谱数据的响应值 num: 前半部分抽取的方差最大的样本数目 return: X_master_prior:主仪器光谱数据划分后的前半部分 X_master_next:主仪器光谱数据划分后的后半部分 X_slave_prior:从仪器光谱数据划分后的前半部分 X_slave_next:从仪器光谱数据划分后的后半部分 y_prior:主从仪器光谱数据的响应值的前半部分 y_next:主从仪器光谱数据的响应值的后半部分 ''' KS_demo = KennardStone(X_master, num) print X_slave.shape , num CalInd_master, ValInd_master = KS_demo.KS() # CalInd_master方差最大样本的下标 ValInd_master 剩余样本的下标 X_master_prior = X_master[CalInd_master] X_master_next = X_master[ValInd_master] X_slave_prior = X_slave[CalInd_master] X_slave_next = X_slave[ValInd_master] y_prior = y[CalInd_master] y_next = y[ValInd_master] return X_master_prior, X_master_next, X_slave_prior, X_slave_next, y_prior, y_next
def affine_pls2(self, j): pls_demo = Partial_LS(self.X_m_cal, self.y_m_cal, folds=10, max_comp=15) W_m_cal, T_m_cal, P_m_cal, comp_best, coefs_cal, RMSECV = pls_demo.pls2_fit( ) # print self.y_s_test.shape , gg RMSEC, RMSEP, ytest_pre = pls_demo.pls_pre(self.X_m_test, self.y_m_test, coefs_cal) y_ = deal_4_2(self.y_m_cal, self.i, j) coef = deal_4_2(coefs_cal, self.i, j) y_s_test_ = deal_4_2(self.y_s_test, self.i, j) y_s_cal_ = deal_4_2(self.y_s_cal, self.i, j) T_m_std, T_s_std, y_m_pre, y_s_pre, T_s_test, ys_test_pre = Pre_deal( self.X_m_cal, self.X_s_cal, W_m_cal, P_m_cal, coef, self.X_m_cal, y_, self.X_s_test) y0_m = y_m_pre[:, 0:1] y1_m = y_m_pre[:, 1:2] y0_s = y_s_pre[:, 0:1] y1_s = y_s_pre[:, 1:2] y0_s_test = ys_test_pre[:, 0:1] y1_s_test = ys_test_pre[:, 1:2] demo = Affine_trans1(y0_m, y0_s, y1_m, y1_s, comp_best) bia, sin_x, cos_x, x, b_s, k_m, b_m = demo.AT_train() KS_master_std = KennardStone(self.X_s_cal, 16) CalInd_master_std, ValInd_master_std = KS_master_std.KS() # print CalInd_master_std , ValInd_master # KS_master_std = KennardStone(self.X_s_cal, 32) # CalInd_master_std, ValInd_master_std = KS_master_std.KS() # # print CalInd_master_std , ValInd_master # y0_s_sure = y0_s[CalInd_master_std] # y1_s_sure = y1_s[CalInd_master_std] # y_sure_real = y_s_cal_[CalInd_master_std] y0_s_sure = y0_s[48:64, :] y1_s_sure = y1_s[48:64, :] y_sure_real = y_s_cal_[48:64, :] # print bia, sin_x, cos_x, b_s RMSEC_y0, RMSEC_y1, y0_cal_pre = demo.AT_pre(y0_s, y1_s, bia, cos_x, sin_x, y_s_cal_, b_s) RMSEP_y0, RMSEP_y1, y0_pre = demo.AT_pre(y0_s_test, y1_s_test, bia, cos_x, sin_x, y_s_test_, b_s) RMSEP_sure_y0, RMSEP_sure_y1, y_cal_pre = demo.AT_pre( y0_s_sure, y1_s_sure, bia, cos_x, sin_x, y_sure_real, b_s) print y_cal_pre.shape, "shapesss" RMSEP_no_trans_0, RMSEP_no_trans_1, y_no_pre = demo.AT_pre_no_trans( y0_s_test, y1_s_test, y_s_test_) # print y0_s_test[:,0] print RMSEP_sure_y0, RMSEP_sure_y1, "SURE ", RMSEP_y0 # print RMSEP_no_trans_0 ,"no_trans" # return comp_best,RMSEP_y0,y0_pre,y_cal_pre , y0_s_test , y1_s_test, RMSEP_y0_ # print y_sure_real.shape , y_cal_pre.shape , "shape" return comp_best, RMSEP_y0, y0_pre, RMSEC_y0, y_cal_pre, y0_s_test, y1_s_test, RMSEP_no_trans_0, y_sure_real
def getStd(X_m_cal, X_s_cal, y_m_cal, y_s_cal, std_num): KS_master_std = KennardStone(X_m_cal, std_num) CalInd_master_std, ValInd_master_std = KS_master_std.KS() # print CalInd_master_std , ValInd_master X_m_std = X_m_cal[CalInd_master_std] y_m_std = y_m_cal[CalInd_master_std] X_s_std = X_s_cal[CalInd_master_std] y_s_std = y_s_cal[CalInd_master_std] return X_m_std, y_m_std, X_s_std, y_s_std
def Data_KS_split(X_master, X_slave, y, num): KS_master = KennardStone(X_slave, num) CalInd_master, ValInd_master = KS_master.KS() X_m_cal = X_master[CalInd_master] y_cal = y[CalInd_master] X_m_test = X_master[ValInd_master] y_test = y[ValInd_master] X_s_cal = X_slave[CalInd_master] X_s_test = X_slave[ValInd_master] return X_m_cal, y_cal, X_s_cal, X_s_test, X_m_test, y_test
def TCA(self, num, i, snv=0): ################# DP_demo = datasetProcess(dataType=i, bool=snv) X_master, X_slave, y = DP_demo.datasetImport() X_master_cal, X_master_test, X_slave_cal, X_slave_test, y_cal, y_test = DP_demo.datasetSplit_KS( X_master, X_slave, y, num) X_m_train, X_m_val, y_m_train, y_m_val = train_test_split( X_master_cal, y_cal, test_size=0.5, random_state=0) KS_demo = KennardStone(X_slave, num) CalInd, ValInd = KS_demo.KS() X_s = X_slave[CalInd] # �������б�ǩ������ X_s_o = X_slave[ValInd] # ������û�б�ǩ������ y_s = y[CalInd] # �����������ı�ǩ # ѡ����� TPC_demo = tca_param_choose(X_m_train, y_m_train, X_m_val, y_m_val, X_s, y_s, X_s_o) m_op, k_op = TPC_demo.choose_Param(15) print m_op, "m_op" my_tca = TCA(dim=m_op) T_m_cal, T_s_o, T_slave_test, T_s = my_tca.fit_transform( X_master_cal, X_slave_cal, X_slave_cal, X_s) T = np.vstack((T_m_cal, T_s)) y = np.vstack((y_cal, y_s)) k1 = np.linalg.lstsq(T, y)[0] # print np.shape(k1), np.shape(T_slave_test) y_cal_pre = np.dot(T_slave_test, k1) RMSEC = np.sqrt( np.sum(np.square(np.subtract(y_cal, y_cal_pre)), axis=0) / y_cal_pre.shape[0]) # print RMSEC , "RMSEC" T_m_cal_, T_s_o, T_slave_test_, T_s_ = my_tca.fit_transform( X_master_cal, X_slave_cal, X_slave_test, X_s) T_ = np.vstack((T_m_cal_, T_s_)) y_ = np.vstack((y_cal, y_s)) k1_ = np.linalg.lstsq(T_, y_)[0] # print np.shape(k1), np.shape(T_slave_test_) y_test_pre_ = np.dot(T_slave_test_, k1_) RMSEP = np.sqrt( np.sum(np.square(np.subtract(y_test, y_test_pre_)), axis=0) / y_test_pre_.shape[0]) return m_op, RMSEP, y_test_pre_, RMSEC, y_cal_pre
def TCA(self , num , i , X_master , X_slave , y , snv = 0 ): ################# X_m_train, X_m_val, y_m_train, y_m_val = train_test_split(self.X_m_cal, self.y_m_cal[:,[i]], test_size=0.5, random_state=0) KS_demo = KennardStone(X_slave, num) CalInd, ValInd = KS_demo.KS() X_s = X_slave[CalInd] # �������б�ǩ������ X_s_o = X_slave[ValInd] # ������û�б�ǩ������ y_s = y[CalInd] # �����������ı�ǩ # ѡ����� TPC_demo = tca_param_choose(X_m_train, y_m_train, X_m_val, y_m_val, X_s, y_s, X_s_o) m_op, k_op = TPC_demo.choose_Param(20) print m_op , "m_op" my_tca = TCA(dim=m_op) # print y_s.shape T_m_cal, T_s_o, T_slave_test, T_s = my_tca.fit_transform(self.X_m_cal, self.X_s_cal, self.X_s_cal, X_s) T = np.vstack((T_m_cal, T_s)) y = np.vstack((self.y_s_cal[:,[i]], y_s)) k1 = np.linalg.lstsq(T, y)[0] # print np.shape(k1), np.shape(T_slave_test) y_cal_pre = np.dot(T_slave_test, k1) RMSEC = np.sqrt(np.sum(np.square(np.subtract(self.y_s_cal[:,[i]], y_cal_pre)), axis=0) / y_cal_pre.shape[0]) # print RMSEC , "RMSEC" T_m_cal_, T_s_o, T_slave_test_, T_s_ = my_tca.fit_transform(self.X_m_cal, self.X_s_cal, self.X_s_test, X_s) T_ = np.vstack((T_m_cal_, T_s_)) y_ = np.vstack((self.y_s_cal[:,[i]], y_s)) k1_ = np.linalg.lstsq(T_, y_)[0] # print np.shape(k1), np.shape(T_slave_test_) y_test_pre_ = np.dot(T_slave_test_, k1_) RMSEP = np.sqrt(np.sum(np.square(np.subtract(self.y_s_test[:,[i]], y_test_pre_)), axis=0) / y_test_pre_.shape[0]) return m_op,RMSEP,y_test_pre_,RMSEC,y_cal_pre
def Dataset_KS_split_std(X_master, X_slave, y, num, std_num=8): KS_master = KennardStone(X_master, num) CalInd_master, ValInd_master = KS_master.KS() # print CalInd_master , ValInd_master X_m_cal = X_master[CalInd_master] y_m_cal = y[CalInd_master] X_m_test = X_master[ValInd_master] y_m_test = y[ValInd_master] X_s_cal = X_slave[CalInd_master] X_s_test = X_slave[ValInd_master] y_s_cal = y[CalInd_master] y_s_test = y[ValInd_master] KS_master_std = KennardStone(X_m_cal, std_num) CalInd_master_std, ValInd_master_std = KS_master_std.KS() # print CalInd_master_std , ValInd_master X_m_std = X_m_cal[CalInd_master_std] y_m_std = y_m_cal[CalInd_master_std] X_s_std = X_s_cal[CalInd_master_std] y_s_std = y_s_cal[CalInd_master_std] return X_m_cal, y_m_cal, X_s_cal, y_s_cal, X_s_test, y_s_test, X_m_test, y_m_test, X_m_std, y_m_std, X_s_std, y_s_std
def Dataset_split(self): num = int(round(self.X_master.shape[0] * self.cal_size)) ############## 划分数据集 KS_master = KennardStone(self.X_master, num) CalInd_master, ValInd_master = KS_master.KS() X_m_cal = self.X_master[CalInd_master] y_m_cal = self.y[CalInd_master] X_m_test = self.X_master[ValInd_master] y_m_test = self.y[ValInd_master] # print CalInd_master , ValInd_master X_s_cal = self.X_slave[CalInd_master] X_s_test = self.X_slave[ValInd_master] y_s_cal = self.y[CalInd_master] y_s_test = self.y[ValInd_master] KS_master_std = KennardStone(X_m_cal, self.std_num) CalInd_master_std, ValInd_master_std = KS_master_std.KS() X_m_std = X_m_cal[CalInd_master_std] y_m_std = y_m_cal[CalInd_master_std] X_s_std = X_s_cal[CalInd_master_std] y_s_std = y_s_cal[CalInd_master_std] return X_m_cal, y_m_cal, X_m_std, y_m_std, X_s_std, y_s_std, X_s_test, y_s_test, X_s_cal, y_s_cal, X_m_test
if __name__ == '__main__': import numpy as np from tca_param_choose import tca_param_choose rmsep_list = [] m_list = [] for i in range(7): # 导入数据 print i DP_demo = datasetProcess(dataType=i) X_master, X_slave, y = DP_demo.datasetImport() X_master_cal, X_master_test, X_slave_cal, X_slave_test, y_cal, y_test = DP_demo.datasetSplit_Random(X_master, X_slave, y, next_size=0.2) X_m_train, X_m_val, y_m_train, y_m_val = train_test_split(X_master_cal, y_cal, test_size=0.5, random_state=0) KS_demo = KennardStone(X_slave, 64) CalInd, ValInd = KS_demo.KS() X_s = X_slave[CalInd] # 从仪器有标签的样本 X_s_o = X_slave[ValInd] # 从仪器没有标签的样本 y_s = y[CalInd] # 从仪器样本的标签 # 选择参数 TPC_demo = tca_param_choose(X_m_train, y_m_train, X_m_val, y_m_val, X_s, y_s, X_s_o) m_op, k_op = TPC_demo.choose_Param(15) print m_op , "m_op" m_list.append(m_op) my_tca = TCA(dim=m_op) T_m_cal, T_s_o, T_slave_test, T_s = my_tca.fit_transform(X_master_cal, X_slave_cal, X_slave_cal, X_s)