def do_pls(data_x, data_y, train_split_percentage): latent_variables = [] x_test, x_train, y_test, y_train = train_test_split(data_x, data_y, test_size=train_split_percentage, random_state=0) for i in range(20): pls = PLSRegression(n_components=(i + 1), scale=True) pls.fit(x_train, y_train) predicted_cv_y = pls.predict(x_test) mean_squared_error_cv = sqrt(mean_squared_error(y_test, predicted_cv_y)) latent_variables.append(mean_squared_error_cv) best_factor = np.argmin(latent_variables) pls2 = PLSRegression(n_components=(best_factor + 1), scale=True) pls2.fit(x_train, y_train) predicted_cal = pls2.predict(x_train) rmsec = sqrt(mean_squared_error(y_train, predicted_cal)) r2c = pls2.score(x_train, y_train) predicted_cv_y = pls2.predict(x_test) rmsecv = sqrt(mean_squared_error(y_test, predicted_cv_y)) r2v = pls2.score(x_test, y_test) plsfinal = PLSRegression(n_components=(best_factor + 1), scale=True) plsfinal.fit(data_x, data_y) return plsfinal, rmsec, r2c, rmsecv, r2v
def get_score(X_train, X_test, y_train, y_test, nc): ''' input:training and testing dataset output:r2 score of 2 methods->pca_score,pls_score ''' #pca方法 pca = PCA(n_components=nc) X_train_reduced = pca.fit_transform(X_train) X_test_reduced = pca.transform(X_test) pcr = LinearRegression().fit(X_train_reduced, y_train) pca_score = pcr.score(X_test_reduced, y_test) predictions = pcr.predict(X_test_reduced) #测试集结果 predictions1 = pcr.predict(X_train_reduced) #训练集结果 print(predictions, predictions1) plt.title("comparison of PLSR and PCA method(nc={},{})".format(nc, item)) plt.xlabel("observed") plt.ylabel("fitted") plt.scatter(y_test / 100, predictions / 100, label='pca') #pls方法 pls = PLSRegression(n_components=nc, ).fit(X_train, y_train.astype(int)) pls_score = pls.score(X_test, y_test) yfit = pls.predict(X_test) yfit1 = pls.predict(X_train) print(yfit, yfit1) plt.scatter(y_test / 100, yfit / 100, label='plsr') plt.legend() # plt.show() return pca_score, pls_score, predictions / 100, predictions1 / 100, yfit / 100, yfit1 / 100
def build_evaluate_pls_model(train, test, n_components, bc_lambda, model_vars): # Fit a linear model using Partial Least Squares Regression. # Reduce feature space to 3 dimensions. pls1 = PLSRegression(n_components=n_components) # Reduce X to R(X) and regress on y. pls1.fit(train[model_vars], train["property_crime_bc"]) # Save predicted values. print('R-squared PLSR (Train):', pls1.score(train[model_vars], train["property_crime_bc"])) resids_train = evaluate_model(pls1, train, bc_lambda, "Train", model_vars) print('R-squared PLSR (Test):', pls1.score(test[model_vars], test["property_crime_bc"])) resids_test = evaluate_model(pls1, test, bc_lambda, "Test", model_vars) return pls1, resids_train, resids_test
class MyPLS(): def __init__(self, n_components=2, scale=True, max_iter=500, tol=1e-06, copy=True): self.pls = PLSRegression(n_components, scale, max_iter, tol, copy) def fit(self, X, Y): self.pls.fit(X, Y) return self.pls def predict(self, X, copy=True): return self.pls.predict(X, copy).flatten() def score(self, X, Y, sample_weight=None): return self.pls.score(X, Y, sample_weight) def get_params(self, deep=True): return self.pls.get_params(deep) def set_params(self, **parameters): self.pls.set_params(**parameters) return self @property def intercept_(self): return 0 @property def coeff_(self): return self.pls.coef_
def fit(self, predictors, predictands, locations, log=False, **kwargs): self.locations = locations self.models = [] self.n = predictors['n'] id = 0 for location in locations: X = extract_n_by_n(predictors, location, **kwargs) Y = predictands[:,id] if log: Y = np.log(Y) #pca = PCA(n_components='mle', whiten=True) model = PLSRegression(n_components=2) model = model.fit(X,Y) #components = pca.components_ #pca.components_ = components self.models.append(model) print "pls: ", location, model.score(X, Y), model.x_loadings_.shape, np.argmax(model.x_loadings_, axis=0) id += 1
def train_plsr(matrix,ty,n): clf = PLSRegression(n_components=5) clf.fit(matrix, ty) X_train, X_test, y_train, y_test = train_test_split(matrix, ty, test_size=n/100) #scores = cross_val_score(clf, matrix, ty, cv =10) scores = clf.score(X_train,y_train) print_plsr_importance(clf) predict_result = {'predict':[each[0] for each in clf.predict(X_test)],'real':y_test} return(scores,predict_result)
class PartialLeastSquareRegressor(Regressor): def __init__(self, n_components): super().__init__() self.regressor = PLSRegression(n_components=n_components) def fit(self, x_train, y_train): self.regressor.fit(x_train, y_train) self.y_train = y_train self.x_train = x_train self._inference() return None, self.regressor.coef_, self.p, self.regressor.score(x_train, y_train)
def get_pls_scores_permutation(self, gArray, mArray, gSizes, mSizes, numPermutation=NUM_PERMUTATION): ''' g and m from legacy code, no particular meaning ??? Permutation will be done within each data slice, due to different data characteristics in time points or delta, or etc. ''' SampleNumber = self.SampleNumber PLS = PLSRegression(n_components=3) scores = [] for jj in range(numPermutation): if str(jj)[-1] == '0': print(" Permutation --- %d" % jj) for g in gSizes: matrix1 = [] for ii in range(g): matrix1.append(permutation(gArray, SampleNumber)) matrix1 = np.array(matrix1).T for m in mSizes: matrix2 = [] for ii in range(m): matrix2.append(permutation(mArray, SampleNumber)) matrix2 = np.array(matrix2).T # if matrix1.shape[1] > matrix2.shape[1]: PLS.fit(matrix1, matrix2) PLSscore = PLS.score(matrix1, matrix2) else: PLS.fit(matrix2, matrix1) PLSscore = PLS.score(matrix2, matrix1) scores.append(PLSscore) return scores
def get_pls_scores_real(self, gCommunities, mCommunities, gDF, mDF): ''' Compute PLS2 scores for all pairwise communities from two societies. Parameters ---------- gCommunities, mCommunities, gDF, mDF Communities and DataMatrix from society_1, society_2 Returns ------- pls_scores list as [( g, m, PLSscore ), ...] ''' PLS = PLSRegression(n_components=3) pls_scores = [] for g in gCommunities.keys(): if len(gCommunities[g]) >= 3: #print(g,) for m in mCommunities.keys(): if len(mCommunities[m]) >= 3: # Getting corresponding rows from btm and metabo. matrix1, matrix2 = gDF.values[ gCommunities[g], :], mDF.values[mCommunities[m], :] matrix1, matrix2 = np.transpose(matrix1), np.transpose( matrix2) print("input matrices ", matrix1.shape, matrix2.shape) # PLS regression if matrix1.shape[1] > matrix2.shape[1]: PLS.fit(matrix1, matrix2) PLSscore = PLS.score(matrix1, matrix2) else: PLS.fit(matrix2, matrix1) PLSscore = PLS.score(matrix2, matrix1) pls_scores.append((g, m, PLSscore)) return pls_scores
def PLSRegressionTest(self): x_train, y_train, x_test, y_test = NIRFit03.get_data(NIRFit03.fileName) n_components = 0 scores = [0 for x in range(x_train.shape[1])] while n_components < x_train.shape[1]: n_components += 1 plsg = PLSRegression(n_components=n_components) plsg.fit(x_train, y_train) scores[n_components - 1] = plsg.score(x_test, y_test) xx = np.linspace(1, len(scores), len(scores)) plt.figure() plt.plot(xx, scores, 'o-') plt.show() print('scores:\n', scores) # 选取使得score最大的n_components进行最小二乘建模 plsg2 = PLSRegression(n_components=np.argmax(scores) + 1) NIRFit03.try_different_method(plsg2, x_train, y_train, x_test, y_test)
def lex_function_learning( class_name, hyper_vec ) : #pls2 = KernelRidge( kernel = "rbf", gamma= 100) #pls2 = KernelRidge( ) pls2 = PLSRegression(n_components=50, max_iter=5000) X = extract_postive_features ( train_dataset[class_name][0], train_dataset[class_name][1] ) Y = [] for hypo_vec in X : sub = hyper_vec-hypo_vec Y.append(sub) # Target = difference vector ( Hypernym_vector - Hyponym_vector ) #Y.append(hyper_vec) # Target = Hypernym vector pls2.fit( X, Y) train_acc = pls2.score(X, Y) print "class = ", class_name, "train len = ", len(X) return pls2, train_acc, len(X)
def doPLS(metrics, color='r', marker='+', perc=10): inp0 = np.zeros(len(metricsInput2)) out0 = np.zeros(len(metricsOutput2)) inp = np.array([metrics[m] for m in metricsInput2]).T.astype(float) out = np.array([metrics[m] for m in metricsOutput2]).T.astype(float) inp = np.vstack((inp, inp0)) out = np.vstack((out, out0)) all = np.concatenate((inp, out), axis=1) # fixed cache fixed = all[all[:, 0] == perc] inp_fixed = fixed[:, 1:2] out_fixed = fixed[:, 2:4] #singleScatter2(1, 2, fixed) #singleScatter2(1, 3, fixed) # singleScatter2(1, 4, fixed) # singleScatter2(1, 5, fixed) inp = inp_fixed #inpnSat #inp_fixed out = out_fixed #outnSat #out_fixed poly = PolynomialFeatures(1, include_bias=False, interaction_only=False) inp = poly.fit_transform(inp) # inp = inp_poly[:, 2:] pls2 = PLSRegression(n_components=1, scale=False) pls2.fit(inp, out) print(pls2.score(inp, out)) print(pls2.coef_) out_pls_pred0 = inp.dot(pls2.coef_)[:, 0] #plt.scatter(inp[:, 0], inp.dot(pls2.coef_)[:, 0], c='r', marker=marker) #plt.scatter(inp[:, 0], pls2.predict(inp)[:, 0], c='r', marker=marker) plt.scatter(inp[:, 0], out[:, 0], c='black', s=30, marker=marker) #plt.scatter(inp[:, 0], out[:, 1], c='grey', s=30, marker=marker) #plt.scatter(inp[:, 0], inp.dot(pls2.coef_)[:, 1], c='g', marker=marker) #plt.scatter(inp[:, 0], pls2.predict(inp)[:, 1], c='g', marker=marker) #plt.scatter(inp[:, 0], out[:, 1], c='black', marker=marker) return pls2.coef_, out
def pls_regression(X, y, k, components): pls = PLSRegression(n_components=components) rmse_l = [] r2_l = [] kf = KFold(n_splits=k, shuffle=True) i = 1 for train_index, test_index in kf.split(X): X_tr, X_tst = X[train_index], X[test_index] y_tr, y_tst = y[train_index], y[test_index] pls.fit(X_tr, y_tr) r2 = pls.score(X_tst, y_tst) y_pred = pls.predict(X_tst) rmse = math.sqrt(metrics.mean_squared_error(y_tst, y_pred)) rmse_l.append(rmse) r2_l.append(r2) print("[{}] RMSE: {}, R2: {}".format(i, round(rmse, 2), round(r2, 2))) i = i + 1 return (rmse_l, r2_l)
def cyl_PLSR(data): ''' Performs PLSR on the data separated into individual cylinders and plot the scores and loadings plots ''' newData = scaler(data) PLSR = PLSRegression(n_components=2) PLSR.fit(newData[:, 2:6], newData[:, 0]) print('The R2Y value is', PLSR.score(newData[:, 2:6], newData[:, 0])) Xscores = PLSR.x_scores_ Yscores = PLSR.y_scores_ Xload = PLSR.x_loadings_ Yload = PLSR.y_loadings_ plt.figure() plt.scatter(Xscores[:, 0], Xscores[:, 1]) plt.scatter(Yscores[:, 0], Yscores[:, 1]) plt.title('Scores Plot') plt.figure() plt.scatter(Xload[0, 0], Xload[0, 1], label='Displacement') plt.scatter(Xload[1, 0], Xload[1, 1], label='Horsepower') plt.scatter(Xload[2, 0], Xload[2, 1], label='Weight') plt.scatter(Xload[3, 0], Xload[3, 1], label='Acceleration') plt.scatter(Yload[:, 0], Yload[:, 1], label='MPG') plt.title('Loadings Plot') plt.legend(loc='best')
def pool_pls_pred(ro_wind, pre_wind): prediction = {} coef_param = {} for i in np.arange((ro_wind + pre_wind), len(trade_days), 1): # 截取样本区间pool在一起计算回归系数 date_roll = pd.to_datetime(trade_days[(i - ro_wind - pre_wind):(i - pre_wind)]) sub_data = new_f.loc[date_roll, :] print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) pls = PLSRegression(n_components=comp_num).fit(sub_data.iloc[:, 0:-1], sub_data.iloc[:, -1]) coef_param[trade_days[i]] = pd.DataFrame(pls.coef_.T, index=[trade_days[i]], columns=sub_data.iloc[:, 0:-1].columns) # 保留参数 print("correct rate: ", pls.score(sub_data.iloc[:, 0:-1], sub_data.iloc[:, -1])) test_data = new_f.loc[pd.to_datetime(trade_days[i]), :] # 参数隔(pred_window+1)天后才能用 test_data = test_data.drop(['stock_rela'], axis=1) prediction[trade_days[i]] = pd.Series(pls.predict(test_data)[:, 0], index=test_data.index) print(trade_days[i]) pred = pd.concat(prediction, axis=1).T pred.index = pd.to_datetime(pred.index) cof = pd.concat(coef_param.values()) cof.index = pd.to_datetime(cof.index) return pred, cof
def PLSR_groupCV(data): ''' Does cross validation by leaving a city out and returns R2Y value ''' R2Y = 0 diff = [] cities = [1, 2, 3] for group in cities: train = [] test = [] for i in range(len(data[:, 0])): if data[i, 7] == group: test.append(data[i, :]) else: train.append(data[i, :]) test = np.array(test) train = np.array(train) trainScale = StandardScaler() trainScaled = trainScale.fit_transform(train) testScaled = trainScale.transform(test) PLSR = PLSRegression(n_components=2) PLSR.fit(trainScaled[:, 2:6], trainScaled[:, 0]) error = PLSR.score(testScaled[:, 2:6], testScaled[:, 0]) diff.append(error) return diff
print round(SVRpreds[i],2) i += 1 print "\n" SVRr2.append(optSVR.score(XTest, yTest)) SVRmse.append( metrics.mean_squared_error(yTest,SVRpreds)) SVRrmse.append(math.sqrt(SVRmse[metcount])) print ("Support Vector Regression prediction statistics for fold %d are; MSE = %5.2f RMSE = %5.2f R2 = %5.2f\n\n" % (metcount+1, SVRmse[metcount], SVRrmse[metcount],SVRr2[metcount])) with open(train_name,'a') as ftrain : ftrain.write("Support Vector Regression prediction statistics for fold %d are, MSE =, %5.2f, RMSE =, %5.2f, R2 =, %5.2f,\n\n" % (metcount+1, SVRmse[metcount], SVRrmse[metcount],SVRr2[metcount])) ftrain.close() # Train partial least squares and predict with optimised parameters print("\n\n------------------- Starting opitimised PLS training -------------------") optPLS = PLSRegression(n_components = nc) optPLS.fit(XTrain, yTrain) # Train the model print("Training R2 = %5.2f" % optPLS.score(XTrain,yTrain)) print("Starting optimised PLS prediction") PLSpreds = optPLS.predict(XTest) print("The predicted values now follow :") PLSpredsdim = PLSpreds.shape[0] i = 0 if PLSpredsdim%5 == 0: while i < PLSpredsdim: print round(PLSpreds[i],2),'\t', round(PLSpreds[i+1],2),'\t', round(PLSpreds[i+2],2),'\t', round(PLSpreds[i+3],2),'\t', round(PLSpreds[i+4],2) i += 5 elif PLSpredsdim%4 == 0: while i < PLSpredsdim: print round(PLSpreds[i],2),'\t', round(PLSpreds[i+1],2),'\t', round(PLSpreds[i+2],2),'\t', round(PLSpreds[i+3],2) i += 4 elif PLSpredsdim%3 == 0 : while i < PLSpredsdim :
(Xtest, ytest) = loadData(xtestpath, ytestpath) #trim off background and scale ytrain=ytrain[:,1:] #ytrain=scale(ytrain) Xtrain=standardize(Xtrain) #trim off background and scale ytest = ytest[:,1:] #ytest = scale(ytest) Xtest = standardize(Xtest) pls = PLSRegression(n_components=10) pls.fit(Xtrain, ytrain) y_pls = pls.predict(Xtest) print 1 + pls.score(Xtest, ytest) pls_rmse=[] pls_rmse.append(sqrt(mean_squared_error(ytest[:,0], y_pls[:,0]))) pls_rmse.append(sqrt(mean_squared_error(ytest[:,1], y_pls[:,1]))) pls_rmse.append(sqrt(mean_squared_error(ytest[:,2], y_pls[:,2]))) pls_rmse.append(sqrt(mean_squared_error(ytest[:,3], y_pls[:,3]))) fig = plt.figure(figsize=(20,10)) ax1 = fig.add_subplot(241) ax1.plot(y_pls[:,0], c='r', label='PLS Fit') ax1.plot(ytest[:,0], c='grey', label='Target') ax1.set_xlabel('Time') ax1.set_ylabel('[c]')
def PLS_Regression(csv_data, point_index, sub_index, var_name, train=None, components=None): ''' plt initinitalize & definition ''' plt.figure() # plt.subplot(3, 3, 1) # plt.plot([0, 1], [0, 1]) # plt.subplot(3, 3, 2) # plt.plot([0, 1], [0, 2]) # plt.subplot(3, 3, 3) # plt.plot([0, 3], [0, 4]) # plt.subplot(3, 3, 4) # plt.plot([0, 1], [0, 2]) # plt.subplot(3, 3, 5) # plt.plot([0, 1], [0, 1]) # plt.subplot(3, 3, 6) # plt.plot([0, 1], [0, 1]) # plt.subplot(3, 1, 3) # plt.plot([0, 1], [0, 3]) # plt.show() for i in range(7): X_array = [] temp_array = [] for j in csv_data: temp_array = j[point_index - 1:point_index + 8] X_array.append(temp_array) X_array = np.array(X_array) Y_array = np.array(csv_data[:, sub_index - 1 + i]) if train == True: X_array, X_test, Y_array, Y_test = train_test_split( X_array, Y_array, test_size=0.15, random_state=42) if components == None: components = np.shape(X_array)[1] plsrModel = PLSRegression(n_components=components) plsrModel.fit(X_array, Y_array) coefs = plsrModel.coef_ coefs = np.around(coefs, decimals=2) coefs = coefs.astype(str) # print(var_name[sub_index + i]) # print("y =",end="") # for i in range(9): # print(coefs[i][0],end="") # print("*x",end="") # print(i+1,end="") # if i != 8: # print(" + ",end="") # print('') # print("R^2 =",np.around(plsrModel.score(X_array, Y_array),decimals=2)) Y_predict = plsrModel.predict(X_array) plt.subplot(3, 3, i + 1) plt.xlabel('X-axis') plt.ylabel('Y-axis') score = np.around(plsrModel.score(X_array, Y_array), decimals=2) plt.text(0.6, 10, "R^2 =" + str(score), horizontalalignment='center', verticalalignment='center') plt.title(var_name[sub_index + i]) plt.scatter(X_array[:, 0], Y_array) plt.scatter(X_array[:, 0], Y_predict) print(RMSE(Y_array, plsrModel.predict(X_array))) plt.suptitle('PLS-Regression') plt.show()
#correct not accurate from sklearn.cross_validation import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn import metrics from sklearn.svm import SVC import numpy as np import pandas as pd from sklearn.cross_decomposition import PLSRegression from sklearn.cross_decomposition import PLSCanonical df=pd.read_csv('newdata.csv') x=df.drop(['tag'],axis=1) y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1) X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5) plsr=PLSRegression() plsr.fit(X_train,Y_train) plsc=PLSCanonical() plsc.fit(X_train,Y_train) print (plsr.score(X_test,Y_test)) print (plsc.score(X_test,Y_test))
balX = pd.concat([balX, newSample]) balY = pd.concat([balY, landmarks.loc[newSample.index]]) X = balX[au_cols].values y = registration(balY.values, neutral) # Model Accuracy in KFold CV print("Evaluating model with KFold CV") n_components = len(au_cols) kf = KFold(n_splits=3) scores = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] clf = PLSRegression(n_components=n_components, max_iter=2000) clf.fit(X_train, y_train) scores.append(clf.score(X_test, y_test)) print('3-fold accuracy mean', np.round(np.mean(scores), 2)) # Train real model clf = PLSRegression(n_components=n_components, max_iter=2000) clf.fit(X, y) print('N_comp:', n_components, 'Rsquare', np.round(clf.score(X, y), 2)) # We visualize the results of our model. The regression was trained on labels 0-1 so we do not recommend exceeding 1 for the intensity. Setting the intensity to 2 will exaggerate the face and anything beyond that might give you strange faces. # In[116]: # Plot results for each action unit f, axes = plt.subplots(5, 4, figsize=(12, 18)) axes = axes.flatten() # Exaggerate the intensity of the expression for clearer visualization.
def plsregress(Train, Test, devcomp=None, spec='ALLr'): ''' Builds PLSR model using spectra data specified in spec. Plots error on the development set vs number of principle components. options: 'UV', UVr, 'NIR', 'ALLr', 'ALL' ''' import numpy as np import pandas as pd import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt from sklearn.cross_decomposition import PLSRegression from sklearn.model_selection import GroupKFold, LeaveOneOut from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import r2_score, mean_squared_error from sklearn.utils import shuffle from uv_nir_gos import wl_select trainR2 = [] devMSE = [] devR2 = [] X, _, Y, __ = wl_select(Train, Test, spec) X, Y = shuffle(X, Y) for i in np.arange(1, 20): ytests = [] ypreds = [] train_score = [] cv = LeaveOneOut( ) # higher error as expected compared to LOO -- unsure how Cao & co. got their results. sample_ids = list(set(X.index.tolist())) for train_idx, dev_idx in cv.split(sample_ids): tr_ix = Train.iloc[train_idx, :].index.tolist() dev_ix = Train.iloc[dev_idx, :].index.tolist() X_train, X_dev = X.loc[tr_ix], X.loc[dev_ix] y_train, y_dev = Y.loc[tr_ix], Y.loc[dev_ix] # fit scaler to train apply to test scaler = MinMaxScaler() X_train_t = scaler.fit_transform(X_train.values) X_dev_t = scaler.transform(X_dev.values) pls2 = PLSRegression(n_components=i) pls2.fit(X_train_t, y_train.values) train_score.append(pls2.score(X_train_t, y_train.values)) y_pred = pls2.predict(X_dev_t) ytests += list(y_dev.values) ypreds += list(y_pred) train_R2 = np.asarray(train_score).mean(axis=0) train_R2_std = np.asarray(train_score).std(axis=0) dev_R2 = r2_score(ytests, ypreds, multioutput='raw_values') dev_MSE = mean_squared_error(ytests, ypreds, multioutput='raw_values') devMSE.append(dev_MSE) devR2.append(dev_R2) trainR2.append(train_R2) if devcomp != None: resDF = pd.DataFrame( [np.asarray(devR2)[devcomp, :], np.asarray(devMSE)[devcomp, :]], columns=Y.columns, index=['R2', 'MSE']) resDF.to_csv('results/resDF_' + str(spec) + '_PLSR_dev.csv') # Plot results plt.plot(np.arange(1, 20), np.array(devMSE), '-o') plt.xlabel('Number of principal components in regression') plt.ylabel('MSE') plt.legend(Y.columns.to_list()) plt.xlim(left=0, right=21) plt.savefig('results/PLSR_dev_' + spec + '.png') plt.close()
def calibracao(self, idmodelo, nrcomponentes, corteOutlier, qtdeRemocoes): # Inativa calibracoes anteriores db.execute("update calibracao set inativo = 'F'" + " where idmodelo = " + str(idmodelo) + " ") db.execute(" update amostra set tpamostra = 'NORMAL' where idmodelo = " + str(idmodelo) + "") session.commit() # cria calibracao para o modelo data_Atual = datetime.today() data_em_texto = data_Atual.strftime('%d/%m/%Y') cursorCodigo = db.execute( "select coalesce(max(idcalibracao),0) + 1 as codigo from calibracao where idmodelo = " + str(idmodelo) + " ") for regCodigo in cursorCodigo: idcalibracao = regCodigo[0] db.execute("insert into calibracao (idcalibracao, idmodelo, dtcalibracao, inativo) " "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str(data_em_texto) + "', 'A' )") session.commit() idmodelo = idmodelo print(idmodelo) Xtodos = self.selectMatrizX(idmodelo, "TODOS") # Insercao das amostras de Validacao YCodigoTodos = self.selectMatrizY(idmodelo, "ID", "TODOS") for amostraX in YCodigoTodos: amostra = str(amostraX) amostra = amostra.replace("[", "") amostra = amostra.replace("]", "") db.execute("insert into amostra_calibracao (idcalibracao, idmodelo, idamostra, tpconjunto) " "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str( int(float(amostra))) + "','VALIDACAO' )") session.commit() qtde = 0 if corteOutlier > 0: while qtde < qtdeRemocoes: self.detectarOutlierKNN(idmodelo, Xtodos, corteOutlier) Xtodos = self.selectMatrizX(idmodelo, "TODOS") qtde = qtde + 1 session.commit() Xtodos=self.selectMatrizX(idmodelo, "TODOS") #Xtodos = self.selectMatrizX(idmodelo, "TODOS") number_of_samples = Xtodos.__len__() number_of_samples = number_of_samples * 0.65 # selected_sample_numbers, remaining_sample_numbers = kennardstonealgorithm(X, number_of_samples) """amostras_Calibracao = kennardStone(Xtodos, number_of_samples)""" # amostras_Calibracao = kennardStone(autoscaled_X, number_of_samples) """print(amostras_Calibracao)""" print("---") print("remaining sample numbers") # print(remaining_sample_numbers) """#plot samples plt.figure() plt.scatter(autoscaled_X[:, 0], autoscaled_X[:, 1], label="all samples") plt.scatter(autoscaled_X[selected_sample_numbers, 0], autoscaled_X[selected_sample_numbers, 1], marker="*", label="all samples") plt.xlabel("x1") plt.ylabel("x2") plt.legend(loc='upper right') plt.show() #*************************************************************************************************************** #fim kennard-stone""" # Insercao das amostras de Calibracao """cont = 0 for amostraCalibracao in amostras_Calibracao: amostra = str(amostraCalibracao) amostra = amostra.replace("[", "") amostra = amostra.replace("]", "") db.execute("update amostra_calibracao set tpconjunto = 'CALIBRACAO' " " where idcalibracao =" + str(idcalibracao) + " and idmodelo = " + str(idmodelo) + " and idamostra = " + str(int(float(amostra)))) session.commit() print(cont) cont = cont + 1 session.commit()""" Xcal = self.selectMatrizX(idmodelo, "CALIBRACAO") Xval = self.selectMatrizX(idmodelo, "VALIDACAO") """ qtde = 0 if corteOutlier > 0: while qtde < qtdeRemocoes: self.detectarOutlierKNN(idmodelo, Xval, corteOutlier) self.detectarOutlierKNN(idmodelo, Xcal, corteOutlier) Xval = self.selectMatrizX(idmodelo, "VALIDACAO") Xcal = self.selectMatrizX(idmodelo, "CALIBRACAO") qtde = qtde + 1 """ #Ycal = self.selectMatrizY(idmodelo, "VALOR", "CALIBRACAO") Yval = self.selectMatrizY(idmodelo, "VALOR", "VALIDACAO") #YCodigoCal = self.selectMatrizY(idmodelo, "ID", "CALIBRACAO") YCodigoVal = self.selectMatrizY(idmodelo, "ID", "VALIDACAO") # Dados do Conjunto de Calibracao """ plsCal = PLSRegression(copy=True, max_iter=500, n_components=nrcomponentes, scale=False, tol=1e-06) plsCal.fit(Xcal, Ycal) coeficiente = plsCal.score(Xcal, Ycal, sample_weight=None) print('score do modelo PLS - Calibracao') print(coeficiente) print('R2 do modelo PLS - Calibracao') coeficienteCal = r2_score(plsCal.predict(Xcal), Ycal) print(coeficienteCal) """ # Dados do Conjunto de Validacao plsVal = PLSRegression(copy=True, max_iter=500, n_components=nrcomponentes, scale=False, tol=1e-06) plsVal.fit(Xval, Yval) coeficiente = plsVal.score(Xval, Yval, sample_weight=None) print('score do modelo PLS - Validacao') print(coeficiente) print('R2 do modelo PLS - Validacao') coeficienteVal = r2_score(plsVal.predict(Xval), Yval) print(coeficienteVal) # print('label_ranking_average_precision_score ') # print(label_ranking_average_precision_score(np.array(Yval), np.array(plsVal.y_scores_))) """# Ajustar Calculos do RMSEC matYPredCalibracao = [] for itemMatrizY in YCodigoCal: amostra = str(itemMatrizY) amostra = amostra.replace("[", "") amostra = amostra.replace("]", "") # print(i) linhaMatriz = [] amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo) Y_pred = plsCal.predict(amostraPredicao) # print(Y_pred) linhaMatriz.append(round(np.double(Y_pred), 0)) matYPredCalibracao += [linhaMatriz] rmsec = sqrt(mean_squared_error(Ycal, matYPredCalibracao)) print('RMSEC') print(rmsec) """ #Ajustar Calculos do RMSEP matYPredValidacao = [] for itemMatrizY in YCodigoVal: amostra = str(itemMatrizY) amostra = amostra.replace("[", "") amostra = amostra.replace("]", "") # print(i) linhaMatriz = [] amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo) Y_pred = plsVal.predict(amostraPredicao) # print(Y_pred) linhaMatriz.append(round(np.double(Y_pred), 0)) matYPredValidacao += [linhaMatriz] rmsep = sqrt(mean_squared_error(Yval, matYPredValidacao)) print('RMSEP') print(rmsep) # Atualiza valores da calibracao db.execute("update calibracao set rmsec = " + str(rmsec) + " , inativo = 'A'" + " , rmsep = " + str(rmsep) + " , coeficientecal = " + str(coeficienteCal) + " , coeficienteval = " + str(coeficienteVal) + " , dtcalibracao = '" + str(data_em_texto) + "'" " where idmodelo = " + str(idmodelo) + " and idcalibracao = " + str(idcalibracao) + " ") session.commit() print("VARIAVEIS LATENTES") print(nrcomponentes) return idmodelo
def calibracao(self, idmodelo): #Inativa calibracoes anteriores db.execute("update calibracao set inativo = 'F'" + " where idmodelo = " + str(idmodelo) + " ") session.commit() #cria calibracao para o modelo data_Atual = datetime.today() data_em_texto = data_Atual.strftime('%d/%m/%Y') cursorCodigo = db.execute( "select coalesce(max(idcalibracao),0) + 1 as codigo from calibracao where idmodelo = " + str(idmodelo) + " ") for regCodigo in cursorCodigo: idcalibracao = regCodigo[0] db.execute( "insert into calibracao (idcalibracao, idmodelo, dtcalibracao) " "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str(data_em_texto) + "' )") session.commit() idmodelo = idmodelo print(idmodelo) conjunto = "CALIBRACAO" X = self.selectMatrizX(idmodelo, conjunto) Y = self.selectMatrizY(idmodelo, conjunto, "VALOR") YCodigo = self.selectMatrizY(idmodelo, conjunto, "ID") pls = PLSRegression(copy=True, max_iter=500, n_components=12, scale=False, tol=1e-06) pls.fit(X, Y) coeficiente = pls.score(X, Y, sample_weight=None) print('R2 do modelo PLS') print(coeficiente) print(r2_score(pls.predict(X), Y)) #Ajustar Calculos do RMSEC e RMSEP para ficarem dinamicos matYPred = [] for item in YCodigo: #print(i) linhaMatriz = [] amostra = str(item) amostra = amostra.replace("[", "") amostra = amostra.replace("]", "") amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo) Y_pred = pls.predict(amostraPredicao) #print(Y_pred) linhaMatriz.append(np.double(Y_pred)) matYPred += [linhaMatriz] db.execute( "insert into amostra_calibracao (idcalibracao, idmodelo, idamostra) " "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str(int(float(amostra))) + "' )") session.commit() # print(mean_squared_error(Y,matYPred)) raizQ = mean_squared_error(Y, matYPred)**(1 / 2) rms = sqrt(mean_squared_error(Y, matYPred)) print('RMSEC') print(raizQ) print(rms) #Atualiza valores da calibracao db.execute("update calibracao set rmsec = " + str(rms) + " , inativo = 'A'" + " , rmsep = " + str(rms) + " , coeficiente = " + str(coeficiente) + " , dtcalibracao = '" + str(data_em_texto) + "'" " where idmodelo = " + str(idmodelo) + " and idcalibracao = " + str(idcalibracao) + " ") session.commit() return idmodelo
def predicao(self, idmodelo, idamostra): idmodelo = idmodelo idamostra = idamostra print(idmodelo) print(idamostra) conjunto = "CALIBRACAO" X = self.selectMatrizX(idmodelo, conjunto) Y = self.selectMatrizY(idmodelo, conjunto, "VALOR") amostraPredicao = self.selectAmostra(idamostra, idmodelo) valorReferencia = self.selectDadosReferenciaAmostra( idamostra, idmodelo) pls = PLSRegression(copy=True, max_iter=500, n_components=12, scale=False, tol=1e-06) pls.fit(X, Y) valorPredito = pls.predict(amostraPredicao) print('Amostra: ' + str(idamostra) + ' - Valor Predito :' + str(valorPredito)) coeficiente = pls.score(X, Y, sample_weight=None) print('R2 do modelo PLS') print(coeficiente) print(r2_score(pls.predict(X), Y)) #Ajustar Calculos do RMSEC e RMSEP para ficarem dinamicos matYPred = [] for i in range(1, 349): #print(i) linhaMatriz = [] idAmostraTestes = i amostraPredicao = self.selectAmostra(idamostra, idmodelo) Y_pred = pls.predict(amostraPredicao) #print(Y_pred) linhaMatriz.append(np.double(Y_pred)) matYPred += [linhaMatriz] # print(mean_squared_error(Y,matYPred)) raizQ = mean_squared_error(Y, matYPred)**(1 / 2) rms = sqrt(mean_squared_error(Y, matYPred)) print('RMSEC') print(raizQ) print(rms) #tratamento dos dados para o Json coeficiente = round(coeficiente, 2) #valorPredito = round(valorPredito, 2) raizQ = round(raizQ, 2) valorReferencia = round(valorReferencia, 2) valorPreditoString = str(valorPredito) valorPreditoString = valorPreditoString.replace("[", "") valorPreditoString = valorPreditoString.replace("]", "") ##Contrucao do JSON json_data = jsonify(idamostra=str(idamostra), valorpredito=str(valorPreditoString), rmsec=str(raizQ), idmodelo=str(idmodelo), valorreferencia=str(valorReferencia), coeficiente=str(coeficiente)) return json_data
def main(): X_train, X_test, y_train, y_test = get_data() pls = PLSRegression(n_components=2) pls.fit(X_train, y_train) print("test score is", pls.score(X_test, y_test))
X_test = X[X.shape[0] / 2:] Y_train = Y[0:Y.shape[0] / 2] Y_test = Y[Y.shape[0] / 2:] # so x1,x2 are useful, x3-10 are bad pls2 = PLSRegression(n_components=3) pls2.fit(X_train, Y_train) print("True B (such that: Y = XB + Err)") print(B) # compare pls2.coef_ with B print("Estimated B") print(np.round(pls2.coef_, 1)) print "\n\n PLS scored: %.2f" % pls2.score(X_test, Y_test) # high variance and have high correlation with the response, in contrast to principal components regression which keys only on high variance # https://github.com/scikit-learn/scikit-learn/blob/14031f6/sklearn/cross_decomposition/pls_.py#L295 # this is the weight estimation step, note: Yk's k is the iteration / component # # blah, not going to dig into the NIPALS algorithm ######################################################################## pca = PCA() X_train_reduced = pca.fit_transform(scale(X_train))[:,0:5] # take top 5 dim of pca l = LinearRegression() l.fit(X_train_reduced, Y_train)
def FindRGBTransformPLS(rgbFrom, rgbTo): pls = PLSRegression(n_components=3) pls.fit(rgbFrom, rgbTo) sc = pls.score(rgbFrom, rgbTo) print(sc) return pls
clf = linear_model.Ridge(alpha=0.1) clf.fit(x_train, y_train) coef=clf.coef_ clf.score(x_train, y_train) y_predict=clf.predict(x_test) ''' windows = 4 # 偏最小二乘回归 test 0.3 循环看几个主成分效果最好,n=5 #用全局标准化0.3314 r2_test_best = 0 r2_train_best = 0 n_best = 0 y_test = np.reshape(y_test, [np.shape(y_test)[0], 1]) for n in range(1, min(np.shape(x_train)[0], np.shape(x_train)[1]) + 1): pls2 = PLSRegression(n_components=n, scale=False) pls2.fit(x_train, y_train) r2_train = pls2.score(x_train, y_train) y_predict = pls2.predict(x_test) r2_test = pls2.score(x_test, y_test) #r2_test=test_r_square(y_predict,y_test) if r2_test > r2_test_best: r2_test_best = r2_test r2_train_best = r2_train n_best = n else: continue pls2 = PLSRegression(n_components=n_best, scale=False) pls2.fit(x_train, y_train) y_pred_test = pls2.predict(x_test) y_pred_train = pls2.predict(x_train)
def plsfinal(trainX, trainY, testX, testY, i): plsModel = PLSRegression(n_components=i) plsModel.fit(trainX, trainY) pred_Y = plsModel.predict(testX) R2 = plsModel.score(testX, testY) return pred_Y, R2
a = [] for row in testSet: testX.append(np.concatenate([vectors[row[1]], vectors[row[2]]])) testY.append(vectors[row[0]]) predictedY = model.predict( np.concatenate([vectors[row[1]], vectors[row[2]]]).reshape(1, -1)) a.append( cosine_similarity(predictedY, np.array(vectors[row[0]]).reshape(1, -1))) # ans = bestWord(row[0],row[1], row[2], predictedY) # if ans.strip() == row[0].strip(): # correct+=1 # writer.writerow([row[1], row[2], ans, row[0]]) ans_lst = bestKWords(row[0], row[1], row[2], predictedY, 1) flg = False for ans in ans_lst: if ans.strip() == row[0].strip(): correct += 1 flg = True writer.writerow([row[1], row[2], ans, row[0]]) break if flg == False: writer.writerow([row[1], row[2], ans_lst, row[0]]) print "Correctly predicted : ", correct, ", Out of :", len( testSet), " test data (last 20 percent of the given dataset)" print "Mean cosine similarity with predicted vector", np.mean(a) print "Model score : ", model.score(testX, testY)
x_msc, _ = processing.msc(x_reflect) x_robust = RobustScaler().fit_transform(x_msc) plt.style.use('dark_background') pls = PLS(n_components=6) pls.fit(x, y) x_fit = pls.predict(x) pls.fit(x_msc, y) svr = SVR() svr.fit(x_msc, y) print(svr.score(x, y)) ridge = RidgeCV() ridge.fit(x_msc, y) print(pls.score(x, y)) # ham # x_fit = pls.predict(x_msc) print(pls.score(x_msc, y)) print(pls.coef_) coeff_final = pls.coef_.T[0] / pls.x_std_ print('-======') # print(pls.x_mean_) # print(pls.y_mean_) # print('bbbbb') # print(ridge.coef_)
pls2 = PLSRegression(copy=True, max_iter=5000, n_components=10, scale=True, tol=1e-06) pls2.fit(X, Y) # Get X scores T = pls2.x_scores_ # Get X loadings P = pls2.x_loadings_ Y_pred = pls2.predict(amostraPredicao) print('Valor Predito:') print(Y_pred) #quantos que é explicado no modelo R2 print('R2 do modelo PLS') print(pls2.score(X, Y, sample_weight=None)) #print('R2 do modelo') #print(r2_score(pls2.predict(X), Y)) print('Loadings:') print(P) print('Score:') print(T)
def learn_pls_cls(x_train, y_train, x_dev, y_dev): clf = PLSRegression(n_components=100) clf.fit(x_train, y_train) acc = clf.score(x_dev, y_dev) return acc
Lig_pls_by_ncomp[iComp - 1] = PLSRegression(n_components=iComp, scale=False, max_iter=100) Lig_cv_results = cross_validate(Lig_pls_by_ncomp[iComp - 1], Lig_X_train - Lig_mean, Lig_y_train, cv=5) Lig_pls_r2val_ncomp[iComp - 1] = np.mean(Lig_cv_results['test_score']) Lig_YM_plsModel_nComp = np.where( Lig_pls_r2val_ncomp == np.max(Lig_pls_r2val_ncomp))[0][0] + 1 Lig_YM_plsModel = PLSRegression(n_components=Lig_YM_plsModel_nComp, scale=False, max_iter=100) Lig_YM_plsModel.fit(Lig_X_train - Lig_mean, Lig_y_train) Lig_plsR2_cal = Lig_YM_plsModel.score(Lig_X_train - Lig_mean, Lig_y_train) Lig_plsR2_val[0] = Lig_YM_plsModel.score(Lig_X_test - Lig_mean, Lig_y_test) for iC in range(1, N_cohorts): Lig_plsR2_val[iC] = Lig_YM_plsModel.score( simDat[iC][iR].simDat.loc[:, 'Collagen':'Pentosidine'] - Lig_mean, simDat[iC][iR].simDat.loc[:, 'Youngs Modulus']) print('Ligament Correlations calculated') ### Sanity Checks: Population Characteristic Histograms iR = [ 0, np.where(YM_plsR2_val[1] == np.min(YM_plsR2_val[1]))[0][0], np.where(YM_plsR2_val[2] == np.min(YM_plsR2_val[2]))[0][0], np.where(YM_plsR2_val[3] == np.min(YM_plsR2_val[3]))[0][0], ] # reset repetition index to first rep
def plsvip (X, Y, V, lat_var): attributes = len(X[0]) if not lat_var: latent_variables = attributes else: latent_variables = lat_var num_instances = len(X) attributes_gone = [] min_att = -1 #start_time = time.time() #attr_time = time.time() #time_counter = 0 while attributes>0: #if (attributes +9) %10 ==0: # print "total time: ", time.time() - start_time # print "attr time: ", time.time() - attr_time # attr_time = time.time() if (latent_variables == 0) or (latent_variables > attributes): latent_variables = attributes lv_best = best_latent_variable(X, Y, latent_variables, num_instances) #print "current best lv: ", lv_best, "num. attr. ", attributes #### #fin_pls = PLSCanonical(n_components = lv_best) fin_pls = PLSRegression(n_components = lv_best) fin_pls.fit(X, Y) currentR2 = fin_pls.score(X, Y) #######################################w # alternative r2 """ meanY4r2 = numpy.mean(Y) predY = fin_pls.predict(X) RSS = 0 for i in range (len(Y)): RSS += numpy.power (Y[i] - predY[i], 2) TSS = 0 for i in range (len(Y)): TSS += numpy.power (Y[i] - meanY4r2, 2) alterR2 = 1 - (RSS/TSS) #print currentR2, "vs", alterR2 """ #######################################w min_vip = 1000 if min_att ==-1: attributes_gone.append(["None", currentR2, attributes, lv_best]) ##########################################r #threaded version """ myThreads = [] VIPcurrent = [] for i in range (0,attributes): myThreads.append(enthread( target = get_vip, args = (fin_pls, lv_best, i, attributes_gone, attributes )) ) for i in range (0,attributes): VIPcurrent.append(myThreads[i].get()) min_vip = min(VIPcurrent) min_att = VIPcurrent.index(min_vip) """ # Working version #""" for i in range (0,attributes): VIPcurrent = get_vip (fin_pls, lv_best, i, attributes_gone, attributes ) if VIPcurrent< min_vip: min_vip = VIPcurrent min_att = i #""" ##########################################r if min_att >-1: attributes_gone.append([V[min_att], currentR2, attributes, lv_best]) ####### CURRENT : to BE popped, NOT already popped V.pop(min_att) for i in range (num_instances): X[i].pop(min_att) attributes -= 1 #print attributes_gone #### #time_counter +=1 return attributes_gone