コード例 #1
0
def do_pls(data_x, data_y, train_split_percentage):
    latent_variables = []

    x_test, x_train, y_test, y_train = train_test_split(data_x, data_y, test_size=train_split_percentage, random_state=0)

    for i in range(20):
        pls = PLSRegression(n_components=(i + 1), scale=True)
        pls.fit(x_train, y_train)
        predicted_cv_y = pls.predict(x_test)
        mean_squared_error_cv = sqrt(mean_squared_error(y_test, predicted_cv_y))
        latent_variables.append(mean_squared_error_cv)

    best_factor = np.argmin(latent_variables)
    pls2 = PLSRegression(n_components=(best_factor + 1), scale=True)
    pls2.fit(x_train, y_train)
    predicted_cal = pls2.predict(x_train)
    rmsec = sqrt(mean_squared_error(y_train, predicted_cal))
    r2c = pls2.score(x_train, y_train)

    predicted_cv_y = pls2.predict(x_test)
    rmsecv = sqrt(mean_squared_error(y_test, predicted_cv_y))
    r2v = pls2.score(x_test, y_test)

    plsfinal = PLSRegression(n_components=(best_factor + 1), scale=True)
    plsfinal.fit(data_x, data_y)

    return plsfinal, rmsec, r2c, rmsecv, r2v
コード例 #2
0
def get_score(X_train, X_test, y_train, y_test, nc):
    '''
    input:training and testing dataset
    output:r2 score of 2 methods->pca_score,pls_score
    '''
    #pca方法
    pca = PCA(n_components=nc)
    X_train_reduced = pca.fit_transform(X_train)
    X_test_reduced = pca.transform(X_test)
    pcr = LinearRegression().fit(X_train_reduced, y_train)
    pca_score = pcr.score(X_test_reduced, y_test)
    predictions = pcr.predict(X_test_reduced)  #测试集结果
    predictions1 = pcr.predict(X_train_reduced)  #训练集结果
    print(predictions, predictions1)
    plt.title("comparison of PLSR and PCA method(nc={},{})".format(nc, item))
    plt.xlabel("observed")
    plt.ylabel("fitted")
    plt.scatter(y_test / 100, predictions / 100, label='pca')

    #pls方法
    pls = PLSRegression(n_components=nc, ).fit(X_train, y_train.astype(int))
    pls_score = pls.score(X_test, y_test)
    yfit = pls.predict(X_test)
    yfit1 = pls.predict(X_train)
    print(yfit, yfit1)
    plt.scatter(y_test / 100, yfit / 100, label='plsr')
    plt.legend()
    # plt.show()

    return pca_score, pls_score, predictions / 100, predictions1 / 100, yfit / 100, yfit1 / 100
コード例 #3
0
ファイル: crime_util.py プロジェクト: mkm29/DataScience
def build_evaluate_pls_model(train, test, n_components, bc_lambda, model_vars):
    # Fit a linear model using Partial Least Squares Regression.
    # Reduce feature space to 3 dimensions.
    pls1 = PLSRegression(n_components=n_components)

    # Reduce X to R(X) and regress on y.
    pls1.fit(train[model_vars], train["property_crime_bc"])

    # Save predicted values.
    print('R-squared PLSR (Train):', pls1.score(train[model_vars], train["property_crime_bc"]))
    resids_train = evaluate_model(pls1, train, bc_lambda, "Train", model_vars)

    print('R-squared PLSR (Test):', pls1.score(test[model_vars], test["property_crime_bc"]))
    resids_test = evaluate_model(pls1, test, bc_lambda, "Test", model_vars)

    return pls1, resids_train, resids_test
コード例 #4
0
class MyPLS():
    def __init__(self,
                 n_components=2,
                 scale=True,
                 max_iter=500,
                 tol=1e-06,
                 copy=True):
        self.pls = PLSRegression(n_components, scale, max_iter, tol, copy)

    def fit(self, X, Y):
        self.pls.fit(X, Y)
        return self.pls

    def predict(self, X, copy=True):
        return self.pls.predict(X, copy).flatten()

    def score(self, X, Y, sample_weight=None):
        return self.pls.score(X, Y, sample_weight)

    def get_params(self, deep=True):
        return self.pls.get_params(deep)

    def set_params(self, **parameters):
        self.pls.set_params(**parameters)
        return self

    @property
    def intercept_(self):
        return 0

    @property
    def coeff_(self):
        return self.pls.coef_
コード例 #5
0
ファイル: pls_sklearn.py プロジェクト: jackaranda/phasespace
	def fit(self, predictors, predictands, locations, log=False, **kwargs):

		self.locations = locations
		self.models = []
		self.n = predictors['n']

		id = 0
		for location in locations:
			X = extract_n_by_n(predictors, location, **kwargs)
			Y = predictands[:,id]

			if log:
				Y = np.log(Y)

			#pca = PCA(n_components='mle', whiten=True)
			model = PLSRegression(n_components=2)
			
			model = model.fit(X,Y)
			#components = pca.components_
			#pca.components_ = components
			
			self.models.append(model)
			print "pls: ", location, model.score(X, Y), model.x_loadings_.shape, np.argmax(model.x_loadings_, axis=0)

			id += 1
コード例 #6
0
ファイル: train.py プロジェクト: Rrrima/GeoMental
def train_plsr(matrix,ty,n):
	clf = PLSRegression(n_components=5)
	clf.fit(matrix, ty)
	X_train, X_test, y_train, y_test = train_test_split(matrix, ty, test_size=n/100)
	#scores = cross_val_score(clf, matrix, ty, cv =10)
	scores = clf.score(X_train,y_train)
	print_plsr_importance(clf)
	predict_result = {'predict':[each[0] for each in clf.predict(X_test)],'real':y_test}
	return(scores,predict_result)  
コード例 #7
0
class PartialLeastSquareRegressor(Regressor):
    def __init__(self, n_components):
        super().__init__()
        self.regressor = PLSRegression(n_components=n_components)

    def fit(self, x_train, y_train):
        self.regressor.fit(x_train, y_train)
        self.y_train = y_train
        self.x_train = x_train
        self._inference()
        return None, self.regressor.coef_, self.p, self.regressor.score(x_train, y_train)
コード例 #8
0
ファイル: pls2_network.py プロジェクト: shuzhao-li/hiconet
    def get_pls_scores_permutation(self,
                                   gArray,
                                   mArray,
                                   gSizes,
                                   mSizes,
                                   numPermutation=NUM_PERMUTATION):
        '''
        g and m from legacy code, no particular meaning
        
        ???
        Permutation will be done within each data slice,
        due to different data characteristics in time points or delta, or etc.
        
        '''
        SampleNumber = self.SampleNumber
        PLS = PLSRegression(n_components=3)
        scores = []
        for jj in range(numPermutation):
            if str(jj)[-1] == '0': print("            Permutation --- %d" % jj)

            for g in gSizes:
                matrix1 = []
                for ii in range(g):
                    matrix1.append(permutation(gArray, SampleNumber))
                matrix1 = np.array(matrix1).T
                for m in mSizes:
                    matrix2 = []
                    for ii in range(m):
                        matrix2.append(permutation(mArray, SampleNumber))
                    matrix2 = np.array(matrix2).T
                    #
                    if matrix1.shape[1] > matrix2.shape[1]:
                        PLS.fit(matrix1, matrix2)
                        PLSscore = PLS.score(matrix1, matrix2)
                    else:
                        PLS.fit(matrix2, matrix1)
                        PLSscore = PLS.score(matrix2, matrix1)

                    scores.append(PLSscore)

        return scores
コード例 #9
0
ファイル: pls2_network.py プロジェクト: shuzhao-li/hiconet
    def get_pls_scores_real(self, gCommunities, mCommunities, gDF, mDF):
        '''
        Compute PLS2 scores for all pairwise communities from two societies.
        
        Parameters
        ----------
        gCommunities, mCommunities, gDF, mDF
        Communities and DataMatrix from society_1, society_2
        
        Returns
        -------
        pls_scores list as [( g, m, PLSscore ), ...]
        '''
        PLS = PLSRegression(n_components=3)
        pls_scores = []

        for g in gCommunities.keys():
            if len(gCommunities[g]) >= 3:
                #print(g,)
                for m in mCommunities.keys():
                    if len(mCommunities[m]) >= 3:
                        # Getting corresponding rows from btm and metabo.
                        matrix1, matrix2 = gDF.values[
                            gCommunities[g], :], mDF.values[mCommunities[m], :]
                        matrix1, matrix2 = np.transpose(matrix1), np.transpose(
                            matrix2)

                        print("input matrices ", matrix1.shape, matrix2.shape)
                        # PLS regression
                        if matrix1.shape[1] > matrix2.shape[1]:
                            PLS.fit(matrix1, matrix2)
                            PLSscore = PLS.score(matrix1, matrix2)
                        else:
                            PLS.fit(matrix2, matrix1)
                            PLSscore = PLS.score(matrix2, matrix1)

                        pls_scores.append((g, m, PLSscore))

        return pls_scores
コード例 #10
0
 def PLSRegressionTest(self):
     x_train, y_train, x_test, y_test = NIRFit03.get_data(NIRFit03.fileName)
     n_components = 0
     scores = [0 for x in range(x_train.shape[1])]
     while n_components < x_train.shape[1]:
         n_components += 1
         plsg = PLSRegression(n_components=n_components)
         plsg.fit(x_train, y_train)
         scores[n_components - 1] = plsg.score(x_test, y_test)
     xx = np.linspace(1, len(scores), len(scores))
     plt.figure()
     plt.plot(xx, scores, 'o-')
     plt.show()
     print('scores:\n', scores)
     # 选取使得score最大的n_components进行最小二乘建模
     plsg2 = PLSRegression(n_components=np.argmax(scores) + 1)
     NIRFit03.try_different_method(plsg2, x_train, y_train, x_test, y_test)
コード例 #11
0
def lex_function_learning( class_name,  hyper_vec ) :

		#pls2 = KernelRidge( kernel = "rbf", gamma= 100)
		#pls2 = KernelRidge( )
		pls2 = PLSRegression(n_components=50, max_iter=5000)

		X = extract_postive_features ( train_dataset[class_name][0], train_dataset[class_name][1] )			

		Y = []

		for hypo_vec in X :

			sub = hyper_vec-hypo_vec
			Y.append(sub) # Target = difference vector ( Hypernym_vector - Hyponym_vector )
			#Y.append(hyper_vec) # Target = Hypernym vector 

		pls2.fit( X, Y)	
		train_acc = pls2.score(X, Y)
		print "class = ", class_name, "train len = ", len(X)
		
		return pls2, train_acc, len(X)
コード例 #12
0
def doPLS(metrics, color='r', marker='+', perc=10):
    inp0 = np.zeros(len(metricsInput2))
    out0 = np.zeros(len(metricsOutput2))

    inp = np.array([metrics[m] for m in metricsInput2]).T.astype(float)
    out = np.array([metrics[m] for m in metricsOutput2]).T.astype(float)
    inp = np.vstack((inp, inp0))
    out = np.vstack((out, out0))

    all = np.concatenate((inp, out), axis=1)
    # fixed cache
    fixed = all[all[:, 0] == perc]
    inp_fixed = fixed[:, 1:2]
    out_fixed = fixed[:, 2:4]
    #singleScatter2(1, 2, fixed)
    #singleScatter2(1, 3, fixed)
    # singleScatter2(1, 4, fixed)
    # singleScatter2(1, 5, fixed)

    inp = inp_fixed  #inpnSat #inp_fixed
    out = out_fixed  #outnSat #out_fixed

    poly = PolynomialFeatures(1, include_bias=False, interaction_only=False)
    inp = poly.fit_transform(inp)
    # inp = inp_poly[:, 2:]

    pls2 = PLSRegression(n_components=1, scale=False)
    pls2.fit(inp, out)
    print(pls2.score(inp, out))
    print(pls2.coef_)
    out_pls_pred0 = inp.dot(pls2.coef_)[:, 0]
    #plt.scatter(inp[:, 0], inp.dot(pls2.coef_)[:, 0], c='r', marker=marker)
    #plt.scatter(inp[:, 0], pls2.predict(inp)[:, 0], c='r', marker=marker)
    plt.scatter(inp[:, 0], out[:, 0], c='black', s=30, marker=marker)
    #plt.scatter(inp[:, 0], out[:, 1], c='grey', s=30, marker=marker)
    #plt.scatter(inp[:, 0], inp.dot(pls2.coef_)[:, 1], c='g', marker=marker)
    #plt.scatter(inp[:, 0], pls2.predict(inp)[:, 1], c='g', marker=marker)
    #plt.scatter(inp[:, 0], out[:, 1], c='black', marker=marker)

    return pls2.coef_, out
コード例 #13
0
ファイル: homework2.py プロジェクト: keizerzilla/ica-pos
def pls_regression(X, y, k, components):
    pls = PLSRegression(n_components=components)
    rmse_l = []
    r2_l = []

    kf = KFold(n_splits=k, shuffle=True)
    i = 1
    for train_index, test_index in kf.split(X):
        X_tr, X_tst = X[train_index], X[test_index]
        y_tr, y_tst = y[train_index], y[test_index]

        pls.fit(X_tr, y_tr)
        r2 = pls.score(X_tst, y_tst)
        y_pred = pls.predict(X_tst)
        rmse = math.sqrt(metrics.mean_squared_error(y_tst, y_pred))
        rmse_l.append(rmse)
        r2_l.append(r2)

        print("[{}] RMSE: {}, R2: {}".format(i, round(rmse, 2), round(r2, 2)))
        i = i + 1

    return (rmse_l, r2_l)
コード例 #14
0
def cyl_PLSR(data):
    ''' Performs PLSR on the data separated into individual cylinders and plot the scores and loadings plots '''
    newData = scaler(data)
    PLSR = PLSRegression(n_components=2)
    PLSR.fit(newData[:, 2:6], newData[:, 0])
    print('The R2Y value is', PLSR.score(newData[:, 2:6], newData[:, 0]))
    Xscores = PLSR.x_scores_
    Yscores = PLSR.y_scores_
    Xload = PLSR.x_loadings_
    Yload = PLSR.y_loadings_
    plt.figure()
    plt.scatter(Xscores[:, 0], Xscores[:, 1])
    plt.scatter(Yscores[:, 0], Yscores[:, 1])
    plt.title('Scores Plot')
    plt.figure()
    plt.scatter(Xload[0, 0], Xload[0, 1], label='Displacement')
    plt.scatter(Xload[1, 0], Xload[1, 1], label='Horsepower')
    plt.scatter(Xload[2, 0], Xload[2, 1], label='Weight')
    plt.scatter(Xload[3, 0], Xload[3, 1], label='Acceleration')
    plt.scatter(Yload[:, 0], Yload[:, 1], label='MPG')
    plt.title('Loadings Plot')
    plt.legend(loc='best')
コード例 #15
0
def pool_pls_pred(ro_wind, pre_wind):
    prediction = {}
    coef_param = {}
    for i in np.arange((ro_wind + pre_wind), len(trade_days), 1):
        # 截取样本区间pool在一起计算回归系数
        date_roll = pd.to_datetime(trade_days[(i - ro_wind - pre_wind):(i - pre_wind)])
        sub_data = new_f.loc[date_roll, :]
        print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

        pls = PLSRegression(n_components=comp_num).fit(sub_data.iloc[:, 0:-1], sub_data.iloc[:, -1])

        coef_param[trade_days[i]] = pd.DataFrame(pls.coef_.T, index=[trade_days[i]], columns=sub_data.iloc[:, 0:-1].columns)  # 保留参数
        print("correct rate: ", pls.score(sub_data.iloc[:, 0:-1], sub_data.iloc[:, -1]))

        test_data = new_f.loc[pd.to_datetime(trade_days[i]), :]  # 参数隔(pred_window+1)天后才能用
        test_data = test_data.drop(['stock_rela'], axis=1)
        prediction[trade_days[i]] = pd.Series(pls.predict(test_data)[:, 0], index=test_data.index)
        print(trade_days[i])
    pred = pd.concat(prediction, axis=1).T
    pred.index = pd.to_datetime(pred.index)
    cof = pd.concat(coef_param.values())
    cof.index = pd.to_datetime(cof.index)
    return pred, cof
コード例 #16
0
def PLSR_groupCV(data):
    ''' Does cross validation by leaving a city out and returns R2Y value '''
    R2Y = 0
    diff = []
    cities = [1, 2, 3]
    for group in cities:
        train = []
        test = []
        for i in range(len(data[:, 0])):
            if data[i, 7] == group:
                test.append(data[i, :])
            else:
                train.append(data[i, :])
        test = np.array(test)
        train = np.array(train)

        trainScale = StandardScaler()
        trainScaled = trainScale.fit_transform(train)
        testScaled = trainScale.transform(test)
        PLSR = PLSRegression(n_components=2)
        PLSR.fit(trainScaled[:, 2:6], trainScaled[:, 0])
        error = PLSR.score(testScaled[:, 2:6], testScaled[:, 0])
        diff.append(error)
    return diff
コード例 #17
0
ファイル: ML.py プロジェクト: Jammyzx1/ML-RF-SVM-PLS
                        print round(SVRpreds[i],2)
                        i += 1
        print "\n"
        SVRr2.append(optSVR.score(XTest, yTest))
        SVRmse.append( metrics.mean_squared_error(yTest,SVRpreds))
        SVRrmse.append(math.sqrt(SVRmse[metcount]))
        print ("Support Vector Regression prediction statistics for fold %d are; MSE = %5.2f RMSE = %5.2f R2 = %5.2f\n\n" % (metcount+1, SVRmse[metcount], SVRrmse[metcount],SVRr2[metcount]))
        with open(train_name,'a') as ftrain :
                ftrain.write("Support Vector Regression prediction statistics for fold %d are, MSE =, %5.2f, RMSE =, %5.2f, R2 =, %5.2f,\n\n" % (metcount+1, SVRmse[metcount], SVRrmse[metcount],SVRr2[metcount]))
        ftrain.close()

        # Train partial least squares and predict with optimised parameters
        print("\n\n------------------- Starting opitimised PLS training -------------------")
        optPLS = PLSRegression(n_components = nc)
        optPLS.fit(XTrain, yTrain)       # Train the model
        print("Training R2 = %5.2f" % optPLS.score(XTrain,yTrain))
        print("Starting optimised PLS prediction")
        PLSpreds = optPLS.predict(XTest)
        print("The predicted values now follow :")
        PLSpredsdim = PLSpreds.shape[0]
        i = 0
        if PLSpredsdim%5 == 0:
                while i < PLSpredsdim:
                        print round(PLSpreds[i],2),'\t', round(PLSpreds[i+1],2),'\t', round(PLSpreds[i+2],2),'\t', round(PLSpreds[i+3],2),'\t', round(PLSpreds[i+4],2)
                        i += 5
        elif PLSpredsdim%4 == 0:
                while i < PLSpredsdim:
                        print round(PLSpreds[i],2),'\t', round(PLSpreds[i+1],2),'\t', round(PLSpreds[i+2],2),'\t', round(PLSpreds[i+3],2)
                        i += 4
        elif PLSpredsdim%3 == 0 :
                while i < PLSpredsdim :
コード例 #18
0
ファイル: pls.py プロジェクト: mwalton/artificial-olfaction
(Xtest, ytest) = loadData(xtestpath, ytestpath)

#trim off background and scale
ytrain=ytrain[:,1:]
#ytrain=scale(ytrain)
Xtrain=standardize(Xtrain)

#trim off background and scale
ytest = ytest[:,1:]
#ytest = scale(ytest)
Xtest = standardize(Xtest)

pls = PLSRegression(n_components=10)
pls.fit(Xtrain, ytrain)
y_pls = pls.predict(Xtest)
print 1 + pls.score(Xtest, ytest)


pls_rmse=[]
pls_rmse.append(sqrt(mean_squared_error(ytest[:,0], y_pls[:,0])))
pls_rmse.append(sqrt(mean_squared_error(ytest[:,1], y_pls[:,1])))
pls_rmse.append(sqrt(mean_squared_error(ytest[:,2], y_pls[:,2])))
pls_rmse.append(sqrt(mean_squared_error(ytest[:,3], y_pls[:,3])))

fig = plt.figure(figsize=(20,10))

ax1 = fig.add_subplot(241)
ax1.plot(y_pls[:,0], c='r', label='PLS Fit')
ax1.plot(ytest[:,0], c='grey', label='Target')
ax1.set_xlabel('Time')
ax1.set_ylabel('[c]')
コード例 #19
0
def PLS_Regression(csv_data,
                   point_index,
                   sub_index,
                   var_name,
                   train=None,
                   components=None):
    '''
    plt initinitalize & definition
    '''
    plt.figure()

    # plt.subplot(3, 3, 1)
    # plt.plot([0, 1], [0, 1])
    # plt.subplot(3, 3, 2)
    # plt.plot([0, 1], [0, 2])
    # plt.subplot(3, 3, 3)
    # plt.plot([0, 3], [0, 4])
    # plt.subplot(3, 3, 4)
    # plt.plot([0, 1], [0, 2])
    # plt.subplot(3, 3, 5)
    # plt.plot([0, 1], [0, 1])
    # plt.subplot(3, 3, 6)
    # plt.plot([0, 1], [0, 1])
    # plt.subplot(3, 1, 3)
    # plt.plot([0, 1], [0, 3])
    # plt.show()
    for i in range(7):
        X_array = []
        temp_array = []

        for j in csv_data:
            temp_array = j[point_index - 1:point_index + 8]
            X_array.append(temp_array)
        X_array = np.array(X_array)
        Y_array = np.array(csv_data[:, sub_index - 1 + i])
        if train == True:
            X_array, X_test, Y_array, Y_test = train_test_split(
                X_array, Y_array, test_size=0.15, random_state=42)
        if components == None:
            components = np.shape(X_array)[1]
        plsrModel = PLSRegression(n_components=components)
        plsrModel.fit(X_array, Y_array)
        coefs = plsrModel.coef_
        coefs = np.around(coefs, decimals=2)
        coefs = coefs.astype(str)
        # print(var_name[sub_index + i])
        # print("y =",end="")
        # for i in range(9):
        #     print(coefs[i][0],end="")
        #     print("*x",end="")
        #     print(i+1,end="")
        #     if i != 8:
        #         print(" + ",end="")
        # print('')
        # print("R^2 =",np.around(plsrModel.score(X_array, Y_array),decimals=2))
        Y_predict = plsrModel.predict(X_array)
        plt.subplot(3, 3, i + 1)
        plt.xlabel('X-axis')
        plt.ylabel('Y-axis')
        score = np.around(plsrModel.score(X_array, Y_array), decimals=2)
        plt.text(0.6,
                 10,
                 "R^2 =" + str(score),
                 horizontalalignment='center',
                 verticalalignment='center')
        plt.title(var_name[sub_index + i])
        plt.scatter(X_array[:, 0], Y_array)
        plt.scatter(X_array[:, 0], Y_predict)
        print(RMSE(Y_array, plsrModel.predict(X_array)))
    plt.suptitle('PLS-Regression')
    plt.show()
コード例 #20
0
#correct not accurate
from sklearn.cross_validation import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.svm import SVC
import numpy as np
import pandas as pd
from sklearn.cross_decomposition import PLSRegression
from sklearn.cross_decomposition import PLSCanonical
df=pd.read_csv('newdata.csv')
x=df.drop(['tag'],axis=1)
y=df.drop(['kx','ky','kz','wa','wb','wc','wd','we','wf'],axis=1)
X_train , X_test , Y_train , Y_test = train_test_split(x,y , random_state=5)

plsr=PLSRegression()
plsr.fit(X_train,Y_train)

plsc=PLSCanonical()
plsc.fit(X_train,Y_train)

print (plsr.score(X_test,Y_test))
print (plsc.score(X_test,Y_test))
コード例 #21
0
ファイル: trainAUvisModel.py プロジェクト: cosanlab/py-feat
    balX = pd.concat([balX, newSample])
    balY = pd.concat([balY, landmarks.loc[newSample.index]])
X = balX[au_cols].values
y = registration(balY.values, neutral)

# Model Accuracy in KFold CV
print("Evaluating model with KFold CV")
n_components = len(au_cols)
kf = KFold(n_splits=3)
scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    clf = PLSRegression(n_components=n_components, max_iter=2000)
    clf.fit(X_train, y_train)
    scores.append(clf.score(X_test, y_test))
print('3-fold accuracy mean', np.round(np.mean(scores), 2))

# Train real model
clf = PLSRegression(n_components=n_components, max_iter=2000)
clf.fit(X, y)
print('N_comp:', n_components, 'Rsquare', np.round(clf.score(X, y), 2))

# We visualize the results of our model. The regression was trained on labels 0-1 so we do not recommend exceeding 1 for the intensity. Setting the intensity to 2 will exaggerate the face and anything beyond that might give you strange faces.

# In[116]:

# Plot results for each action unit
f, axes = plt.subplots(5, 4, figsize=(12, 18))
axes = axes.flatten()
# Exaggerate the intensity of the expression for clearer visualization.
コード例 #22
0
def plsregress(Train, Test, devcomp=None, spec='ALLr'):
    '''
    Builds PLSR model using spectra data specified in spec. Plots error on the development set vs number of principle components.
    options: 'UV', UVr, 'NIR', 'ALLr', 'ALL'
    
    '''
    import numpy as np
    import pandas as pd
    import matplotlib
    matplotlib.use('agg')
    import matplotlib.pyplot as plt
    from sklearn.cross_decomposition import PLSRegression
    from sklearn.model_selection import GroupKFold, LeaveOneOut
    from sklearn.preprocessing import MinMaxScaler
    from sklearn.metrics import r2_score, mean_squared_error
    from sklearn.utils import shuffle
    from uv_nir_gos import wl_select
    trainR2 = []
    devMSE = []
    devR2 = []

    X, _, Y, __ = wl_select(Train, Test, spec)

    X, Y = shuffle(X, Y)

    for i in np.arange(1, 20):
        ytests = []
        ypreds = []
        train_score = []

        cv = LeaveOneOut(
        )  # higher error as expected compared to LOO -- unsure how Cao & co. got their results.
        sample_ids = list(set(X.index.tolist()))
        for train_idx, dev_idx in cv.split(sample_ids):
            tr_ix = Train.iloc[train_idx, :].index.tolist()
            dev_ix = Train.iloc[dev_idx, :].index.tolist()

            X_train, X_dev = X.loc[tr_ix], X.loc[dev_ix]
            y_train, y_dev = Y.loc[tr_ix], Y.loc[dev_ix]

            # fit scaler to train apply to test
            scaler = MinMaxScaler()
            X_train_t = scaler.fit_transform(X_train.values)
            X_dev_t = scaler.transform(X_dev.values)

            pls2 = PLSRegression(n_components=i)
            pls2.fit(X_train_t, y_train.values)
            train_score.append(pls2.score(X_train_t, y_train.values))

            y_pred = pls2.predict(X_dev_t)

            ytests += list(y_dev.values)
            ypreds += list(y_pred)

        train_R2 = np.asarray(train_score).mean(axis=0)
        train_R2_std = np.asarray(train_score).std(axis=0)
        dev_R2 = r2_score(ytests, ypreds, multioutput='raw_values')
        dev_MSE = mean_squared_error(ytests, ypreds, multioutput='raw_values')

        devMSE.append(dev_MSE)
        devR2.append(dev_R2)
        trainR2.append(train_R2)
    if devcomp != None:
        resDF = pd.DataFrame(
            [np.asarray(devR2)[devcomp, :],
             np.asarray(devMSE)[devcomp, :]],
            columns=Y.columns,
            index=['R2', 'MSE'])
        resDF.to_csv('results/resDF_' + str(spec) + '_PLSR_dev.csv')
    # Plot results
    plt.plot(np.arange(1, 20), np.array(devMSE), '-o')
    plt.xlabel('Number of principal components in regression')
    plt.ylabel('MSE')
    plt.legend(Y.columns.to_list())
    plt.xlim(left=0, right=21)
    plt.savefig('results/PLSR_dev_' + spec + '.png')
    plt.close()
コード例 #23
0
  def calibracao(self, idmodelo, nrcomponentes, corteOutlier, qtdeRemocoes):

    # Inativa calibracoes anteriores
    db.execute("update calibracao set  inativo = 'F'" +
               " where idmodelo = " + str(idmodelo) + " ")
    db.execute(" update amostra set tpamostra = 'NORMAL' where idmodelo = " + str(idmodelo) + "")
    session.commit()

    # cria calibracao para o modelo
    data_Atual = datetime.today()
    data_em_texto = data_Atual.strftime('%d/%m/%Y')

    cursorCodigo = db.execute(
      "select coalesce(max(idcalibracao),0) + 1 as codigo from calibracao where idmodelo = " + str(idmodelo) + " ")
    for regCodigo in cursorCodigo:
      idcalibracao = regCodigo[0]

    db.execute("insert into calibracao (idcalibracao, idmodelo, dtcalibracao, inativo) "
               "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str(data_em_texto) + "', 'A' )")
    session.commit()

    idmodelo = idmodelo

    print(idmodelo)

    Xtodos = self.selectMatrizX(idmodelo, "TODOS")

    # Insercao das amostras de Validacao
    YCodigoTodos = self.selectMatrizY(idmodelo, "ID", "TODOS")

    for amostraX in YCodigoTodos:
      amostra = str(amostraX)
      amostra = amostra.replace("[", "")
      amostra = amostra.replace("]", "")
      db.execute("insert into amostra_calibracao (idcalibracao, idmodelo, idamostra, tpconjunto) "
                 "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" + str(
        int(float(amostra))) + "','VALIDACAO' )")

    session.commit()

    qtde = 0
    if corteOutlier > 0:
      while qtde < qtdeRemocoes:
        self.detectarOutlierKNN(idmodelo, Xtodos, corteOutlier)
        Xtodos = self.selectMatrizX(idmodelo, "TODOS")
        qtde = qtde + 1

    session.commit()

    Xtodos=self.selectMatrizX(idmodelo, "TODOS")

    #Xtodos = self.selectMatrizX(idmodelo, "TODOS")
    number_of_samples = Xtodos.__len__()
    number_of_samples = number_of_samples * 0.65

    # selected_sample_numbers, remaining_sample_numbers = kennardstonealgorithm(X, number_of_samples)
    """amostras_Calibracao = kennardStone(Xtodos, number_of_samples)"""

    # amostras_Calibracao = kennardStone(autoscaled_X, number_of_samples)
    """print(amostras_Calibracao)"""
    print("---")
    print("remaining sample numbers")
    # print(remaining_sample_numbers)

    """#plot samples
        plt.figure()
        plt.scatter(autoscaled_X[:, 0], autoscaled_X[:, 1], label="all samples")
        plt.scatter(autoscaled_X[selected_sample_numbers, 0], autoscaled_X[selected_sample_numbers, 1], marker="*",
                    label="all samples")
        plt.xlabel("x1")
        plt.ylabel("x2")
        plt.legend(loc='upper right')
        plt.show()


        #***************************************************************************************************************
        #fim kennard-stone"""

    # Insercao das amostras de Calibracao
    """cont = 0
    for amostraCalibracao in amostras_Calibracao:
      amostra = str(amostraCalibracao)
      amostra = amostra.replace("[", "")
      amostra = amostra.replace("]", "")
      db.execute("update  amostra_calibracao set tpconjunto = 'CALIBRACAO'  "
                 " where idcalibracao =" + str(idcalibracao) + " and idmodelo = " + str(idmodelo) +
                 " and idamostra = " + str(int(float(amostra))))
      session.commit()

      print(cont)
      cont = cont + 1
    session.commit()"""

    Xcal = self.selectMatrizX(idmodelo, "CALIBRACAO")
    Xval = self.selectMatrizX(idmodelo, "VALIDACAO")

    """
    qtde = 0
    if corteOutlier > 0:
      while qtde < qtdeRemocoes:
        self.detectarOutlierKNN(idmodelo, Xval, corteOutlier)
        self.detectarOutlierKNN(idmodelo, Xcal, corteOutlier)

        Xval = self.selectMatrizX(idmodelo, "VALIDACAO")
        Xcal = self.selectMatrizX(idmodelo, "CALIBRACAO")
        qtde = qtde + 1
    """

    #Ycal = self.selectMatrizY(idmodelo, "VALOR", "CALIBRACAO")
    Yval = self.selectMatrizY(idmodelo, "VALOR", "VALIDACAO")

    #YCodigoCal = self.selectMatrizY(idmodelo, "ID", "CALIBRACAO")
    YCodigoVal = self.selectMatrizY(idmodelo, "ID", "VALIDACAO")

    # Dados do Conjunto de Calibracao
    """
    plsCal = PLSRegression(copy=True, max_iter=500, n_components=nrcomponentes, scale=False, tol=1e-06)
    plsCal.fit(Xcal, Ycal)
    coeficiente = plsCal.score(Xcal, Ycal, sample_weight=None)
    print('score do modelo PLS - Calibracao')
    print(coeficiente)
    print('R2 do modelo PLS - Calibracao')
    coeficienteCal = r2_score(plsCal.predict(Xcal), Ycal)
    print(coeficienteCal)
    """

    # Dados do Conjunto de Validacao
    plsVal = PLSRegression(copy=True, max_iter=500, n_components=nrcomponentes, scale=False, tol=1e-06)
    plsVal.fit(Xval, Yval)
    coeficiente = plsVal.score(Xval, Yval, sample_weight=None)
    print('score do modelo PLS - Validacao')
    print(coeficiente)
    print('R2 do modelo PLS - Validacao')
    coeficienteVal = r2_score(plsVal.predict(Xval), Yval)
    print(coeficienteVal)
    # print('label_ranking_average_precision_score ')
    # print(label_ranking_average_precision_score(np.array(Yval), np.array(plsVal.y_scores_)))

    """# Ajustar Calculos do RMSEC
    matYPredCalibracao = []

    for itemMatrizY in YCodigoCal:
      amostra = str(itemMatrizY)
      amostra = amostra.replace("[", "")
      amostra = amostra.replace("]", "")
      # print(i)
      linhaMatriz = []
      amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo)
      Y_pred = plsCal.predict(amostraPredicao)
      # print(Y_pred)
      linhaMatriz.append(round(np.double(Y_pred), 0))
      matYPredCalibracao += [linhaMatriz]

    rmsec = sqrt(mean_squared_error(Ycal, matYPredCalibracao))
    print('RMSEC')
    print(rmsec)
    """

    #Ajustar Calculos do RMSEP
    matYPredValidacao = []

    for itemMatrizY in YCodigoVal:
      amostra = str(itemMatrizY)
      amostra = amostra.replace("[", "")
      amostra = amostra.replace("]", "")
      # print(i)
      linhaMatriz = []
      amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo)
      Y_pred = plsVal.predict(amostraPredicao)
      # print(Y_pred)
      linhaMatriz.append(round(np.double(Y_pred), 0))
      matYPredValidacao += [linhaMatriz]

    rmsep = sqrt(mean_squared_error(Yval, matYPredValidacao))
    print('RMSEP')
    print(rmsep)

    # Atualiza valores da calibracao
    db.execute("update calibracao set rmsec = " + str(rmsec) +
               " , inativo = 'A'" +
               " , rmsep = " + str(rmsep) +
               " , coeficientecal = " + str(coeficienteCal) +
               " , coeficienteval = " + str(coeficienteVal) +
               " , dtcalibracao = '" + str(data_em_texto) + "'"
                                                            " where idmodelo = " + str(idmodelo) +
               " and idcalibracao = " + str(idcalibracao) + " ")
    session.commit()

    print("VARIAVEIS LATENTES")
    print(nrcomponentes)

    return idmodelo
コード例 #24
0
    def calibracao(self, idmodelo):

        #Inativa calibracoes anteriores
        db.execute("update calibracao set  inativo = 'F'" +
                   " where idmodelo = " + str(idmodelo) + " ")
        session.commit()

        #cria calibracao para o modelo
        data_Atual = datetime.today()
        data_em_texto = data_Atual.strftime('%d/%m/%Y')

        cursorCodigo = db.execute(
            "select coalesce(max(idcalibracao),0) + 1 as codigo from calibracao where idmodelo = "
            + str(idmodelo) + " ")
        for regCodigo in cursorCodigo:
            idcalibracao = regCodigo[0]

        db.execute(
            "insert into calibracao (idcalibracao, idmodelo, dtcalibracao) "
            "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" +
            str(data_em_texto) + "' )")
        session.commit()

        idmodelo = idmodelo

        print(idmodelo)

        conjunto = "CALIBRACAO"

        X = self.selectMatrizX(idmodelo, conjunto)
        Y = self.selectMatrizY(idmodelo, conjunto, "VALOR")
        YCodigo = self.selectMatrizY(idmodelo, conjunto, "ID")

        pls = PLSRegression(copy=True,
                            max_iter=500,
                            n_components=12,
                            scale=False,
                            tol=1e-06)
        pls.fit(X, Y)

        coeficiente = pls.score(X, Y, sample_weight=None)
        print('R2 do modelo PLS')
        print(coeficiente)
        print(r2_score(pls.predict(X), Y))

        #Ajustar Calculos do RMSEC e RMSEP para ficarem dinamicos
        matYPred = []

        for item in YCodigo:
            #print(i)
            linhaMatriz = []
            amostra = str(item)
            amostra = amostra.replace("[", "")
            amostra = amostra.replace("]", "")
            amostraPredicao = self.selectAmostra(int(float(amostra)), idmodelo)
            Y_pred = pls.predict(amostraPredicao)
            #print(Y_pred)
            linhaMatriz.append(np.double(Y_pred))
            matYPred += [linhaMatriz]
            db.execute(
                "insert into amostra_calibracao (idcalibracao, idmodelo, idamostra) "
                "values (" + str(idcalibracao) + "," + str(idmodelo) + " , '" +
                str(int(float(amostra))) + "' )")

        session.commit()

        # print(mean_squared_error(Y,matYPred))
        raizQ = mean_squared_error(Y, matYPred)**(1 / 2)

        rms = sqrt(mean_squared_error(Y, matYPred))
        print('RMSEC')
        print(raizQ)
        print(rms)

        #Atualiza valores da calibracao
        db.execute("update calibracao set rmsec = " + str(rms) +
                   " , inativo = 'A'" + " , rmsep = " + str(rms) +
                   " , coeficiente = " + str(coeficiente) +
                   " , dtcalibracao = '" + str(data_em_texto) + "'"
                   " where idmodelo = " + str(idmodelo) +
                   " and idcalibracao = " + str(idcalibracao) + " ")
        session.commit()

        return idmodelo
コード例 #25
0
    def predicao(self, idmodelo, idamostra):

        idmodelo = idmodelo

        idamostra = idamostra

        print(idmodelo)
        print(idamostra)

        conjunto = "CALIBRACAO"

        X = self.selectMatrizX(idmodelo, conjunto)
        Y = self.selectMatrizY(idmodelo, conjunto, "VALOR")

        amostraPredicao = self.selectAmostra(idamostra, idmodelo)

        valorReferencia = self.selectDadosReferenciaAmostra(
            idamostra, idmodelo)

        pls = PLSRegression(copy=True,
                            max_iter=500,
                            n_components=12,
                            scale=False,
                            tol=1e-06)

        pls.fit(X, Y)

        valorPredito = pls.predict(amostraPredicao)

        print('Amostra: ' + str(idamostra) + ' - Valor Predito :' +
              str(valorPredito))

        coeficiente = pls.score(X, Y, sample_weight=None)
        print('R2 do modelo PLS')
        print(coeficiente)
        print(r2_score(pls.predict(X), Y))

        #Ajustar Calculos do RMSEC e RMSEP para ficarem dinamicos
        matYPred = []

        for i in range(1, 349):
            #print(i)
            linhaMatriz = []
            idAmostraTestes = i
            amostraPredicao = self.selectAmostra(idamostra, idmodelo)
            Y_pred = pls.predict(amostraPredicao)
            #print(Y_pred)
            linhaMatriz.append(np.double(Y_pred))
            matYPred += [linhaMatriz]

        # print(mean_squared_error(Y,matYPred))
        raizQ = mean_squared_error(Y, matYPred)**(1 / 2)

        rms = sqrt(mean_squared_error(Y, matYPred))
        print('RMSEC')
        print(raizQ)
        print(rms)

        #tratamento dos dados para o Json
        coeficiente = round(coeficiente, 2)
        #valorPredito = round(valorPredito, 2)
        raizQ = round(raizQ, 2)
        valorReferencia = round(valorReferencia, 2)

        valorPreditoString = str(valorPredito)
        valorPreditoString = valorPreditoString.replace("[", "")
        valorPreditoString = valorPreditoString.replace("]", "")

        ##Contrucao do JSON
        json_data = jsonify(idamostra=str(idamostra),
                            valorpredito=str(valorPreditoString),
                            rmsec=str(raizQ),
                            idmodelo=str(idmodelo),
                            valorreferencia=str(valorReferencia),
                            coeficiente=str(coeficiente))

        return json_data
コード例 #26
0
def main():
    X_train, X_test, y_train, y_test = get_data()
    pls = PLSRegression(n_components=2)
    pls.fit(X_train, y_train)
    print("test score is", pls.score(X_test, y_test))
X_test = X[X.shape[0] / 2:]
Y_train = Y[0:Y.shape[0] / 2]
Y_test = Y[Y.shape[0] / 2:]

# so x1,x2 are useful, x3-10 are bad

pls2 = PLSRegression(n_components=3)
pls2.fit(X_train, Y_train)

print("True B (such that: Y = XB + Err)")
print(B)
# compare pls2.coef_ with B
print("Estimated B")
print(np.round(pls2.coef_, 1))

print "\n\n PLS scored: %.2f" % pls2.score(X_test, Y_test)

# high variance and have high correlation with the response, in contrast to principal components regression which keys only on high variance

# https://github.com/scikit-learn/scikit-learn/blob/14031f6/sklearn/cross_decomposition/pls_.py#L295
# this is the weight estimation step, note: Yk's k is the iteration / component #

# blah, not going to dig into the NIPALS algorithm

########################################################################

pca = PCA()
X_train_reduced = pca.fit_transform(scale(X_train))[:,0:5] # take top 5 dim of pca
l = LinearRegression()
l.fit(X_train_reduced, Y_train)
コード例 #28
0
def FindRGBTransformPLS(rgbFrom, rgbTo):
    pls = PLSRegression(n_components=3)
    pls.fit(rgbFrom, rgbTo)
    sc = pls.score(rgbFrom, rgbTo)
    print(sc)
    return pls
コード例 #29
0
clf = linear_model.Ridge(alpha=0.1)
clf.fit(x_train, y_train)
coef=clf.coef_
clf.score(x_train, y_train)
y_predict=clf.predict(x_test)
'''
windows = 4
# 偏最小二乘回归 test 0.3 循环看几个主成分效果最好,n=5 #用全局标准化0.3314
r2_test_best = 0
r2_train_best = 0
n_best = 0
y_test = np.reshape(y_test, [np.shape(y_test)[0], 1])
for n in range(1, min(np.shape(x_train)[0], np.shape(x_train)[1]) + 1):
    pls2 = PLSRegression(n_components=n, scale=False)
    pls2.fit(x_train, y_train)
    r2_train = pls2.score(x_train, y_train)
    y_predict = pls2.predict(x_test)
    r2_test = pls2.score(x_test, y_test)
    #r2_test=test_r_square(y_predict,y_test)
    if r2_test > r2_test_best:
        r2_test_best = r2_test
        r2_train_best = r2_train
        n_best = n
    else:
        continue

pls2 = PLSRegression(n_components=n_best, scale=False)
pls2.fit(x_train, y_train)
y_pred_test = pls2.predict(x_test)
y_pred_train = pls2.predict(x_train)
def plsfinal(trainX, trainY, testX, testY, i):
    plsModel = PLSRegression(n_components=i)
    plsModel.fit(trainX, trainY)
    pred_Y = plsModel.predict(testX)
    R2 = plsModel.score(testX, testY)
    return pred_Y, R2
コード例 #31
0
ファイル: plsr.py プロジェクト: apurv1205/SNLP_sharedTask
a = []
for row in testSet:
    testX.append(np.concatenate([vectors[row[1]], vectors[row[2]]]))
    testY.append(vectors[row[0]])

    predictedY = model.predict(
        np.concatenate([vectors[row[1]], vectors[row[2]]]).reshape(1, -1))
    a.append(
        cosine_similarity(predictedY,
                          np.array(vectors[row[0]]).reshape(1, -1)))
    # ans =  bestWord(row[0],row[1], row[2], predictedY)
    # if ans.strip() == row[0].strip():
    # 	correct+=1
    # writer.writerow([row[1], row[2], ans, row[0]])
    ans_lst = bestKWords(row[0], row[1], row[2], predictedY, 1)
    flg = False
    for ans in ans_lst:
        if ans.strip() == row[0].strip():
            correct += 1
            flg = True
            writer.writerow([row[1], row[2], ans, row[0]])
            break

    if flg == False:
        writer.writerow([row[1], row[2], ans_lst, row[0]])

print "Correctly predicted : ", correct, ", Out of :", len(
    testSet), " test data (last 20 percent of the given dataset)"
print "Mean cosine similarity with predicted vector", np.mean(a)
print "Model score : ", model.score(testX, testY)
コード例 #32
0
x_msc, _ = processing.msc(x_reflect)
x_robust = RobustScaler().fit_transform(x_msc)
plt.style.use('dark_background')

pls = PLS(n_components=6)

pls.fit(x, y)
x_fit = pls.predict(x)
pls.fit(x_msc, y)
svr = SVR()
svr.fit(x_msc, y)
print(svr.score(x, y))

ridge = RidgeCV()
ridge.fit(x_msc, y)
print(pls.score(x, y))

# ham

# x_fit = pls.predict(x_msc)

print(pls.score(x_msc, y))
print(pls.coef_)
coeff_final = pls.coef_.T[0] / pls.x_std_
print('-======')
# print(pls.x_mean_)
# print(pls.y_mean_)
# print('bbbbb')
# print(ridge.coef_)

コード例 #33
0
pls2 = PLSRegression(copy=True,
                     max_iter=5000,
                     n_components=10,
                     scale=True,
                     tol=1e-06)

pls2.fit(X, Y)

# Get X scores
T = pls2.x_scores_

# Get X loadings
P = pls2.x_loadings_

Y_pred = pls2.predict(amostraPredicao)

print('Valor Predito:')
print(Y_pred)

#quantos que é explicado no modelo R2
print('R2 do modelo PLS')
print(pls2.score(X, Y, sample_weight=None))

#print('R2 do modelo')
#print(r2_score(pls2.predict(X), Y))

print('Loadings:')
print(P)
print('Score:')
print(T)
コード例 #34
0
ファイル: utils.py プロジェクト: mhany90/amnesic_probing
def learn_pls_cls(x_train, y_train, x_dev, y_dev):
    clf = PLSRegression(n_components=100)

    clf.fit(x_train, y_train)
    acc = clf.score(x_dev, y_dev)
    return acc
コード例 #35
0
    Lig_pls_by_ncomp[iComp - 1] = PLSRegression(n_components=iComp,
                                                scale=False,
                                                max_iter=100)
    Lig_cv_results = cross_validate(Lig_pls_by_ncomp[iComp - 1],
                                    Lig_X_train - Lig_mean,
                                    Lig_y_train,
                                    cv=5)
    Lig_pls_r2val_ncomp[iComp - 1] = np.mean(Lig_cv_results['test_score'])
Lig_YM_plsModel_nComp = np.where(
    Lig_pls_r2val_ncomp == np.max(Lig_pls_r2val_ncomp))[0][0] + 1
Lig_YM_plsModel = PLSRegression(n_components=Lig_YM_plsModel_nComp,
                                scale=False,
                                max_iter=100)

Lig_YM_plsModel.fit(Lig_X_train - Lig_mean, Lig_y_train)
Lig_plsR2_cal = Lig_YM_plsModel.score(Lig_X_train - Lig_mean, Lig_y_train)
Lig_plsR2_val[0] = Lig_YM_plsModel.score(Lig_X_test - Lig_mean, Lig_y_test)
for iC in range(1, N_cohorts):
    Lig_plsR2_val[iC] = Lig_YM_plsModel.score(
        simDat[iC][iR].simDat.loc[:, 'Collagen':'Pentosidine'] - Lig_mean,
        simDat[iC][iR].simDat.loc[:, 'Youngs Modulus'])
print('Ligament Correlations calculated')

### Sanity Checks: Population Characteristic Histograms
iR = [
    0,
    np.where(YM_plsR2_val[1] == np.min(YM_plsR2_val[1]))[0][0],
    np.where(YM_plsR2_val[2] == np.min(YM_plsR2_val[2]))[0][0],
    np.where(YM_plsR2_val[3] == np.min(YM_plsR2_val[3]))[0][0],
]
# reset repetition index to first rep
コード例 #36
0
def plsvip (X, Y, V, lat_var):
    attributes = len(X[0])

    if not lat_var:
        latent_variables = attributes
    else:
        latent_variables = lat_var
		
    num_instances = len(X)	
	
    attributes_gone = []

    min_att = -1	

    #start_time = time.time()
    #attr_time = time.time()
    #time_counter = 0
    while attributes>0: 
        #if (attributes +9) %10 ==0:
        #    print "total time: ", time.time() - start_time
        #    print "attr time: ", time.time() - attr_time
        #    attr_time = time.time()

        if (latent_variables == 0) or (latent_variables > attributes):	
            latent_variables = attributes	

        lv_best = best_latent_variable(X, Y, latent_variables, num_instances)
        #print "current best lv: ", lv_best, "num. attr. ", attributes ####
		
        #fin_pls = PLSCanonical(n_components = lv_best)
        fin_pls = PLSRegression(n_components = lv_best)
        fin_pls.fit(X, Y)


        currentR2 = fin_pls.score(X, Y)  

        #######################################w
        # alternative r2
        """
        meanY4r2 = numpy.mean(Y)
        predY = fin_pls.predict(X)
        RSS = 0
        for i in range (len(Y)):
            RSS +=  numpy.power (Y[i] - predY[i], 2)
        TSS = 0
        for i in range (len(Y)):
            TSS += numpy.power (Y[i] - meanY4r2, 2)
        
        alterR2 = 1 - (RSS/TSS)
        #print currentR2, "vs", alterR2
        """
        #######################################w
        
        min_vip = 1000

        if min_att ==-1:
            attributes_gone.append(["None", currentR2, attributes, lv_best])

        ##########################################r
        #threaded version
        """ 
        myThreads = []
        VIPcurrent = []
        for i in range (0,attributes):
            myThreads.append(enthread( target = get_vip, args = (fin_pls, lv_best, i, attributes_gone, attributes  )) )
        for i in range (0,attributes):
            VIPcurrent.append(myThreads[i].get())
      
        min_vip = min(VIPcurrent)
        min_att = VIPcurrent.index(min_vip)
        """ 
        # Working version
        #"""
        for i in range (0,attributes):
            VIPcurrent = get_vip (fin_pls, lv_best, i, attributes_gone, attributes  )
            if VIPcurrent< min_vip:
                min_vip = VIPcurrent
                min_att = i
        #"""
        ##########################################r
        if min_att >-1:
            attributes_gone.append([V[min_att], currentR2, attributes, lv_best]) ####### CURRENT : to BE popped, NOT already popped
        V.pop(min_att)

        for i in range (num_instances):
            X[i].pop(min_att)

        attributes -= 1		
    #print attributes_gone ####
    #time_counter +=1
    return attributes_gone