def dataPlot_ind(values, data, column, order): ''' Plots the measured data and the predicted curve of the least squares analysis ''' if column == 1: label = "Cylinders" elif column == 2: label = "Displacement" elif column == 3: label = "Horsepower" elif column == 4: label = "Weight" elif column == 5: label = "Acceleration" newData = scaler(data) plt.figure() plt.scatter(data[:, column], data[:, 0]) plt.title('Measured Data') plt.ylabel("MPG") plt.xlabel(label) predVal = calcPred(values, newData, column, order) predVal = invScale(data, predVal) plt.figure() plt.scatter(data[:, column], predVal) plt.title('Predicted Data') plt.ylabel("MPG") plt.xlabel(label)
def PLSR_LOOCV(data): ''' Performs LOOCV on the data and returns R2Y value ''' R2Y = 0 predVal = [] for i in range(len(data[:, 0])): train = np.zeros((len(data[:, 0]) - 1, 8)) test = np.zeros((1, 8)) for j in range(len(data[:, 0])): if j < i: train[j, :] = data[j, :] elif j > i: train[j - 1, :] = data[j, :] else: test[0, :] = data[j, :] testScaled = np.zeros((1, 8)) trainScale = StandardScaler() trainScaled = trainScale.fit_transform(train) testScaled[0, :] = trainScale.transform(test) PLSR = PLSRegression(n_components=2) PLSR.fit(trainScaled[:, 2:6], trainScaled[:, 0]) pred = PLSR.predict(testScaled[:, 2:6]) predVal.append(np.squeeze(pred)) scaledData = scaler(data) R2Y = 1 - np.sum( (predVal - scaledData[:, 0])**2) / np.sum(scaledData[:, 0]**2) return R2Y
def R2YCalc_ind(values, data, column, order): ''' Returns the R2Y value for the least squares ''' newData = scaler(data) residuals = residuals_ind(values, newData, column, order) for i in range(len(residuals)): residuals[i] = residuals[i]**2 r2y = 1 - np.sum(residuals) / np.sum(newData[:, 0]**2) return r2y
def R2YCalc_full(values, data): ''' Returns the R2Y value for the entire data least squares ''' newData = scaler(data) residuals = residuals_full(values, newData) for i in range(len(residuals)): residuals[i] = residuals[i]**2 r2y = 1 - np.sum(residuals) / np.sum(newData[:, 0]**2) return r2y
def OLS_ind(data, column, order): ''' Performs least squares analysis on the data and returns the predicted constants for the equation ''' if order == 1: y0 = np.zeros(2) elif order == 2: y0 = np.zeros(3) elif order == 3: y0 = np.zeros(4) elif order == 4: y0 = np.zeros(5) newData = scaler(data) opt = least_squares(residuals_ind, y0, args=(newData, column, order)) return opt.x
def cyl_PLSR(data): ''' Performs PLSR on the data separated into individual cylinders and plot the scores and loadings plots ''' newData = scaler(data) PLSR = PLSRegression(n_components=2) PLSR.fit(newData[:, 2:6], newData[:, 0]) print('The R2Y value is', PLSR.score(newData[:, 2:6], newData[:, 0])) Xscores = PLSR.x_scores_ Yscores = PLSR.y_scores_ Xload = PLSR.x_loadings_ Yload = PLSR.y_loadings_ plt.figure() plt.scatter(Xscores[:, 0], Xscores[:, 1]) plt.scatter(Yscores[:, 0], Yscores[:, 1]) plt.title('Scores Plot') plt.figure() plt.scatter(Xload[0, 0], Xload[0, 1], label='Displacement') plt.scatter(Xload[1, 0], Xload[1, 1], label='Horsepower') plt.scatter(Xload[2, 0], Xload[2, 1], label='Weight') plt.scatter(Xload[3, 0], Xload[3, 1], label='Acceleration') plt.scatter(Yload[:, 0], Yload[:, 1], label='MPG') plt.title('Loadings Plot') plt.legend(loc='best')
def OLS_LOOCV(data, column, order): ''' Performs LOOCV on the data and returns R2Y value ''' R2Y = 0 predVal = [] for i in range(len(data[:, 0])): train = np.zeros((len(data[:, 0]) - 1, 8)) test = np.zeros((1, 8)) for j in range(len(data[:, 0])): if j < i: train[j, :] = data[j, :] elif j > i: train[j - 1, :] = data[j, :] else: test[0, :] = data[j, :] opt = OLS_ind(train, column, order) trainScale = StandardScaler() trainScale.fit(train) testScaled = trainScale.transform(test) pred = calcPred(opt, testScaled, column, order) predVal.append(np.squeeze(pred)) scaledData = scaler(data) R2Y = 1 - np.sum( (predVal - scaledData[:, 0])**2) / np.sum(scaledData[:, 0]**2) return R2Y
def OLS_full(data): ''' Performs least squares analysis on the data and returns the predicted constants for the equation ''' y0 = np.zeros(6) data = scaler(data) opt = least_squares(residuals_full, y0, args=(data, )) return opt.x