예제 #1
0
def dataPlot_ind(values, data, column, order):
    ''' Plots the measured data and the predicted curve of the least squares analysis '''
    if column == 1:
        label = "Cylinders"
    elif column == 2:
        label = "Displacement"
    elif column == 3:
        label = "Horsepower"
    elif column == 4:
        label = "Weight"
    elif column == 5:
        label = "Acceleration"
    newData = scaler(data)
    plt.figure()
    plt.scatter(data[:, column], data[:, 0])
    plt.title('Measured Data')
    plt.ylabel("MPG")
    plt.xlabel(label)

    predVal = calcPred(values, newData, column, order)
    predVal = invScale(data, predVal)

    plt.figure()
    plt.scatter(data[:, column], predVal)
    plt.title('Predicted Data')
    plt.ylabel("MPG")
    plt.xlabel(label)
예제 #2
0
def PLSR_LOOCV(data):
    ''' Performs LOOCV on the data and returns R2Y value '''
    R2Y = 0
    predVal = []
    for i in range(len(data[:, 0])):
        train = np.zeros((len(data[:, 0]) - 1, 8))
        test = np.zeros((1, 8))
        for j in range(len(data[:, 0])):
            if j < i:
                train[j, :] = data[j, :]
            elif j > i:
                train[j - 1, :] = data[j, :]
            else:
                test[0, :] = data[j, :]

        testScaled = np.zeros((1, 8))
        trainScale = StandardScaler()
        trainScaled = trainScale.fit_transform(train)
        testScaled[0, :] = trainScale.transform(test)
        PLSR = PLSRegression(n_components=2)
        PLSR.fit(trainScaled[:, 2:6], trainScaled[:, 0])
        pred = PLSR.predict(testScaled[:, 2:6])
        predVal.append(np.squeeze(pred))
    scaledData = scaler(data)
    R2Y = 1 - np.sum(
        (predVal - scaledData[:, 0])**2) / np.sum(scaledData[:, 0]**2)
    return R2Y
예제 #3
0
def R2YCalc_ind(values, data, column, order):
    ''' Returns the R2Y value for the least squares '''
    newData = scaler(data)
    residuals = residuals_ind(values, newData, column, order)
    for i in range(len(residuals)):
        residuals[i] = residuals[i]**2
    r2y = 1 - np.sum(residuals) / np.sum(newData[:, 0]**2)
    return r2y
예제 #4
0
def R2YCalc_full(values, data):
    ''' Returns the R2Y value for the entire data least squares '''
    newData = scaler(data)
    residuals = residuals_full(values, newData)
    for i in range(len(residuals)):
        residuals[i] = residuals[i]**2
    r2y = 1 - np.sum(residuals) / np.sum(newData[:, 0]**2)
    return r2y
def OLS_ind(data, column, order):
    ''' Performs least squares analysis on the data and returns the predicted constants for the equation '''
    if order == 1:
        y0 = np.zeros(2)
    elif order == 2:
        y0 = np.zeros(3)
    elif order == 3:
        y0 = np.zeros(4)
    elif order == 4:
        y0 = np.zeros(5)
    newData = scaler(data)
    opt = least_squares(residuals_ind, y0, args=(newData, column, order))
    return opt.x
def cyl_PLSR(data):
    ''' Performs PLSR on the data separated into individual cylinders and plot the scores and loadings plots '''
    newData = scaler(data)
    PLSR = PLSRegression(n_components=2)
    PLSR.fit(newData[:, 2:6], newData[:, 0])
    print('The R2Y value is', PLSR.score(newData[:, 2:6], newData[:, 0]))
    Xscores = PLSR.x_scores_
    Yscores = PLSR.y_scores_
    Xload = PLSR.x_loadings_
    Yload = PLSR.y_loadings_
    plt.figure()
    plt.scatter(Xscores[:, 0], Xscores[:, 1])
    plt.scatter(Yscores[:, 0], Yscores[:, 1])
    plt.title('Scores Plot')
    plt.figure()
    plt.scatter(Xload[0, 0], Xload[0, 1], label='Displacement')
    plt.scatter(Xload[1, 0], Xload[1, 1], label='Horsepower')
    plt.scatter(Xload[2, 0], Xload[2, 1], label='Weight')
    plt.scatter(Xload[3, 0], Xload[3, 1], label='Acceleration')
    plt.scatter(Yload[:, 0], Yload[:, 1], label='MPG')
    plt.title('Loadings Plot')
    plt.legend(loc='best')
예제 #7
0
def OLS_LOOCV(data, column, order):
    ''' Performs LOOCV on the data and returns R2Y value '''
    R2Y = 0
    predVal = []
    for i in range(len(data[:, 0])):
        train = np.zeros((len(data[:, 0]) - 1, 8))
        test = np.zeros((1, 8))
        for j in range(len(data[:, 0])):
            if j < i:
                train[j, :] = data[j, :]
            elif j > i:
                train[j - 1, :] = data[j, :]
            else:
                test[0, :] = data[j, :]
        opt = OLS_ind(train, column, order)
        trainScale = StandardScaler()
        trainScale.fit(train)
        testScaled = trainScale.transform(test)
        pred = calcPred(opt, testScaled, column, order)
        predVal.append(np.squeeze(pred))
    scaledData = scaler(data)
    R2Y = 1 - np.sum(
        (predVal - scaledData[:, 0])**2) / np.sum(scaledData[:, 0]**2)
    return R2Y
def OLS_full(data):
    ''' Performs least squares analysis on the data and returns the predicted constants for the equation '''
    y0 = np.zeros(6)
    data = scaler(data)
    opt = least_squares(residuals_full, y0, args=(data, ))
    return opt.x