def part1_2d(): #load the data data = (scio.loadmat('boston.mat'))['boston'] #spile the train data and test data data_train, data_test = train_test_split(data, test_size=0.33) #initialize the vector w and b mse_price_train = np.zeros(20) mse_price_test = np.zeros(20) for j in range(20): #initialize the train and test sets x_train = data_train[:, range(13)] x_test = data_test[:, range(13)] #add a bias term x_train = np.asmatrix(np.c_[(x_train, np.ones(len(data_train)))]) x_test = np.asmatrix(np.c_[(x_test, np.ones(len(data_test)))]) mse_price_train[j] = lr.mean_squared_error((x_train @ ((inv( (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13]).T)), data_train[:, 13]) mse_price_test[j] = lr.mean_squared_error((x_test @ ((inv( (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13]).T)), data_test[:, 13]) #random spilt the train and test set after each linear regression data_train, data_test = train_test_split(data, test_size=0.33) #calculate standard deviations on training set and testing set train_std = np.std(mse_price_train, ddof=1) test_std = np.std(mse_price_test, ddof=1) #take the average MSE for results over 20 runs on the train and test set mse_price_train = np.sum(mse_price_train) / 20 mse_price_test = np.sum(mse_price_test) / 20 print("Linear regression with all attributes\t", '\t MSE train\t', mse_price_train, '\tMSE test\t', mse_price_test) return mse_price_train, mse_price_test, train_std, test_std
def part1_2a(): #load the data data = (scio.loadmat('boston.mat'))['boston'] #spile the train data and test data data_train, data_test = train_test_split(data, test_size=0.33) #initialize the train and test sets x_train = np.asmatrix(np.ones(len(data_train))).T x_test = np.asmatrix(np.ones(len(data_test))).T mse_price_train = np.zeros(20) mse_price_test = np.zeros(20) for j in range(20): #fit the data with constant function w_price_train = inv( (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13] w_price_test = inv( (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13] #calculate MSEs mse_price_train[j] = lr.mean_squared_error(x_train * w_price_train, data_train[:, 13]) mse_price_test[j] = lr.mean_squared_error(x_test * w_price_test, data_train[:, 13]) data_train, data_test = train_test_split(data, test_size=0.33) train_std = np.std(mse_price_train, ddof=1) test_std = np.std(mse_price_test, ddof=1) mse_price_train = np.sum(mse_price_train) / 20 mse_price_test = np.sum(mse_price_test) / 20 print('average MSE for train set\t', mse_price_train, '\naverage MSE for test set\t', mse_price_test) return mse_price_train, mse_price_test, train_std, test_std
def part1_1_1_c(): #calculate estimate value for different dimension estimate_y1 = lr.estimate(lr.pl_featured(x, 1), model_d1) estimate_y2 = lr.estimate(lr.pl_featured(x, 2), model_d2) estimate_y3 = lr.estimate(lr.pl_featured(x, 3), model_d3) estimate_y4 = lr.estimate(lr.pl_featured(x, 4), model_d4) #calculate MSEs mse1 = lr.mean_squared_error(y, estimate_y1) mse2 = lr.mean_squared_error(y, estimate_y2) mse3 = lr.mean_squared_error(y, estimate_y3) mse4 = lr.mean_squared_error(y, estimate_y4) print("MSE 1D: %f, MSE 2D: %f, MSE 3D: %f, MSE 4D: %f" % (mse1, mse2, mse3, mse4))
def mse_1d_to_18d_sin(x1, y1, xt, yt): mse = np.zeros(shape=(18, 1)) for k in range(1, 19): #calculate w for dimension 1 to 18 model_dk = lr.fit_sin(x1, y1, k) #calculate estimate value for different dimension estimate_yt = lr.estimate(lr.pl_featured_sin(xt, k), model_dk) #calculate MSEs mse[k - 1] = lr.mean_squared_error(yt, estimate_yt) return mse
def part1_3c(): #best performance parameters sigma = 2**10 gamma = 2**(-31) #load data from .mat file data = (scio.loadmat('boston.mat'))['boston'] #spile the train data and test data data_train, data_test = train_test_split(data, test_size=0.33) x_train = data_train[:, range(13)] y_train = data_train[:, 13] x_test = data_test[:, range(13)] y_test = data_test[:, 13] #train model alpha = krr.fit(x_train, sigma, gamma, y_train) #calculate mse y_e_train = krr.estimate(x_train, x_train, sigma, alpha) mse_train = lr.mean_squared_error(y_train, y_e_train) y_e_test = krr.estimate(x_train, x_test, sigma, alpha) mse_test = lr.mean_squared_error(y_test, y_e_test) #print('MSE on training set is: %f MSE on test set is: %f'%(mse_train, mse_test)) return mse_train, mse_test
def part1_2c(): #load the data data = (scio.loadmat('boston.mat'))['boston'] #spile the train data and test data data_train, data_test = train_test_split(data, test_size=0.33) #initialize the MSE mse_price_train = np.zeros((20, 13)) mse_price_test = np.zeros((20, 13)) mse_mean_price_train = np.zeros((1, 13)) mse_mean_price_test = np.zeros((1, 13)) for j in range(20): for i in range(13): #initialize the train and test sets x_train = data_train[:, i] x_test = data_test[:, i] #add a bias term x_train = np.asmatrix( np.vstack((x_train, np.ones(len(data_train))))).T #339*2 x_test = np.asmatrix(np.vstack( (x_test, np.ones(len(data_test))))).T mse_price_train[j, i] = lr.mean_squared_error((x_train @ (inv( (x_train.T) @ (x_train)) @ (x_train.T) @ data_train[:, 13]).T), data_train[:, 13]) mse_price_test[j, i] = lr.mean_squared_error((x_test @ (inv( (x_test.T) @ (x_test)) @ (x_test.T) @ data_test[:, 13]).T), data_test[:, 13]) #random spilt the train and test set after each linear regression data_train, data_test = train_test_split(data, test_size=0.33) #take the average for results over 20 runs train_std = np.std(mse_price_train, axis=0, ddof=1) test_std = np.std(mse_price_test, axis=0, ddof=1) mse_mean_price_train = np.sum(mse_price_train, axis=0) / 20 mse_mean_price_test = np.sum(mse_price_test, axis=0) / 20 print("For liner regression with single attribute\t ") for i in range(len(mse_mean_price_train)): print("Linear regression attribute", i + 1, "\tMSE train\t", mse_mean_price_train[i], "\tMSE test\t", mse_mean_price_test[i], "\n") return mse_mean_price_train, mse_mean_price_test, train_std, test_std
def k_fold_crossvalidation(k, data_x, data_y, sigma, gamma): #split data into k segments averagely kf = KFold(n_splits=k) a = kf.split(data_x) mse_list = [] for train_data_index, test_data_index in a: x_train = data_x[train_data_index] x_test = data_x[test_data_index] y_train = data_y[train_data_index] y_test = data_y[test_data_index] alpha = krr.fit(x_train, sigma, gamma, y_train) y_e = krr.estimate(x_train, x_test, sigma, alpha) mse = lr.mean_squared_error(y_test, y_e) mse_list.append(mse) sum = 0 for i in range(len(mse_list)): sum += mse_list[i] mean_mse = sum / len(mse_list) return mean_mse