def part_e(plotting=False): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) for method in ['ols', 'ridge', 'lasso']: designMatrix = DesignMatrix('polynomial2D', 10) leastSquares = LeastSquares(backend='manual', method=method) leastSquares.setLambda(1e-3) if method == 'lasso': leastSquares.setLambda(1e-4) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test print(leastSquares.MSE()) if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(leastSquares.yHat, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', method+'terrain.png'), transparent=True, bbox_inches='tight') plt.show() if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(y_test, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'test_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def plot_MSE_R2() : leastSquares = LeastSquares(backend='manual') N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) p_max = 10 p = [i for i in range(2, p_max+1)] R2 = [None for i in range(2, p_max+1)] MSE = [None for i in range(2, p_max+1)] for degree in p : designMatrix = DesignMatrix('polynomial2D', degree) X = designMatrix.getMatrix(x_data) leastSquares.fit(X, y_data) _ = leastSquares.predict() R2[degree-2] = leastSquares.R2() MSE[degree-2] = leastSquares.MSE() print(R2[degree-2]) print(MSE[degree-2]) p = np.array(p) plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight') plt.show() plt.figure() ax = plt.gca() plt.semilogy(p, MSE, 'r-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight') plt.show()
def test_DesignMatrix_polynomial(): """Tests the polynomial method of the DesignMatrix class The tests comprise setting up design matrices of different polynomials orders and comparing to manually setup matrices. """ # Degree 1 polynomial x = np.array([2.0]) DM = DesignMatrix('polynomial', 1) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 2 polynomial x = np.array([2.0, 3.0]) DM = DesignMatrix('polynomial', 2) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0], [1.0, 3.0, 9.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 3 polynomial x = np.array([2.0, 3.0, 4.0]) DM = DesignMatrix('polynomial', 3) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0], [1.0, 3.0, 9.0, 27.0], [1.0, 4.0, 16.0, 64.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 4 polynomial x = np.array([2.0, 3.0, 4.0, 5.0]) DM = DesignMatrix('polynomial', 4) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0], [1.0, 3.0, 9.0, 27.0, 81.0], [1.0, 4.0, 16.0, 64.0, 256.0], [1.0, 5.0, 25.0, 125.0, 625.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 5 polynomial x = np.array([2.0, 3.0, 4.0, 5.0, 6.0]) DM = DesignMatrix('polynomial', 5) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0], [1.0, 3.0, 9.0, 27.0, 81.0, 243.0], [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0], [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0], [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 6 polynomial x = np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) DM = DesignMatrix('polynomial', 6) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0], [1.0, 3.0, 9.0, 27.0, 81.0, 243.0, 729.0], [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0, 4096.0], [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0, 15625.0], [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0, 46656.0], [1.0, 7.0, 49.0, 343.0, 2401.0, 16807.0, 117649.0]]) assert X == pytest.approx(X_true, abs=1e-15)
def test_DesignMatrix_polynomial2D(): """Tests the polynomial2D method of the DesignMatrix class The tests comprise setting up design matrices of different polynomials orders and comparing to manually setup matrices. """ x = np.array([[2.0, 3.0], [4.0, 5.0], [6.0, 7.0], [8.0, 9.0]]) DM = DesignMatrix('polynomial2D', 2) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 3.0, 2.0**2, 2.0 * 3.0, 3.0**2], [1.0, 4.0, 5.0, 4.0**2, 4.0 * 5.0, 5.0**2], [1.0, 6.0, 7.0, 6.0**2, 6.0 * 7.0, 7.0**2], [1.0, 8.0, 9.0, 8.0**2, 8.0 * 9.0, 9.0**2]]) assert X == pytest.approx(X_true, abs=1e-15)
def test_DesignMatrix_function(): """Tests the function method of the DesignMatrix class The tests comprise setting up design matrices with different functions and comparing to manually setup matrices. """ class f1: def __init__(self, degree): self.degree = degree def __call__(self, i, x): if i > self.degree: raise ValueError( "Specified function index is larger than the number of available functions." ) if i == 0: return self._f0(x) elif i == 1: return self._f1(x) elif i == 2: return self._f2(x) elif i == 3: return self._f3(x) elif i == 4: return self._f4(x) elif i == 5: return self._f5(x) elif i == 6: return self._f6(x) def _f1(self, x): return np.cos(x) def _f2(self, x): return np.sin(x) def _f3(self, x): return np.tan(x) def _f4(self, x): return np.cosh(x) def _f5(self, x): return np.sinh(x) def _f6(self, x): return np.tanh(x) # 1 function numberOfFunctions = 1 f = f1(numberOfFunctions) x = np.array([np.pi / 2]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[1.0, np.cos(np.pi / 2)]]) assert X == pytest.approx(X_true, abs=1e-15) # 2 functions numberOfFunctions = 2 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[1.0, np.cos(np.pi / 2), np.sin(np.pi / 2)], [1.0, np.cos(np.pi / 3), np.sin(np.pi / 3)]]) assert X == pytest.approx(X_true, abs=1e-15) # 3 functions numberOfFunctions = 3 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array( [[1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2)], [1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3)], [1.0, np.cos(np.pi / 4), np.sin(np.pi / 4), np.tan(np.pi / 4)], [1.0, np.cos(np.pi / 5), np.sin(np.pi / 5), np.tan(np.pi / 5)], [1.0, np.cos(np.pi / 6), np.sin(np.pi / 6), np.tan(np.pi / 6)]]) assert X == pytest.approx(X_true, abs=1e-15) # 4 functions numberOfFunctions = 4 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[ 1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2), np.cosh(np.pi / 2) ], [ 1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3), np.cosh(np.pi / 3) ]]) assert X == pytest.approx(X_true, abs=1e-15) # 6 functions numberOfFunctions = 6 f = f1(numberOfFunctions) x = np.array([ np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6, np.pi / 7, np.pi / 8, np.pi / 9 ]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[ 1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2), np.cosh(np.pi / 2), np.sinh(np.pi / 2), np.tanh(np.pi / 2) ], [ 1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3), np.cosh(np.pi / 3), np.sinh(np.pi / 3), np.tanh(np.pi / 3) ], [ 1.0, np.cos(np.pi / 4), np.sin(np.pi / 4), np.tan(np.pi / 4), np.cosh(np.pi / 4), np.sinh(np.pi / 4), np.tanh(np.pi / 4) ], [ 1.0, np.cos(np.pi / 5), np.sin(np.pi / 5), np.tan(np.pi / 5), np.cosh(np.pi / 5), np.sinh(np.pi / 5), np.tanh(np.pi / 5) ], [ 1.0, np.cos(np.pi / 6), np.sin(np.pi / 6), np.tan(np.pi / 6), np.cosh(np.pi / 6), np.sinh(np.pi / 6), np.tanh(np.pi / 6) ], [ 1.0, np.cos(np.pi / 7), np.sin(np.pi / 7), np.tan(np.pi / 7), np.cosh(np.pi / 7), np.sinh(np.pi / 7), np.tanh(np.pi / 7) ], [ 1.0, np.cos(np.pi / 8), np.sin(np.pi / 8), np.tan(np.pi / 8), np.cosh(np.pi / 8), np.sinh(np.pi / 8), np.tanh(np.pi / 8) ], [ 1.0, np.cos(np.pi / 9), np.sin(np.pi / 9), np.tan(np.pi / 9), np.cosh(np.pi / 9), np.sinh(np.pi / 9), np.tanh(np.pi / 9) ]]) assert X == pytest.approx(X_true, abs=1e-15)
degree = int(degree) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(method="ridge", backend='manual') Lambda = 4 leastSquares.setLambda(Lambda) crossvalidation = CrossValidation(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) X = designMatrix.getMatrix(x_data) XT_X = np.dot(X.T, X) print("det(X^T*X): %g" % (np.linalg.det(XT_X + Lambda * np.eye(XT_X.shape[0])))) noise_strength = 0.5 @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) + noise_strength * np.random.random() computeFrankeValues(x_data, y_data)
def part_a(plotting=False) : MSE_degree = [] R2_degree = [] betaVariance_degree = [] for degree in [2,3,4,5]: #,6,7,8,9] : designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual') bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) bootstrap.resample(x_data, y_data, 1000) MSE_degree. append(leastSquares.MSE()) R2_degree. append(leastSquares.R2()) betaVariance_degree.append(bootstrap.betaVariance) if plotting : print("MSE: ", MSE_degree[-1]) print("R2: ", R2_degree[-1]) print("Beta Variance: ") for b in betaVariance_degree[-1] : print(b) print("Beta: ") for b in leastSquares.beta : print(b) print(" ") M = 100 fig = plt.figure() ax = fig.gca(projection='3d') x = np.linspace(0, 1, M) y = np.linspace(0, 1, M) X, Y = np.meshgrid(x,y) x_data = np.vstack([X.ravel(), Y.ravel()]).T # When plotting the Franke function itself, we use these lines. yy_data = np.zeros(shape=(x_data.data.shape[0])) computeFrankeValues(x_data, yy_data) # When plotting the linear regression model: XX = designMatrix.getMatrix(x_data) leastSquares.X = XX y_data = leastSquares.predict() Z = np.reshape(y_data.T, X.shape) ZF = np.reshape(yy_data.T, X.shape) #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight') plt.show() print("\nMSE :") print(MSE_degree) print("\nR2 :") print(R2_degree) print("\nσ²(β) :") print(betaVariance_degree) return MSE_degree, R2_degree, betaVariance_degree
def part_e_2(): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) degree = 10 designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ols') X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ols_MSE = leastSquares.MSE() print(ols_MSE) ols_MSE = np.array([ols_MSE, ols_MSE]) ols_lambda = np.array([1e-5, 1]) ridge_lambda = np.logspace(-5, 0, 20) ridge_MSE = [] for lambda_ in ridge_lambda: print("ridge " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ridge_MSE.append(leastSquares.MSE()) print(leastSquares.MSE()) ridge_MSE = np.array(ridge_MSE) lasso_lambda = np.logspace(-4, 0, 20) lasso_MSE = [] for lambda_ in lasso_lambda: print("lasso " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='lasso') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test lasso_MSE.append(leastSquares.MSE()) lasso_MSE = np.array(ridge_MSE) ######################################################## plt.rc('text', usetex=True) plt.loglog(ols_lambda, ols_MSE, 'k--o', markersize=1, linewidth=1, label=r'OLS') plt.loglog(ridge_lambda, ridge_MSE, 'r-o', markersize=1, linewidth=1, label=r'Ridge') plt.loglog(lasso_lambda, lasso_MSE, 'b-o', markersize=1, linewidth=1, label=r'Lasso') plt.xlabel(r"$\lambda$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) #plt.subplots_adjust(left=0.2,bottom=0.2) plt.legend(fontsize=10) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'lambda_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def part_b(): R2 = [] MSE = [] R2_noise = [] MSE_noise = [] beta_noise = [] betaVariance_noise = [] noise = np.linspace(0, 1.0, 50) k = 1 fig, ax1 = plt.subplots() plt.rc('text', usetex=True) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) ind = -1 for lambda_ in np.logspace(-2, 0, 3): ind += 1 MSE_noise = [] for eta in noise: designMatrix = DesignMatrix('polynomial2D', 10) if ind == 0: leastSquares = LeastSquares(backend='manual', method='ols') else: leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE_noise.append(leastSquares.MSE()) R2_noise.append(leastSquares.R2()) beta_noise.append(bootstrap.beta) betaVariance_noise.append(bootstrap.betaVariance) # Different noise, test data N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) X = designMatrix.getMatrix(x_data) leastSquares.X = X leastSquares.predict() leastSquares.y = y_data_noise MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] if ind == 0: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '--', markersize=1, label=r"OLS") else: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '-', markersize=1, label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_)))) plt.ylabel(r"MSE", fontsize=10) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.subplots_adjust(left=0.2, bottom=0.2) #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax1.legend() #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight') plt.show()
Degree = [] for i in range(1, max_degree + 1): Degree.append(i) Bias_vec = [] Var_vec = [] MSE_vec = [] print("#########################################################") for degree in Degree: degree = int(degree) designMatrix = DesignMatrix('polynomial2D', degree) X = designMatrix.getMatrix(X) XT_X = np.dot(X.T, X) print("det(X^T*X): %g" % (np.linalg.det(XT_X + Lambda * np.eye(XT_X.shape[0])))) X_test_deg = designMatrix.getMatrix(X_test) # The following (m x n_bootstraps) matrix holds the column vectors y_pred # for each bootstrap iteration. X_train = np.zeros(shape=(x1_train.shape[0], 2)) n_bootstraps = 100 y_pred = np.empty((y_test.shape[0], n_bootstraps)) for i in range(n_bootstraps): x1_, x2_, y_ = resample(x1_train, x2_train, y_train) X_train[:, 0] = x1_