def part_e(plotting=False): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) for method in ['ols', 'ridge', 'lasso']: designMatrix = DesignMatrix('polynomial2D', 10) leastSquares = LeastSquares(backend='manual', method=method) leastSquares.setLambda(1e-3) if method == 'lasso': leastSquares.setLambda(1e-4) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test print(leastSquares.MSE()) if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(leastSquares.yHat, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', method+'terrain.png'), transparent=True, bbox_inches='tight') plt.show() if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(y_test, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'test_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def test_bootstrap_resample(): """Tests the resample method of the Bootstrap class Tests comprise of simple resampling cases for which we can evalue the exact answer by hand. """ # All data is the same, variance should be zero. OLS = LeastSquares() DM = DesignMatrix('polynomial', 3) bootstrap = Bootstrap(OLS, DM) x = np.ones(10) * 2.25 y = x**3 #bootstrap.resample(x, y, 10) # This fails with an raise LinAlgError("Singular matrix") error on # TravisCI, but passes locally. Removing for now. #assert bootstrap.betaVariance == pytest.approx(np.zeros(4), abs=1e-15) # Ensure that larger noise in the data set gives larger computed # variance in the beta values from resampling. functions = { 0: lambda x: np.sin(x), 1: lambda x: np.cos(x), 2: lambda x: np.sin(2 * x), 3: lambda x: np.cos(2 * x), 4: lambda x: np.sin(3 * x), 5: lambda x: np.cos(3 * x), 6: lambda x: np.sin(4 * x), 7: lambda x: np.cos(4 * x), 8: lambda x: np.sin(5 * x), 9: lambda x: np.cos(5 * x), 10: lambda x: np.sin(x)**2, 11: lambda x: np.cos(x)**2, 12: lambda x: np.sin(2 * x)**2, 13: lambda x: np.cos(2 * x)**2, 14: lambda x: np.sin(3 * x)**2, 15: lambda x: np.cos(3 * x)**2, } DM = DesignMatrix(lambda j, x: functions[j](x), 9) OLS = LeastSquares() bootstrap = Bootstrap(OLS, DM) N = 100 x = np.linspace(0, 2 * np.pi, N) meanBetaVariance = np.zeros(6) ind = 0 for noiseScale in [0.0, 0.1, 1.0]: y = np.sin(1.5 * x) - 0.5 * np.cos(2 * x)**2 + np.random.normal( 0, noiseScale, N) bootstrap.resample(x, y, 100) meanBetaVariance[ind] = np.mean(bootstrap.betaVariance) if ind > 0: assert meanBetaVariance[ind - 1] < meanBetaVariance[ind] ind += 1
def plot_MSE_R2() : leastSquares = LeastSquares(backend='manual') N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) p_max = 10 p = [i for i in range(2, p_max+1)] R2 = [None for i in range(2, p_max+1)] MSE = [None for i in range(2, p_max+1)] for degree in p : designMatrix = DesignMatrix('polynomial2D', degree) X = designMatrix.getMatrix(x_data) leastSquares.fit(X, y_data) _ = leastSquares.predict() R2[degree-2] = leastSquares.R2() MSE[degree-2] = leastSquares.MSE() print(R2[degree-2]) print(MSE[degree-2]) p = np.array(p) plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight') plt.show() plt.figure() ax = plt.gca() plt.semilogy(p, MSE, 'r-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight') plt.show()
def test_DesignMatrix_polynomial2D(): """Tests the polynomial2D method of the DesignMatrix class The tests comprise setting up design matrices of different polynomials orders and comparing to manually setup matrices. """ x = np.array([[2.0, 3.0], [4.0, 5.0], [6.0, 7.0], [8.0, 9.0]]) DM = DesignMatrix('polynomial2D', 2) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 3.0, 2.0**2, 2.0 * 3.0, 3.0**2], [1.0, 4.0, 5.0, 4.0**2, 4.0 * 5.0, 5.0**2], [1.0, 6.0, 7.0, 6.0**2, 6.0 * 7.0, 7.0**2], [1.0, 8.0, 9.0, 8.0**2, 8.0 * 9.0, 9.0**2]]) assert X == pytest.approx(X_true, abs=1e-15)
def test_DesignMatrix_function(): """Tests the function method of the DesignMatrix class The tests comprise setting up design matrices with different functions and comparing to manually setup matrices. """ class f1: def __init__(self, degree): self.degree = degree def __call__(self, i, x): if i > self.degree: raise ValueError( "Specified function index is larger than the number of available functions." ) if i == 0: return self._f0(x) elif i == 1: return self._f1(x) elif i == 2: return self._f2(x) elif i == 3: return self._f3(x) elif i == 4: return self._f4(x) elif i == 5: return self._f5(x) elif i == 6: return self._f6(x) def _f1(self, x): return np.cos(x) def _f2(self, x): return np.sin(x) def _f3(self, x): return np.tan(x) def _f4(self, x): return np.cosh(x) def _f5(self, x): return np.sinh(x) def _f6(self, x): return np.tanh(x) # 1 function numberOfFunctions = 1 f = f1(numberOfFunctions) x = np.array([np.pi / 2]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[1.0, np.cos(np.pi / 2)]]) assert X == pytest.approx(X_true, abs=1e-15) # 2 functions numberOfFunctions = 2 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[1.0, np.cos(np.pi / 2), np.sin(np.pi / 2)], [1.0, np.cos(np.pi / 3), np.sin(np.pi / 3)]]) assert X == pytest.approx(X_true, abs=1e-15) # 3 functions numberOfFunctions = 3 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array( [[1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2)], [1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3)], [1.0, np.cos(np.pi / 4), np.sin(np.pi / 4), np.tan(np.pi / 4)], [1.0, np.cos(np.pi / 5), np.sin(np.pi / 5), np.tan(np.pi / 5)], [1.0, np.cos(np.pi / 6), np.sin(np.pi / 6), np.tan(np.pi / 6)]]) assert X == pytest.approx(X_true, abs=1e-15) # 4 functions numberOfFunctions = 4 f = f1(numberOfFunctions) x = np.array([np.pi / 2, np.pi / 3]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[ 1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2), np.cosh(np.pi / 2) ], [ 1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3), np.cosh(np.pi / 3) ]]) assert X == pytest.approx(X_true, abs=1e-15) # 6 functions numberOfFunctions = 6 f = f1(numberOfFunctions) x = np.array([ np.pi / 2, np.pi / 3, np.pi / 4, np.pi / 5, np.pi / 6, np.pi / 7, np.pi / 8, np.pi / 9 ]) DM = DesignMatrix(f, numberOfFunctions) X = DM.getMatrix(x) X_true = np.array([[ 1.0, np.cos(np.pi / 2), np.sin(np.pi / 2), np.tan(np.pi / 2), np.cosh(np.pi / 2), np.sinh(np.pi / 2), np.tanh(np.pi / 2) ], [ 1.0, np.cos(np.pi / 3), np.sin(np.pi / 3), np.tan(np.pi / 3), np.cosh(np.pi / 3), np.sinh(np.pi / 3), np.tanh(np.pi / 3) ], [ 1.0, np.cos(np.pi / 4), np.sin(np.pi / 4), np.tan(np.pi / 4), np.cosh(np.pi / 4), np.sinh(np.pi / 4), np.tanh(np.pi / 4) ], [ 1.0, np.cos(np.pi / 5), np.sin(np.pi / 5), np.tan(np.pi / 5), np.cosh(np.pi / 5), np.sinh(np.pi / 5), np.tanh(np.pi / 5) ], [ 1.0, np.cos(np.pi / 6), np.sin(np.pi / 6), np.tan(np.pi / 6), np.cosh(np.pi / 6), np.sinh(np.pi / 6), np.tanh(np.pi / 6) ], [ 1.0, np.cos(np.pi / 7), np.sin(np.pi / 7), np.tan(np.pi / 7), np.cosh(np.pi / 7), np.sinh(np.pi / 7), np.tanh(np.pi / 7) ], [ 1.0, np.cos(np.pi / 8), np.sin(np.pi / 8), np.tan(np.pi / 8), np.cosh(np.pi / 8), np.sinh(np.pi / 8), np.tanh(np.pi / 8) ], [ 1.0, np.cos(np.pi / 9), np.sin(np.pi / 9), np.tan(np.pi / 9), np.cosh(np.pi / 9), np.sinh(np.pi / 9), np.tanh(np.pi / 9) ]]) assert X == pytest.approx(X_true, abs=1e-15)
def test_DesignMatrix_polynomial(): """Tests the polynomial method of the DesignMatrix class The tests comprise setting up design matrices of different polynomials orders and comparing to manually setup matrices. """ # Degree 1 polynomial x = np.array([2.0]) DM = DesignMatrix('polynomial', 1) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 2 polynomial x = np.array([2.0, 3.0]) DM = DesignMatrix('polynomial', 2) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0], [1.0, 3.0, 9.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 3 polynomial x = np.array([2.0, 3.0, 4.0]) DM = DesignMatrix('polynomial', 3) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0], [1.0, 3.0, 9.0, 27.0], [1.0, 4.0, 16.0, 64.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 4 polynomial x = np.array([2.0, 3.0, 4.0, 5.0]) DM = DesignMatrix('polynomial', 4) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0], [1.0, 3.0, 9.0, 27.0, 81.0], [1.0, 4.0, 16.0, 64.0, 256.0], [1.0, 5.0, 25.0, 125.0, 625.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 5 polynomial x = np.array([2.0, 3.0, 4.0, 5.0, 6.0]) DM = DesignMatrix('polynomial', 5) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0], [1.0, 3.0, 9.0, 27.0, 81.0, 243.0], [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0], [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0], [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0]]) assert X == pytest.approx(X_true, abs=1e-15) # Degree 6 polynomial x = np.array([2.0, 3.0, 4.0, 5.0, 6.0, 7.0]) DM = DesignMatrix('polynomial', 6) X = DM.getMatrix(x) X_true = np.array([[1.0, 2.0, 4.0, 8.0, 16.0, 32.0, 64.0], [1.0, 3.0, 9.0, 27.0, 81.0, 243.0, 729.0], [1.0, 4.0, 16.0, 64.0, 256.0, 1024.0, 4096.0], [1.0, 5.0, 25.0, 125.0, 625.0, 3125.0, 15625.0], [1.0, 6.0, 36.0, 216.0, 1296.0, 7776.0, 46656.0], [1.0, 7.0, 49.0, 343.0, 2401.0, 16807.0, 117649.0]]) assert X == pytest.approx(X_true, abs=1e-15)
np.mean(MSE_train) ] return averages if __name__ == '__main__': Degree = 1 + np.arange(20) train_MSE = [] test_MSE = [] print("#########################################################") for degree in Degree: degree = int(degree) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(method="ridge", backend='manual') Lambda = 4 leastSquares.setLambda(Lambda) crossvalidation = CrossValidation(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) X = designMatrix.getMatrix(x_data) XT_X = np.dot(X.T, X)
def part_a(plotting=False) : MSE_degree = [] R2_degree = [] betaVariance_degree = [] for degree in [2,3,4,5]: #,6,7,8,9] : designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual') bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) bootstrap.resample(x_data, y_data, 1000) MSE_degree. append(leastSquares.MSE()) R2_degree. append(leastSquares.R2()) betaVariance_degree.append(bootstrap.betaVariance) if plotting : print("MSE: ", MSE_degree[-1]) print("R2: ", R2_degree[-1]) print("Beta Variance: ") for b in betaVariance_degree[-1] : print(b) print("Beta: ") for b in leastSquares.beta : print(b) print(" ") M = 100 fig = plt.figure() ax = fig.gca(projection='3d') x = np.linspace(0, 1, M) y = np.linspace(0, 1, M) X, Y = np.meshgrid(x,y) x_data = np.vstack([X.ravel(), Y.ravel()]).T # When plotting the Franke function itself, we use these lines. yy_data = np.zeros(shape=(x_data.data.shape[0])) computeFrankeValues(x_data, yy_data) # When plotting the linear regression model: XX = designMatrix.getMatrix(x_data) leastSquares.X = XX y_data = leastSquares.predict() Z = np.reshape(y_data.T, X.shape) ZF = np.reshape(yy_data.T, X.shape) #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight') plt.show() print("\nMSE :") print(MSE_degree) print("\nR2 :") print(R2_degree) print("\nσ²(β) :") print(betaVariance_degree) return MSE_degree, R2_degree, betaVariance_degree
def fit_franke_noise() : R2 = [] MSE = [] R2_noise = [] MSE_noise = [] beta_noise = [] betaVariance_noise = [] noise = np.logspace(-4,0,50) k = 1 @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) for eta in noise : designMatrix = DesignMatrix('polynomial2D', 10) leastSquares = LeastSquares(backend='manual') bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1e5) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE_noise. append(leastSquares.MSE()) R2_noise. append(leastSquares.R2()) beta_noise. append(bootstrap.beta) betaVariance_noise.append(bootstrap.betaVariance) leastSquares.y = y_data MSE.append(leastSquares.MSE()) R2. append(leastSquares.R2()) """ betaVariance_noise = np.array(betaVariance_noise) for beta in betaVariance_noise : print(beta) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] for i in range(6) : plt.loglog(noise, betaVariance_noise[:,i], colors[i]+'-o', markersize=2) plt.rc('text', usetex=True) #plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) ## for Palatino and other serif fonts use: #plt.rc('font',**{'family':'serif','serif':['Palatino']}) plt.xlabel(r"$p$", fontsize=16) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.ylabel(r"$ \sigma^2(\beta_j)$", fontsize=10) plt.legend([r"intercept", r"$\beta_{x}$", r"$\beta_{y}$", r"$\beta_{x^2}$", r"$\beta_{xy}$", r"$\beta_{y^2}$"], fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_variance_OLS_noise.png'), transparent=True, bbox_inches='tight') #plt.show() """ print(R2_noise) print(1-np.array(R2_noise)) fig, ax1 = plt.subplots() ax1.loglog(noise, 1-np.array(R2_noise),'k-o',markersize=2) ax1.loglog(noise, 1-np.array(R2),'k--',markersize=2) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.ylabel(r"$1-R^2$", color='k', fontsize=10) ax2 = ax1.twinx() ax2.loglog(noise, np.array(MSE_noise), 'b-o',markersize=2) ax2.loglog(noise, np.array(MSE), 'b--',markersize=2) plt.ylabel(r"MSE", color='b', fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2,right=0.9) ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax2.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax2.get_yaxis().set_ticks([]) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight') plt.show()
def part_e_2(): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) degree = 10 designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ols') X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ols_MSE = leastSquares.MSE() print(ols_MSE) ols_MSE = np.array([ols_MSE, ols_MSE]) ols_lambda = np.array([1e-5, 1]) ridge_lambda = np.logspace(-5, 0, 20) ridge_MSE = [] for lambda_ in ridge_lambda: print("ridge " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ridge_MSE.append(leastSquares.MSE()) print(leastSquares.MSE()) ridge_MSE = np.array(ridge_MSE) lasso_lambda = np.logspace(-4, 0, 20) lasso_MSE = [] for lambda_ in lasso_lambda: print("lasso " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='lasso') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test lasso_MSE.append(leastSquares.MSE()) lasso_MSE = np.array(ridge_MSE) ######################################################## plt.rc('text', usetex=True) plt.loglog(ols_lambda, ols_MSE, 'k--o', markersize=1, linewidth=1, label=r'OLS') plt.loglog(ridge_lambda, ridge_MSE, 'r-o', markersize=1, linewidth=1, label=r'Ridge') plt.loglog(lasso_lambda, lasso_MSE, 'b-o', markersize=1, linewidth=1, label=r'Lasso') plt.xlabel(r"$\lambda$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) #plt.subplots_adjust(left=0.2,bottom=0.2) plt.legend(fontsize=10) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'lambda_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def part_b(): R2 = [] MSE = [] R2_noise = [] MSE_noise = [] beta_noise = [] betaVariance_noise = [] noise = np.linspace(0, 1.0, 50) k = 1 fig, ax1 = plt.subplots() plt.rc('text', usetex=True) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) ind = -1 for lambda_ in np.logspace(-2, 0, 3): ind += 1 MSE_noise = [] for eta in noise: designMatrix = DesignMatrix('polynomial2D', 10) if ind == 0: leastSquares = LeastSquares(backend='manual', method='ols') else: leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE_noise.append(leastSquares.MSE()) R2_noise.append(leastSquares.R2()) beta_noise.append(bootstrap.beta) betaVariance_noise.append(bootstrap.betaVariance) # Different noise, test data N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) X = designMatrix.getMatrix(x_data) leastSquares.X = X leastSquares.predict() leastSquares.y = y_data_noise MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] if ind == 0: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '--', markersize=1, label=r"OLS") else: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '-', markersize=1, label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_)))) plt.ylabel(r"MSE", fontsize=10) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.subplots_adjust(left=0.2, bottom=0.2) #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax1.legend() #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight') plt.show()
def plot_beta_ridge(): beta = [] betaVariance = [] MSE = [] R2 = [] k = 10000 fig, ax1 = plt.subplots() plt.rc('text', usetex=True) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) ind = -1 lam = np.logspace(-3, 5, 20) for lambda_ in lam: if ind == 0: leastSquares = LeastSquares(backend='manual', method='ols') else: leastSquares = LeastSquares(backend='manual', method='ridge') designMatrix = DesignMatrix('polynomial2D', 3) bootstrap = Bootstrap(leastSquares, designMatrix) leastSquares.setLambda(lambda_) ind += 1 N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) eta = 1.0 y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) beta.append(bootstrap.beta) betaVariance.append(bootstrap.betaVariance) leastSquares.y = y_data MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) beta = np.array(beta) betaVariance = np.array(betaVariance) monomial = [ '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3' ] colors = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] for i in range(10): plt.errorbar(lam[1:], beta[1:, i], yerr=2 * betaVariance[1:, i], fmt='-o', markersize=2, linewidth=1, color=colors[i], elinewidth=0.5, capsize=2, capthick=0.5, label=r"$\beta_{%s}$" % (monomial[i])) plt.rc('text', usetex=True) plt.ylabel(r"$\beta_j$", fontsize=10) plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10) plt.subplots_adjust(left=0.2, bottom=0.2) plt.legend(fontsize=8) for i in range(10): plt.errorbar(1e-3, beta[0, i], yerr=2 * betaVariance[0, i], fmt='-o', markersize=2, linewidth=1, color=colors[i], elinewidth=0.5, capsize=2, capthick=0.5) fig.gca().set_xscale('log') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_ridge.png'), transparent=True, bbox_inches='tight') plt.show()
max_degree = 20 Degree = [] for i in range(1, max_degree + 1): Degree.append(i) Bias_vec = [] Var_vec = [] MSE_vec = [] print("#########################################################") for degree in Degree: degree = int(degree) designMatrix = DesignMatrix('polynomial2D', degree) X = designMatrix.getMatrix(X) XT_X = np.dot(X.T, X) print("det(X^T*X): %g" % (np.linalg.det(XT_X + Lambda * np.eye(XT_X.shape[0])))) X_test_deg = designMatrix.getMatrix(X_test) # The following (m x n_bootstraps) matrix holds the column vectors y_pred # for each bootstrap iteration. X_train = np.zeros(shape=(x1_train.shape[0], 2)) n_bootstraps = 100 y_pred = np.empty((y_test.shape[0], n_bootstraps)) for i in range(n_bootstraps):