class ScalarField_PolynomialParametrisation(): """ Polynomial Parametrisation of an N-Dimensional Scalar Field (Surface). The polynomial parameters are determined via Linear Regression Analysis, which is performed by the LinearRegression() class. Input variables: X | matrix whose N columns are the independent variables y | column vector with the scalar field """ def __init__(self, X, y, ScalarField): self.LinReg = LinearRegression(X, y, ScalarField, True) self.P, self.data = [], {"OLS": {}, "Ridge": {}, "Lasso": {}} # creates the design matrix for an (X.shape[0])-dimensional polynomial model of degree deg using data set X # includes every possible polynomial term up-to and including degree deg @staticmethod def design_matrix(X, deg): X_ = np.c_[np.ones(X.shape[0])[:, None], X] sets = [ s for s in itertools.combinations_with_replacement( range(X_.shape[1]), deg) ] design_matrix = np.ones((X.shape[0], len(sets))) for i in range(1, len(sets)): design_matrix[:, i] = np.prod([X_[:, sets[i]]], axis=2) return design_matrix # main functionality - determine polynomial coefficients and save results for all polynomials def __call__(self, method="OLS", alpha=0, technique="", K=1): if len(self.P) == 0: raise ValueError("No polynomial has been added") self.update_LinReg() self.update_data_keys() self.determine_coefficients(method, alpha, technique, K) # adds missing and removes obsolete polynomials to self.LinReg def update_LinReg(self): current_models = sorted([key for key in self.LinReg.model.keys()]) for deg in self.P: if not deg in current_models: F = lambda X: self.design_matrix(X, deg) self.LinReg.add_model(F, deg) for model in current_models: if not model in self.P: self.LinReg.remove_model(model) # update data keys def update_data_keys(self): for method in self.data.keys(): for deg in self.P: if not deg in self.data[method].keys(): self.data[method][deg] = [] # run regression analysis def determine_coefficients(self, method, alpha, technique, K): self.LinReg.use_method(method) self.LinReg.run_analysis(alpha, technique, K) for deg in self.P: model = self.LinReg.model[deg] self.data[method][deg].append( [alpha, model.MSE, model.R2, model.beta, model.std_beta]) if technique in ["Kfold", "Bootstrap"]: for element in [ model.MSE_sample, model.R2_sample, model.Bias, model.Var, technique, K ]: self.data[method][deg][-1].append(element) # add polynomial(s) to the analysis def add_polynomial(self, deg): if hasattr(deg, "__len__"): for d in deg: if not d in self.P: self.P.append(d) else: if not deg in self.P: self.P.append(deg) self.P.sort() # remove polynomial(s) from the analysis def remove_polynomial(self, deg): if hasattr(deg, "__len__"): for d in deg: if d in self.P: self.P.remove(d) else: if deg in self.P: self.P.remove(deg) # plot the models' error dependency on the penalty def plot_error_penalty_dependence(self, deg, technique="", K=1): # setup resampled = technique in ["Bootstrap", "Kfold"] methods = ["OLS", "Ridge", "Lasso"] fig1 = plt.figure(figsize=(10, 8)) fig2 = plt.figure(figsize=(10, 8)) ax11 = fig1.add_subplot(211) ax12 = fig1.add_subplot(212) ax21 = fig2.add_subplot(211) ax22 = fig2.add_subplot(212) if resampled: fig3 = plt.figure(figsize=(10, 8)) ax3 = fig3.add_subplot(111) # extract data and plot for i in range(3): method = methods[i] alpha1, MSE, R2 = [], [], [] if resampled: alpha2, MSE_, R2_, Bias, Var = [], [], [], [], [] for element in self.data[method][deg]: resample = len(element) > 5 alpha1.append(element[0]) MSE.append(element[1]) R2.append(element[2]) if resample: alpha2.append(element[0]) MSE_.append(element[5]) R2_.append(element[6]) Bias.append(element[7]) Var.append(element[8]) alpha1 = np.array(alpha1) ax11.plot(alpha1, np.array(MSE), label=method) ax12.plot(alpha1, np.array(R2), label=method) if resampled: alpha2 = np.array(alpha2) ax21.plot(alpha2, np.array(MSE_), label=method) ax22.plot(alpha2, np.array(R2_), label=method) ax3.plot(alpha2, np.array(Bias), label=method + " Bias") ax3.plot(alpha2, np.array(Var), label=method + " Var") # extra plotting details for ax, s in zip([ax11, ax12], ["MSE", r"$R^2$"]): ax.set_title( r"Dependence of {:s} on the penalty $\lambda$".format(s), fontsize=22) ax.set_xlabel(r"$\lambda$", fontsize=20) ax.set_ylabel(r"{:s}$(\lambda)$".format(s), fontsize=20) ax.legend(loc="best") fig1.tight_layout() fig1.savefig(self.LinReg.dirpath + "error1_deg{:d}".format(deg)) if resampled: ax21.set_title(r"Dependence of average MSE on the penalty $\lambda$" + \ "\nAverage estimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22) ax22.set_title(r"Dependence of average $R^2$ on the penalty $\lambda$" + \ "\nAverage estimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22) ax3.set_title( r"Penalty $\lambda$ Dependence of Model $Bias^2$ and Variance" + \ "\nEstimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22) for ax in [ax21, ax22, ax3]: ax.legend(loc="best") ax.set_xlabel(r"$\lambda$", fontsize=20) ax21.set_ylabel(r"avg(MSE$(\lambda)$)", fontsize=20) ax22.set_ylabel(r"avg($R^2(\lambda)$)", fontsize=20) ax3.set_ylabel(r"$Bias^2$ or Variance", fontsize=20) fig2.tight_layout() [ fig.savefig(self.LinReg.dirpath + "error{:d}_deg{:d}".format(n, deg)) for (fig, n) in zip([fig2, fig3], [2, 3]) ] plt.show()