Ejemplo n.º 1
0
class ScalarField_PolynomialParametrisation():
    """
  Polynomial Parametrisation of an N-Dimensional Scalar Field (Surface). The polynomial parameters
  are determined via Linear Regression Analysis, which is performed by the LinearRegression() class.
  Input variables:
    X | matrix whose N columns are the independent variables
    y | column vector with the scalar field
  """
    def __init__(self, X, y, ScalarField):
        self.LinReg = LinearRegression(X, y, ScalarField, True)
        self.P, self.data = [], {"OLS": {}, "Ridge": {}, "Lasso": {}}

    # creates the design matrix for an (X.shape[0])-dimensional polynomial model of degree deg using data set X
    # includes every possible polynomial term up-to and including degree deg
    @staticmethod
    def design_matrix(X, deg):
        X_ = np.c_[np.ones(X.shape[0])[:, None], X]
        sets = [
            s for s in itertools.combinations_with_replacement(
                range(X_.shape[1]), deg)
        ]
        design_matrix = np.ones((X.shape[0], len(sets)))
        for i in range(1, len(sets)):
            design_matrix[:, i] = np.prod([X_[:, sets[i]]], axis=2)
        return design_matrix

    # main functionality - determine polynomial coefficients and save results for all polynomials
    def __call__(self, method="OLS", alpha=0, technique="", K=1):
        if len(self.P) == 0: raise ValueError("No polynomial has been added")
        self.update_LinReg()
        self.update_data_keys()
        self.determine_coefficients(method, alpha, technique, K)

    # adds missing and removes obsolete polynomials to self.LinReg
    def update_LinReg(self):
        current_models = sorted([key for key in self.LinReg.model.keys()])
        for deg in self.P:
            if not deg in current_models:
                F = lambda X: self.design_matrix(X, deg)
                self.LinReg.add_model(F, deg)
        for model in current_models:
            if not model in self.P:
                self.LinReg.remove_model(model)

    # update data keys
    def update_data_keys(self):
        for method in self.data.keys():
            for deg in self.P:
                if not deg in self.data[method].keys():
                    self.data[method][deg] = []

    # run regression analysis
    def determine_coefficients(self, method, alpha, technique, K):
        self.LinReg.use_method(method)
        self.LinReg.run_analysis(alpha, technique, K)
        for deg in self.P:
            model = self.LinReg.model[deg]
            self.data[method][deg].append(
                [alpha, model.MSE, model.R2, model.beta, model.std_beta])
            if technique in ["Kfold", "Bootstrap"]:
                for element in [
                        model.MSE_sample, model.R2_sample, model.Bias,
                        model.Var, technique, K
                ]:
                    self.data[method][deg][-1].append(element)

    # add polynomial(s) to the analysis
    def add_polynomial(self, deg):
        if hasattr(deg, "__len__"):
            for d in deg:
                if not d in self.P: self.P.append(d)
        else:
            if not deg in self.P: self.P.append(deg)
        self.P.sort()

    # remove polynomial(s) from the analysis
    def remove_polynomial(self, deg):
        if hasattr(deg, "__len__"):
            for d in deg:
                if d in self.P: self.P.remove(d)
        else:
            if deg in self.P: self.P.remove(deg)

    # plot the models' error dependency on the penalty
    def plot_error_penalty_dependence(self, deg, technique="", K=1):
        # setup
        resampled = technique in ["Bootstrap", "Kfold"]
        methods = ["OLS", "Ridge", "Lasso"]
        fig1 = plt.figure(figsize=(10, 8))
        fig2 = plt.figure(figsize=(10, 8))
        ax11 = fig1.add_subplot(211)
        ax12 = fig1.add_subplot(212)
        ax21 = fig2.add_subplot(211)
        ax22 = fig2.add_subplot(212)
        if resampled:
            fig3 = plt.figure(figsize=(10, 8))
            ax3 = fig3.add_subplot(111)
        # extract data and plot
        for i in range(3):
            method = methods[i]
            alpha1, MSE, R2 = [], [], []
            if resampled:
                alpha2, MSE_, R2_, Bias, Var = [], [], [], [], []
            for element in self.data[method][deg]:
                resample = len(element) > 5
                alpha1.append(element[0])
                MSE.append(element[1])
                R2.append(element[2])
                if resample:
                    alpha2.append(element[0])
                    MSE_.append(element[5])
                    R2_.append(element[6])
                    Bias.append(element[7])
                    Var.append(element[8])
            alpha1 = np.array(alpha1)
            ax11.plot(alpha1, np.array(MSE), label=method)
            ax12.plot(alpha1, np.array(R2), label=method)
            if resampled:
                alpha2 = np.array(alpha2)
                ax21.plot(alpha2, np.array(MSE_), label=method)
                ax22.plot(alpha2, np.array(R2_), label=method)
                ax3.plot(alpha2, np.array(Bias), label=method + " Bias")
                ax3.plot(alpha2, np.array(Var), label=method + " Var")
        # extra plotting details
        for ax, s in zip([ax11, ax12], ["MSE", r"$R^2$"]):
            ax.set_title(
                r"Dependence of {:s} on the penalty $\lambda$".format(s),
                fontsize=22)
            ax.set_xlabel(r"$\lambda$", fontsize=20)
            ax.set_ylabel(r"{:s}$(\lambda)$".format(s), fontsize=20)
            ax.legend(loc="best")
        fig1.tight_layout()
        fig1.savefig(self.LinReg.dirpath + "error1_deg{:d}".format(deg))
        if resampled:
            ax21.set_title(r"Dependence of average MSE on the penalty $\lambda$" + \
                            "\nAverage estimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22)
            ax22.set_title(r"Dependence of average $R^2$ on the penalty $\lambda$" + \
                            "\nAverage estimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22)
            ax3.set_title( r"Penalty $\lambda$ Dependence of Model $Bias^2$ and Variance" + \
                            "\nEstimated via {:s} resampling with K = {:d}".format(technique,K),fontsize=22)
            for ax in [ax21, ax22, ax3]:
                ax.legend(loc="best")
                ax.set_xlabel(r"$\lambda$", fontsize=20)
            ax21.set_ylabel(r"avg(MSE$(\lambda)$)", fontsize=20)
            ax22.set_ylabel(r"avg($R^2(\lambda)$)", fontsize=20)
            ax3.set_ylabel(r"$Bias^2$ or Variance", fontsize=20)
            fig2.tight_layout()
            [
                fig.savefig(self.LinReg.dirpath +
                            "error{:d}_deg{:d}".format(n, deg))
                for (fig, n) in zip([fig2, fig3], [2, 3])
            ]
        plt.show()