コード例 #1
0
def __test_bootstrap_fit():
    """A small implementation of a test case."""
    from regression import OLSRegression
    import sklearn.preprocessing as sk_preproc

    # Initial values
    deg = 2
    N_bs = 1000
    n = 100
    test_percent = 0.35
    noise = 0.3
    np.random.seed(1234)

    # Sets up random matrices
    x = np.random.rand(n, 1)

    poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)

    y = 2*x*x + np.exp(-2*x) + noise * \
        np.random.randn(x.shape[0], x.shape[1])

    # Sets up design matrix
    X = poly.fit_transform(x)

    # Performs regression
    reg = OLSRegression()
    reg.fit(X, y)
    y_predict = reg.predict(X).ravel()
    print("Regular linear regression")
    print("r2:  {:-20.16f}".format(reg.score(X, y)))
    print("mse: {:-20.16f}".format(metrics.mse(y, reg.predict(X))))
    print("Beta:      ", reg.coef_.ravel())
    print("var(Beta): ", reg.coef_var.ravel())
    print("")

    # Performs a bootstrap
    print("Bootstrapping")
    bs_reg = BootstrapRegression(X, y, OLSRegression())
    bs_reg.bootstrap(N_bs, test_percent=test_percent)

    print("r2:    {:-20.16f}".format(bs_reg.r2))
    print("mse:   {:-20.16f}".format(bs_reg.mse))
    print("Bias^2:{:-20.16f}".format(bs_reg.bias))
    print("Var(y):{:-20.16f}".format(bs_reg.var))
    print("Beta:      ", bs_reg.coef_.ravel())
    print("var(Beta): ", bs_reg.coef_var.ravel())
    print("mse = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(bs_reg.mse, bs_reg.bias, bs_reg.var,
                                     bs_reg.bias + bs_reg.var))
    print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.mse)))

    import matplotlib.pyplot as plt
    plt.plot(x.ravel(), y, "o", label="Data")
    plt.plot(x.ravel(), y_predict, "o",
             label=r"Pred, R^2={:.4f}".format(reg.score(X, y)))
    plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred,
                 yerr=np.sqrt(bs_reg.y_pred_var), fmt="o",
                 label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.r2))
    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$2x^2 + \sigma^2$")
    plt.legend()
    plt.show()
コード例 #2
0
def task1b(pickle_fname,
           N_samples=10000,
           training_size=0.5,
           N_bs=200,
           L_system_size=20,
           figure_folder="../fig"):
    """Task b of project 2"""
    print("=" * 80)
    print("Task b")

    states, energies = generate_1d_ising_data(L_system_size, N_samples)

    X_train, X_test, y_train, y_test = \
        sk_modsel.train_test_split(states, energies, test_size=1-training_size,
                                   shuffle=False)

    lambda_values = np.logspace(-4, 4, 9)

    print("Train size:  ", X_train.shape)
    print("Test size:   ", X_test.shape)

    # Linear regression
    linreg = reg.OLSRegression()
    linreg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train))
    y_pred_linreg = linreg.predict(cp.deepcopy(X_test))
    y_pred_linreg_train = linreg.predict(cp.deepcopy(X_train))

    linreg_general_results = {
        "test": {
            "r2": metrics.r2(y_test, y_pred_linreg),
            "mse": metrics.mse(y_test, y_pred_linreg),
            "bias": metrics.bias(y_test, y_pred_linreg)
        },
        "train": {
            "r2": metrics.r2(y_train, y_pred_linreg_train),
            "mse": metrics.mse(y_train, y_pred_linreg_train),
            "bias": metrics.bias(y_train, y_pred_linreg_train)
        }
    }

    print("LINREG:")
    print("R2:  {:-20.16f}".format(linreg_general_results["test"]["r2"]))
    print("MSE: {:-20.16f}".format(linreg_general_results["test"]["mse"]))
    print("Bias: {:-20.16f}".format(linreg_general_results["test"]["bias"]))
    # print("Beta coefs: {}".format(linreg.coef_))
    # print("Beta coefs variances: {}".format(linreg.coef_var))

    J_leastsq = np.asarray(linreg.coef_).reshape(
        (L_system_size, L_system_size))

    linreg_bs_results = bs.BootstrapWrapper(
        X_train,
        y_train,
        sk_model.LinearRegression(fit_intercept=False),
        N_bs,
        X_test=X_test,
        y_test=y_test)

    linreg_cvkf_results = cv.kFoldCVWrapper(
        X_train,
        y_train,
        sk_model.LinearRegression(fit_intercept=False),
        k=4,
        X_test=X_test,
        y_test=y_test)

    ridge_general_results = []
    ridge_bs_results = []
    ridge_cvkf_results = []

    lasso_general_results = []
    lasso_bs_results = []
    lasso_cvkf_results = []

    heatmap_data = {}

    for i, lmbda in enumerate(lambda_values):

        # Ridge regression
        ridge_reg = reg.RidgeRegression(lmbda)
        ridge_reg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train))
        y_pred_ridge = ridge_reg.predict(cp.deepcopy(X_test)).reshape(-1, 1)
        y_pred_ridge_train = ridge_reg.predict(cp.deepcopy(X_train)).reshape(
            -1, 1)
        ridge_general_results.append({
            "test": {
                "lambda": lmbda,
                "r2": metrics.r2(y_test, y_pred_ridge),
                "mse": metrics.mse(y_test, y_pred_ridge),
                "bias": metrics.bias(y_test, y_pred_ridge)
            },
            "train": {
                "lambda": lmbda,
                "r2": metrics.r2(y_train, y_pred_ridge_train),
                "mse": metrics.mse(y_train, y_pred_ridge_train),
                "bias": metrics.bias(y_train, y_pred_ridge_train)
            },
        })

        print("\nRIDGE (lambda={}):".format(lmbda))
        print("R2:  {:-20.16f}".format(
            ridge_general_results[-1]["test"]["r2"]))
        print("MSE: {:-20.16f}".format(
            ridge_general_results[-1]["test"]["mse"]))
        print("Bias: {:-20.16f}".format(
            ridge_general_results[-1]["test"]["bias"]))

        ridge_bs_results.append(
            bs.BootstrapWrapper(X_train,
                                y_train,
                                reg.RidgeRegression(lmbda),
                                N_bs,
                                X_test=X_test,
                                y_test=y_test))

        ridge_cvkf_results.append(
            cv.kFoldCVWrapper(X_train,
                              y_train,
                              reg.RidgeRegression(lmbda),
                              k=4,
                              X_test=X_test,
                              y_test=y_test))

        # Lasso regression
        lasso_reg = sk_model.Lasso(alpha=lmbda)

        # Filtering out annoing warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            lasso_reg.fit(cp.deepcopy(X_train), cp.deepcopy(y_train))
            y_pred_lasso = lasso_reg.predict(cp.deepcopy(X_test)).reshape(
                -1, 1)
            y_pred_lasso_train = lasso_reg.predict(
                cp.deepcopy(X_train)).reshape(-1, 1)

        lasso_general_results.append({
            "test": {
                "lambda": lmbda,
                "r2": metrics.r2(y_test, y_pred_lasso),
                "mse": metrics.mse(y_test, y_pred_lasso),
                "bias": metrics.bias(y_test, y_pred_lasso)
            },
            "train": {
                "lambda": lmbda,
                "r2": metrics.r2(y_train, y_pred_lasso_train),
                "mse": metrics.mse(y_train, y_pred_lasso_train),
                "bias": metrics.bias(y_train, y_pred_lasso_train)
            },
        })

        print("\nLASSO (lambda={}):".format(lmbda))
        print("R2:  {:-20.16f}".format(
            lasso_general_results[-1]["test"]["r2"]))
        print("MSE: {:-20.16f}".format(
            lasso_general_results[-1]["test"]["mse"]))
        print("Bias: {:-20.16f}".format(
            lasso_general_results[-1]["test"]["bias"]))
        # print("Beta coefs: {}".format(lasso_reg.coef_))

        # Filtering out annoing warnings
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")

            lasso_bs_results.append(
                bs.BootstrapWrapper(cp.deepcopy(X_train),
                                    cp.deepcopy(y_train),
                                    sk_model.Lasso(lmbda),
                                    N_bs,
                                    X_test=X_test,
                                    y_test=y_test))

            lasso_cvkf_results.append(
                cv.kFoldCVWrapper(cp.deepcopy(X_train),
                                  cp.deepcopy(y_train),
                                  sk_model.Lasso(lmbda),
                                  k=4,
                                  X_test=X_test,
                                  y_test=y_test))

        J_ridge = np.asarray(ridge_reg.coef_).reshape(
            (L_system_size, L_system_size))
        J_lasso = np.asarray(lasso_reg.coef_).reshape(
            (L_system_size, L_system_size))

        heatmap_data[i] = [J_leastsq, J_ridge, J_lasso]

        # plot_heatmap(J_leastsq, J_ridge, J_lasso,
        #              L_system_size, lmbda, figure_folder,
        #              "regression_ising_1d_heatmap_lambda{}.pdf".format(lmbda))

        # cmap_args = dict(vmin=-1., vmax=1., cmap='seismic')

        # fig, axarr = plt.subplots(nrows=1, ncols=3)

        # axarr[0].imshow(J_leastsq, **cmap_args)
        # axarr[0].set_title(r'$\mathrm{OLS}$', fontsize=16)
        # axarr[0].tick_params(labelsize=16)

        # axarr[1].imshow(J_ridge, **cmap_args)
        # axarr[1].set_title(
        #     r'$\mathrm{Ridge}, \lambda=%.4f$' % (lmbda), fontsize=16)
        # axarr[1].tick_params(labelsize=16)

        # im = axarr[2].imshow(J_lasso, **cmap_args)
        # axarr[2].set_title(
        #     r'$\mathrm{LASSO}, \lambda=%.4f$' % (lmbda), fontsize=16)
        # axarr[2].tick_params(labelsize=16)

        # divider = make_axes_locatable(axarr[2])
        # cax = divider.append_axes("right", size="5%", pad=0.05)
        # cbar = fig.colorbar(im, cax=cax)

        # cbar.ax.set_yticklabels(np.arange(-1.0, 1.0+0.25, 0.25), fontsize=14)
        # cbar.set_label(r'$J_{i,j}$', labelpad=-40,
        #                y=1.12, fontsize=16, rotation=0)

        # # plt.show()
        # figure_path = os.path.join(
        #     figure_folder, "ising_1d_heatmap_lambda{}.pdf".format(lmbda))
        # fig.savefig(figure_path)
        # print("Figure for lambda={} stored at {}.".format(lmbda, figure_path))

        # plt.close(fig)

    with open(pickle_fname, "wb") as f:
        pickle.dump(
            {
                "L_system_size": L_system_size,
                "ols": linreg_general_results,
                "ols_bs": linreg_bs_results,
                "ols_cv": linreg_cvkf_results,
                "ridge": ridge_general_results,
                "ridge_bs": ridge_bs_results,
                "ridge_cv": ridge_cvkf_results,
                "lasso": lasso_general_results,
                "lasso_bs": lasso_bs_results,
                "lasso_cv": lasso_cvkf_results,
                "heatmap_data": heatmap_data
            }, f)
        print("Data pickled and dumped to: {:s}".format(pickle_fname))
コード例 #3
0
    def __init__(self,
                 x,
                 y,
                 z,
                 alpha,
                 deg=5,
                 N_bs=100,
                 N_cv_bs=100,
                 k_splits=4,
                 test_percent=0.4,
                 print_results=False):
        """Lasso method for scikit learn."""
        poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
        X = poly.fit_transform(np.c_[cp.deepcopy(x).ravel(),
                                     cp.deepcopy(y).ravel()])

        ridge = sk_model.Lasso(alpha=alpha, fit_intercept=False)
        ridge.fit(X, z.ravel())

        # Gets the predicted y values
        z_predict = ridge.predict(X)

        bias = metrics.bias2(z.ravel(), z_predict)
        R2 = ridge.score(X, z.ravel())
        mse = metrics.mse(z.ravel(), z_predict)

        # poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
        # X = poly.fit_transform(
        #     np.c_[cp.deepcopy(x).reshape(-1, 1),
        #           cp.deepcopy(y).reshape(-1, 1)])

        # linreg = sk_model.LinearRegression(fit_intercept=False)
        # linreg.fit(X, z.ravel())
        # z_predict_ = linreg.predict(X)
        # r2 = metrics.R2(z.ravel(), z_predict_)
        # bias = metrics.bias2(z.ravel(), z_predict_)
        # mse_error = metrics.mse(z.ravel(), z_predict_)

        # Gets the beta coefs
        beta = ridge.coef_

        self.data["regression"] = {
            "y_pred": z_predict,
            "r2": R2,
            "mse": mse,
            "bias": bias,
            "beta_coefs": ridge.coef_,
            "beta_coefs_var": None,
        }

        if print_results:
            print("Lambda: {:-e}".format(alpha))
            print("R2:     {:-20.16f}".format(R2))
            print("MSE:    {:-20.16f}".format(mse))
            print("Bias:   {:-20.16f}".format(bias))
            print("Beta coefs: {}".format(beta))

        reg_kwargs = {"alpha": alpha, "fit_intercept": False}
        sk_results = sk_resampling.sk_learn_k_fold_cv(
            cp.deepcopy(x),
            cp.deepcopy(y),
            cp.deepcopy(z),
            sk_model.Lasso(**reg_kwargs),
            poly.transform,
            test_percent=test_percent,
            k_splits=k_splits,
            print_results=print_results)
        self.data["kfoldcv"] = sk_results

        bs_reg = bs.BootstrapRegression(
            cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()),
            sk_model.Lasso(**reg_kwargs), poly.transform)
        bs_reg.reg = sk_model.Lasso(alpha=alpha, fit_intercept=False)
        bs_reg.bootstrap(N_bs, test_percent=test_percent)

        self._fill_data(bs_reg, "bootstrap")

        if print_results:
            print("R2:    {:-20.16f}".format(bs_reg.R2))
            print("MSE:   {:-20.16f}".format(bs_reg.MSE))
            print("Bias^2:{:-20.16f}".format(bs_reg.bias))
            print("Var(y):{:-20.16f}".format(bs_reg.var))
            print("Beta coefs: {}".format(bs_reg.coef_))
            print("Beta coefs variances: {}".format(bs_reg.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias,
                                             bs_reg.var,
                                             bs_reg.bias + bs_reg.var))
            print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var -
                                        bs_reg.MSE)))
コード例 #4
0
    def __init__(self,
                 x,
                 y,
                 z,
                 deg=1,
                 N_bs=100,
                 N_cv_bs=100,
                 k_splits=4,
                 test_percent=0.4,
                 print_results=False):
        """Manual implementation of the OLS."""

        poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
        X = poly.fit_transform(cp.deepcopy(np.c_[x.ravel(),
                                                 y.ravel()]), z.ravel())
        linreg = reg.OLSRegression()
        linreg.fit(X, cp.deepcopy(z.ravel()))
        z_predict_ = linreg.predict(X).ravel()
        if print_results:
            print("R2:  {:-20.16f}".format(metrics.R2(z.ravel(), z_predict_)))
            print("MSE: {:-20.16f}".format(metrics.mse(z.ravel(), z_predict_)))
            print("Bias: {:-20.16f}".format(
                metrics.bias2(z.ravel(), z_predict_)))
            print("Beta coefs: {}".format(linreg.coef_))
            print("Beta coefs variances: {}".format(linreg.coef_var))

        self.data["regression"] = {
            "y_pred": z_predict_,
            "r2": metrics.R2(z.ravel(), z_predict_),
            "mse": metrics.mse(z.ravel(), z_predict_),
            "bias": metrics.bias2(z.ravel(), z_predict_),
            "beta_coefs": linreg.coef_,
            "beta_coefs_var": linreg.coef_var,
            "beta_95c": np.sqrt(linreg.coef_var) * 2,
        }

        # Resampling with k-fold cross validation
        kfcv = cv.kFoldCrossValidation(
            cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()),
            reg.OLSRegression(), poly.transform)
        kfcv.cross_validate(k_splits=k_splits, test_percent=test_percent)

        if print_results:
            print("R2:    {:-20.16f}".format(kfcv.R2))
            print("MSE:   {:-20.16f}".format(kfcv.MSE))
            print("Bias^2:{:-20.16f}".format(kfcv.bias))
            print("Var(y):{:-20.16f}".format(kfcv.var))
            print("Beta coefs: {}".format(kfcv.coef_))
            print("Beta coefs variances: {}".format(kfcv.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var,
                                             kfcv.bias + kfcv.var))
            print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE)))

        self._fill_data(kfcv, "kfoldcv")

        # Resampling with mc cross validation
        mccv = cv.MCCrossValidation(cp.deepcopy(np.c_[x.ravel(),
                                                      y.ravel()]),
                                    cp.deepcopy(z.ravel()),
                                    reg.OLSRegression(), poly.transform)
        mccv.cross_validate(N_cv_bs,
                            k_splits=k_splits,
                            test_percent=test_percent)
        if print_results:
            print("R2:    {:-20.16f}".format(mccv.R2))
            print("MSE:   {:-20.16f}".format(mccv.MSE))
            print("Bias^2:{:-20.16f}".format(mccv.bias))
            print("Var(y):{:-20.16f}".format(mccv.var))
            print("Beta coefs: {}".format(mccv.coef_))
            print("Beta coefs variances: {}".format(mccv.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var,
                                             mccv.bias + mccv.var))
            print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE)))

        self._fill_data(mccv, "mccv")

        # Resampling with bootstrapping
        bs_reg = bs.BootstrapRegression(
            cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()),
            reg.OLSRegression(), poly.transform)
        bs_reg.bootstrap(N_bs, test_percent=test_percent)

        if print_results:
            print("R2:    {:-20.16f}".format(bs_reg.R2))
            print("MSE:   {:-20.16f}".format(bs_reg.MSE))
            print("Bias^2:{:-20.16f}".format(bs_reg.bias))
            print("Var(y):{:-20.16f}".format(bs_reg.var))
            print("Beta coefs: {}".format(bs_reg.coef_))
            print("Beta coefs variances: {}".format(bs_reg.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias,
                                             bs_reg.var,
                                             bs_reg.bias + bs_reg.var))
            print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var -
                                        bs_reg.MSE)))

        self._fill_data(bs_reg, "bootstrap")
コード例 #5
0
    def __init__(self,
                 x,
                 y,
                 z,
                 alpha,
                 deg=5,
                 N_bs=100,
                 N_cv_bs=100,
                 k_splits=4,
                 test_percent=0.4,
                 print_results=False):
        poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
        X = poly.fit_transform(np.c_[cp.deepcopy(x).ravel(),
                                     cp.deepcopy(y).ravel()])

        ridge = sk_model.Ridge(alpha=alpha, fit_intercept=False)
        ridge.fit(X, z.ravel())

        # Gets the predicted y values
        z_predict = ridge.predict(X)

        R2 = ridge.score(X, z.ravel())
        # R2 =  1 - np.sum((z.ravel() - z_predict)**2)/np.sum((z.ravel() - np.mean(z.ravel()))**2)
        mse = metrics.mse(z.ravel(), z_predict)
        bias = metrics.bias2(z.ravel(), z_predict)

        N, P = X.shape
        z_variance = np.sum((z.ravel() - z_predict)**2) / (N - P - 1)

        # Gets the beta variance
        beta_variance = metrics.ridge_regression_variance(X, z_variance, alpha)

        self.data["regression"] = {
            "y_pred": z_predict,
            "r2": R2,
            "mse": mse,
            "bias": bias,
            "beta_coefs": ridge.coef_,
            "beta_coefs_var": beta_variance,
            "beta_95c": np.sqrt(beta_variance) * 2,
        }

        if print_results:
            print("Lambda: {:-e}".format(alpha))
            print("R2:     {:-20.16f}".format(R2))
            print("MSE:    {:-20.16f}".format(mse))
            print("Bias:   {:-20.16f}".format(bias))
            print("Beta coefs: {}".format(ridge.coef_))
            print("Beta coefs variances: {}".format(beta_variance))

        reg_kwargs = {"alpha": alpha, "fit_intercept": False, "solver": "lsqr"}
        kfcf_results = sk_resampling.sk_learn_k_fold_cv(
            cp.deepcopy(x),
            cp.deepcopy(y),
            cp.deepcopy(z),
            sk_model.Ridge(**reg_kwargs),
            poly.transform,
            test_percent=test_percent,
            k_splits=k_splits,
            print_results=print_results)
        self.data["kfoldcv"] = kfcf_results

        # Resampling with bootstrapping
        bs_reg = bs.BootstrapRegression(
            cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()),
            sk_model.Ridge(**reg_kwargs), poly.transform)
        bs_reg.bootstrap(N_bs, test_percent=test_percent)

        self._fill_data(bs_reg, "bootstrap")

        if print_results:
            print("R2:    {:-20.16f}".format(bs_reg.R2))
            print("MSE:   {:-20.16f}".format(bs_reg.MSE))
            print("Bias^2:{:-20.16f}".format(bs_reg.bias))
            print("Var(y):{:-20.16f}".format(bs_reg.var))
            print("Beta coefs: {}".format(bs_reg.coef_))
            print("Beta coefs variances: {}".format(bs_reg.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias,
                                             bs_reg.var,
                                             bs_reg.bias + bs_reg.var))
            print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var -
                                        bs_reg.MSE)))
コード例 #6
0
    def __init__(self,
                 x,
                 y,
                 z,
                 deg=1,
                 N_bs=100,
                 N_cv_bs=100,
                 k_splits=4,
                 test_percent=0.4,
                 print_results=False):
        """SK-Learn implementation of OLS."""
        poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
        X = poly.fit_transform(np.c_[cp.deepcopy(x).reshape(-1, 1),
                                     cp.deepcopy(y).reshape(-1, 1)])

        linreg = sk_model.LinearRegression(fit_intercept=False)
        linreg.fit(X, z.ravel())
        z_predict_ = linreg.predict(X)
        r2 = metrics.R2(z.ravel(), z_predict_)
        bias = metrics.bias2(z.ravel(), z_predict_)
        mse_error = metrics.mse(z.ravel(), z_predict_)

        N, P = X.shape
        z_variance = np.sum((z.ravel() - z_predict_)**2) / (N - P - 1)

        linreg_coef_var = np.diag(np.linalg.inv(X.T @ X)) * z_variance
        self.data["regression"] = {
            "y_pred": z_predict_,
            "r2": r2,
            "mse": mse_error,
            "bias": bias,
            "beta_coefs": linreg.coef_,
            "beta_coefs_var": linreg_coef_var,
            "beta_95c": np.sqrt(linreg_coef_var) * 2,
        }

        # Resampling coefs
        if print_results:
            print("R2:  {:-20.16f}".format(r2))
            print("MSE: {:-20.16f}".format(mse_error))
            print("Bias: {:-20.16f}".format(bias))
            print("Beta coefs: {}".format(linreg.coef_))
            print("Beta coefs variances: {}".format(linreg_coef_var))

        sk_kfold_res = sk_resampling.sk_learn_k_fold_cv(
            cp.deepcopy(x),
            cp.deepcopy(y),
            cp.deepcopy(z),
            sk_model.LinearRegression(fit_intercept=False),
            poly.transform,
            test_percent=test_percent,
            k_splits=k_splits,
            print_results=print_results)

        self.data["kfoldcv"] = sk_kfold_res

        bs_reg = bs.BootstrapRegression(
            cp.deepcopy(np.c_[x.ravel(), y.ravel()]), cp.deepcopy(z.ravel()),
            sk_model.LinearRegression(fit_intercept=False), poly.transform)
        bs_reg.bootstrap(N_bs, test_percent=test_percent)

        self._fill_data(bs_reg, "bootstrap")

        if print_results:
            print("R2:    {:-20.16f}".format(bs_reg.R2))
            print("MSE:   {:-20.16f}".format(bs_reg.MSE))
            print("Bias^2:{:-20.16f}".format(bs_reg.bias))
            print("Var(y):{:-20.16f}".format(bs_reg.var))
            print("Beta coefs: {}".format(bs_reg.coef_))
            print("Beta coefs variances: {}".format(bs_reg.coef_var))
            print("MSE = Bias^2 + Var(y) = ")
            print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias,
                                             bs_reg.var,
                                             bs_reg.bias + bs_reg.var))
            print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var -
                                        bs_reg.MSE)))
コード例 #7
0
def __test_cross_validation_methods():
    # A small implementation of a test case
    from regression import LinearRegression
    import matplotlib.pyplot as plt

    # Initial values
    n = 100
    N_bs = 1000
    k_splits = 4
    test_percent = 0.2
    noise = 0.3
    np.random.seed(1234)

    # Sets up random matrices
    x = np.random.rand(n, 1)

    def func_excact(_x):        return 2*_x*_x + np.exp(-2*_x) + noise * \
np.random.randn(_x.shape[0], _x.shape[1])

    y = func_excact(x)

    def design_matrix(_x):
        return np.c_[np.ones(_x.shape), _x, _x * _x]

    # Sets up design matrix
    X = design_matrix(x)

    # Performs regression
    reg = LinearRegression()
    reg.fit(X, y)
    y = y.ravel()
    y_predict = reg.predict(X).ravel()
    print("Regular linear regression")
    print("R2:    {:-20.16f}".format(reg.score(y, y_predict)))
    print("MSE:   {:-20.16f}".format(metrics.mse(y, y_predict)))
    # print (metrics.bias(y, y_predict))
    print("Bias^2:{:-20.16f}".format(metrics.bias2(y, y_predict)))

    # Small plotter
    import matplotlib.pyplot as plt
    plt.plot(x, y, "o", label="data")
    plt.plot(x,
             y_predict,
             "o",
             label=r"Pred, $R^2={:.4f}$".format(reg.score(y, y_predict)))

    print("k-fold Cross Validation")
    kfcv = kFoldCrossValidation(x, y, LinearRegression, design_matrix)
    kfcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kfcv.R2))
    print("MSE:   {:-20.16f}".format(kfcv.MSE))
    print("Bias^2:{:-20.16f}".format(kfcv.bias))
    print("Var(y):{:-20.16f}".format(kfcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var,
                                     kfcv.bias + kfcv.var))
    print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE)))

    plt.errorbar(kfcv.x_pred_test,
                 kfcv.y_pred,
                 yerr=np.sqrt(kfcv.y_pred_var),
                 fmt="o",
                 label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.R2))

    print("kk Cross Validation")
    kkcv = kkFoldCrossValidation(x, y, LinearRegression, design_matrix)
    kkcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kkcv.R2))
    print("MSE:   {:-20.16f}".format(kkcv.MSE))
    print("Bias^2:{:-20.16f}".format(kkcv.bias))
    print("Var(y):{:-20.16f}".format(kkcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kkcv.MSE, kkcv.bias, kkcv.var,
                                     kkcv.bias + kkcv.var))
    print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.MSE)))

    plt.errorbar(kkcv.x_pred_test.ravel(),
                 kkcv.y_pred.ravel(),
                 yerr=np.sqrt(kkcv.y_pred_var.ravel()),
                 fmt="o",
                 label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.R2))

    print("Monte Carlo Cross Validation")
    mccv = MCCrossValidation(x, y, LinearRegression, design_matrix)
    mccv.cross_validate(N_bs, k_splits=k_fold_size, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(mccv.R2))
    print("MSE:   {:-20.16f}".format(mccv.MSE))
    print("Bias^2:{:-20.16f}".format(mccv.bias))
    print("Var(y):{:-20.16f}".format(mccv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var,
                                     mccv.bias + mccv.var))
    print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE)))

    print("\nCross Validation methods tested.")

    plt.errorbar(mccv.x_pred_test,
                 mccv.y_pred,
                 yerr=np.sqrt(mccv.y_pred_var),
                 fmt="o",
                 label=r"MC CV, $R^2={:.4f}$".format(mccv.R2))

    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$y=2x^2$")
    plt.legend()
    plt.show()
コード例 #8
0
def __test_cross_validation_methods():
    # A small implementation of a test case
    from regression import OLSRegression
    import sklearn.preprocessing as sk_preproc
    import matplotlib.pyplot as plt

    # Initial values
    n = 100
    N_bs = 200
    deg = 2
    k_splits = 4
    test_percent = 0.35
    noise = 0.3
    np.random.seed(1234)
    # Sets up random matrices
    x = np.random.rand(n, 1)

    y = 2 * x * x + np.exp(
        -2 * x) + noise * np.random.randn(x.shape[0], x.shape[1])

    # Sets up design matrix
    poly = sk_preproc.PolynomialFeatures(degree=deg, include_bias=True)
    X = poly.fit_transform(x)

    # Performs regression
    reg = OLSRegression()
    reg.fit(X, y)
    y_predict = reg.predict(X)
    print("Regular linear regression")
    print("R2:    {:-20.16f}".format(reg.score(X, y)))
    print("MSE:   {:-20.16f}".format(metrics.mse(y, y_predict)))
    print("Bias^2:{:-20.16f}".format(metrics.bias(y, y_predict)))

    # Small plotter
    plt.plot(x, y, "o", label="data")
    plt.plot(x,
             y_predict,
             "o",
             label=r"Pred, $R^2={:.4f}$".format(reg.score(X, y)))

    print("k-fold Cross Validation")
    kfcv = kFoldCrossValidation(X, y, OLSRegression())
    kfcv.cross_validate(k_splits=k_splits, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kfcv.r2))
    print("MSE:   {:-20.16f}".format(kfcv.mse))
    print("Bias^2:{:-20.16f}".format(kfcv.bias))
    print("Var(y):{:-20.16f}".format(kfcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kfcv.mse, kfcv.bias, kfcv.var,
                                     kfcv.bias + kfcv.var))
    print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.mse)))

    plt.errorbar(kfcv.x_pred_test,
                 kfcv.y_pred,
                 yerr=np.sqrt(kfcv.y_pred_var),
                 fmt="o",
                 label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.r2))

    print("kk Cross Validation")
    kkcv = kkFoldCrossValidation(X, y, OLSRegression())
    kkcv.cross_validate(k_splits=k_splits, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(kkcv.r2))
    print("MSE:   {:-20.16f}".format(kkcv.mse))
    print("Bias^2:{:-20.16f}".format(kkcv.bias))
    print("Var(y):{:-20.16f}".format(kkcv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(kkcv.mse, kkcv.bias, kkcv.var,
                                     kkcv.bias + kkcv.var))
    print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.mse)))

    plt.errorbar(kkcv.x_pred_test,
                 kkcv.y_pred,
                 yerr=np.sqrt(kkcv.y_pred_var),
                 fmt="o",
                 label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.r2))

    print("Monte Carlo Cross Validation")
    mccv = MCCrossValidation(X, y, OLSRegression())
    mccv.cross_validate(N_bs, k_splits=k_splits, test_percent=test_percent)
    print("R2:    {:-20.16f}".format(mccv.r2))
    print("MSE:   {:-20.16f}".format(mccv.mse))
    print("Bias^2:{:-20.16f}".format(mccv.bias))
    print("Var(y):{:-20.16f}".format(mccv.var))
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(mccv.mse, mccv.bias, mccv.var,
                                     mccv.bias + mccv.var))
    print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.mse)))

    print("\nCross Validation methods tested.")

    plt.errorbar(mccv.x_pred_test,
                 mccv.y_pred,
                 yerr=np.sqrt(mccv.y_pred_var),
                 fmt="o",
                 label=r"MC CV, $R^2={:.4f}$".format(mccv.r2))

    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$y=2x^2 + e^{-2x}$")
    y = 2 * x * x + np.exp(
        -2 * x) + noise * np.random.randn(x.shape[0], x.shape[1])
    plt.legend()
    plt.show()
コード例 #9
0
def __test_bootstrap_fit():
        # A small implementation of a test case
    from regression import LinearRegression

    N_bs = 1000

    # Initial values
    n = 200
    noise = 0.2
    np.random.seed(1234)
    test_percent = 0.35

    # Sets up random matrices
    x = np.random.rand(n, 1)

    def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \
        np.random.randn(_x.shape[0], _x.shape[1])

    y = func_excact(x)

    def design_matrix(_x):
        return np.c_[np.ones(_x.shape), _x, _x*_x]

    # Sets up design matrix
    X = design_matrix(x)

    # Performs regression
    reg = LinearRegression()
    reg.fit(X, y)
    y = y.ravel()
    y_predict = reg.predict(X).ravel()
    print("Regular linear regression")
    print("R2:  {:-20.16f}".format(reg.score(y_predict, y)))
    print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict)))
    print("Beta:      ", reg.coef_.ravel())
    print("var(Beta): ", reg.coef_var.ravel())
    print("")

    # Performs a bootstrap
    print("Bootstrapping")
    bs_reg = BootstrapRegression(x, y, LinearRegression, design_matrix)
    bs_reg.bootstrap(N_bs, test_percent=test_percent)

    print("R2:    {:-20.16f}".format(bs_reg.R2))
    print("MSE:   {:-20.16f}".format(bs_reg.MSE))
    print("Bias^2:{:-20.16f}".format(bs_reg.bias))
    print("Var(y):{:-20.16f}".format(bs_reg.var))
    print("Beta:      ", bs_reg.coef_.ravel())
    print("var(Beta): ", bs_reg.coef_var.ravel())
    print("MSE = Bias^2 + Var(y) = ")
    print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var,
                                     bs_reg.bias + bs_reg.var))
    print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE)))

    import matplotlib.pyplot as plt
    plt.plot(x.ravel(), y, "o", label="Data")
    plt.plot(x.ravel(), y_predict, "o", 
        label=r"Pred, R^2={:.4f}".format(reg.score(y_predict, y)))
    print (bs_reg.y_pred.shape, bs_reg.y_pred_var.shape)
    plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, 
        yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", 
        label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.R2))
    plt.xlabel(r"$x$")
    plt.ylabel(r"$y$")
    plt.title(r"$2x^2 + \sigma^2$")
    plt.legend()
    plt.show()