예제 #1
0
def plot_regularization_path(degree):
    # 填充训练数据
    feature = PolynomialFeature(degree)
    X = feature.transform(x_train)
    # 未经正则化操作的w_infinite
    w_infinite = np.linalg.pinv(X) @ y_train
    # 正则项系数(范围可调节,0.04-100是较好的范围)
    alpha = np.linspace(0.04, 100, 10000)
    # 注意report中要对w随lambda的变化进行分析以及横坐标的范围
    w_lambda = None
    # 依次计算不同正则项系数下的 w_lambda
    for i in alpha:
        w_lambda_i = np.linalg.solve(i * np.eye(np.size(X, 1)) + X.T @ X,
                                     X.T @ y_train)
        w_lambda = w_lambda_i if w_lambda is None else np.vstack(
            (w_lambda, w_lambda_i))
    # 横坐标 ||w_lambda||/||w_infinite||
    path_x = [np.linalg.norm(i) / np.linalg.norm(w_infinite) for i in w_lambda]
    # 绘制Regularization Path
    plt.title('Regularization Path (degree={0})'.format(degree))
    plt.xlabel('$||w_\lambda||/||w_∞||$')
    plt.ylabel('$w_i$')
    for i in range(0, degree + 1):
        # 依次绘制wi的正则化路径
        plt.plot(path_x,
                 w_lambda[:, i],
                 label='$w_{0}$'.format(i),
                 linewidth='2')
        # plt.plot(path_x, abs(w_lambda[:, i]), label='$w_{0}$'.format(i), linewidth='2')
    plt.legend()
    plt.show()
    pass
def rmse_measure_1():
    training_errors = []
    test_errors = []
    for i in range(10):
        feature = PolynomialFeature(i)
        X_train = feature.transform(x_train)
        X_test = feature.transform(x_test)

        model = LinearRegression()
        # model = RidgeRegression()
        model.fit(X_train, y_train)
        y = model.predict(X_test)
        training_errors.append(rmse(model.predict(X_train), y_train))
        # test_errors.append(rmse(model.predict(X_test), y_test + np.random.normal(scale=0.25, size=len(y_test))))
        test_errors.append(rmse(model.predict(X_test), y_test))
    plt.title('Curve Fitting Result with Training Size=%d' % len(X_train))
    # plt.title('Curve Fitting Result with $ln(λ)$=%.2f' % 1.0)
    plt.plot(training_errors,
             'o-',
             mfc="none",
             mec="b",
             ms=10,
             c="b",
             label="Training")
    plt.plot(test_errors,
             'o-',
             mfc="none",
             mec="r",
             ms=10,
             c="r",
             label="Test")
    plt.legend()
    plt.xlabel("$degree$")
    plt.ylabel("$RMSE$")
    plt.show()
def linear_fit(x, y, degree, alpha):
    # 填充数据
    feature = PolynomialFeature(degree)
    X = feature.transform(x)
    # 引入正则化后,求解得到的参数w_lambda
    w_lambda = np.linalg.solve(alpha * np.eye(np.size(X, 1)) + X.T @ X,
                               X.T @ y)
    return X, w_lambda
예제 #4
0
def plot_rmse(degree):
    # 填充训练数据
    feature = PolynomialFeature(degree)
    X = feature.transform(x_train)
    # 未经正则化操作的w_infinite
    w_infinite = np.linalg.pinv(X) @ y_train
    # 正则项系数
    alpha = np.linspace(0.04, 100, 10000)
    # 注意report中要说明:lambda不能调过大,否则影响准确率(通过RMSE指标说明)
    # 不同正则项系数下的 w_lambda
    w_lambda = None
    # 训练集和测试集RMSE误差
    training_errors = []
    test_errors = []
    for i in alpha:
        w_lambda_i = np.linalg.solve(i * np.eye(np.size(X, 1)) + X.T @ X,
                                     X.T @ y_train)
        w_lambda = w_lambda_i if w_lambda is None else np.vstack(
            (w_lambda, w_lambda_i))
        model = RidgeRegression(i)
        model.fit(X, y_train)
        training_errors.append(rmse(model.predict(X), y_train))
        test_errors.append(
            rmse(model.predict(feature.transform(x_test)), y_test))
    # 横坐标 ||w_lambda||/||w_infinite||
    path_x = [np.linalg.norm(i) / np.linalg.norm(w_infinite) for i in w_lambda]
    # 绘制RMSE曲线
    plt.title('RMSE (degree={0})'.format(degree))
    plt.xlabel('$||w_\lambda||/||w_∞||$')
    plt.ylabel("$RMSE$")
    plt.plot(path_x,
             training_errors,
             '-',
             mfc="none",
             mec="b",
             ms=10,
             c="b",
             label="Train")
    plt.plot(path_x,
             test_errors,
             '-',
             mfc="none",
             mec="r",
             ms=10,
             c="r",
             label="Test")
    plt.legend()
    plt.show()
    pass
def find_best_degree_based_on_rmse():
    row_training_errors = []
    column_training_errors = []
    degrees = np.arange(2, 20, 2)
    for degree in degrees:
        X = PolynomialFeature(degree).transform(index)
        model_row = RidgeRegression(alpha_row)
        model_column = RidgeRegression(alpha_column)
        model_row.fit(X, row)
        model_column.fit(X, column)
        row_training_errors.append(rmse(model_row.predict(X), row))
        column_training_errors.append(rmse(model_column.predict(X), column))
    plt.title('Linear Fitting Result ($RMSE$)')
    plt.plot(degrees,
             row_training_errors,
             'o-',
             mfc="none",
             mec="r",
             ms=10,
             c="r",
             label="row-fitting-RMSE ($\lambda=%.3f$)" % alpha_row)
    plt.plot(degrees,
             column_training_errors,
             'o-',
             mfc="none",
             mec="b",
             ms=10,
             c="b",
             label="column-fitting-RMSE ($\lambda=%.3f$)" % alpha_column)
    plt.xlabel("$degree$")
    plt.ylabel("$RMSE$")
    plt.legend()
    plt.show()
def rmse_measure_2():
    training_errors = []
    test_errors = []
    alpha_ln = np.linspace(-50, 0, 100)
    alpha = np.exp(alpha_ln)
    degree = 9
    for i in alpha:
        feature = PolynomialFeature(degree)
        X_train = feature.transform(x_train)
        X_test = feature.transform(x_test)

        # model = LinearRegression()
        model = RidgeRegression(i)
        model.fit(X_train, y_train)
        training_errors.append(rmse(model.predict(X_train), y_train))
        test_errors.append(rmse(model.predict(X_test), y_test))
    plt.title('Curve Fitting Result with $M$=%d' % degree)
    plt.plot(alpha_ln,
             training_errors,
             '-',
             mfc="none",
             mec="b",
             ms=10,
             c="b",
             label="Training")
    plt.plot(alpha_ln,
             test_errors,
             '-',
             mfc="none",
             mec="r",
             ms=10,
             c="r",
             label="Test")
    plt.xlim((-40, 0))
    plt.ylim((0, 1))
    plt.legend()
    plt.xlabel("$ln(λ)$")
    plt.ylabel("$RMSE$")
    plt.show()
def poly_fit_regularized():
    for i, degree in enumerate([0, 1, 3, 6, 9]):
        feature = PolynomialFeature(degree)
        X_train = feature.transform(x_train)
        X_test = feature.transform(x_test)

        model = RidgeRegression(alpha=1e-3)
        model.fit(X_train, y_train)
        y = model.predict(X_test)
        plt.title('Curve Fitting Result with Regularization, M=%d' % degree)
        plt.xlabel('$x$')
        plt.ylabel('$y$')
        plt.scatter(x_train,
                    y_train,
                    facecolor="none",
                    edgecolor="b",
                    s=50,
                    label="training data")
        plt.plot(x_test, y_test, c="g", label="$\sin(2\pi x)$")
        plt.plot(x_test, y, c="r", label="fitting")
        plt.ylim(-1.5, 1.5)
        plt.annotate("M={}".format(degree), xy=(-0.15, 1))
        plt.legend()
        plt.show()