return X # end np.random.seed(0) normal_spn = 100 # 正常数据数量 outlin_spn = 10 # 异常数据数量 X, y = generate_samples(normal_spn, outlin_spn) X = X.reshape(normal_spn + outlin_spn, -1) y = y.reshape(normal_spn + outlin_spn, -1) X_proc = process_features(X) # print(X.shape, y.shape, X_proc.shape) model_l = lr.LinearRegression() # 使用普通线性回归模型作为对照 model_l.fit(X_proc, y) model_h = hs.Hubber_SubGD(epsilon=0.2) model_h.fit(X_proc, y, eta=0.01, N=4000) fig, ax = plt.subplots(1, 1, figsize=(5, 4)) ax.scatter(X, y, s=5, c='b') LX = np.linspace(-1, 1, 500).reshape(500, 1) LX_proc = process_features(LX) pred_l = model_l.predict(LX_proc) pred_h = model_h.predict(LX_proc) ax.plot(LX, pred_l, label="LinearRegression model", color='red') ax.plot(LX, pred_h, label="Hubber_SubGD model", color='green')
import Linear_Regression_Class as lr def process_features(X): """对原始特征数据进行处理(主要进行特征标准化,使各个特征数据处于同一量级)""" scaler = StandardScaler() # 调用 Sklearn提供的的标准化方法对特征进行标准化 X = scaler.fit_transform(X) m, n = X.shape X = np.c_[np.ones((m, 1)), X] # 注意:这一步必须在标准化之后实施 return X # end housing = fetch_california_housing() X = housing.data y = housing.target.reshape(-1, 1) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) X_train = process_features(X_train) X_test = process_features(X_test) model = lr.LinearRegression() model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = model.mean_squared_error(y_test, y_pred) r2 = model.r2_score(y_test, y_pred) print("mse = {} and r2 = {}.".format(mse, r2))
import sys, os import numpy as np sys.path.append(os.getcwd() + r'\Modules') from sklearn.preprocessing import PolynomialFeatures import Linear_Regression_Class as lr import matplotlib.pyplot as plt X = [[0], [1], [2]] # 特征组 y = [ [1], # 标签 [1], [3] ] poly = PolynomialFeatures(degree=2) # 调用 Sklearn工具库中的类,将原始特征转化为指定次数的多项式特征 X_poly = poly.fit_transform(X) # 使用内置方法,将原始特征二 (degree)次多项式化,以转化成线性模型问题 model = lr.LinearRegression() # 创建线性模型对象 (将 X_poly中的各分量看作线性模型中的不同特征) model.fit(X_poly, y) # 将问题转化成训练该·线性模型· # 绘制训练点集、与之拟合的多项式函数图线 plt.scatter(X, y) W = np.linspace(-4, 5, 300).reshape(300, 1) W_poly = poly.fit_transform(W) # 特征多项式化,以供训练得到的·线性模型·使用 u = model.predict(W_poly) # 由线性模型预测获得标签值 plt.plot(W, u) plt.show() # print(X_poly)