# -*- coding: utf-8 -*- import numpy as np import matplotlib.pyplot as plt from matplotlib.font_manager import FontProperties from sklearn.linear_model import LinearRegression import sklearn import pylab x = [[1], [2], [3], [4], [5], [6]] y = [[1], [2.1], [2.9], [4.2], [5.1], [5.8]] model = LinearRegression() a = model.fit(x, y) model.intercept_ a.coef_ model._decision_function(y) # -*- coding: utf-8 -*- """ Created on Tue Feb 23 16:06:54 2016 @author: SumaiWong """ import numpy as np import pandas as pd from numpy.linalg import inv from numpy import dot iris = pd.read_csv('e:\\python\\iris.csv', nrows=10) # 拟合线性模型: Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width
lm_multi2.intercept_ lm_multi2.coef_ lm_multi1.score(X1,y) lm_multi2.score(X2,y) sns.regplot(x = 'highway-mpg', y = 'price', data = df) sns.regplot(x = 'peak-rpm', y = 'price', data = df) df[['highway-mpg','peak-rpm','price']].corr() sns.residplot(x=df['highway-mpg'],y=df['price'],lowess=True) lm.fit(X1,y) dir(lm) lm._decision_function(X1) lm.get_params(True) lm._get_tags() y_hat = lm.predict(X1) def PlotPolly(model, independent_variable, dependent_variabble, Name): x_new = np.linspace(15, 55, 100) y_new = model(x_new) plt.plot(independent_variable, dependent_variabble, '.', x_new, y_new, '-') plt.title('Polynomial Fit with Matplotlib for Price ~ Length') ax = plt.gca() ax.set_facecolor((0.898, 0.898, 0.898)) fig = plt.gcf() plt.xlabel(Name)
# 定义线性回归模型 # 构造函数中的默认的四个参数含义如下: # fit_intercept=True 是否计算截距 # normalize=False 是否对数据进行标准化 # copy_X=True 对数据的副本进行操作 # n_jobs=1 作业数量,若为-1,则为所有的CPU参与运算 model = LinearRegression() #将x自变量转置为单列矩阵,后模型训练(fit) print(x.reshape(len(x), 1)) model.fit(x.reshape(len(x), 1), y) #获取模型参数,intercept_为截距,coef_为x系数 与python原生算法一致,此时得到的参数是一个矩阵 print(model.intercept_, model.coef_) #预测,输入值应为一个矩阵,返回y值 print(model.predict([[150]])) # 同下 print(model._decision_function([[150]])) print(model.score(x.reshape(len(x), 1),y)) #评分函数,返回一个小于或等于1的值 print(model.get_params(True)) #{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False} #遗留问题,最小二乘法的算法复杂度。 # 假设影响因素 x 为一个 n 行 p 列的矩阵那么其算法复杂度为O(np^2) 假设n >= p # 使用矩阵进行并行化运算 x = np.matrix([[1,56], [1,72], [1,69], [1,88], [1,102], [1,86],[1, 76], [1,79], [1,94], [1,74]]) y = np.matrix([92, 102, 86, 110, 130, 99, 96, 102, 105, 92]) model = LinearRegression()