# -*- coding: utf-8 -*-
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
from sklearn.linear_model import LinearRegression
import sklearn
import pylab

x = [[1], [2], [3], [4], [5], [6]]
y = [[1], [2.1], [2.9], [4.2], [5.1], [5.8]]
model = LinearRegression()
a = model.fit(x, y)
model.intercept_
a.coef_
model._decision_function(y)

# -*- coding: utf-8 -*-
"""
Created on Tue Feb 23 16:06:54 2016

@author: SumaiWong
"""

import numpy as np
import pandas as pd
from numpy.linalg import inv
from numpy import dot

iris = pd.read_csv('e:\\python\\iris.csv', nrows=10)
# 拟合线性模型: Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width
lm_multi2.intercept_
lm_multi2.coef_

lm_multi1.score(X1,y)
lm_multi2.score(X2,y)

sns.regplot(x = 'highway-mpg', y = 'price', data = df)
sns.regplot(x = 'peak-rpm', y = 'price', data = df)

df[['highway-mpg','peak-rpm','price']].corr()

sns.residplot(x=df['highway-mpg'],y=df['price'],lowess=True)

lm.fit(X1,y)
dir(lm)
lm._decision_function(X1)
lm.get_params(True)
lm._get_tags()

y_hat = lm.predict(X1)

def PlotPolly(model, independent_variable, dependent_variabble, Name):
    x_new = np.linspace(15, 55, 100)
    y_new = model(x_new)

    plt.plot(independent_variable, dependent_variabble, '.', x_new, y_new, '-')
    plt.title('Polynomial Fit with Matplotlib for Price ~ Length')
    ax = plt.gca()
    ax.set_facecolor((0.898, 0.898, 0.898))
    fig = plt.gcf()
    plt.xlabel(Name)
Exemple #3
0
# 定义线性回归模型
# 构造函数中的默认的四个参数含义如下:
# fit_intercept=True 是否计算截距
# normalize=False 是否对数据进行标准化
# copy_X=True 对数据的副本进行操作
# n_jobs=1 作业数量,若为-1,则为所有的CPU参与运算
model = LinearRegression()
#将x自变量转置为单列矩阵,后模型训练(fit)
print(x.reshape(len(x), 1))
model.fit(x.reshape(len(x), 1), y)

#获取模型参数,intercept_为截距,coef_为x系数 与python原生算法一致,此时得到的参数是一个矩阵
print(model.intercept_, model.coef_)
#预测,输入值应为一个矩阵,返回y值
print(model.predict([[150]])) # 同下
print(model._decision_function([[150]]))

print(model.score(x.reshape(len(x), 1),y)) #评分函数,返回一个小于或等于1的值

print(model.get_params(True)) #{'copy_X': True, 'fit_intercept': True, 'n_jobs': 1, 'normalize': False}


#遗留问题,最小二乘法的算法复杂度。
# 假设影响因素 x 为一个 n 行 p 列的矩阵那么其算法复杂度为O(np^2) 假设n >= p


# 使用矩阵进行并行化运算
x = np.matrix([[1,56], [1,72], [1,69], [1,88], [1,102], [1,86],[1, 76], [1,79], [1,94], [1,74]])
y = np.matrix([92, 102, 86, 110, 130, 99, 96, 102, 105, 92])

model = LinearRegression()