예제 #1
0
sum(np.array(y_predict) == y_test)

# In[16]:

len(X_test)

# In[17]:

sum(np.array(y_predict) == y_test) / len(X_test)

# In[19]:

from ML.model_selection import train_test_split

# In[20]:

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    seed=1)

# In[21]:

y_predict = kNN_classify(X_train, y_train, X_test)

# In[22]:

sum(np.array(y_predict) == y_test) / len(X_test)

# In[ ]:
# -*- coding: utf-8 -*-
"""
Created on Thu Jul 30 23:48:08 2020

@author: Giyn
"""

import pandas as pd

from ML.MultipleLinearRegression import MultipleLinearRegression
from ML.metrics import *
from ML.model_selection import train_test_split

data = pd.read_csv("housing.csv")

X = data.values[:, [0, 1, 2, 3]]
y = data.values[:, [4]]
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=412)

reg = MultipleLinearRegression()
reg.fit_normal(X_train, y_train)

y_predict = reg.predict(X_test)
MSE = mean_squared_error(y_test, y_predict)
RMSE = root_mean_squared_error(y_test, y_predict)
MAE = mean_absolute_error(y_test, y_predict)
R_Square = r2_score(y_test, y_predict)
from ML.model_selection import train_test_split

data = pd.read_csv("housing.csv")  # 读取数据
x_1 = data.values[:, [0]].flatten()  # 特征1
x_2 = data.values[:, [1]].flatten()  # 特征2
x_3 = data.values[:, [2]].flatten()  # 特征3
x_4 = data.values[:, [3]].flatten()  # 特征4
y = data.values[:, [4]].flatten()  # 输出变量
x_1 = x_1[y < 50.0]  # 剔除超过50的点(因为有可能有超过50的,但是在图中只显示50)
x_2 = x_2[y < 50.0]
x_3 = x_3[y < 50.0]
x_4 = x_4[y < 50.0]
y = y[y < 50.0]

# 分割数据集
x_1_train, x_1_test, y_train, y_test = train_test_split(x_1, y, seed=412)
x_2_train, x_2_test, y_train, y_test = train_test_split(x_2, y, seed=412)
x_3_train, x_3_test, y_train, y_test = train_test_split(x_3, y, seed=412)
x_4_train, x_4_test, y_train, y_test = train_test_split(x_4, y, seed=412)

# 实例化线性回归模型
reg_1 = LinearRegression()
reg_2 = LinearRegression()
reg_3 = LinearRegression()
reg_4 = LinearRegression()
# 线性回归模型训练
reg_1.fit(x_1_train, y_train)
reg_2.fit(x_2_train, y_train)
reg_3.fit(x_3_train, y_train)
reg_4.fit(x_4_train, y_train)
예제 #4
0
from ML.model_selection import train_test_split
from ML.LogisticRegression import LogisticRegression
from sklearn import datasets


iris = datasets.load_iris() # 引入鸢尾花数据集

X = iris.data
y = iris.target

X_1 = X[y<2, :2] # 只取前两个特征
X_2 = X[y<2, 2:4]
y = y[y<2] # 二分类


X_1_train, X_1_test, y_train, y_test = train_test_split(X_1, y, seed=412) # 数据集分割
log_reg_1 = LogisticRegression() # 逻辑回归模型实例化
log_reg_1.fit(X_1_train, y_train) # 训练模型

X_2_train, X_2_test, y_train, y_test = train_test_split(X_2, y, seed=412) # 数据集分割
log_reg_2 = LogisticRegression() # 逻辑回归模型实例化
log_reg_2.fit(X_2_train, y_train) # 训练模型


def decision_boundary_1(x):
    """
    逻辑回归模型的决策边界
    Parameters
    ----------
    x : ndarray
        决策边界的横坐标散点数组
예제 #5
0
    [1.8, 220],
    [2.2, 258],
    [1.9, 360],
])
y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1])

# In[16]:

plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r')
plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g')
plt.show()

# In[17]:

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    seed=100)
y_predict = kNN_classify(X_train, y_train, X_test)
accuracy_score(y_test, y_predict)

# In[9]:

# 标准化按照比例进行缩放,使其落入小的空间之中
# 变成均值为0,标准差为1的数据,去除量纲对结果的影响

# In[18]:

X[:, 0] = (X[:, 0] - np.mean(X[:, 0])) / np.std(X[:, 0])
X[:, 0]

# In[19]: