sum(np.array(y_predict) == y_test) # In[16]: len(X_test) # In[17]: sum(np.array(y_predict) == y_test) / len(X_test) # In[19]: from ML.model_selection import train_test_split # In[20]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, seed=1) # In[21]: y_predict = kNN_classify(X_train, y_train, X_test) # In[22]: sum(np.array(y_predict) == y_test) / len(X_test) # In[ ]:
# -*- coding: utf-8 -*- """ Created on Thu Jul 30 23:48:08 2020 @author: Giyn """ import pandas as pd from ML.MultipleLinearRegression import MultipleLinearRegression from ML.metrics import * from ML.model_selection import train_test_split data = pd.read_csv("housing.csv") X = data.values[:, [0, 1, 2, 3]] y = data.values[:, [4]] X_train, X_test, y_train, y_test = train_test_split(X, y, seed=412) reg = MultipleLinearRegression() reg.fit_normal(X_train, y_train) y_predict = reg.predict(X_test) MSE = mean_squared_error(y_test, y_predict) RMSE = root_mean_squared_error(y_test, y_predict) MAE = mean_absolute_error(y_test, y_predict) R_Square = r2_score(y_test, y_predict)
from ML.model_selection import train_test_split data = pd.read_csv("housing.csv") # 读取数据 x_1 = data.values[:, [0]].flatten() # 特征1 x_2 = data.values[:, [1]].flatten() # 特征2 x_3 = data.values[:, [2]].flatten() # 特征3 x_4 = data.values[:, [3]].flatten() # 特征4 y = data.values[:, [4]].flatten() # 输出变量 x_1 = x_1[y < 50.0] # 剔除超过50的点(因为有可能有超过50的,但是在图中只显示50) x_2 = x_2[y < 50.0] x_3 = x_3[y < 50.0] x_4 = x_4[y < 50.0] y = y[y < 50.0] # 分割数据集 x_1_train, x_1_test, y_train, y_test = train_test_split(x_1, y, seed=412) x_2_train, x_2_test, y_train, y_test = train_test_split(x_2, y, seed=412) x_3_train, x_3_test, y_train, y_test = train_test_split(x_3, y, seed=412) x_4_train, x_4_test, y_train, y_test = train_test_split(x_4, y, seed=412) # 实例化线性回归模型 reg_1 = LinearRegression() reg_2 = LinearRegression() reg_3 = LinearRegression() reg_4 = LinearRegression() # 线性回归模型训练 reg_1.fit(x_1_train, y_train) reg_2.fit(x_2_train, y_train) reg_3.fit(x_3_train, y_train) reg_4.fit(x_4_train, y_train)
from ML.model_selection import train_test_split from ML.LogisticRegression import LogisticRegression from sklearn import datasets iris = datasets.load_iris() # 引入鸢尾花数据集 X = iris.data y = iris.target X_1 = X[y<2, :2] # 只取前两个特征 X_2 = X[y<2, 2:4] y = y[y<2] # 二分类 X_1_train, X_1_test, y_train, y_test = train_test_split(X_1, y, seed=412) # 数据集分割 log_reg_1 = LogisticRegression() # 逻辑回归模型实例化 log_reg_1.fit(X_1_train, y_train) # 训练模型 X_2_train, X_2_test, y_train, y_test = train_test_split(X_2, y, seed=412) # 数据集分割 log_reg_2 = LogisticRegression() # 逻辑回归模型实例化 log_reg_2.fit(X_2_train, y_train) # 训练模型 def decision_boundary_1(x): """ 逻辑回归模型的决策边界 Parameters ---------- x : ndarray 决策边界的横坐标散点数组
[1.8, 220], [2.2, 258], [1.9, 360], ]) y = np.array([0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1]) # In[16]: plt.scatter(X[y == 0, 0], X[y == 0, 1], color='r') plt.scatter(X[y == 1, 0], X[y == 1, 1], color='g') plt.show() # In[17]: X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, seed=100) y_predict = kNN_classify(X_train, y_train, X_test) accuracy_score(y_test, y_predict) # In[9]: # 标准化按照比例进行缩放,使其落入小的空间之中 # 变成均值为0,标准差为1的数据,去除量纲对结果的影响 # In[18]: X[:, 0] = (X[:, 0] - np.mean(X[:, 0])) / np.std(X[:, 0]) X[:, 0] # In[19]: