예제 #1
0
def lasso_test(X, y, degree=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
    lasso_reg = LassoRegression(degree=degree)
    lasso_reg.fit(X_train, y_train, lasso=True, method="bgd")
    print(lasso_reg.score(X_test, y_test))
    X_test = X_test[:5]
    y_predict = lasso_reg.predict(X_test)
    y_true = y_test[:5]
    for i in range(len(y_true)):
        print(y_true[i], y_predict[i])
    print()
예제 #2
0
def poly_test(X, y, degree=1):
    X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)
    poly_reg = LassoRegression(degree=degree)
    poly_reg.fit(X_train, y_train, lasso=False, method="normal")
    print(poly_reg.score(X_test, y_test))
    X_test = X_test[:5]
    y_predict = poly_reg.predict(X_test)
    y_true = y_test[:5]
    for i in range(len(y_true)):
        print(y_true[i], y_predict[i])
    print()
예제 #3
0
y=iris.target
#
# x_train,x_test,y_train,y_test = train_test_split(x,y)
#
# my_knn=KNNClassifier(k=3)
# my_knn.fit(x_train,y_train)
# y_predict=my_knn.predict(x_test)
# print(y_test)
# print(y_predict)
# print(sum(y_predict==y_test)/len(y_test))

#########################################################
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2)
# kNN_classifier  = KNeighborsClassifier(n_neighbors=3)
# kNN_classifier.fit(x_train,y_train)
# y_predict = kNN_classifier.predict(x_test)
# # print(sum(y_predict==y_test)/len(y_test))
#
#
# best_score = 0.0
# best_k = -1
# best_p = -1
#
# for k in range(1, 11):
#     for p in range(1, 6):
#         knn_clf = KNeighborsClassifier(n_neighbors=k, weights="distance", p=p)#考虑了P就确定了考虑了距离,所以weights="distance"
#         knn_clf.fit(x_train, y_train)
#         score = knn_clf.score(x_test, y_test)
예제 #4
0
from KNN import KNNClassifier
from model_selection import train_test_split
from metric import accuracy_score
import pandas as pd
import numpy as np

if __name__ == '__main__':
    """
    """
    iris = pd.read_csv('../iris.data', header=None)
    iris_data = iris.loc[:, :].values
    x_data = iris_data[:, [0, 2]]
    y_data = iris_data[:, 4]

    x_predict = np.array([[5.1, 2.1]])
    x_train, y_train, x_test, y_test = train_test_split(x_data, y_data)

    best_score = 0.0
    best_k = -1
    for k in range(1, 11):
        knn_clf = KNNClassifier(n_neighbors=k)
        knn_clf.fit(x_train, y_train)
        score = knn_clf.score(x_test, y_test)
        if score > best_score:
            best_k = k
            best_score = score

    print("best_k =", best_k)
    print('best_score =', best_score)
예제 #5
0
# plt.show()

#鸢尾花逻辑分类
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

iris = datasets.load_iris()
X = iris.data
y = iris.target

X = X[y < 2, :2]
y = y[y < 2]

from model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666)

from LogisticRegression import LogisticRegression
log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

# print(log_reg.score(X_test,y_test))#1.0
#
# print(log_reg.predict_proba(X_test))
# #[0.92972035 0.98664939 0.14852024 0.01685947 0.0369836  0.0186637
#  # 0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285
#  # 0.26131273 0.0369836  0.84192923 0.79892262 0.82890209 0.32358166
#  # 0.06535323 0.20735334]
#
# print(log_reg.coef_) #[ 3.01796521 -5.04447145]
# print(log_reg.intercept_) #-0.6937719272911225
예제 #6
0
from kNN import kNNClassifier
from model_selection import train_test_split
from sklearn import datasets
from metrics import accuracy_score
from preprocessing import StandardScaler

#导入iris数据
iris = datasets.load_iris()
X = iris.data
y = iris.target

standardScaler = StandardScaler()
standardScaler.fit

#将数据集拆分为train、test数据
X_train, X_test, y_train, y_test = train_test_split(X, y, 0.2)

#数据的归一化(训练数据集、测试数据集的归一化)
standardScaler = StandardScaler()
standardScaler.fit(X_train)
X_train = standardScaler.transform(X_train)
X_test = standardScaler.transform(X_test)
print(X_test)

#使用上述数据进行训练和预测
kcf = kNNClassifier(3)
kcf.fit(X_train, y_train)
y_predict = kcf.predict(X_test)

#计算预测准确度
accuracy = accuracy_score(y_test, y_predict)
예제 #7
0
from linear_regression import MultipleLinearRegression
from metric import mean_squared_error
from metric import mean_absolute_error
from metric import root_mean_squared_error
from metric import r2_score

if __name__ == '__main__':
    boston = datasets.load_boston()

    X = boston.data
    y = boston.target

    X = X[y < 50.0]
    y = y[y < 50.0]

    X_train, y_train, X_test, y_test = train_test_split(X, y, seed=666)
    reg = MultipleLinearRegression()
    reg.fit_normal(X_train, y_train)
    print(reg.coef_, reg.bias_)
    print(reg.score(X_test, y_test))
"""
if __name__ == '__main__':
    boston = datasets.load_boston()
    print(boston.feature_names)

    x = boston.data[:, 5]
    y = boston.target

    x = x[y < 50.0]
    y = y[y < 50.0]
예제 #8
0
from regression.logistic_regression import LogisticRegression
from metric import mean_squared_error
from metric import mean_absolute_error
from metric import root_mean_squared_error
from metric import r2_score
import pandas as pd;
if __name__ == '__main__':
    boston = datasets.load_boston()

    X = boston.data
    y = boston.target

    X = X[y < 50.0]
    y = y[y < 50.0]

    X_train, y_train, X_test, y_test = train_test_split(X, y, seed=666)
    reg = MultipleLinearRegression()
    reg.fit_normal(X_train, y_train)
    print(reg.coef_, reg.bias_)
    print(reg.score(X_test, y_test))


    #燕尾挖数据做测试

    iris = pd.read_csv('../iris.data', header=None)
    iris_data = iris.loc[:, :].values
    x_data = iris_data[:100, :2]
    y_data = iris_data[:100, 4]
    y_data[y_data == 'Iris-setosa'] = 0
    y_data[y_data == 'Iris-versicolor'] = 1
    x_train, y_train, x_test, y_test = train_test_split(x_data, y_data)
예제 #9
0
# 取房间数量作为特征值

x = boston.data[:, 5]
y = boston.target

plt.scatter(x, y)
plt.show()

# 去除一些噪音
x = x[y < 50]
y = y[y < 50]

plt.scatter(x, y)
plt.show()

x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666)

print(x_train.shape)
reg = SimpleLinearRegression2()
reg.fit(x_train, y_train)

plt.scatter(x_train, y_train)
plt.plot(x_train, reg.predict(x_train), color='r')
plt.show()

y_predict = reg.predict(x_test)

# MSE

mse_test = np.sum((y_test - y_predict)**2) / len(y_test)
print(mse_test)