def sk_usage(): data_path = datautil.get_data_file_path('knn.csv') data = pd.read_csv(data_path) X = data.iloc[:, :-1].values Y = data.iloc[:, -1].values X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = KNeighborsClassifier(n_neighbors=5) model.fit(X_train, Y_train) Y_pred = model.predict(X_test) confusion = confusion_matrix(Y_test, Y_pred) print(confusion)
def demo(): file = datautil.get_data_file_path('Social_Network_Ads.csv') data = pd.read_csv(file, ) data = data.replace({r'\n': ''}, regex=True) X = data.values[1:, 2:-1].astype('int') Y = data.values[1:, -1:].astype('int') scaler = StandardScaler() X = scaler.fit_transform(X) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = LogisticRegression() model.fit(X_train, Y_train) Y_pred = model.predict(X_test) plt.plot(model.errors) plt.show() confusion = confusion_matrix(Y_test, Y_pred) print(confusion)
import random import pandas as pd import numpy as np import matplotlib.pyplot as plt from copy import deepcopy from cv2 import imread, imshow, imwrite from sklearn.cluster import KMeans from lib import datautil data_file = datautil.get_data_file_path("knn.csv") def sk_usage(): """ 使用scikit learn完成kmeans算法的实现 """ img = imread(data_file.as_posix()) pixel = np.reshape(img, (img.shape[0] * img.shape[1], 3)) print(pixel.shape) pixel_new = deepcopy(pixel) print(img.shape) model = KMeans(n_clusters=5) # 注意,KMeans的fit_predict方法入参必须是两个dimension,d1是各个样本,d2是每个样本的features, 返回值是每个样本分到第几类的列表 labels = model.fit_predict(pixel) # cluster_centers_返回各类中心点的列表 palette = model.cluster_centers_
import pandas as pd from sklearn.model_selection import train_test_split from sklearn.metrics import confusion_matrix from sklearn.svm import LinearSVC from sklearn.preprocessing import StandardScaler from lib import datautil data_file = datautil.get_data_file_path('Social_Network_Ads.csv') def sk_usage(): data = pd.read_csv(data_file) data = data.replace({r'\n': ''}, regex=True) X = data.values[1:, 2:-1].astype('int') Y = data.values[1:, -1].astype('int') scaler = StandardScaler() X = scaler.fit_transform(X) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2) model = LinearSVC() model.fit(X_train, Y_train) Y_pred = model.predict(X_test) confusion = confusion_matrix(Y_test, Y_pred) print(confusion)
from scipy.ndimage.filters import convolve import numpy as np from scipy import ndimage import cv2 from matplotlib import pyplot as plt from lib import datautil data = datautil.get_data_file_path('soccer.jpg') def opencv_usage(): """ Canny()方法可以接受多个参数,常用的为: threshold1: 高阈值 threshold2: 低阈值 apertureSize: Sobel算子的大小 """ img = cv2.imread(data, 0) edges = cv2.Canny(img, 100, 200) plt.subplot(121) plt.imshow(img, cmap='gray') plt.title('Original Image') plt.xticks([]), plt.yticks([]) plt.subplot(122) plt.imshow(edges, cmap='gray') plt.title('Edge Image') plt.xticks([]), plt.yticks([]) plt.show()
from collections import Counter from sklearn.feature_extraction.text import CountVectorizer from sklearn.model_selection import train_test_split from sklearn.naive_bayes import MultinomialNB from sklearn.metrics import confusion_matrix import numpy as np import pandas as pd from lib import datautil data = datautil.get_data_file_path('spam.csv') def sk_usage(): df = pd.read_csv(data, encoding='iso-8859-1') df.rename(columns={'v1': 'Label', 'v2': 'Text'}, inplace=True) vectorizer = CountVectorizer() X = vectorizer.fit_transform(df.Text) Y = df.Label X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2) # train naive bayes classifier clf = MultinomialNB(alpha=1.0, fit_prior=True) clf.fit(X_train, y_train) y_pred = clf.predict(X_test) conf_mat = confusion_matrix(y_test, y_pred, labels=['ham', 'spam']) print(conf_mat)