Ejemplo n.º 1
0
def sk_usage():
    data_path = datautil.get_data_file_path('knn.csv')
    data = pd.read_csv(data_path)

    X = data.iloc[:, :-1].values
    Y = data.iloc[:, -1].values

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train, Y_train)

    Y_pred = model.predict(X_test)


    confusion = confusion_matrix(Y_test, Y_pred)
    print(confusion)
Ejemplo n.º 2
0
def demo():
    file = datautil.get_data_file_path('Social_Network_Ads.csv')

    data = pd.read_csv(file, )
    data = data.replace({r'\n': ''}, regex=True)
    X = data.values[1:, 2:-1].astype('int')
    Y = data.values[1:, -1:].astype('int')
    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model = LogisticRegression()
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)

    plt.plot(model.errors)
    plt.show()

    confusion = confusion_matrix(Y_test, Y_pred)
    print(confusion)
Ejemplo n.º 3
0
import random
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
from cv2 import imread, imshow, imwrite
from sklearn.cluster import KMeans

from lib import datautil

data_file = datautil.get_data_file_path("knn.csv")


def sk_usage():
    """
    使用scikit learn完成kmeans算法的实现
    """
    img = imread(data_file.as_posix())

    pixel = np.reshape(img, (img.shape[0] * img.shape[1], 3))
    print(pixel.shape)
    pixel_new = deepcopy(pixel)

    print(img.shape)

    model = KMeans(n_clusters=5)
    # 注意,KMeans的fit_predict方法入参必须是两个dimension,d1是各个样本,d2是每个样本的features, 返回值是每个样本分到第几类的列表
    labels = model.fit_predict(pixel)
    # cluster_centers_返回各类中心点的列表
    palette = model.cluster_centers_
Ejemplo n.º 4
0
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler

from lib import datautil

data_file = datautil.get_data_file_path('Social_Network_Ads.csv')


def sk_usage():
    data = pd.read_csv(data_file)
    data = data.replace({r'\n': ''}, regex=True)
    X = data.values[1:, 2:-1].astype('int')
    Y = data.values[1:, -1].astype('int')

    scaler = StandardScaler()
    X = scaler.fit_transform(X)
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2)

    model = LinearSVC()
    model.fit(X_train, Y_train)
    Y_pred = model.predict(X_test)

    confusion = confusion_matrix(Y_test, Y_pred)
    print(confusion)
Ejemplo n.º 5
0
from scipy.ndimage.filters import convolve
import numpy as np
from scipy import ndimage
import cv2
from matplotlib import pyplot as plt

from lib import datautil

data = datautil.get_data_file_path('soccer.jpg')


def opencv_usage():
    """
        Canny()方法可以接受多个参数,常用的为:
        threshold1: 高阈值
        threshold2: 低阈值
        apertureSize: Sobel算子的大小
    """
    img = cv2.imread(data, 0)
    edges = cv2.Canny(img, 100, 200)

    plt.subplot(121)
    plt.imshow(img, cmap='gray')
    plt.title('Original Image')
    plt.xticks([]), plt.yticks([])
    plt.subplot(122)
    plt.imshow(edges, cmap='gray')
    plt.title('Edge Image')
    plt.xticks([]), plt.yticks([])
    plt.show()
Ejemplo n.º 6
0
from collections import Counter

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import confusion_matrix
import numpy as np
import pandas as pd

from lib import datautil

data = datautil.get_data_file_path('spam.csv')


def sk_usage():
    df = pd.read_csv(data, encoding='iso-8859-1')

    df.rename(columns={'v1': 'Label', 'v2': 'Text'}, inplace=True)
    vectorizer = CountVectorizer()
    X = vectorizer.fit_transform(df.Text)
    Y = df.Label

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

    # train naive bayes classifier
    clf = MultinomialNB(alpha=1.0, fit_prior=True)
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    conf_mat = confusion_matrix(y_test, y_pred, labels=['ham', 'spam'])
    print(conf_mat)