def test_weighted_vs_majority(x_data, class_vector) -> (Dict, Dict): """ :return: Returns (error_rates_majority, error_rates_weighted) """ kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=1) average_error_rates: Dict[Tuple[str, int], float] = {} for weight_method in KNearestNeighbors.WEIGHT_METHODS: for n_neighbors in range(1, 51): classification_errors = [] for train_indexes, test_indexes in kfold.split(x_data, class_vector): knn = KNearestNeighbors(x_data[train_indexes], class_vector[train_indexes], weight_method=weight_method, n_neighbors=n_neighbors) predicteds = knn.classify(x_data[test_indexes]) n_errors = 0 for predicted, actual in zip(predicteds, class_vector[test_indexes]): if predicted != actual: n_errors += 1 classification_errors.append(n_errors / len(predicteds)) average_error_rates[(weight_method, n_neighbors)] = np.average(classification_errors) def error_rates_method(method): return {n_n: err_rate for (method_, n_n), err_rate in average_error_rates.items() if method_ == method} return error_rates_method('majority'), error_rates_method('weighted')
def __init__(self, path, k=1): self.img = cv2.imread(path, 0) if self.img.shape[0] > 2500 or self.img.shape[1] > 2500: self.img = cv2.resize( self.img, (self.img.shape[0]//2, self.img.shape[1]//2)) self.k = KNearestNeighbors(k) self.k.fit_transform() self.grid = np.zeros((9, 9))
def test_knn(self): x_data = np.array([[1, 2, 3], [2, 3, 4], [17, 18, 19]]) y = [1, 1, 2] knn = KNearestNeighbors(x_data, y, n_neighbors=2, weight_method='weighted', distance_method='euclidean') assert knn.classify(np.array([[18, 19, 20]]))[0] == 2
import numpy as np from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score from knn import KNearestNeighbors iris = load_iris() data = iris.data target = iris.target X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.2, random_state=5656) clf = KNearestNeighbors(K=3) clf.fit(X_train, y_train) predictions = clf.predict(X_test) print('Accuracy:', accuracy_score(y_test, predictions))
class scanner: def __init__(self, path, k=1): self.img = cv2.imread(path, 0) if self.img.shape[0] > 2500 or self.img.shape[1] > 2500: self.img = cv2.resize( self.img, (self.img.shape[0]//2, self.img.shape[1]//2)) self.k = KNearestNeighbors(k) self.k.fit_transform() self.grid = np.zeros((9, 9)) def getNum(self, digit): return self.k.predict(digit) def preprocessing(self): img_blur = cv2.GaussianBlur(self.img, (3, 3), 0) otsu_thresh_val, _ = cv2.threshold( img_blur, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) high_thresh_val = otsu_thresh_val lower_thresh_val = otsu_thresh_val * 0.5 canny_output = cv2.Canny(img_blur, lower_thresh_val, high_thresh_val) contours, _ = cv2.findContours( canny_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5] temp = None for c in contours: peri = cv2.arcLength(c, True) approx = cv2.approxPolyDP(c, 0.02 * peri, True) if len(approx) == 4: temp = approx break temp = temp.reshape(4, 2) inputRect = np.zeros((4, 2), dtype="float32") s = temp.sum(axis=1) inputRect[0] = temp[np.argmin(s)] inputRect[2] = temp[np.argmax(s)] diff = np.diff(temp, axis=1) inputRect[1] = temp[np.argmin(diff)] inputRect[3] = temp[np.argmax(diff)] outputRect = np.array([[0, 0], [self.img.shape[0] - 1, 0], [self.img.shape[0] - 1, self.img.shape[1] - 1], [0, self.img.shape[1] - 1]], dtype="float32") perspectiveMatrix = cv2.getPerspectiveTransform(inputRect, outputRect) warp_output = cv2.warpPerspective( self.img, perspectiveMatrix, (self.img.shape[0], self.img.shape[1])) size = int(warp_output.shape[0]*warp_output.shape[1]/2188) if size % 2 == 0: size += 1 binary_output = cv2.adaptiveThreshold( warp_output, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, size, 0) square = cv2.resize(binary_output, (900, 900)) return square def getDigits(self): square = self.preprocessing() for x in range(9): for y in range(9): s = int(900*0.13) elm = np.zeros((s, s)) for i in range(s): for j in range(s): if i + int(900*x/9) < 900 and j + int(900*y/9) < 900: elm[i][j] = square[i + int(900*x/9)][j + int(900*y/9)] else: elm[i][j] = 0 elm = cv2.convertScaleAbs(elm) contours, _ = cv2.findContours( elm, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) largest_area = 0 for cnt in contours: area = cv2.contourArea(cnt) if area > largest_area: largest_area = area c = cnt bounding_rect = cv2.boundingRect(c) elm = elm[bounding_rect[1]+5:bounding_rect[1]+bounding_rect[3] - 5, bounding_rect[0]+5:bounding_rect[0]+bounding_rect[2]-5] fin = cv2.resize(elm, (20, 20)) self.grid[x][y] = self.getNum(fin)
import cv2 import GenerateFeatureVector as gfv from numpy import genfromtxt from knn import KNearestNeighbors from trainSVM import SVM import glob knn_obj = KNearestNeighbors() svm_obj = SVM() cell_length = 10 cell_width = 10 training = False bins = 8 i = 0 for filename in sorted(glob.glob('Images/Testing/*.png')): print(i) i = i+1
def psi(xlist, M): """ make a design matrix """ ret = [] for x in xlist: ret.append([x**i for i in range(0, M + 1)]) return np.array(ret) np.random.seed(0) """ Data for grid search """ N = 10 M = 15 xlist = np.linspace(0, 1, N) ylist = np.sin(2 * np.pi * xlist) + np.random.normal(0, 0.2, xlist.size) X = psi(xlist, M) y = ylist """ Grid search """ parameters = {'n_neighbors': [i for i in range(1, 5)]} reg = GridSearchCV(KNearestNeighbors(), parameters, cv=5) reg = GridSearchCV(KNearestNeighbors_Inheritance(), parameters, cv=5) reg.fit(X, y) print(reg.best_params_) """ Plot """ xs = np.linspace(0, 1, 500) ideal = np.sin(2 * np.pi * xs) y_pred = reg.predict(psi(xs, M)) plt.plot(xlist, ylist, 'bo') plt.plot(xs, ideal) plt.plot(xs, y_pred) plt.show()
print(dataset.head()) X = dataset.drop('label', axis=1) y = dataset['label'] from sklearn.preprocessing import MinMaxScaler x_scaler = MinMaxScaler() X = x_scaler.fit_transform(X) from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.25, random_state=2) from knn import KNearestNeighbors knn = KNearestNeighbors(k=3) knn.fit(X_train, y_train) predict = knn.predict(X_test) from sklearn.metrics import accuracy_score, confusion_matrix, classification_report print(accuracy_score(y_test, predict)) print(confusion_matrix(y_test, predict)) print(classification_report(y_test, predict))