Exemple #1
0
def test_weighted_vs_majority(x_data, class_vector) -> (Dict, Dict):
    """
    :return: Returns (error_rates_majority, error_rates_weighted)
    """
    kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=1)
    average_error_rates: Dict[Tuple[str, int], float] = {}

    for weight_method in KNearestNeighbors.WEIGHT_METHODS:
        for n_neighbors in range(1, 51):
            classification_errors = []
            for train_indexes, test_indexes in kfold.split(x_data, class_vector):
                knn = KNearestNeighbors(x_data[train_indexes], class_vector[train_indexes], weight_method=weight_method,
                                        n_neighbors=n_neighbors)
                predicteds = knn.classify(x_data[test_indexes])
                n_errors = 0

                for predicted, actual in zip(predicteds, class_vector[test_indexes]):
                    if predicted != actual:
                        n_errors += 1

                classification_errors.append(n_errors / len(predicteds))

            average_error_rates[(weight_method, n_neighbors)] = np.average(classification_errors)

    def error_rates_method(method):
        return {n_n: err_rate for (method_, n_n), err_rate in average_error_rates.items() if method_ == method}

    return error_rates_method('majority'), error_rates_method('weighted')
Exemple #2
0
 def __init__(self, path, k=1):
     self.img = cv2.imread(path, 0)
     if self.img.shape[0] > 2500 or self.img.shape[1] > 2500:
         self.img = cv2.resize(
             self.img, (self.img.shape[0]//2, self.img.shape[1]//2))
     self.k = KNearestNeighbors(k)
     self.k.fit_transform()
     self.grid = np.zeros((9, 9))
Exemple #3
0
 def test_knn(self):
     x_data = np.array([[1, 2, 3], [2, 3, 4], [17, 18, 19]])
     y = [1, 1, 2]
     knn = KNearestNeighbors(x_data,
                             y,
                             n_neighbors=2,
                             weight_method='weighted',
                             distance_method='euclidean')
     assert knn.classify(np.array([[18, 19, 20]]))[0] == 2
Exemple #4
0
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from knn import KNearestNeighbors

iris = load_iris()
data = iris.data
target = iris.target

X_train, X_test, y_train, y_test = train_test_split(data,
                                                    target,
                                                    test_size=0.2,
                                                    random_state=5656)

clf = KNearestNeighbors(K=3)
clf.fit(X_train, y_train)

predictions = clf.predict(X_test)

print('Accuracy:', accuracy_score(y_test, predictions))
Exemple #5
0
class scanner:

    def __init__(self, path, k=1):
        self.img = cv2.imread(path, 0)
        if self.img.shape[0] > 2500 or self.img.shape[1] > 2500:
            self.img = cv2.resize(
                self.img, (self.img.shape[0]//2, self.img.shape[1]//2))
        self.k = KNearestNeighbors(k)
        self.k.fit_transform()
        self.grid = np.zeros((9, 9))

    def getNum(self, digit):
        return self.k.predict(digit)

    def preprocessing(self):

        img_blur = cv2.GaussianBlur(self.img, (3, 3), 0)

        otsu_thresh_val, _ = cv2.threshold(
            img_blur, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
        high_thresh_val = otsu_thresh_val
        lower_thresh_val = otsu_thresh_val * 0.5
        canny_output = cv2.Canny(img_blur, lower_thresh_val, high_thresh_val)

        contours, _ = cv2.findContours(
            canny_output, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        contours = sorted(contours, key=cv2.contourArea, reverse=True)[:5]

        temp = None

        for c in contours:
            peri = cv2.arcLength(c, True)
            approx = cv2.approxPolyDP(c, 0.02 * peri, True)
            if len(approx) == 4:
                temp = approx
                break

        temp = temp.reshape(4, 2)

        inputRect = np.zeros((4, 2), dtype="float32")

        s = temp.sum(axis=1)
        inputRect[0] = temp[np.argmin(s)]
        inputRect[2] = temp[np.argmax(s)]

        diff = np.diff(temp, axis=1)
        inputRect[1] = temp[np.argmin(diff)]
        inputRect[3] = temp[np.argmax(diff)]

        outputRect = np.array([[0, 0], [self.img.shape[0] - 1, 0], [self.img.shape[0] - 1,
                                                                    self.img.shape[1] - 1], [0, self.img.shape[1] - 1]], dtype="float32")

        perspectiveMatrix = cv2.getPerspectiveTransform(inputRect, outputRect)
        warp_output = cv2.warpPerspective(
            self.img, perspectiveMatrix, (self.img.shape[0], self.img.shape[1]))

        size = int(warp_output.shape[0]*warp_output.shape[1]/2188)
        if size % 2 == 0:
            size += 1

        binary_output = cv2.adaptiveThreshold(
            warp_output, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, size, 0)
        square = cv2.resize(binary_output, (900, 900))

        return square

    def getDigits(self):

        square = self.preprocessing()

        for x in range(9):
            for y in range(9):

                s = int(900*0.13)
                elm = np.zeros((s, s))

                for i in range(s):
                    for j in range(s):
                        if i + int(900*x/9) < 900 and j + int(900*y/9) < 900:
                            elm[i][j] = square[i +
                                               int(900*x/9)][j + int(900*y/9)]
                        else:
                            elm[i][j] = 0

                elm = cv2.convertScaleAbs(elm)

                contours, _ = cv2.findContours(
                    elm, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

                largest_area = 0

                for cnt in contours:
                    area = cv2.contourArea(cnt)
                    if area > largest_area:
                        largest_area = area
                        c = cnt

                bounding_rect = cv2.boundingRect(c)

                elm = elm[bounding_rect[1]+5:bounding_rect[1]+bounding_rect[3] -
                          5, bounding_rect[0]+5:bounding_rect[0]+bounding_rect[2]-5]

                fin = cv2.resize(elm, (20, 20))
                self.grid[x][y] = self.getNum(fin)
import cv2
import GenerateFeatureVector as gfv
from numpy import genfromtxt
from knn import KNearestNeighbors
from trainSVM import SVM
import glob

knn_obj = KNearestNeighbors()
svm_obj = SVM()

cell_length = 10
cell_width = 10
training = False
bins = 8

i = 0
for filename in sorted(glob.glob('Images/Testing/*.png')):
	print(i)
	i = i+1
def psi(xlist, M):
    """ make a design matrix """
    ret = []
    for x in xlist:
        ret.append([x**i for i in range(0, M + 1)])
    return np.array(ret)


np.random.seed(0)
""" Data for grid search """
N = 10
M = 15
xlist = np.linspace(0, 1, N)
ylist = np.sin(2 * np.pi * xlist) + np.random.normal(0, 0.2, xlist.size)
X = psi(xlist, M)
y = ylist
""" Grid search """
parameters = {'n_neighbors': [i for i in range(1, 5)]}
reg = GridSearchCV(KNearestNeighbors(), parameters, cv=5)
reg = GridSearchCV(KNearestNeighbors_Inheritance(), parameters, cv=5)
reg.fit(X, y)
print(reg.best_params_)
""" Plot """
xs = np.linspace(0, 1, 500)
ideal = np.sin(2 * np.pi * xs)
y_pred = reg.predict(psi(xs, M))
plt.plot(xlist, ylist, 'bo')
plt.plot(xs, ideal)
plt.plot(xs, y_pred)
plt.show()
print(dataset.head())

X = dataset.drop('label', axis=1)
y = dataset['label']

from sklearn.preprocessing import MinMaxScaler

x_scaler = MinMaxScaler()
X = x_scaler.fit_transform(X)

from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=.25,
                                                    random_state=2)

from knn import KNearestNeighbors

knn = KNearestNeighbors(k=3)
knn.fit(X_train, y_train)
predict = knn.predict(X_test)

from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print(accuracy_score(y_test, predict))

print(confusion_matrix(y_test, predict))

print(classification_report(y_test, predict))