def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False): ''' Constructor for FaceRecognizer. Args (optional): k_rank (int): How many principal components to keep. k_neighbors (int): How many neighbors to compare against in the kNN classifier. ''' self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel) self.knn_classifier = KNNClassifier(neighbors=k_neighbors) self.instances = None
def kneighbors(tDataX, tDataY, mineData): X_train, X_test, y_train, y_test = cross_validation.train_test_split( tDataX, tDataY, test_size=0.4, random_state=0) # X_train: The Attributes from the test data that will be used to train the model # X_test: The Attributes from the test data that will be used to test the model # y_train: The classification from the test data that will be used to train the model # y_train: The classification from the test data that will be used to test the model # A good rule of thumb is that k = the sqrt of n. Round to nearest int n_neighbors = int(np.sqrt(len(X_train))) weights = 'uniform' # we create an instance of Neighbours Classifier and fit the data. knn = nClass.KNN(n_neighbors, weights, X_train, y_train) test_pred = knn.predictDataSet(X_test) accuracy = knn.accuracy(X_test, y_test) actual_pred = knn.predictDataSet(mineData) resp = { 'Testset Class': y_test, 'Predicted Testset Class': test_pred, 'Classes': actual_pred } resp['mineAttrs'] = mineData resp['accuracy'] = accuracy return resp
def run(url, header, categories=None, Nan="?", k=3): data = read(url, header) #print data if categories is not None: for key in categories: conversion = categorize(data[key], categories[key], Nan=Nan) #print conversion.keys() data[key] = data[key].map(conversion) #for x in xrange(len(data[key])): # #print data[key][x] # if data[key][x] in conversion.keys(): # print "converting..." # data[key][x] = conversion[data[key][x]] else: for head in header: if numpy.issubdtype(data[head].dtype, numpy.number): conversion = categorize(data[head], build_options_list(data[head], Nan)) data[head] = data[head].map(conversion) print data[header[13]][14] #data.replace(Nan, "0") #for key in header: # #print data[key] # for y in xrange(len(data[key])): # if data[key][y] == "?": # print "Guessing..." # data[key][y] = 0.0 print data print data[header[13]][14] data.apply(pandas.to_numeric) print data[header[13]][14] data = data.as_matrix() print data[14][13] print data print "data tested with K = ", k, ": ", KNNClassifier.test_classifier(data[:, range(13)], data[:, 14], k)
def main(): df_beijing = pd.read_csv(BEIJING_PATH) df_shenyang = pd.read_csv(SHENYANG_PATH) X, Y = prepare_data_and_labels(df_beijing, df_shenyang) clf = KNNClassifier(K=5) validate(clf, X, Y) print("GUANGZHOU") df_guangzhou = pd.read_csv(GUANGZHOU_PATH) X_test, Y_test = prepare_data_and_labels(df_guangzhou) test(clf, X_test, Y_test) print("SHANGHAI") df_shanghai = pd.read_csv(SHANGHAI_PATH) X_test, Y_test = prepare_data_and_labels(df_shanghai) test(clf, X_test, Y_test)
def classify(folder, main_folder,num_class, num_feature,F, K): #for folder in address: feature_num_class = [] with open(main_folder+"/feature_num_of_classes.csv",'rU') as f: reader = csv.reader(f) for row in reader: feature_num_class.append(row) f.close() binary_class_labels = [] with open(main_folder+"/binary_class_labels.csv",'rU') as f: reader = csv.reader(f) for row in reader: binary_class_labels.append(row) f.close() feature_vectors = [] with open(main_folder+"/feature_vectors_trim.csv",'rU') as f: reader = csv.reader(f) for row in reader: feature_vectors.append(row) f.close() result = open(folder+"/knn_results.txt","w") class_frequency = [] feature_weights = [] training_article_nums = [] test_article_nums = [] sample_training_article_nums = [] sample_test_article_nums = [] sample_training_vectors = [] sample_training_labels = [] sample_test_vectors = [] sample_test_labels = [] with open(folder+"/training_article_numbers.csv", 'rb') as f: reader=csv.reader(f) for row in reader: training_article_nums.append(row) f.close() with open(folder+"/test_article_numbers.csv", 'rb') as f: reader=csv.reader(f) for row in reader: test_article_nums.append(row) f.close() with open(main_folder+"/class_frequency.csv", 'rb') as f: reader = csv.reader(f) for row in reader: class_frequency.append(row) f.close() with open(main_folder+"/weight_feature_class.csv", 'rU') as f: reader = csv.reader(f) for row in reader: feature_weights.append(row) f.close() if False: from random import shuffle; X=list(training_article_nums[0]); shuffle(training_article_nums[0]) for k in xrange(F): #sample_training = [x for i, x in enumerate(article_number) if i % K != k] sample_training = [x for i, x in enumerate(training_article_nums[0]) if i % F == k] with open(folder+"/sample_training_article_num.csv", 'wb') as f: writer=csv.writer(f) writer.writerow(sample_training) f.close() if False: from random import shuffle; X=list(test_article_nums[0]); shuffle(test_article_nums[0]) for k in xrange(F): #sample_training = [x for i, x in enumerate(article_number) if i % K != k] sample_test = [x for i, x in enumerate(test_article_nums[0]) if i % F == k] with open(folder+"/sample_test_article_num.csv", 'wb') as f: writer=csv.writer(f) writer.writerow(sample_test) f.close() with open(folder+"/sample_training_article_num.csv", 'wb') as f: writer=csv.writer(f) writer.writerow(sample_training) f.close() with open(folder+"/sample_training_vectors.csv", 'wb') as f: writer=csv.writer(f) for item in sample_training: sample_training_vectors.append(feature_vectors[int(item)]) writer.writerow(feature_vectors[int(item)]) f.close() with open(folder+"/sample_training_labels.csv", 'wb') as f: writer=csv.writer(f) for item in sample_training: sample_training_labels.append(binary_class_labels[int(item)]) writer.writerow(binary_class_labels[int(item)]) f.close() with open(folder+"/sample_test_vectors.csv", 'wb') as f: writer=csv.writer(f) for item in sample_test: sample_test_vectors.append(feature_vectors[int(item)]) writer.writerow(feature_vectors[int(item)]) f.close() with open(folder+"/sample_test_labels.csv", 'wb') as f: writer=csv.writer(f) for item in sample_test: sample_test_labels.append(binary_class_labels[int(item)]) writer.writerow(binary_class_labels[int(item)]) f.close() modeling_time = 0 classifying_time=0 inversed_classified_labels = [] for i in range(0,num_class): temp = [] feature_num = feature_num_class[i] the_class_frequency = class_frequency[0][i] start_time = time.time() class_column = [] for item in sample_training_labels : if item[0]=='': break class_column.append(item[i]) knn=KNNClassifier.train(sample_training_vectors, class_column, K, typecode=None) modeling_time+=(time.time()-start_time) #print "modeling time = ", modeling_time, "s" #print "classifier number = ",i+1 for article in sample_test_vectors: start_time = time.time() if article[0]=='': break label = KNNClassifier.classify(knn, article,feature_num, the_class_frequency,None , feature_weights[0]) classifying_time += (time.time()-start_time) #print "classifying time = ", classifying_time,"s" #print "class number = ",i," label = ", label temp.append(label) print( i+1,"out of ",num_class," has been classified") inversed_classified_labels.append(temp) print("modeling time = ", modeling_time,"s", file = result) print("number of knn classifiers = ", num_class, file = result) print("classifying time = ", classifying_time, "s", file = result) #inverse-the inversed label matrix num_article = len(inversed_classified_labels[0]) print("calssified article number = ", num_article, file = result) with open(folder+"/knn_classified_sample_labels.csv", 'wb') as f: writer = csv.writer(f) for i in range(0,num_article): temp = [] for j in range(0,num_class): temp.append(inversed_classified_labels[j][i]) writer.writerow(temp) f.close() calculate_accuracy()
nrows=2, ncols=5, sharex=True, sharey=True, ) ax = ax.flatten() for i in range(10): img = X_train[y_train == i][0] ax[i].imshow(img, cmap='Greys', interpolation='nearest') ax[0].set_xticks([]) ax[0].set_yticks([]) plt.tight_layout() plt.show() X_train = v.NormalVectorize(X_train) X_test = v.NormalVectorize(X_test) """Average Classifier""" AC = AverageClassifier.Train(X_train, y_train) print("Average Classifier Accurary:", AverageClassifier.Test(X_test, y_test, AC) * 100, "%") """KNN Classifier""" print("KNN Predict Classifier:") print("KNN") KNNClassifier.Test(X_test, y_test, X_train, y_train) print("KNN with Lib") KNN = KNNClassifier.KNNbyLib(X_train, y_train) KNNClassifier.KNNByLibTest(X_test, y_test, KNN) """ANN Classifier""" ANN = ANNClassifier.Train(X_train, y_train) print("ANN Classifier Accurary:") ANNClassifier.Test(X_test, y_test, ANN)
import numpy as np import KNNClassifier import matplotlib.pyplot as plt if __name__ == '__main__': image_size = 28 # width and length no_of_different_labels = 10 # i.e. 0, 1, 2, 3, ..., 9 image_pixels = image_size * image_size data_path = "/mnist/" train_data = np.loadtxt(data_path + "mnist_train.csv", delimiter=",") test_data = np.loadtxt(data_path + "mnist_test.csv", delimiter=",") fac = 0.99 / 255 train_imgs = np.asfarray(train_data[:, 1:]) * fac + 0.01 test_imgs = np.asfarray(test_data[:, 1:]) * fac + 0.01 train_labels = np.asfarray(train_data[:, :1]) test_labels = np.asfarray(test_data[:, :1]) knn = KNNClassifier(distance='Euclidean', K=5) knn.fit(train_imgs, train_labels) results = knn.predict(test_imgs) # # for i in range(10): # img = train_imgs[i].reshape((28,28)) # plt.imshow(img, cmap="Greys") # plt.show() from sklearn.metrics import confusion_matrix confusion_matrix(test_labels, results)
########################################### ## KNN test kodları from sklearn.datasets import load_iris from sklearn.utils import shuffle iris_X, iris_y = load_iris(return_X_y=True) iris_X, iris_y = shuffle(iris_X, iris_y) X_train = iris_X[:-30] X_test = iris_X[-30:] y_train = iris_y[:-30] y_test = iris_y[-30:] from KNNClassifier import * knn = KNNClassifier("eucledean", 10) knn.buildModel(X_train, y_train) knn.evaluateModel(X_test, y_test) knn.showLabel(X_test[5], load_iris()) ######################################### ## Naive bayes test kodları from sklearn.datasets import load_iris from sklearn.utils import shuffle iris_X, iris_y = load_iris(return_X_y=True) iris_X, iris_y = shuffle(iris_X, iris_y) X_train = iris_X[:-30] X_test = iris_X[-30:] y_train = iris_y[:-30]
class FaceRecognizer: def __init__(self, dimensions=0, k_neighbors=5, use_kernel=False): ''' Constructor for FaceRecognizer. Args (optional): k_rank (int): How many principal components to keep. k_neighbors (int): How many neighbors to compare against in the kNN classifier. ''' self.pca_model = PCAModel(dimensions=dimensions, use_kernel=use_kernel) self.knn_classifier = KNNClassifier(neighbors=k_neighbors) self.instances = None def train(self, instances): ''' Trains the recognizer with a set of faces. Args: instances (list<tuple<int, numpy.ndarray>>): List of label/face data pairs. ''' self.instances = instances # Stack all of the faces together faces_list = list() for instance in instances: faces_list.append(instance[1]) faces = np.vstack(faces_list).T # Learn principal components self.pca_model.fit(faces) # Add each class to the kNN classifier for instance in instances: label = instance[0] face = instance[1] t_face = self.pca_model.transform(face) self.knn_classifier.add_sample(label, t_face) def fit_knn(self): ''' Fits the kNN classifier with the current instances. ''' if self.instances is None: raise RuntimeError('FaceRecognizer has no instances') self.knn_classifier.reset() for instance in self.instances: label = instance[0] face = instance[1] t_face = self.pca_model.transform(face) self.knn_classifier.add_sample(label, t_face) def classify(self, face): ''' Classifies a given face from the trained set. Args: face (numpy.ndarray): The face to classify. Returns: int, the class the face best belongs to. ''' t_face = self.pca_model.transform(face) return self.knn_classifier.classify(t_face) def set_dimensions(self, dimensions): ''' Sets the number of dimensions to use from PCA. Args: dimensions (int): The new number of dimensions. ''' self.pca_model.dimensions = dimensions if self.instances is not None: self.fit_knn() def set_k_neighbors(self, k): ''' Sets k for kNN classifier. Args: k (int): The new k for the classifier. ''' self.knn_classifier.neighbors = k def set_kernel_variance(self, variance): ''' Sets the variance for the RBF kernel and retrains. Args: variance (float): The new variance. ''' self.pca_model.variance = variance if self.instances is not None and self.pca_model.use_kernel: self.train(self.instances)