def exercise03(neighbors,split): ''' Data set: Iris Split the Iris dataset into a train / test model with the split ratio between the two established by the function parameter split. Fit KNN with the training data with number of neighbors equal to the function parameter neighbors Generate and return back an accuracy score using the test data was split out ''' random_state = 21 # ------ Place code below here \/ \/ \/ ------ #splits train and test data X_train, X_test, y_train, y_test = tts(iris_df,iris_target,test_size = split,random_state = random_state, stratify = iris_target) #initializes knn with number of neighbors specified from input knn = KNN(n_neighbors = neighbors) #fits data from training data knn.fit(X_train,y_train) #generates score on test data knn_score = knn.score(X_test,y_test) # ------ Place code above here /\ /\ /\ ------ return knn_score
def exercise02(new_observations): ''' Data set: Iris Fit the Iris dataset into a kNN model with neighbors=5 and predict the category of observations passed in argument new_observations. Return back the target names of each prediction (and not their encoded values, i.e. return setosa instead of 0). ''' # ------ Place code below here \/ \/ \/ ------ # Create arrays for the features and the response variable X = iris.data y = iris.target # Create a KNN classifier with 5 neighbors knn = KNN(n_neighbors = 5) # Fit the classifier to the data knn.fit(X, y) # Predict and print the label for the new data point new_observations classes = {0:'setosa', 1:'versicolor', 2:'virginica'} iris_predictions = [] for i in range(len(new_observations)): name = classes[knn.predict(new_observations)[i]] iris_predictions.append(name) # ------ Place code above here /\ /\ /\ ------ return iris_predictions
def main(): # prepare data trainingSet = [] testSet = [] accuracy = 0.0 split = 0.25 loadDataset('../Dataset/combined.csv', split, trainingSet, testSet) print 'Train set: ' + repr(len(trainingSet)) print 'Test set: ' + repr(len(testSet)) # generate predictions predictions = [] trainData = np.array(trainingSet)[:, 0:np.array(trainingSet).shape[1] - 1] columns = trainData.shape[1] X = np.array(trainData) y = np.array(trainingSet)[:, columns] clf = BaggingClassifier( KNN(n_neighbors=10, weights='uniform', algorithm='auto', leaf_size=10, p=1, metric='minkowski', metric_params=None, n_jobs=1)) clf.fit(X, y) testData = np.array(testSet)[:, 0:np.array(trainingSet).shape[1] - 1] X_test = np.array(testData) y_test = np.array(testSet)[:, columns] accuracy = clf.score(X_test, y_test) accuracy *= 100 print("Accuracy %:", accuracy)
def handwritingClassTest( file1dir, file2dir ): #手写数字识别函数,file1dir为存储训练集数据的文件夹,file2dir为测试集数据文件夹,文件名格式为1_12,表示第一类的第十二张图片 hwlabels = [] #测试集的labels trainingFileList = listdir(file1dir) #返回文件夹下的所有文件名 m = len(trainingFileList) #返回文件夹下文件的数目 trainingMat = np.zeros((m, 1024)) #初始化所有训练集的1024向量构成的矩阵 for i in range(m): fileNameStr = trainingFileList[i] #获得文件的名字 classNumber = int(fileNameStr.split('_')[0]) #取文件名‘_’前面的数据的第一个 hwlabels.append(classNumber) #类别向量 trainingMat[i, :] = img2vector(file1dir + '/%s' % (fileNameStr)) neigh = KNN(n_neighbors=3, algorithm='brute') #调用sklearn工具包的KNN分类器 neigh.fit(trainingMat, hwlabels) #拟合模型,trainingMat为训练矩阵,hwlabels为对应的标签 testFileList = listdir(file2dir) errorCount = 0.0 #错误分类计数 mTest = len(testFileList) #测试集文件个数 for i in range(mTest): fileNameStr = testFileList[i] classNumber = int(fileNameStr.split('_')[0]) vectorUnderTest = img2vector(file2dir + '/%s' % (fileNameStr)) #%()代替字符串中的%s classifierResult = neigh.predict(vectorUnderTest) #预测测试集的标签 print("分类返回结果为%d\t真实结果为%d" % (classifierResult, classNumber)) if (classifierResult != classNumber): errorCount += 1.0 print("总共分类错误%d个数据\n错误率为%f%%" % (errorCount, errorCount * 100 / mTest))
def pred(self, x_support, x_query): way, shot = int(x_support.size(0)), int(x_support.size(1)) knn = KNN(n_neighbors=1, ) x_support = x_support.view(-1, 3, 32, 32) x_support = self.vgg16(x_support) x_support = x_support.view(x_support.size(0), -1) x_support = self.fc_1(x_support) x_support = tor.mean(x_support.view(20, shot, -1), dim=1).cpu().detach().numpy() y_support = np.array([i // 1 for i in range(1 * way)]) knn.fit(x_support, y_support) pred_list = [] for query in x_query.view(-1, 3, 32, 32): query_feature = self.vgg16(query.view(1, 3, 32, 32)) query_feature = self.fc_1( query_feature.view(query_feature.size(0), -1)) query_feature = query_feature.cpu().detach().numpy() pred = knn.predict(query_feature) pred_list.append(int(pred[0])) return np.array(pred_list)
def trainingarray(): trainMat = np.array([[1, 2, 3], [2, 3, 5], [55, 33, 66], [55, 44, 66]]) label = np.array([0, 0, 1, 1]) neigh = KNN(n_neighbors=3, algorithm='auto', weights='distance', n_jobs=1) neigh.fit(trainMat, label) testmat = np.array([[2, 3, 4], [55, 33, 66]]) print(neigh.predict(testmat))
def k_nearest_neighbors(M, m, D, d, feature_mean, diag, accuracy): #k-near neighbor training_start = t.time() knn = KNN() knn.fit(M, D) training_end = t.time() print("\nKNN\nTraining time: {0:.0000001} sec".format(training_end - training_start)) testing_start = t.time() p = knn.predict(m) testing_end = t.time() print("Testing/Predict time: {0:.0000001} sec".format(testing_end - testing_start)) validation = [] validation = cross_val_score(knn, feature_mean, diag, cv=5) accuracy.append(accuracy_score(p, d)) print("Accuracy: {0:.01%}".format(accuracy_score(p, d))) print("Cross validation result: {0:.01%} (+/- {1:.01%})".format( num.mean(validation), num.std(validation) * 2)) print(classification_report(d, p))
def get_new_model(self): if (self.model_type.split("_")[-1] == "Regressor"): if (self.model_type == "Linear-Regressor"): from sklearn.linear_model import LinearRegression self.model = LinearRegression(**self.model_args) elif (self.model_type == "Support-Vector-Regressor"): import sklearn.svm as SVR self.model = SVR(**self.model_args) elif (self.model_type == "Decision-Tree-Regressor"): from sklearn.tree import DecisionTreeRegressor as DTR self.model = DTR(**self.model_args) elif (self.model_type == "Random-Forest-Regressor"): from sklearn.ensemble import RandomForestRegressor as RFR self.model = RFR(**self.model_args) else: if (self.model_type == "Logistic-Regression-Classifier"): from sklearn.linear_model import LogisticRegression self.model = LogisticRegression(**self.model_args) elif (self.model_type == "KNN-Classifier"): from sklearn.neighbors import KNeighborsClassifier as KNN self.model = KNN(**self.model_args) elif (self.model_type == "Support-Vector-Classifier"): import sklearn.svm as SVC self.model = SVC(**self.model_args) elif (self.model_type == "Naive-Bayes-Classifier"): from sklearn.naive_bayes import GNB self.model = GNB(**self.model_args) elif (self.model_type == "Decision-Tree-Classifier"): from sklearn.tree import DecisionTreeClassifier as DTC self.model = DTC(**self.model_args) elif (self.model_type == "Random-Forest-Classifier"): from sklearn.ensemble import RandomForestClassifier as RFC self.model = RFC(**self.model_args)
def exercise03(neighbors,split): ''' Data set: Iris Split the Iris dataset into a train / test model with the split ratio between the two established by the function parameter split. Fit KNN with the training data with number of neighbors equal to the function parameter neighbors Generate and return back an accuracy score using the test data was split out ''' # ------ Place code below here \/ \/ \/ ------ ir = pd.DataFrame(iris.data) ir.columns = iris.feature_names X = np.array(ir) y = np.array(iris.target) X_train, X_test, y_train, y_test = tts(X, y, test_size=split, random_state = 21) knn = KNN(n_neighbors=neighbors) knn.fit(X_train, y_train) knn_score = knn.score(X_test, y_test) # ------ Place code above here /\ /\ /\ ------ return knn_score
def tuning_evaluation(X_train, y_train): ''' Grid search algorithm to tune hyperparameters and evaluation of a accuracy. Returns the best classifier. :param X_train: numpy array [n_samples, n_features] :param y_train: numpy array [n_samples] :return: best classifier. ''' pipe_KNN = Pipeline([('scl', StandardScaler()), ('pca', PCA(n_components=2)), ('clf', KNN(p=2, metric='minkowski'))]) param_range = [1, 2, 4, 6, 8, 10, 20, 30, 50, 100] gs = GridSearchCV(estimator=pipe_KNN, param_grid=[{ 'clf__n_neighbors': param_range }], scoring='accuracy', cv=10, n_jobs=1) gs = gs.fit(X_train, y_train) print('The grid search best score is ', gs.best_score_) print('The best parameters according to the grid search algorithm are ', gs.best_params_) return gs.best_estimator_
def __init__(self, k, weights=[0.4 / 3] * 3 + [0.3 / 27] * 27 + [0.3 / 108] * 108): self.weights = weights self.isFit = False # print(currentdir + '/data/filenames.txt') with open(currentdir + '/data/filenames.txt') as f: self.filenames = [_.strip('\n') for _ in f.readlines()] if 'hists.npy' not in os.listdir(currentdir + '/data/'): self.get_hists() self.hists = np.load(currentdir + '/data/hists.npy', allow_pickle=True) # print(self.hists) self.nn = KNN(k, metric=ImageHistogram.compare_hists, metric_params={ 'n_features': 138, 'method': cv2.HISTCMP_BHATTACHARYYA, 'feature_weights': self.weights })
def plot_boundary(bound_points, bound_labels, real_points, real_labels, points, args, name): if args.nn: knn = KNN() print(f'Starting KNN - {name}') knn.fit(bound_points, (bound_labels > 0.5)) background = knn.predict(points) plt.scatter(points[:, 0], points[:, 1], c=background, alpha=0.2) else: plt.scatter(bound_points[:, 0], bound_points[:, 1], c=bound_labels, s=300, alpha=0.2) plt.scatter(real_points[:, 0], real_points[:, 1], c=real_labels, linewidths=1, edgecolors='black') plt.axis('off') plt.savefig(f'{args.save}_{name}.png', bbox_inches='tight', pad_inches=0)
def exercise03(neighbors, split): ''' Data set: Iris Split the Iris dataset into a train / test model with the split ratio between the two established by the function parameter split. Fit KNN with the training data with number of neighbors equal to the function parameter neighbors Generate and return back an accuracy score using the test data was split out ''' #random_state = 21 # ------ Place code below here \/ \/ \/ ------ X = iris.data y = iris.target df = pd.DataFrame(X, columns=iris.feature_names) X_train, X_test, y_train, y_test = tts(X, y, test_size=split, random_state=21, stratify=iris.target) #Fitting K-NN classifier to the training set classifier = KNN(n_neighbors=neighbors) classifier.fit(X_train, y_train) #test score - ratio of # of predictions found correct knn_score = classifier.score(X_test, y_test) # ------ Place code above here /\ /\ /\ ------ return knn_score
def exercise02(new_observations): ''' Data set: Iris Fit the Iris dataset into a kNN model with neighbors=5 and predict the category of observations passed in argument new_observations. Return back the target names of each prediction (and not their encoded values, i.e. return setosa instead of 0). ''' # ------ Place code below here \/ \/ \/ ------ X = iris.data y = iris.target df = pd.DataFrame(X, columns=iris.feature_names) #split train and test sets X_train, X_test, y_train, y_test = tts(X, y, random_state=0) #Fitting K-NN classifier to the training set classifier = KNN(n_neighbors=5) classifier.fit(X_train, y_train) y_pred = classifier.predict(new_observations) iris_predictions = iris['target_names'][y_pred] # ------ Place code above here /\ /\ /\ ------ return iris_predictions
def knn_classification_nca(X_train, Y_train, X_test, state=20): """A function that applies grid and random search to tune model and also gives a prediction, it also uses the NCA transformation of the data which seems to improve performance""" # Creating a score and parameters to search from scoring = {"f1": "f1_weighted"} grid_param2_nca = { "knn__n_neighbors": range(1, 11), "knn__p": range(1, 6), "knn__metric": ["minkowski", "canberra", "hamming"] } with_nca = namedtuple("with_nca", ["fitted_grid", "y_grid", "grid_train_Y"]) # Model setting nca = NCA(random_state=state) pipe = Pipeline(steps=[("nca", nca), ("knn", KNN(n_jobs=-1))]) knn_grid = GridSearchCV(pipe, grid_param2_nca, scoring=scoring, refit="f1", cv=5) # Model training with nca fitted_grid = knn_grid.fit(X_train, Y_train) # Model predictions with nca y_grid = fitted_grid.best_estimator_.predict(X_test) # training data prediction with nca grid_train_Y = fitted_grid.best_estimator_.predict(X_train) nca_model_list = with_nca(*[fitted_grid, y_grid, grid_train_Y]) return nca_model_list
def handwritingClassify(): """手写数字识别 :return: """ hwLabels = [] # 测试集标签 trainingFileList = listdir('trainingDigits') number_of_files = len(trainingFileList) trainingMat = np.zeros((number_of_files, 1024)) for i in range(number_of_files): fileNameStr = trainingFileList[i] classNumber = int(fileNameStr.split('_')[0]) hwLabels.append(classNumber) # classNumber=int(fileNameStr[0]) trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr)) neigh = KNN(n_neighbors=3, algorithm='auto') neigh.fit(trainingMat, hwLabels) testFileList = listdir('testDigits') errorCount = 0.0 number_of_files_test = len(testFileList) for i in range(number_of_files_test): fileNameStr = testFileList[i] classNumber = int(fileNameStr.split('_')[0]) vector_of_test = img2vector('testDigits/%s' % (fileNameStr)) vector_of_test = np.asarray(vector_of_test) vector_of_test = np.expand_dims(vector_of_test, axis=0) classfier_result = neigh.predict(vector_of_test) print("分类返回结果为%d\t真实结果为%d" % (classfier_result, classNumber)) if (classfier_result != classNumber): errorCount += 1.0 print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / number_of_files_test * 100))
def knn_classification(X_train, Y_train, X_test): """A function that applies grid and random search to tune model and also gives a prediction, it also uses the NCA transformation of the data which seems to improve performance""" # Creating a score and parameters to search from scoring = {"f1": "f1_weighted"} grid_param2 = { "n_neighbors": range(1, 11), "p": range(1, 6), "metric": ["minkowski", "canberra", "hamming"] } no_nca = namedtuple("no_nca", ["fitted_grid", "y_grid", "grid_train_Y"]) # Model setting knn_grid = GridSearchCV(KNN(n_jobs=-1), grid_param2, scoring=scoring, refit="f1", cv=5) # Model training fitted_grid = knn_grid.fit(X_train, Y_train) # Model predictions y_grid = fitted_grid.best_estimator_.predict(X_test) # training data prediction grid_train_Y = fitted_grid.best_estimator_.predict(X_train) no_nca_model_list = no_nca(*[fitted_grid, y_grid, grid_train_Y]) return no_nca_model_list
def handWriteTest(): # 读取训练数据文件夹 trainMat,trainLables = importData('trainingDigits') # 构建kNN分类器 参数如下: # neigh = kNN(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30,p=2, metric=’minkowski’, metric_params=None, n_jobs=1) # n_neighbors:默认为5,就是k-NN的k的值 # weights:默认是uniform,参数可以是uniform(均等的权重)、distance(不均等的权重) # algorithm:快速k近邻搜索算法,默认为auto 搜索算法ball_tree、kd_tree、brute # leaf_size:默认是30,这个是构造的kd树和ball树的大小 # metric:用于距离度量,默认度量是minkowski(欧氏距离) # p:距离度量公式 # metric_params:距离公式的其他关键参数,使用默认的None即可。 # n_jobs:并行处理设置。默认为1,临近点搜索并行工作数。如果为-1,那么CPU的所有cores都用于并行工作。 knnNeigh = KNN(n_neighbors = 3,algorithm= 'kd_tree') # 将训练数据和标签放入KNN分类器中 knnNeigh.fit(trainMat, trainLables) # 导入测试数据 trainFileList = listdir('testDigits') m = len(trainFileList) errorCount = 0 for i in range(m): # 获取文件名 fileName = trainFileList[i] # 获取标签名 className = int(fileName.split("_")[0]) # 将32*32 转成 1*1024 testnMat = img2Vector('testDigits/%s' % fileName) result = knnNeigh.predict(testnMat) print('预测值为:%s,测试值为:%s' %(result,className)) if result != className: errorCount += 1 print("----- 预测错误 -----") print("错误率为:%f%%" %(errorCount/float(m)*100))
def __init__(self, Np=100): self.ctr = 1 self.laser_tf_br = tf.TransformBroadcaster() self.laser_frame = rospy.get_param('~laser_frame') self.pub_particlecloud = rospy.Publisher('/particlecloud', PoseArray, queue_size=60) self.pub_estimated_pos = rospy.Publisher('/MCL_estimated_pose', PoseWithCovarianceStamped, queue_size=60) self.pub_particlecloud2fusion = rospy.Publisher( '/particlecloud2fuse_out', PoseArray, queue_size=60) self.scan = MapClientLaserScanSubscriber() self.last_time = rospy.Time.now().to_sec() self.Np = Np self.init() self.i_TH = 0.0 self.nbrs = KNN(n_neighbors=1, algorithm='ball_tree').fit(self.scan.obs()) self.M_idxs = (np.linspace(0, len(self.scan.z.ranges) - 1, 20)).astype(np.int32) rospy.Subscriber('/odom', Odometry, self.get_odom) rospy.Subscriber('/initialpose', PoseWithCovarianceStamped, self.init_pose)
def predict(self,testX,testY,classifier=KNN(n_neighbors=3,n_jobs=-1),filename="individual.txt"): f=open(filename+".txt",mode="w") for fl in self.hof[0]: f.write(fl["pool"]+"\n") for v in fl["filter"]: for i in range(len(v)-1): f.write(str(v[i])+",") f.write(str(v[-1])+"\n") f.write("\n") f.close() print("Evaluating") trainingFeat=np.array([self.get_feature_values(self.hof[0],i) for i in self.trainingX]) testFeatures=np.array([self.get_feature_values(self.hof[0],i) for i in np.array(testX)]) eval = classifier.fit(trainingFeat, self.trainingY) pre = eval.predict(testFeatures) fit = 0 for p, tv in zip(pre, testY): if p == tv: fit += 1 fit /= len(testY) tst=open("test_features.csv",mode="w") trn = open("train_features.csv", mode="w") for x,l in zip(trainingFeat,self.trainingY): for f in x: trn.write(str(round(f,4))+",") trn.write(str(l)+"\n") for x,l in zip(testFeatures,testY): for f in x: tst.write(str(round(f,4))+",") tst.write(str(l)+"\n") tst.close() trn.close() return fit
def handwritingClassTest(): hwLabels = [] trainingFileList = listdir('trainingDigits') m = len(trainingFileList) trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] classNumber = int(fileNameStr.split('_')[0]) hwLabels.append(classNumber) trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr)) neigh = KNN(n_neighbors=3, algorithm='auto') neigh.fit(trainingMat, hwLabels) testFileList = listdir('testDigits') errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): fileNameStr = testFileList[i] classNumber = int(fileNameStr.split('_')[0]) vectorUnderTest = img2vector('testDigits/%s' % (fileNameStr)) classifierResult = neigh.predict(vectorUnderTest) print('分类返回结果为%d\t真实结果为%d' % (classifierResult, classNumber)) if (classifierResult != classNumber): errorCount += 1.0 print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / mTest * 100))
def __init__(self,trainingX,trainingY,classifier=KNN(n_neighbors=3,n_jobs=-1), populationSize=100,mutationRate=0.3,crossoverRate=0.7, kfodls=-1,ngens=100,numberOfFilters=20): creator.create("FitnessMin", base.Fitness, weights=(1.0,)) creator.create("Individual", list, fitness=creator.FitnessMin, __eq__=lambda self, other: np.array_equal(self[0], other[0])) self.mutationRate=mutationRate self.crossoverRate=crossoverRate self.ngens=ngens self.numberOfFilters=numberOfFilters self.trainingX=np.array(trainingX) self.trainingY=np.array(trainingY) toolbox = base.Toolbox() toolbox.register("individual",generate, icls=creator.Individual,numberOfFilters=numberOfFilters) toolbox.register("population", tools.initRepeat, list, toolbox.individual) TX,TY,FX,FY=self.splitData(trainingX,trainingY) toolbox.register("evaluate", self.fitness, trainingX=TX, trainingY=TY,fitnessX=FX,fitnessY=FY,classifier=classifier) toolbox.register("mate", self.cross) toolbox.register("mutate", self.mutation) stats_fit = tools.Statistics(lambda ind: ind.fitness.values) mstats = tools.MultiStatistics(fitness=stats_fit) mstats.register("std", np.std) mstats.register("min", np.min) mstats.register("max", np.max) mstats.register("av", np.average) toolbox.register("select", selection, tournsize=3,icls=creator.Individual,numberOfFilters=numberOfFilters) self.hof = tools.HallOfFame(1) self.pop = toolbox.population(populationSize) self.mstats=mstats self.toolbox=toolbox
def impact_of_k_on_knn(): dataset_data = get_dataset_data(1) k_list = [] r_sq_list = [] for k in range(1, 346): predicted_knn, r_knn = fill(KNN(n_neighbors=k), dataset_data) k_list.append(k) r_sq_list.append(r_knn) print 'first k values:' print k_list[:5] print r_sq_list[:5] max_r = max(r_sq_list) max_k = r_sq_list.index(max_r) + 1 print 'max R^2: %.3f, k: %d' % (max_r, max_k) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 5)) ax1.plot(k_list, r_sq_list) ax1.set_xlabel('k') ax1.set_ylabel('$R^2$') ax1.set_title('impact of k on the performance of KNN, k: [1, 345]') ax2_k_start = 0 ax2_k_limit = 79 ax2.plot(k_list[ax2_k_start:ax2_k_limit], r_sq_list[ax2_k_start:ax2_k_limit]) ax2.set_xlabel('k') ax2.set_title('k: [%d, %d]' % (ax2_k_start + 1, ax2_k_limit + 1)) plt.show()
def getKNN(data, target, N): Y = data[target] X = data.drop(target, axis=1) model = KNN(n_neighbors=N) model.fit(X, Y) #cv = cross_val_score(model,X,Y,cv=N_FOLDS) return model
def exercise03(neighbors,split): ''' Data set: Iris Split the Iris dataset into a train / test model with the split ratio between the two established by the function parameter split. Fit KNN with the training data with number of neighbors equal to the function parameter neighbors Generate and return back an accuracy score using the test data was split out ''' random_state = 21 # ------ Place code below here \/ \/ \/ ------ # Create arrays for the features and the response variable X = iris.data y = iris.target # Split into training and test set X_train, X_test, y_train, y_test = tts(X, y, test_size = split, random_state = random_state, stratify = y) # Create a KNN classifier with n neighbors knn = KNN(n_neighbors = neighbors) # Fit the classifier to the training data knn.fit(X_train, y_train) # Return the accuracy knn_score = knn.score(X_test, y_test) # ------ Place code above here /\ /\ /\ ------ return knn_score
def draw_knn(dataset): data_x, data_y = dataset.data, dataset.target info = np.zeros([12, 5]) for _ in range(100): data = list() for i in range(1, 25, 2): data.append( (i, *pipeline(KNC(n_neighbors=i), data_x, data_y, label='my'), *pipeline(KNN(n_neighbors=i, algorithm='brute'), data_x, data_y, label='sk'))) np.add(info, np.array(data), out=info) np.multiply(info, 0.01, out=info) plt.figure() plt.plot(info[:, 0], info[:, 1], label='my') plt.plot(info[:, 0], info[:, 3], label='sklearn') plt.xlabel('k'), plt.ylabel('accuracy') plt.legend(loc='best') plt.show() plt.figure() plt.plot(info[:, 0], info[:, 2], label='my') plt.plot(info[:, 0], info[:, 4], label='sklearn') plt.xlabel('k'), plt.ylabel('time (sec)') plt.legend(loc='best') plt.show() mean = info.mean(axis=0) print(f'avg acc my: {mean[1]}, sk: {mean[3]}') print(f'avg time my: {mean[2]}, sk: {mean[4]}') return
def handwritingClassTest(): #训练集的labels hwlabel = [] #返回训练集目录下的文件名 trainingFileList = listdir('trainingDigits') m = len(trainingFileList) #初始化训练集 trainingMat = np.zeros((m, 1024)) for i in range(m): fileNameStr = trainingFileList[i] #训练集真实labels classNumber = int(fileNameStr.split('_')[0]) hwlabel.append(classNumber) #将每一个文件的1*1024数据存储到trainingMat矩阵中 trainingMat[i, :] = img2vector('trainingDigits/%s' % (fileNameStr)) #构建KNN分类器 neigh = KNN(n_neighbors=3, algorithm='auto') #拟合模型 neigh.fit(trainingMat, hwlabel) #返回测试集下的文件列表 testFileList = listdir('testDigits') #检测错误数据 errorCount = 0.0 mTest = len(testFileList) for i in range(mTest): testNameStr = testFileList[i] classNumber = int(testNameStr.split('_')[0]) vectorTest = img2vector('testDigits/%s' % (testNameStr)) classiFileResult = neigh.predict(vectorTest) print("分类返回结果为%d\t真实结果为%d" % (classiFileResult, classNumber)) if (classiFileResult != classNumber): errorCount += 1.0 print("总共错了%d个数据\n错误率为%f%%" % (errorCount, errorCount / mTest * 100))
def hand_writing_test(): hw_labels = [] training_file_list = listdir('trainingDigits') m = len(training_file_list) training_data_mat = np.zeros((m, 1024)) index = 0 for training_file_name in training_file_list: class_label = int(training_file_name.split('_')[0]) hw_labels.append(class_label) training_data_mat[index, :] = creat_img_vector('trainingDigits/%s' % (training_file_name)) index += 1 neigh = KNN(n_neighbors=3, algorithm='auto') neigh.fit(training_data_mat, hw_labels) test_file_list = listdir('testDigits') n = len(test_file_list) error_count = 0 for test_file_name in test_file_list: class_label = int(test_file_name.split('_')[0]) testing_vector = creat_img_vector('testDigits/%s' % (test_file_name)) classifyer_result = neigh.predict(testing_vector) if (classifyer_result != class_label): error_count += 1 print('testing error %d data, error rate: %.2f%%' % (error_count, error_count / n * 100))
def myPredict(): csv_file = ('WinePredictor.csv') data = pd.read_csv(csv_file) print(data.head()) features = data.drop("Class", axis=1) print(features.head()) target = data["Class"] print(target.head()) data_train, data_test, target_train, target_test = train_test_split( features, target, test_size=0.5) classifier = KNN() classifier.fit(data_train, target_train) predictions = classifier.predict(data_test) Accuracy = accuracy_score(target_test, predictions) print('Accuracy using KNN: ', Accuracy * 100, '%') classifier2 = tree.DecisionTreeClassifier() classifier2.fit(data_train, target_train) predictions2 = classifier2.predict(data_test) Accuracy2 = accuracy_score(target_test, predictions2) print('Accuracy using DT: ', Accuracy2 * 100, '%')
def train_model(value2, value3, value4): if len(value2) < 1: return "Please select at least 1 column" else: X_train, X_test, y_train, y_test = train_test_split(df[value2], df['species'], test_size=value3) if value4 == "KNN": clf = KNN() elif value4 == "SVC": clf = SVC(gamma='auto', probability=True) clf.fit(X_train, y_train) pred = clf.predict(X_test) acc = np.round(accuracy_score(pred, y_test),3) fig5 = px.scatter_3d(x=clf.predict_proba(X_test)[:,0], y=clf.predict_proba(X_test)[:,1], z=clf.predict_proba(X_test)[:,2], color=clf.predict(X_test)) return html.Div([ html.Br(), html.H6('Model accuracy %s'%str(acc)), html.Br(), dcc.Graph(id='g5', figure=fig5) ])