def plot_accuracies(feature_data, classification_data_numerical): """ Plot accuracy for over a range of values of k neighbours for uniform and distance-based weighting. """ feature_data_train, feature_data_test, \ classification_data_train, classification_data_test = \ train_test_split(feature_data, classification_data_numerical) max_n_neighbours = 350 print("Again note the superior performance of distance-based weighting\n" "for this data set shown by the plot. While uniform weighting\n" "quickly loses accuracy as the number of neighbours considered\n" "increases, the accuracy using distance-based weighting remains\n" "relatively steady.") knnplots.plotaccuracy(feature_data_train, classification_data_train, feature_data_test, classification_data_test, max_n_neighbours)
XTrain, XTest, yTrain, yTest = train_test_split(X, yTransformed) # make a confusion matrix knn = neighbors.KNeighborsClassifier(n_neighbors=3) knn.fit(XTrain, yTrain) #classify the training set predicted = knn.predict(XTest) # predict the class labels for the test set mat = metrics.confusion_matrix( yTest, predicted) # Compute the confusion matrix for our predicitons print mat print metrics.classification_report(yTest, predicted) print "accuracy: ", metrics.accuracy_score(yTest, predicted) # Plot accuracy score for different numbers of neighbours and weights knnplots.plotaccuracy(XTrain, yTrain, XTest, yTest, 310) knnplots.decisionplot(XTrain, yTrain, n_neighbors=3, weights="uniform") # k- fold validation splits the training datasets into several different test sets knn3scores = cross_validation.cross_val_score(knnK3, XTrain, yTrain, cv=5) print knn3scores print "Mean f scores KNN3", knn3scores.mean() print "SD of scores KNN3", knn3scores.std() knn15scores = cross_validation.cross_val_score(knnK15, XTrain, yTrain, cv=5) print knn15scores print "Mean f scores KNN15", knn15scores.mean() print "SD of scores KNN15", knn15scores.std() # Plot accuracy means and standard deviations for different numbers of folds
nonAgreementPredictedK15 = predictedk15[predictedk15 != yTransformed] # print 'Number of discrepancies', len(nonAgreementPredictedK15) # # print 'accuracy 3 nearest neighbours: ', metrics.accuracy_score(yTransformed, predictedk3) # print 'accuracy 15 nearest neighbours: ', metrics.accuracy_score(yTransformed, predictedk15) knnWD = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance') knnWD = knnWD.fit(X, yTransformed) predictedWD = knnWD.predict(X) # # print numpy.sum(predictedWD != yTransformed) XTrain, XTest, YTrain, YTest = train_test_split(X,yTransformed) print XTrain.shape print YTrain.shape knnWD = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance') knnWD = knnWD.fit(XTrain, YTrain) predictedWD = knnWD.predict(XTest) print metrics.classification_report(YTest, predictedWD) print 'accuracy: ', metrics.accuracy_score(YTest, predictedWD) knnplots.plotaccuracy(XTrain,YTrain,XTest,YTest,310) knnplots.decisionplot(XTrain,YTrain, n_neighbors=3,weights='uniform') knnplots.decisionplot(XTrain,YTrain, n_neighbors=15,weights='uniform')
print "yTrain dimensions: ", yTrain.shape # print "XTest dimensions: ", XTest.shape print "yTest dimensions: ", yTest.shape # # # print metrics.classi # knn = neighbors.KNeighborsClassifier(n_neighbors=3, weights="distance") knn = knn.fit(XTrain, yTrain) predicted = knn.predict(XTest) print metrics.classification_report(yTest, predicted) print "accuracy:", metrics.accuracy_score(yTest,predicted) knnplots.plotaccuracy(XTrain,yTrain,XTest,yTest,310) # # print "PredictedK3 n_neighbors=3" # print predictedK3 # # knnK3 = neighbors.KNeighborsClassifier(n_neighbors=15, weights="distance") # knnK3 = knnK3.fit(X, yTransformed) # predictedK15 = knnK3.predict(X) # # print "PredictedK15 n_neighbors=15" # print predictedK15 #