def plot_accuracies(feature_data, classification_data_numerical):
    """
    Plot accuracy for over a range of values of k neighbours for
    uniform and distance-based weighting.
    """

    feature_data_train, feature_data_test, \
    classification_data_train, classification_data_test = \
        train_test_split(feature_data, classification_data_numerical)

    max_n_neighbours = 350
    print("Again note the superior performance of distance-based weighting\n"
          "for this data set shown by the plot. While uniform weighting\n"
          "quickly loses accuracy as the number of neighbours considered\n"
          "increases, the accuracy using distance-based weighting remains\n"
          "relatively steady.")
    knnplots.plotaccuracy(feature_data_train, classification_data_train,
                          feature_data_test, classification_data_test,
                          max_n_neighbours)
def plot_accuracies(feature_data, classification_data_numerical):
    """
    Plot accuracy for over a range of values of k neighbours for
    uniform and distance-based weighting.
    """

    feature_data_train, feature_data_test, \
    classification_data_train, classification_data_test = \
        train_test_split(feature_data, classification_data_numerical)

    max_n_neighbours = 350
    print("Again note the superior performance of distance-based weighting\n"
          "for this data set shown by the plot. While uniform weighting\n"
          "quickly loses accuracy as the number of neighbours considered\n"
          "increases, the accuracy using distance-based weighting remains\n"
          "relatively steady.")
    knnplots.plotaccuracy(feature_data_train, classification_data_train,
                          feature_data_test, classification_data_test,
                          max_n_neighbours)
Ejemplo n.º 3
0
XTrain, XTest, yTrain, yTest = train_test_split(X, yTransformed)

# make a confusion matrix
knn = neighbors.KNeighborsClassifier(n_neighbors=3)
knn.fit(XTrain, yTrain)  #classify the training set
predicted = knn.predict(XTest)  # predict the class labels for the test set

mat = metrics.confusion_matrix(
    yTest, predicted)  # Compute the confusion matrix for our predicitons
print mat
print metrics.classification_report(yTest, predicted)
print "accuracy: ", metrics.accuracy_score(yTest, predicted)

# Plot accuracy score for different numbers of neighbours and weights

knnplots.plotaccuracy(XTrain, yTrain, XTest, yTest, 310)
knnplots.decisionplot(XTrain, yTrain, n_neighbors=3, weights="uniform")

# k- fold validation splits the training datasets into several different test sets

knn3scores = cross_validation.cross_val_score(knnK3, XTrain, yTrain, cv=5)
print knn3scores
print "Mean f scores KNN3", knn3scores.mean()
print "SD of scores KNN3", knn3scores.std()

knn15scores = cross_validation.cross_val_score(knnK15, XTrain, yTrain, cv=5)
print knn15scores
print "Mean f scores KNN15", knn15scores.mean()
print "SD of scores KNN15", knn15scores.std()

# Plot accuracy means and standard deviations for different numbers of folds
nonAgreementPredictedK15 = predictedk15[predictedk15 != yTransformed]
# print 'Number of discrepancies', len(nonAgreementPredictedK15)
#
# print 'accuracy 3 nearest neighbours: ', metrics.accuracy_score(yTransformed, predictedk3)
# print 'accuracy 15 nearest neighbours: ', metrics.accuracy_score(yTransformed, predictedk15)

knnWD = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance')
knnWD = knnWD.fit(X, yTransformed)
predictedWD = knnWD.predict(X)
#
# print numpy.sum(predictedWD != yTransformed)

XTrain, XTest, YTrain, YTest = train_test_split(X,yTransformed)

print XTrain.shape
print YTrain.shape

knnWD = neighbors.KNeighborsClassifier(n_neighbors=3, weights='distance')
knnWD = knnWD.fit(XTrain, YTrain)
predictedWD = knnWD.predict(XTest)

print metrics.classification_report(YTest, predictedWD)
print 'accuracy: ', metrics.accuracy_score(YTest, predictedWD)

knnplots.plotaccuracy(XTrain,YTrain,XTest,YTest,310)

knnplots.decisionplot(XTrain,YTrain, n_neighbors=3,weights='uniform')

knnplots.decisionplot(XTrain,YTrain, n_neighbors=15,weights='uniform')

print "yTrain dimensions: ", yTrain.shape
#
print "XTest dimensions: ", XTest.shape
print "yTest dimensions: ", yTest.shape

#
# # print metrics.classi
#
knn = neighbors.KNeighborsClassifier(n_neighbors=3, weights="distance")
knn = knn.fit(XTrain, yTrain)
predicted = knn.predict(XTest)

print metrics.classification_report(yTest, predicted)
print "accuracy:", metrics.accuracy_score(yTest,predicted)

knnplots.plotaccuracy(XTrain,yTrain,XTest,yTest,310)




#
# print "PredictedK3 n_neighbors=3"
# print predictedK3
#
# knnK3 = neighbors.KNeighborsClassifier(n_neighbors=15, weights="distance")
# knnK3 = knnK3.fit(X, yTransformed)
# predictedK15 = knnK3.predict(X)
#
# print "PredictedK15 n_neighbors=15"
# print predictedK15
#