def classTest(): ratio = 0.1 dataset = array(pd.read_csv('/home/plutolove/train.csv')) data = dataset[:, 1:] labels = dataset[:, 0] normdata, ranges, minval = autoNorm(data) size = data.shape[0] numTestVecs = int(size * ratio) errorCount = 0 for i in range(numTestVecs): result = classify(normdata[i, :], normdata[numTestVecs:size, :], labels[numTestVecs:size], 3) print 'the classifer came back with: %d, the real answer is: %d' %(result, labels[i]) if(result != labels[i]): errorCount += 1 print 'the total error rate is: %f' %(errorCount/float(numTestVecs))
import matplotlib.pyplot as plt from autoNorm import * from KNN import * group, labels = file2matrix('/home/plutolove/machinelearninginaction/Ch02/datingTestSet2.txt') data, ranges, min_val = autoNorm(group) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(data[:, 1], data[:, 2], 15*array(labels), 15*array(labels)) plt.show()