#!/usr/bin/python # coding = utf-8 from numpy import * import matplotlib import matplotlib.pyplot as plt def plot(dataMat, labels, x1, x2): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:, x1], dataMat[:, x2], 15.0 * array(labels), 15.0 * array(labels)) plt.show() if __name__ == "__main__": import importData dataset, label = importData.dataFrmFile("./optdigits.tes") dataSet, ranges, minVals = importData.nomilze(dataset) plot(dataSet, label, 3, 4)
#!/usr/bin/python #coding = utf-8 from numpy import * import matplotlib import matplotlib.pyplot as plt def plot(dataMat, labels, x1, x2): fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(dataMat[:,x1], dataMat[:,x2], 15.0*array(labels), 15.0*array(labels)) plt.show() if __name__ == '__main__': import importData dataset, label = importData.dataFrmFile('./optdigits.tes') dataSet, ranges, minVals = importData.nomilze(dataset) plot(dataSet, label, 3, 4)
#!/usr/bin/python #coding = utf-8 from numpy import * import importData import plot import knn dataSet_tra, label_tra = importData.dataFrmFile('./optdigits.tra') dataSet_tes, label_tes = importData.dataFrmFile('./optdigits.tes') index = -1 cnt = 0 for i in label_tes: index += 1 if i != knn.classify(dataSet_tes[index], dataSet_tra, label_tra, 10): cnt += 1 print cnt plot.plot(dataSet_tes, label_tes, 4, 5)
#!/usr/bin/python #coding = utf-8 from numpy import * import operator def classify(inX, dataSet, labels, k): dataLen = len(labels) diff = tile(inX, (dataLen, 1)) - dataSet diff **= 2 distances = diff.sum(1) distances **= 0.5 classCnt = {} disSortedIndex = distances.argsort() for i in xrange(k): votelabel = labels[disSortedIndex[i]] classCnt[votelabel] = classCnt.get(votelabel, 0) + 1 sortedClassCnt = sorted(classCnt.iteritems(), key = operator.itemgetter(1), reverse = True) return sortedClassCnt[0][0] if __name__ == '__main__': import importData dataSet, labels = importData.dataFrmFile('./optdigits.tes') print classify(dataSet[0], dataSet, labels, 4)