def vote(features): """ Takes a list of features, returns the most common feature of the list. """ hist = util.histogram(features) diagnosis = sorted(hist, key=hist.get)[-1] return diagnosis
def main(arg): k = int(arg.k) results = [] dataFilePath = arg.dataFile testDataFilePath = arg.testDataFile searchKey = arg.propertyToLearn header, data = knnloader.getDataFromFilename(dataFilePath) headerIndices = util.arrayToReverseDict(header) _, testData = knnloader.getDataFromFilename(testDataFilePath) confusionMatrix = np.array([0, 0, 0, 0]) for queryRow in testData: searchIndex = headerIndices[searchKey] result = knn( k, searchIndex, queryRow, header, data ) cfMatrixIndex = 1 if result == "Sick" else 0 cfMatrixIndex += 2 if queryRow[searchIndex] == "Healthy" else 0 confusionMatrix[cfMatrixIndex] += 1 results.append(result) knnloader.writeResultsToDisk(header, results, testData, "diagnoses.csv") computedHistogram = util.histogram(results) confusionMatrix = confusionMatrix.astype(float) errorRate = ( confusionMatrix[2] + confusionMatrix[1] )/( confusionMatrix[0] + confusionMatrix[1] + confusionMatrix[2] + confusionMatrix[3] ) accuracyRate = 1 - errorRate sensitivity = confusionMatrix[2]/(confusionMatrix[2]+confusionMatrix[3]) specificity = confusionMatrix[1]/(confusionMatrix[1]+confusionMatrix[0]) precision = confusionMatrix[2]/(confusionMatrix[2]+confusionMatrix[0]) print "Accuracy Rate: ", accuracyRate print "Error Rate:", errorRate print "Sensitivity: ", sensitivity print "Specificity: ", specificity print "Precision: ", precision print confusionMatrix.reshape(2, 2) print computedHistogram
def main(arg): k = int(arg.k) results = [] dataFilePath = arg.dataFile testDataFilePath = arg.testDataFile searchKey = arg.propertyToLearn header, data = knnloader.getDataFromFilename(dataFilePath) headerIndices = util.arrayToReverseDict(header) _, testData = knnloader.getDataFromFilename(testDataFilePath) confusionMatrix = np.array([0, 0, 0, 0]) for queryRow in testData: searchIndex = headerIndices[searchKey] result = knn(k, searchIndex, queryRow, header, data) cfMatrixIndex = 1 if result == "Sick" else 0 cfMatrixIndex += 2 if queryRow[searchIndex] == "Healthy" else 0 confusionMatrix[cfMatrixIndex] += 1 results.append(result) knnloader.writeResultsToDisk(header, results, testData, "diagnoses.csv") computedHistogram = util.histogram(results) confusionMatrix = confusionMatrix.astype(float) errorRate = (confusionMatrix[2] + confusionMatrix[1]) / ( confusionMatrix[0] + confusionMatrix[1] + confusionMatrix[2] + confusionMatrix[3]) accuracyRate = 1 - errorRate sensitivity = confusionMatrix[2] / (confusionMatrix[2] + confusionMatrix[3]) specificity = confusionMatrix[1] / (confusionMatrix[1] + confusionMatrix[0]) precision = confusionMatrix[2] / (confusionMatrix[2] + confusionMatrix[0]) print "Accuracy Rate: ", accuracyRate print "Error Rate:", errorRate print "Sensitivity: ", sensitivity print "Specificity: ", specificity print "Precision: ", precision print confusionMatrix.reshape(2, 2) print computedHistogram