Beispiel #1
0
def vote(features):
    """
    Takes a list of features, returns the most common feature of the list.
    """
    hist = util.histogram(features)
    diagnosis = sorted(hist, key=hist.get)[-1]
    return diagnosis
Beispiel #2
0
def vote(features):
    """
    Takes a list of features, returns the most common feature of the list.
    """
    hist = util.histogram(features)
    diagnosis = sorted(hist, key=hist.get)[-1]
    return diagnosis
Beispiel #3
0
def main(arg):
    k = int(arg.k)
    results = []
    dataFilePath = arg.dataFile
    testDataFilePath = arg.testDataFile
    searchKey = arg.propertyToLearn
    header, data = knnloader.getDataFromFilename(dataFilePath)
    headerIndices = util.arrayToReverseDict(header)
    _, testData = knnloader.getDataFromFilename(testDataFilePath)
    confusionMatrix = np.array([0, 0, 0, 0])
    for queryRow in testData:
        searchIndex = headerIndices[searchKey]
        result = knn(
            k,
            searchIndex,
            queryRow,
            header,
            data
        )
        cfMatrixIndex = 1 if result == "Sick" else 0
        cfMatrixIndex += 2 if queryRow[searchIndex] == "Healthy" else 0
        confusionMatrix[cfMatrixIndex] += 1
        results.append(result)
    knnloader.writeResultsToDisk(header, results, testData, "diagnoses.csv")
    computedHistogram = util.histogram(results)
    confusionMatrix = confusionMatrix.astype(float)
    errorRate = (
        confusionMatrix[2] +
        confusionMatrix[1]
    )/(
        confusionMatrix[0] +
        confusionMatrix[1] +
        confusionMatrix[2] +
        confusionMatrix[3]
    )
    accuracyRate = 1 - errorRate
    sensitivity = confusionMatrix[2]/(confusionMatrix[2]+confusionMatrix[3])
    specificity = confusionMatrix[1]/(confusionMatrix[1]+confusionMatrix[0])
    precision = confusionMatrix[2]/(confusionMatrix[2]+confusionMatrix[0])
    print "Accuracy Rate: ", accuracyRate
    print "Error Rate:", errorRate
    print "Sensitivity: ", sensitivity
    print "Specificity: ", specificity
    print "Precision: ", precision
    print confusionMatrix.reshape(2, 2)
    print computedHistogram
Beispiel #4
0
def main(arg):
    k = int(arg.k)
    results = []
    dataFilePath = arg.dataFile
    testDataFilePath = arg.testDataFile
    searchKey = arg.propertyToLearn
    header, data = knnloader.getDataFromFilename(dataFilePath)
    headerIndices = util.arrayToReverseDict(header)
    _, testData = knnloader.getDataFromFilename(testDataFilePath)
    confusionMatrix = np.array([0, 0, 0, 0])
    for queryRow in testData:
        searchIndex = headerIndices[searchKey]
        result = knn(k, searchIndex, queryRow, header, data)
        cfMatrixIndex = 1 if result == "Sick" else 0
        cfMatrixIndex += 2 if queryRow[searchIndex] == "Healthy" else 0
        confusionMatrix[cfMatrixIndex] += 1
        results.append(result)
    knnloader.writeResultsToDisk(header, results, testData, "diagnoses.csv")
    computedHistogram = util.histogram(results)
    confusionMatrix = confusionMatrix.astype(float)
    errorRate = (confusionMatrix[2] + confusionMatrix[1]) / (
        confusionMatrix[0] + confusionMatrix[1] + confusionMatrix[2] +
        confusionMatrix[3])
    accuracyRate = 1 - errorRate
    sensitivity = confusionMatrix[2] / (confusionMatrix[2] +
                                        confusionMatrix[3])
    specificity = confusionMatrix[1] / (confusionMatrix[1] +
                                        confusionMatrix[0])
    precision = confusionMatrix[2] / (confusionMatrix[2] + confusionMatrix[0])
    print "Accuracy Rate: ", accuracyRate
    print "Error Rate:", errorRate
    print "Sensitivity: ", sensitivity
    print "Specificity: ", specificity
    print "Precision: ", precision
    print confusionMatrix.reshape(2, 2)
    print computedHistogram