def main():
    data = extractData.readData(
        "http://research.cs.queensu.ca/home/cords2/annualIncome.txt")
    data_test = extractData.makeTestSet(data)
    data_train = extractData.makeTrainingSet(data)

    dataless_50k, datamore_50k = classifierBuild.processSets(data)

    print(len(data))
    print(len(dataless_50k))
    print(len(datamore_50k))

    print()
    dataStructures(data_train, dataless_50k, datamore_50k)
Exemple #2
0
def main():
    # extractData functions
    print("Reading in data")
    data = extractData.readData("http://research.cs.queensu.ca/home/cords2/annualIncome.txt")
    print("Making training and test files")
    data_test = extractData.makeTestSet(data)
    data_train = extractData.makeTrainingSet(data)

    # classifierBuild function
    print("Building classifier")
    data_model_less, data_model_more = classifierBuild.dataStructures(data_train)

    # classifierTest functions
    print("Classifying test data")
    prediction_points, incorrectly_classified, percent_accuracy = classifierTest.predictData(data_test, data_model_less, data_model_more)

    print("Classified Correctly: ", +prediction_points)
    print("Classified Incorrectly: ", + incorrectly_classified)
    print("Accuracy: ", str(percent_accuracy) + "%")
Exemple #3
0
                else:
                    counter_greater50 += 1

    # Return the records predicted correctly, records predicted incorrectly and percent accuracy
    return prediction_points, (total_sum - prediction_points), round(
        (prediction_points / total_sum) * 100, 2)


# Testing for each function
if __name__ == "__main__":
    import extractData  # imported for testing purposes
    import classifierBuild

    print("\n TESTING DATA ON A  15 RECORD SAMPLE")
    data = extractData.readData(
        "http://research.cs.queensu.ca/home/cords2/annualIncome.txt")
    data_test = extractData.makeTestSet(data[:30])
    data_train = extractData.makeTrainingSet(data[:30])

    data_model_less, data_model_more = classifierBuild.dataStructures(
        data_train)

    # classifierTest functions
    prediction_points, incorrectly_classified, percent_accuracy = predictData(
        data_test, data_model_less, data_model_more)

    print("\nClassified Correctly: ", +prediction_points)
    print("Classified Incorrectly: ", +incorrectly_classified)
    print("Accuracy: ", str(percent_accuracy) + "%")

    # predictData module test