def main(): data = extractData.readData( "http://research.cs.queensu.ca/home/cords2/annualIncome.txt") data_test = extractData.makeTestSet(data) data_train = extractData.makeTrainingSet(data) dataless_50k, datamore_50k = classifierBuild.processSets(data) print(len(data)) print(len(dataless_50k)) print(len(datamore_50k)) print() dataStructures(data_train, dataless_50k, datamore_50k)
def main(): # extractData functions print("Reading in data") data = extractData.readData("http://research.cs.queensu.ca/home/cords2/annualIncome.txt") print("Making training and test files") data_test = extractData.makeTestSet(data) data_train = extractData.makeTrainingSet(data) # classifierBuild function print("Building classifier") data_model_less, data_model_more = classifierBuild.dataStructures(data_train) # classifierTest functions print("Classifying test data") prediction_points, incorrectly_classified, percent_accuracy = classifierTest.predictData(data_test, data_model_less, data_model_more) print("Classified Correctly: ", +prediction_points) print("Classified Incorrectly: ", + incorrectly_classified) print("Accuracy: ", str(percent_accuracy) + "%")
else: counter_greater50 += 1 # Return the records predicted correctly, records predicted incorrectly and percent accuracy return prediction_points, (total_sum - prediction_points), round( (prediction_points / total_sum) * 100, 2) # Testing for each function if __name__ == "__main__": import extractData # imported for testing purposes import classifierBuild print("\n TESTING DATA ON A 15 RECORD SAMPLE") data = extractData.readData( "http://research.cs.queensu.ca/home/cords2/annualIncome.txt") data_test = extractData.makeTestSet(data[:30]) data_train = extractData.makeTrainingSet(data[:30]) data_model_less, data_model_more = classifierBuild.dataStructures( data_train) # classifierTest functions prediction_points, incorrectly_classified, percent_accuracy = predictData( data_test, data_model_less, data_model_more) print("\nClassified Correctly: ", +prediction_points) print("Classified Incorrectly: ", +incorrectly_classified) print("Accuracy: ", str(percent_accuracy) + "%") # predictData module test