예제 #1
0
파일: main.py 프로젝트: gshawm/CS450
def main():
    while True:
        data_set_name = input("Please provide the name of the data set you want to work with: ")

        # Load, Randomize, Normalize, Discretize Dataset
        data_set = Dataset()
        data_set.read_file_into_dataset("C:\\Users\\Grant\\Documents\\School\\Winter 2016\\CS 450\\Prove03\\" + data_set_name)
        data_set.randomize()
        data_set.data = normalize(data_set.data)
        data_set.discretize()

        data_set.set_missing_data()

        # Split Dataset
        split_percentage = 0.7
        data_sets    = split_dataset(data_set, split_percentage)
        training_set = data_sets['train']
        testing_set  = data_sets['test']

        # Create Custom Classifier, Train Dataset, Predict Target From Testing Set
        id3Classifier = ID3()
        id3Classifier.train(training_set)
        predictions = id3Classifier.predict(testing_set)

        id3Classifier.display_tree(0, id3Classifier.tree)

        # Check Results
        my_accuracy = get_accuracy(predictions, testing_set.target)
        print("Accuracy: " + str(my_accuracy) + "%")

        # Compare To Existing Implementations
        dtc = tree.DecisionTreeClassifier()
        dtc.fit(training_set.data, training_set.target)
        predictions = dtc.predict(testing_set.data)

        dtc_accuracy = get_accuracy(predictions, testing_set.target)
        print("DTC Accuracy: " + str(dtc_accuracy) + "%")

        # Do another or not
        toContinue = False

        while True:
            another = input("Do you want to examine another dataset? (y / n) ")

            if another != 'y' and another != 'n':
                print("Please provide you answer in a 'y' or 'n' format.")
            elif another == 'y':
                toContinue = True
                break
            else:
                toContinue = False
                break

        if not toContinue:
            break