Exemple #1
0
def handle_data(train_set, test_set):
    '''
    get the predictions for three algorithms - decision tree, knn and naive bayes
    :param train_set: x
    :param test_set: y
    predict the y_hat, calc the accuracy and write to file the accuracies + the tree
    '''
    # split the files and get the data and labels
    train_data, train_data_labels, attributes, label_key = split_train_data(
        train_set)
    test_data, test_data_labels = split_test_data(test_set)
    # get the algorithms
    decision_tree, knn, naive_bayes = DecisionTree.Model(), Knn.Model(
    ), NaiveBayes.Model()
    algorithms = [decision_tree, knn, naive_bayes]
    accuracies = []
    # for every algorithm - get the prediction on the test set, calc the accuracy and add to list
    for algorithm in algorithms:
        algorithm.set_data(train_data, train_data_labels,
                           [label_key, attributes])
        prediction = algorithm.predict(test_data)
        accuracy = get_acc(prediction, test_data_labels)
        accuracies.append(
            "{0:.2f}".format(accuracy))  # get the 2 digits after point
    # get the output tree and write to the file
    tree = decision_tree.get_tree()
    tree.write_tree(OUTPUT_FILE)
    # write the accuracies to the same file
    write_accuracies(OUTPUT_FILE, accuracies)
    print(I2F)
    print(L2I)
    print(I2L)
    print(V2I)
    print(I2V)
    '''
    # loading the datasetes
    TRAIN = get_data("train.txt")
    TEST = get_data("test.txt")

    # creating the different models
    dt = DecisionTree.Model(
        TRAIN, values={i: value.keys()
                       for i, value in I2V.items()})
    knn = KNN.Model(TRAIN, k=5)
    nb = NaiveBayes.Model(TRAIN, I2L.keys())

    # predicting on the TEST set
    r = open("output.txt", 'w')
    predict(TEST, [dt, knn, nb])
    r.close()

    # printing the tree that DecisionTree created
    t = open("output_tree.txt", 'w')
    print_tree(dt.root)
    t.close()

    # deleting the last '\n' character
    with open("output_tree.txt", 'rb+') as t:
        t.seek(-1, os.SEEK_END)
        t.truncate()