예제 #1
0
def main(col_names=None):
    if len(sys.argv) < 2:
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]
    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))

    train = resample(data[1:], replace=True, n_samples=int(len(data)))
    test = []
    for i in data[1:]:
        if i not in train:
            test.append(i)
    tree = dtree_build.buildtree(train, min_gain=0.01, min_samples=5)

    dtree_build.printtree(tree, '', col_names)

    result2 = naive_bayes.build(train)
    # print(result2)
    # max_tree_depth = dtree_build.max_depth(tree)
    # print("max number of questions=" + str(max_tree_depth))

    # print(test)
    out_put = [['instance', 'actual', 'predicted', 'probability']]
    total = 0
    correct = 0
    correct2 = 0
    for i in test:
        total += 1
        result = dtree_build.classify(i, tree)
        out = naive_bayes.classifier(result2, i)
        sum_probability = 0
        max_number = 0
        choice = ''
        for n, m in result.items():
            sum_probability += m
            if m >= max_number:
                max_number = m
                choice = n
        if choice == i[-1]:
            correct += 1
        if out == int(i[-1]):
            correct2 += 1
        sublist = [total, i[-1], choice, max_number / sum_probability]
        out_put.append(sublist)

        # print(result)
    with open("predicted.csv", "w") as output:
        writer = csv.writer(output)
        writer.writerows(out_put)
    print("Accuracy for decision tree is", correct / len(test))
    print("Accuracy for naive bayes is", correct2 / len(test))
예제 #2
0
def main(train_f, image_f, test_f, output_f):
    rows = tsv.get_list(train_f)
    for i in range(len(rows)):
        # Convert to numeric, then pop the Pokemon name
        rows[i] = move_tree.convert_numeric(rows[i])
        rows[i].pop(0)

    tree = dtree_build.buildtree(rows)
    dtree_draw.drawtree(tree, labels, jpeg=image_f)
    classify_pokemon(tree, test_f, output_f)
예제 #3
0
def main(train_f, image_f, test_f, output_f):
    data = open(train_f)
    moves = []
    rows = []

    # Create a 2D array to pass into the function which creates the tree
    for line in data:
        arr = line.rstrip().split('\t')
        moves.append(arr.pop(0))
        entry = convert_numeric(arr)  # Convert arr into integers where appropriate

        rows.append(entry)

    data.close()
    tree = dtree_build.buildtree(rows)
    dtree_draw.drawtree(tree, labels, jpeg=image_f)
    classify_moves(tree, test_f, output_f)
예제 #4
0
def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            list = []
            for attribute in row:
                try:
                    list += [float(attribute)]
                except:
                    list += [attribute]
            data.append(list)

    print("Total number of records = ", len(data))
    tree = dtree_build.buildtree(data, min_gain=0.01, min_samples=5)

    dtree_build.printtree(tree, '', col_names)

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    if len(sys.argv) > 2:  # draw option specified
        import dtree_draw
        dtree_draw.drawtree(tree, jpeg=csv_file_name + '.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)
import dtree_build
import sys

if __name__ == "__main__":
    # fruits with their size and color
    fruits = [[4, 'red', 'apple'], [4, 'green', 'apple'], [1, 'red', 'cherry'],
              [1, 'green', 'grape'], [5, 'red', 'apple']]

    tree = dtree_build.buildtree(fruits)
    dtree_build.printtree(tree, '', ["size", "color"])
    print("fruit [2, 'red'] is: ", dtree_build.classify([2, 'red'], tree))
    print("fruit [4.5, 'red'] is: ", dtree_build.classify([4.5, 'red'], tree))
    print("fruit [1.4, 'green'] is: ",
          dtree_build.classify([1.4, 'green'], tree))

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))
    if len(sys.argv) > 1:  # draw option specified
        import dtree_draw
        dtree_draw.drawtree(tree, jpeg='fruits_dt.jpg')