def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))

    print("Total number of records = ", len(data))
    tree = regression_tree.buildtree(data, min_gain=0.001, min_samples=5)

    regression_tree.printtree(tree, '', col_names)

    max_tree_depth = regression_tree.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    print("ODE: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'male', 'english', 'young', 'small',
            'upper', 'not handsome/beautiful)', 'good'
        ], tree))
    print("Linear Algebra: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'male', 'english', 'young', 'small',
            'lower', 'not handsome/beautiful)', 'bad'
        ], tree))
    print("Intro to Stats: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'female', 'english', 'young', 'small',
            'lower', 'not handsome/beautiful)', 'good'
        ], tree))

    if len(sys.argv) > 2:  # draw option specified
        import dtree_draw_regression_tree
        dtree_draw_regression_tree.drawtree(tree, jpeg=csv_file_name + '.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)
Beispiel #2
0
def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    if len(sys.argv) < 2:  # input file name should be specified
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))
    tree = regression_tree.buildtree(data, min_gain=0.01, min_samples=5)

    max_tree_depth = regression_tree.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    json_tree = dtree_to_jsontree(tree, col_names)
    print(json_tree)

    # create json data for d3.js interactive visualization
    with open(csv_file_name + ".json", "w") as write_file:
        json.dump(json_tree, write_file)