def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))

    print("Total number of records = ", len(data))
    tree = regression_tree.buildtree(data, min_gain=0.001, min_samples=5)

    regression_tree.printtree(tree, '', col_names)

    max_tree_depth = regression_tree.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    print("ODE: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'male', 'english', 'young', 'small',
            'upper', 'not handsome/beautiful)', 'good'
        ], tree))
    print("Linear Algebra: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'male', 'english', 'young', 'small',
            'lower', 'not handsome/beautiful)', 'bad'
        ], tree))
    print("Intro to Stats: ")
    print(
        regression_tree.classify([
            'tenured', 'not minority', 'female', 'english', 'young', 'small',
            'lower', 'not handsome/beautiful)', 'good'
        ], tree))

    if len(sys.argv) > 2:  # draw option specified
        import dtree_draw_regression_tree
        dtree_draw_regression_tree.drawtree(tree, jpeg=csv_file_name + '.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)
コード例 #2
0
def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print ("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            list = []
            for attribute in row:
                try:
                    list += [float(attribute)]
                except:
                    list += [attribute]
            data.append(list)

    print("Total number of records = ",len(data))
    tree = regression_tree.buildtree(data, min_gain = 0.005, min_samples = 5)

    regression_tree.printtree(tree, '', col_names)

    max_tree_depth = dtree_build.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    if len(sys.argv) > 2: # draw option specified
        import regression_draw
        regression_draw.drawtree(tree, jpeg=csv_file_name+'.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)

    print("course ['teaching', 'not minority', 'female', 'english', 50, 30, 'lower', 7, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'female', 'english', 50, 30, 'lower', 7, 4], tree))
    print("course ['teaching', 'not minority', 'male', 'english', 40, 30, 'lower', 6, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'male', 'english', 40, 30, 'lower', 6, 4], tree))
    print("course ['teaching', 'not minority', 'male', 'english', 70, 30, 'lower', 4, 4] is: ", dtree_build.classify(['teaching', 'not minority', 'male', 'english', 70, 30, 'lower', 4, 4], tree))
コード例 #3
0
def main(col_names=None):
    # parse command-line arguments to read the name of the input csv file
    # and optional 'draw tree' parameter
    if len(sys.argv) < 2:  # input file name should be specified
        print("Please specify input csv file name")
        return

    csv_file_name = sys.argv[1]

    data = []
    with open(csv_file_name) as csvfile:
        readCSV = csv.reader(csvfile, delimiter=',')
        for row in readCSV:
            data.append(list(row))

    print("Total number of records = ", len(data))
    tree = regression_tree.buildtree(data, min_gain=0.01, min_samples=5)
    regression_tree.printtree(tree, '', col_names)

    max_tree_depth = regression_tree.max_depth(tree)
    print("max number of questions=" + str(max_tree_depth))

    csv_predict = "predict.csv"
    predict = []
    with open(csv_file_name) as csvpredict:
        CSVp = csv.reader(csv_predict, delimiter=',')
        for row in CSVp:
            predict.append(list(row))
    print(regression_tree.classify(predict, tree))
    if len(sys.argv) > 2:  # draw option specified
        import regtree_draw
        regtree_draw.drawtree(tree, jpeg=csv_file_name + '.jpg')

    if len(sys.argv) > 3:  # create json file for d3.js visualization
        import json
        import dtree_to_json
        json_tree = dtree_to_json.dtree_to_jsontree(tree, col_names)
        print(json_tree)

        # create json data for d3.js interactive visualization
        with open(csv_file_name + ".json", "w") as write_file:
            json.dump(json_tree, write_file)