import decision_tree # Program start if len(sys.argv) < 3: sys.exit('Please, input as arguments:\n' + '1. the name of the input CSV file.\n' + '2. the level of verbosity: 0 or 2\n' + ' 0 - output only the decision tree,\n' + ' 1 - also provide some basic information on the ' + 'construction,' + ' 2 - in addition provide the explanations of the ' + 'decision tree construction.\n\n' + 'Example use:\n' + 'python construct_decision_tree.py swim.csv 1') csv_file_name = sys.argv[1] verbose = int(sys.argv[2]) # verbosity level, 0 - only decision tree # Define the equired column to be the last one. # I.e. a column defining the decision variable. (heading, complete_data, incomplete_data, enquired_column) = common.csv_file_to_ordered_data(csv_file_name) printfv( 1, verbose, "We construct a decision tree given the following " + str(len(complete_data)) + " data items: \n" + str(complete_data) + "\n\n") tree = decision_tree.constuct_decision_tree(verbose, heading, complete_data, enquired_column) printfv(2, verbose, "\n") printfv(1, verbose, "***Decision tree graph***\n") decision_tree.display_tree(tree)
common.dic_key_count( conditional_counts, ( heading[i], incomplete_item[i], enquired_group[0]))) / ( common.dic_key_count(enquired_column_classes, enquired_group[0]))) partial_probs[enquired_group[0]] = probability probs_sum += probability for enquired_group in enquired_column_classes.items(): complete_probs[enquired_group[0] ] = partial_probs[enquired_group[0] ] / probs_sum incomplete_item[enquired_column] = complete_probs completed_items.append(incomplete_item) return completed_items # Program start if len(sys.argv) < 2: sys.exit('Please, input as an argument the name of the CSV file.') (heading, complete_data, incomplete_data, enquired_column) = common.csv_file_to_ordered_data(sys.argv[1]) # Calculate the Bayesian probability for the incomplete data # and output it. completed_data = bayes_probability( heading, complete_data, incomplete_data, enquired_column) print completed_data();