Example #1
0
import decision_tree

# Program start
if len(sys.argv) < 3:
    sys.exit('Please, input as arguments:\n' +
             '1. the name of the input CSV file.\n' +
             '2. the level of verbosity: 0 or 2\n' +
             '   0 - output only the decision tree,\n' +
             '   1 - also provide some basic information on the ' +
             'construction,' +
             '   2 - in addition provide the explanations of the ' +
             'decision tree construction.\n\n' + 'Example use:\n' +
             'python construct_decision_tree.py swim.csv 1')

csv_file_name = sys.argv[1]
verbose = int(sys.argv[2])  # verbosity level, 0 - only decision tree

# Define the equired column to be the last one.
# I.e. a column defining the decision variable.
(heading, complete_data, incomplete_data,
 enquired_column) = common.csv_file_to_ordered_data(csv_file_name)

printfv(
    1, verbose, "We construct a decision tree given the following " +
    str(len(complete_data)) + " data items: \n" + str(complete_data) + "\n\n")
tree = decision_tree.constuct_decision_tree(verbose, heading, complete_data,
                                            enquired_column)
printfv(2, verbose, "\n")
printfv(1, verbose, "***Decision tree graph***\n")
decision_tree.display_tree(tree)
Example #2
0
                        common.dic_key_count(
                            conditional_counts, (
                                heading[i], incomplete_item[i],
                                enquired_group[0]))) / (
                        common.dic_key_count(enquired_column_classes,
                                             enquired_group[0])))
            partial_probs[enquired_group[0]] = probability
            probs_sum += probability

        for enquired_group in enquired_column_classes.items():
            complete_probs[enquired_group[0]
                           ] = partial_probs[enquired_group[0]
                                             ] / probs_sum
        incomplete_item[enquired_column] = complete_probs
        completed_items.append(incomplete_item)
    return completed_items

# Program start
if len(sys.argv) < 2:
    sys.exit('Please, input as an argument the name of the CSV file.')

(heading, complete_data, incomplete_data,
 enquired_column) = common.csv_file_to_ordered_data(sys.argv[1])

# Calculate the Bayesian probability for the incomplete data
# and output it.
completed_data = bayes_probability(
    heading, complete_data, incomplete_data, enquired_column)
print completed_data();