def test_build_tree(self):
        tree_str = """
        <Tree name = "test">
          <node var ="Gender">
            <edge var ="Female" num="2">
              <node var = "Bush Approval">
                <edge var = "Approve" num="2" >
                  <decision end = "2" choice = "McCain" p = "0.9"/>
                </edge>
                <edge var = "Disapprove" num="1">
                  <decision end = "1" choice="Obama" p = "0.95"/>
                </edge>
              </node>
            </edge>
            <edge var = "Male" num="1">
              <node var = "Ideology">
                <edge var = "Liberal" num = "1">
                  <decision end = "1" choice ="Obama" p = "0.99"/>
                </edge>
                <edge var = "Moderate" num="2">
                  <decision end = "1" choice = "Obama" p = "0.7"/>
                </edge>
                <edge var = "Conservative" num ="3">
                  <decision end = "2" choice = "McCain" p = "0.95"/>
                </edge>
              </node>
            </edge>
          </node>
        </Tree>
        """

        root = model.build_tree(tree_str)
        self.assertEqual(root.name, "Gender")
        self.assertEqual(len(root.edges.keys()), 2)

        # Gender = Female
        node = root.edges['Female']
        self.assertEqual(node.name, "Bush Approval")
        self.assertEqual(len(node.edges.keys()), 2)
        self.assertEqual(node.edges['Approve'], Label("McCain"))
        self.assertEqual(node.edges['Disapprove'], Label("Obama"))

        # Gender = Male
        node = root.edges['Male']
        self.assertEqual(node.name, "Ideology")
        self.assertEqual(len(node.edges.keys()), 3)
        self.assertEqual(node.edges['Liberal'], Label("Obama"))
        self.assertEqual(node.edges['Moderate'], Label("Obama"))
        self.assertEqual(node.edges['Conservative'], Label("McCain"))

        tree = Node("Gender",
                ("Female", Node("Bush Approval",
                        ("Approve", Label("McCain")),
                        ("Disapprove", Label("Obama")))),
                ("Male", Node("Ideology",
                        ("Liberal", Label("Obama")),
                        ("Moderate", Label("Obama")),
                        ("Conservative", Label("McCain")))))
        self.assertEqual(root, tree)
 def test_stringify_tree(self):
     tree = Node("Gender",
             ("Female", Node("Bush Approval",
                     ("Approve", Label("McCain")),
                     ("Disapprove", Label("Obama")))),
             ("Male", Node("Ideology",
                     ("Liberal", Label("Obama")),
                     ("Moderate", Label("Obama")),
                     ("Conservative", Label("McCain")))))
     xml_tree = model.stringify_tree(tree)
     self.assertEqual(tree, model.build_tree(xml_tree))
def main(to_classify_csv, decision_tree_xml, restrictionstxt, has_label_column):
    # how are we supposed to determine if this has a label column or not?
    # I guess we could look at the number of unique edge labels in decision tree
    #   to determine features/
    tree = model.build_tree(decision_tree_xml.read())
    restrictions = dataset.restrictions_from_text(restrictionstxt)
    cols, data = dataset.read(to_classify_csv.read(), has_label_column,
            restrictions)

    predicted_classes = [tree.classify(x[0], cols) for x in data]
    labels = [x[1] for x in data]
    if has_label_column:
        print('Records:', len(data))
        print('Correctly classified:',
              sum(1 for p,l in zip(predicted_classes, labels) if p==l))
        print('Incorrectly classified:',
              sum(1 for p,l in zip(predicted_classes, labels) if p!=l))
        print('Accuracy:', sampling.accuracy(labels, predicted_classes))
        print('Error:', sampling.error_rate(labels, predicted_classes))
        print('Confusion matrix:')
        print(sampling.confusion_matrix(labels, predicted_classes))
    else:
        for i in range(len(predicted_classes)):
            print(data[i][0], predicted_classes[i])