Ejemplo n.º 1
0
 def __init__(self, training_set):
     """
     Constructs a new decision tree.
     
     Args:
       training_set: model.DataSet
         The training data to use when building the decision tree.
     """
     self.training_set = training_set
     self._tree = id3.build_tree(training_set)
     self._plotter = MatplotlibAnnotationTreePlotter(self._tree)
Ejemplo n.º 2
0
 def __init__(self, training_set):
     """
     Constructs a new decision tree.
     
     Args:
       training_set: model.DataSet
         The training data to use when building the decision tree.
     """
     self.training_set = training_set
     self._tree = id3.build_tree(training_set)
     self._plotter = MatplotlibAnnotationTreePlotter(self._tree)
Ejemplo n.º 3
0
        else:
            ret += bprint_aux(child_node, index + 2)
    return ret


import pandas as pd

has_ids = True
has_header = True
has_labels = True
delimiter = ","

header = 0 if has_header else None
id_col = 0 if has_ids else None

# dataframe = pd.read_csv("./test/datasets/play_tennis.data", index_col=id_col, header=header,delimiter=delimiter)
dataframe = pd.read_csv("hope.csv", index_col=id_col, header=header, delimiter=delimiter)

labels = dataframe.pop(dataframe.columns[-1]) if has_labels else None

dataset = model.DataSet(dataframe, labels=labels)
tree = id3.build_tree(dataset)

printbonito = bprint(tree)
print(printbonito)
txt = open("output.txt", mode="w", encoding="UTF-8")
txt.write(printbonito)
txt.close()
tp = MatplotlibAnnotationTreePlotter(tree)
tp.plot()
Ejemplo n.º 4
0
class DecisionTree(AbstractClassifier):
    """
    Decision tree classifier.
    
    Builds a tree which is like a flow chart.  It allows a decision to be 
    reached by checking the values for various features and following the 
    appropriate branches until a destination is reached.
        
    In addition to being useful as a classifier, the structure of the 
    decision tree can lend insight into the data. 
    """
    
    def __init__(self, training_set):
        """
        Constructs a new decision tree.
        
        Args:
          training_set: model.DataSet
            The training data to use when building the decision tree.
        """
        self.training_set = training_set
        self._tree = id3.build_tree(training_set)
        self._plotter = MatplotlibAnnotationTreePlotter(self._tree)
    
    def _classify(self, sample):
        """
        Predicts a sample's classification based on the decision tree that 
        was built from the training data.
        
        Args:
          sample: 
            The sample or observation to be classified.
          
        Returns:
          The sample's classification.
        """
        node = self._tree.get_root_node()
        while not node.is_leaf():
            feature = node.get_value()
            branch = sample[feature]
            try:
                node = node.get_child(branch)
            except KeyError:
                return self._handle_value_not_trained_for()
        
        return node.get_value()

    def _handle_value_not_trained_for(self):
        """
        Handles the case where a sample has a value for a feature which was 
        not seen in the training set and therefore is not accounted for in 
        the tree.
        
        Current strategy is to just return the most common label in the 
        training data set.  It might be better to narrow this down to the 
        most common among samples that would reach the node at which the 
        unrecognized value was found.
        
        Returns:
          label:
            The best guess at the label.
        """
        return collection_utils.get_most_common(
                                    self.training_set.get_labels())

    def plot(self):
        """
        Generates a plot of the decision tree to visualize its structure.
        
        Returns:
          void
        """
        self._plotter.plot()
Ejemplo n.º 5
0
class DecisionTree(AbstractClassifier):
    """
    Decision tree classifier.
    
    Builds a tree which is like a flow chart.  It allows a decision to be 
    reached by checking the values for various features and following the 
    appropriate branches until a destination is reached.
        
    In addition to being useful as a classifier, the structure of the 
    decision tree can lend insight into the data. 
    """
    def __init__(self, training_set):
        """
        Constructs a new decision tree.
        
        Args:
          training_set: model.DataSet
            The training data to use when building the decision tree.
        """
        self.training_set = training_set
        self._tree = id3.build_tree(training_set)
        self._plotter = MatplotlibAnnotationTreePlotter(self._tree)

    def _classify(self, sample):
        """
        Predicts a sample's classification based on the decision tree that 
        was built from the training data.
        
        Args:
          sample: 
            The sample or observation to be classified.
          
        Returns:
          The sample's classification.
        """
        node = self._tree.get_root_node()
        while not node.is_leaf():
            feature = node.get_value()
            branch = sample[feature]
            try:
                node = node.get_child(branch)
            except KeyError:
                return self._handle_value_not_trained_for()

        return node.get_value()

    def _handle_value_not_trained_for(self):
        """
        Handles the case where a sample has a value for a feature which was 
        not seen in the training set and therefore is not accounted for in 
        the tree.
        
        Current strategy is to just return the most common label in the 
        training data set.  It might be better to narrow this down to the 
        most common among samples that would reach the node at which the 
        unrecognized value was found.
        
        Returns:
          label:
            The best guess at the label.
        """
        return collection_utils.get_most_common(self.training_set.get_labels())

    def plot(self):
        """
        Generates a plot of the decision tree to visualize its structure.
        
        Returns:
          void
        """
        self._plotter.plot()