Esempio n. 1
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with tree with a gain ratio

        # Create decision tree with leaf pure termination criteria
        dt_1 = DT.ClassificationDecisionTree(
            split_type='gain_ratio',
            terminate='pure',
        )
        dt_2 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='pure',
        )

        # Make simple input data
        self.x_data_1 = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 1],
                                  [6, 2], [7, 2], [8, 2], [9, 2], [10, 2]])
        self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1])

        # Train the data
        dt_1.fit(self.x_data_1, self.y_data_1)
        dt_2.fit(self.x_data_1, self.y_data_1)

        # Get the result object
        self.result_tree_1 = dt_1.get_tree()
        self.result_tree_2 = dt_2.get_tree()
Esempio n. 2
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with leaf size as 1
        self.leaf_terminate_1 = 1
        self.dt_1 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='leaf',
            leaf_terminate=self.leaf_terminate_1)

        # Create decision tree with leaf size as 2
        self.leaf_terminate_2 = 2
        self.dt_2 = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='leaf',
            leaf_terminate=self.leaf_terminate_2)

        # Create decision tree with leaf pure termination criteria
        self.dt_3_pure = DT.ClassificationDecisionTree(
            split_type='gini',
            terminate='pure',
        )

        # Make simple input data
        self.x_data_1 = np.array([[1, 4], [6, 7], [1, 4], [2, 3], [4, 5],
                                  [1, 5], [3, 6], [1, 4], [3, 1], [8, 9]])
        self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1])

        # Train the data
        self.dt_1.fit(self.x_data_1, self.y_data_1)
        self.dt_2.fit(self.x_data_1, self.y_data_1)
        self.dt_3_pure.fit(self.x_data_1, self.y_data_1)
Esempio n. 3
0
    def setUp(self):
        """
        Setup internal parameters used multiple times.
        """
        # Create decision with tree with a gain ratio
        self.dt_w_prune = DT.ClassificationDecisionTree(
            split_type='gain_ratio', terminate='pure', prune=True)
        self.dt_wo_prune = DT.ClassificationDecisionTree(
            split_type='gain_ratio', terminate='pure', prune=False)

        # Make simple input data
        x_data_1 = np.array([[1]] * 2 + [[2]] * 3)
        y_data_1 = np.array([0, 1, 0, 1, 1])

        # Train the data
        self.dt_w_prune.fit(x_data_1, y_data_1)
        self.dt_wo_prune.fit(x_data_1, y_data_1)
Esempio n. 4
0
 def __get_tree(self, x, y):
     """
     Create a decision tree based upon self._num_trees.
     :param x: The x data to fit to (input)
     :paray y: The y data to fit to (target)
     :return: A new CDT
     """
     dt = DT.ClassificationDecisionTree(self._split_type,
                                        self._terminate,
                                        self._leaf_terminate,
                                        prune=False)
     dt.fit(x, y)
     return dt
Esempio n. 5
0
    def test_data_intake_classification(self):
        """
        Test the classification can intake the data in different formats and predict same result.
        """
        # Setup y data
        y_data_train = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1])
        y_data_true = np.array([1, 0, 1])

        y_series_train = pd.Series(y_data_train)
        y_matrix_train = np.asmatrix(y_data_train)

        # Classification tree
        class_tree = DT.ClassificationDecisionTree(split_type='gini',
                                                   terminate='leaf',
                                                   leaf_terminate=1)

        # Test different inputs and assertions
        class_tree.fit(self.x_df_train, y_series_train)
        self.assertEqual(list(class_tree.predict(self.x_df_test)),
                         list(y_data_true))

        class_tree.fit(self.x_matrix_train, y_matrix_train)
        self.assertEqual(list(class_tree.predict(self.x_matrix_test)),
                         list(y_data_true))