def setUp(self): """ Setup internal parameters used multiple times. """ # Create decision with tree with a gain ratio # Create decision tree with leaf pure termination criteria dt_1 = DT.ClassificationDecisionTree( split_type='gain_ratio', terminate='pure', ) dt_2 = DT.ClassificationDecisionTree( split_type='gini', terminate='pure', ) # Make simple input data self.x_data_1 = np.array([[1, 1], [2, 1], [3, 1], [4, 1], [5, 1], [6, 2], [7, 2], [8, 2], [9, 2], [10, 2]]) self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1]) # Train the data dt_1.fit(self.x_data_1, self.y_data_1) dt_2.fit(self.x_data_1, self.y_data_1) # Get the result object self.result_tree_1 = dt_1.get_tree() self.result_tree_2 = dt_2.get_tree()
def setUp(self): """ Setup internal parameters used multiple times. """ # Create decision with leaf size as 1 self.leaf_terminate_1 = 1 self.dt_1 = DT.ClassificationDecisionTree( split_type='gini', terminate='leaf', leaf_terminate=self.leaf_terminate_1) # Create decision tree with leaf size as 2 self.leaf_terminate_2 = 2 self.dt_2 = DT.ClassificationDecisionTree( split_type='gini', terminate='leaf', leaf_terminate=self.leaf_terminate_2) # Create decision tree with leaf pure termination criteria self.dt_3_pure = DT.ClassificationDecisionTree( split_type='gini', terminate='pure', ) # Make simple input data self.x_data_1 = np.array([[1, 4], [6, 7], [1, 4], [2, 3], [4, 5], [1, 5], [3, 6], [1, 4], [3, 1], [8, 9]]) self.y_data_1 = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1]) # Train the data self.dt_1.fit(self.x_data_1, self.y_data_1) self.dt_2.fit(self.x_data_1, self.y_data_1) self.dt_3_pure.fit(self.x_data_1, self.y_data_1)
def setUp(self): """ Setup internal parameters used multiple times. """ # Create decision with tree with a gain ratio self.dt_w_prune = DT.ClassificationDecisionTree( split_type='gain_ratio', terminate='pure', prune=True) self.dt_wo_prune = DT.ClassificationDecisionTree( split_type='gain_ratio', terminate='pure', prune=False) # Make simple input data x_data_1 = np.array([[1]] * 2 + [[2]] * 3) y_data_1 = np.array([0, 1, 0, 1, 1]) # Train the data self.dt_w_prune.fit(x_data_1, y_data_1) self.dt_wo_prune.fit(x_data_1, y_data_1)
def __get_tree(self, x, y): """ Create a decision tree based upon self._num_trees. :param x: The x data to fit to (input) :paray y: The y data to fit to (target) :return: A new CDT """ dt = DT.ClassificationDecisionTree(self._split_type, self._terminate, self._leaf_terminate, prune=False) dt.fit(x, y) return dt
def test_data_intake_classification(self): """ Test the classification can intake the data in different formats and predict same result. """ # Setup y data y_data_train = np.array([0, 1, 1, 0, 1, 0, 1, 1, 0, 1]) y_data_true = np.array([1, 0, 1]) y_series_train = pd.Series(y_data_train) y_matrix_train = np.asmatrix(y_data_train) # Classification tree class_tree = DT.ClassificationDecisionTree(split_type='gini', terminate='leaf', leaf_terminate=1) # Test different inputs and assertions class_tree.fit(self.x_df_train, y_series_train) self.assertEqual(list(class_tree.predict(self.x_df_test)), list(y_data_true)) class_tree.fit(self.x_matrix_train, y_matrix_train) self.assertEqual(list(class_tree.predict(self.x_matrix_test)), list(y_data_true))