def test_mean_squared_error(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'V1'
     #split = ('<=', [2, 3, 4, 5, 6, 10])
     mse = tree.mean_squared_error(self.data)
     print(mse)
     self.assertAlmostEqual(53.5, mse)
 def test_find_best_split(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'V1'
     node = Node(data=self.data)
     split = tree.find_best_split(node)
     print(split)
     self.assertTupleEqual(('in', 1, ['5', '2', '3']), split[0])
     self.assertAlmostEqual(32.166666666, split[1])
 def test_impurity(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'V1'
     split = ('<=', 2, 5.5)
     impurity = tree.impurity(split, self.data)
     print(impurity)
     expected = 39.4375
     self.assertAlmostEqual(impurity, expected)
 def test_create_numerical_split(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'Class'
     attribute_index = 0
     splits = tree.create_numeric_split(self.data, attribute_index)
     # sorted: 1, 2, 3, 5, 13, 13, 17, 22
     expected = [1.5, 2.5, 4, 9, 13, 15, 19.5]
     print(splits)
     self.assertListEqual(splits, expected)
    def test_018(self):
        tree = DecisionTreeRegressor()
        tree.target_class = 'V1'

        file = open("data/bank-marketing.arff")
        data = Data(file)
        #data.summary()
        #node = Node(data = data)
        #split = tree.find_best_split(node)
        #print(split)
        file.close()
 def test_generate_candidate_splits_for_numeric(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'Class'
     attribute_index = 2
     candidate_splits = tree.generate_candidate_splits(
         attribute_index, self.data)
     # sorted: 1, 1, 3, 5, 6, 8, 10, 20
     question_type = '<='
     expected = (question_type, [1, 2, 4, 5.5, 7, 9, 15])
     print(candidate_splits)
     self.assertTupleEqual(candidate_splits, expected)
 def test_create_categorical_split(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'V3'
     attribute_index = 1
     splits = tree.create_categorical_split(self.data, attribute_index)
     # '1' => 4
     # '2' => 15
     # '3' => 1
     # '4' => no instance
     # '5' => 7
     print(splits)
     expected = [['3'], ['3', '1'], ['3', '1', '5']]
     self.assertListEqual(splits, expected)
 def test_generate_candidate_splits_for_categorical(self):
     tree = DecisionTreeRegressor()
     tree.target_class = 'V1'
     attribute_index = 1
     candidate_splits = tree.generate_candidate_splits(
         attribute_index, self.data)
     # '1' => (13 + 22) / 2 = 17.5
     # '2' => 9
     # '3' => 9
     # '4' => no instances
     # '5' => 2.5
     question_type = 'in'
     expected = (question_type, [['5'], ['5', '2'], ['5', '2', '3']])
     print(candidate_splits)
     self.assertTupleEqual(candidate_splits, expected)
Exemple #9
0
    def fit(self, X, y):
        X_ = self.__get_values(X)
        y_ = self.__get_values(y)
        self.regressors_ = []
        self.unique_outputs_ = np.unique(y_)
        self.n_outputs_ = len(self.unique_outputs_)
        estimations = np.zeros((self.n_outputs_, len(y_)))
        self.loss_ = np.inf
        stable_loss_count = 0
        for _ in range(self.n_learners):
            prob_exp = np.exp(estimations)
            probs = prob_exp / (np.sum(prob_exp, axis=0))
            curr_regressor_group = []
            if self.n_iters_stop:
                curr_loss = self._loss(y_, probs)
                if abs(curr_loss - self.loss_) <= self.loss_tol:
                    stable_loss_count += 1
                    if stable_loss_count == self.n_iters_stop:
                        self.loss_ = curr_loss
                        break
                else:
                    stable_loss_count = 0

                self.loss_ = curr_loss

            for k, y_k in enumerate(self.unique_outputs_):
                model = DecisionTreeRegressor(tol=self.tol,
                                              max_depth=self.max_depth,
                                              min_members=self.min_members,
                                              criterion='mse',
                                              split_method=self.split_method,
                                              max_features=self.max_features)

                y_i = y_ == y_k
                residuals = y_i - probs[k]
                model.fit(X_, residuals)

                res_pred = self.__eval_leaves(model.tree_, X_, residuals)
                estimations[k] += res_pred * self.alpha
                curr_regressor_group.append(model)
            self.regressors_.append(curr_regressor_group)
Exemple #10
0
 def fit(self, X, y):
     '''
     Parameters
     ----------
     X : matrix of shape = [n_samples, n_features]
         The matrix consisting of the input data
         
     y : array of shape = [n_samples]
         The label of X
     '''
     self.trees_ = [
         DecisionTreeRegressor(self.max_features_num, self.max_depth,
                               self.min_samples_split)
         for i in range(self.n_trees)
     ]
     self.trees_ = Parallel(n_jobs=self.n_processes)(
         delayed(_parallel_build)(tree, X, y)
         for i, tree in enumerate(self.trees_))
    def test_find_next_partition(self):
        tree = DecisionTreeRegressor(max_leaf_nodes=3)
        tree.target_class = 'V1'
        #(('in', 1, ['5', '2', '3']), 32.16666666666667)
        file = open("data/bank-marketing.arff")
        data = Data(file)
        node = Node(data=data)
        node.is_leaf = True
        tree.root = node
        change, next_node = tree.find_best_node_to_split(node)
        print(change, next_node)
        tree.partition(next_node)
        print(tree.n_leaves)
        tree.root.left_child.data.summary()

        tree.root.right_child.data.summary()
        tree.root.right_child.left_child.data.summary()
        tree.root.right_child.right_child.data.summary()