Exemple #1
0
 def setUp(self):
     self.data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 1]))
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemple #2
0
 def test_single_condition_data(self):
     data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 2]))
     left_condition, right_condition = SplitCondition(0, 1,
                                                      le), SplitCondition(
                                                          0, 1, gt)
     left_split, right_split = Split(data) + left_condition, Split(
         data) + right_condition
     self.assertListEqual([1], list(left_split.data.X[:, 0]))
     self.assertListEqual([2], list(right_split.data.X[:, 0]))
Exemple #3
0
 def test_head_prune(self):
     b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data))
     a = DecisionNode(Split(self.data), b, c)
     tree = Tree([a, b, c])
     updated_a = LeafNode(Split(self.data))
     prune_mutation = PruneMutation(a, updated_a)
     mutate(tree, prune_mutation)
     self.assertIn(updated_a, tree.leaf_nodes)
     self.assertNotIn(self.a, tree.nodes)
Exemple #4
0
 def test_grow(self):
     f, g = LeafNode(Split(self.data)), LeafNode(Split(self.data))
     updated_d = DecisionNode(Split(self.data), f, g)
     grow_mutation = TreeMutation("grow", self.d, updated_d)
     mutate(self.tree, grow_mutation)
     self.assertIn(updated_d, self.tree.decision_nodes)
     self.assertIn(updated_d, self.tree.prunable_decision_nodes)
     self.assertIn(f, self.tree.leaf_nodes)
     self.assertNotIn(self.d, self.tree.nodes)
Exemple #5
0
 def setUp(self):
     self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})),
                      np.array([1]).astype(float))
     self.d = LeafNode(Split(self.data), None)
     self.e = LeafNode(Split(self.data), None)
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemple #6
0
    def test_pruning_non_leaf_parent(self):
        a = LeafNode(Split(self.data))
        b = LeafNode(Split(self.data))
        c = LeafNode(Split(self.data))
        d = DecisionNode(Split(self.data), a, b)
        e = DecisionNode(Split(self.data), c, d)

        with self.assertRaises(TypeError):
            PruneMutation(e, a)
Exemple #7
0
    def test_growing_decision_node(self):
        a = LeafNode(Split(self.data))
        b = LeafNode(Split(self.data))
        c = LeafNode(Split(self.data))
        d = DecisionNode(Split(self.data), a, b)
        e = DecisionNode(Split(self.data), c, d)

        with self.assertRaises(TypeError):
            GrowMutation(d, a)
Exemple #8
0
 def setUp(self):
     self.data = make_bartpy_data(pd.DataFrame({"a": [1, 2]}),
                                  np.array([1, 2]),
                                  normalize=False)
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemple #9
0
 def setUp(self):
     self.data = make_bartpy_data(
         pd.DataFrame({"a": np.random.normal(size=1000)}),
         np.array(np.random.normal(size=1000)))
     self.d = LeafNode(Split(self.data))
     self.e = LeafNode(Split(self.data))
     self.c = DecisionNode(Split(self.data), self.d, self.e)
     self.b = LeafNode(Split(self.data))
     self.a = DecisionNode(Split(self.data), self.b, self.c)
     self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
Exemple #10
0
 def initialize_trees(self) -> List[Tree]:
     tree_data = copy(self.data)
     tree_data.update_y(tree_data.y / self.n_trees)
     trees = [
         Tree([LeafNode(Split(tree_data))]) for _ in range(self.n_trees)
     ]
     return trees
Exemple #11
0
    def test_same_prediction(self):
        from sklearn.ensemble import GradientBoostingRegressor
        params = {
            'n_estimators': 1,
            'max_depth': 2,
            'min_samples_split': 2,
            'learning_rate': 0.8,
            'loss': 'ls'
        }
        sklearn_model = GradientBoostingRegressor(**params)
        sklearn_model.fit(self.data.X.values, self.data.y.values)

        sklearn_tree = sklearn_model.estimators_[0][0].tree_
        bartpy_tree = Tree([LeafNode(Split(self.data))])

        map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree)

        sklearn_predictions = sklearn_tree.predict(
            self.data.X.values.astype(np.float32))
        sklearn_predictions = [
            round(x, 2) for x in sklearn_predictions.reshape(-1)
        ]

        bartpy_tree.cache_up_to_date = False
        bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values)
        bartpy_tree_predictions = [
            round(x, 2) for x in bartpy_tree_predictions
        ]

        self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
Exemple #12
0
 def initialize_trees(self) -> List[Tree]:
     tree_data = deepcopy(self.data)
     tree_data._y = tree_data.y / self.n_trees
     trees = [
         Tree([LeafNode(Split(self.data))]) for _ in range(self.n_trees)
     ]
     return trees
Exemple #13
0
 def initialize_trees(self) -> List[Tree]:
     trees = [
         Tree([LeafNode(Split(deepcopy(self.data)))])
         for _ in range(self.n_trees)
     ]
     for tree in trees:
         tree.update_y(tree.update_y(self.data.y.values / self.n_trees))
     return trees
Exemple #14
0
 def test_internal_prune(self):
     updated_c = LeafNode(Split(self.data))
     prune_mutation = TreeMutation("prune", self.c, updated_c)
     mutate(self.tree, prune_mutation)
     self.assertIn(updated_c, self.tree.leaf_nodes)
     self.assertNotIn(self.c, self.tree.nodes)
     self.assertNotIn(self.d, self.tree.nodes)
     self.assertNotIn(self.e, self.tree.nodes)
Exemple #15
0
 def test_null_split_returns_all_values(self):
     data = make_bartpy_data(
         pd.DataFrame({
             "a": [1, 2]
         }).values, np.array([1, 2]))
     split = Split(data)
     conditioned_data = split.data
     self.assertListEqual(list(data.X.get_column(0)),
                          list(conditioned_data.X.get_column(0)))
Exemple #16
0
    def test_combined_condition_data(self):
        data = make_bartpy_data(
            pd.DataFrame({
                "a": [1, 2, 3, 4]
            }).values, np.array([1, 2, 1, 1]))

        first_left_condition, first_right_condition = SplitCondition(
            0, 3, le), SplitCondition(0, 3, gt)
        second_left_condition, second_right_condition = SplitCondition(
            0, 1, le), SplitCondition(0, 1, gt)

        split = Split(data)
        updated_split = split + first_left_condition + second_right_condition
        conditioned_data = updated_split.data
        self.assertListEqual([2, 3], list(conditioned_data.X.get_column(0)))
Exemple #17
0
    def test_most_recent_split(self):
        data = make_bartpy_data(
            pd.DataFrame({
                "a": [1, 2, 3, 4]
            }).values, np.array([1, 2, 1, 1]))

        first_left_condition, first_right_condition = SplitCondition(
            0, 3, le), SplitCondition(0, 3, gt)
        second_left_condition, second_right_condition = SplitCondition(
            0, 1, le), SplitCondition(0, 1, gt)

        split = Split(data)
        updated_split = split + first_left_condition + second_right_condition
        self.assertEqual(
            (split + first_left_condition).most_recent_split_condition(),
            first_left_condition)
        self.assertEqual(updated_split.most_recent_split_condition(),
                         second_right_condition)
Exemple #18
0
    def setUp(self):
        self.data = Data(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }).values, np.array([1, 2, 3]))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child
Exemple #19
0
    def setUp(self):
        X = format_covariate_matrix(
            pd.DataFrame({
                "a": [1, 2, 3],
                "b": [1, 2, 3]
            }))
        self.data = Data(X, np.array([1, 2, 3]).astype(float))

        self.a = split_node(LeafNode(Split(
            self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt)))
        self.b = self.a.left_child
        self.x = self.a.right_child
        self.tree = Tree([self.a, self.b, self.x])

        self.c = split_node(
            self.a._right_child,
            (SplitCondition(1, 2, le), SplitCondition(1, 2, gt)))
        mutate(self.tree, TreeMutation("grow", self.x, self.c))

        self.d = self.c.left_child
        self.e = self.c.right_child
Exemple #20
0
 def test_pruning_leaf(self):
     with self.assertRaises(TypeError):
         PruneMutation(LeafNode(Split(self.data)),
                       LeafNode(Split(self.data)))
Exemple #21
0
 def test_invalid_prune(self):
     with self.assertRaises(TypeError):
         updated_a = LeafNode(Split(self.data))
         PruneMutation(self.a, updated_a)
Exemple #22
0
 def setUp(self):
     self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]}))
     self.data = Data(format_covariate_matrix(self.X),
                      np.array([1.0, 2.0, 3.0, 4.0, 5.0]))
     self.split = Split(self.data)
     self.node = LeafNode(self.split)
Exemple #23
0
 def test_null_split_returns_all_values(self):
     data = Data(pd.DataFrame({"a": [1, 2]}).values, np.array([1, 2]))
     split = Split(data)
     conditioned_data = split.data
     self.assertListEqual(list(data.X[:, 0]), list(conditioned_data.X[:,
                                                                      0]))