def setUp(self): self.data = Data(pd.DataFrame({"a": [1]}).values, np.array([1])) self.d = LeafNode(Split(self.data), None) self.e = LeafNode(Split(self.data), None) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def test_growing_decision_node(self): a = LeafNode(Split(self.data)) b = LeafNode(Split(self.data)) c = LeafNode(Split(self.data)) d = DecisionNode(Split(self.data), a, b) e = DecisionNode(Split(self.data), c, d) with self.assertRaises(TypeError): GrowMutation(d, a)
def test_head_prune(self): b, c = LeafNode(Split(self.data)), LeafNode(Split(self.data)) a = DecisionNode(Split(self.data), b, c) tree = Tree([a, b, c]) updated_a = LeafNode(Split(self.data)) prune_mutation = PruneMutation(a, updated_a) mutate(tree, prune_mutation) self.assertIn(updated_a, tree.leaf_nodes) self.assertNotIn(self.a, tree.nodes)
def setUp(self): self.data = Data(format_covariate_matrix(pd.DataFrame({"a": [1]})), np.array([1]).astype(float)) self.d = LeafNode(Split(self.data), None) self.e = LeafNode(Split(self.data), None) self.c = DecisionNode(Split(self.data), self.d, self.e) self.b = LeafNode(Split(self.data)) self.a = DecisionNode(Split(self.data), self.b, self.c) self.tree = Tree([self.a, self.b, self.c, self.d, self.e])
def test_grow(self): f, g = LeafNode(Split(self.data)), LeafNode(Split(self.data)) updated_d = DecisionNode(Split(self.data), f, g) grow_mutation = TreeMutation("grow", self.d, updated_d) mutate(self.tree, grow_mutation) self.assertIn(updated_d, self.tree.decision_nodes) self.assertIn(updated_d, self.tree.prunable_decision_nodes) self.assertIn(f, self.tree.leaf_nodes) self.assertNotIn(self.d, self.tree.nodes)
def test_pruning_non_leaf_parent(self): a = LeafNode(Split(self.data)) b = LeafNode(Split(self.data)) c = LeafNode(Split(self.data)) d = DecisionNode(Split(self.data), a, b) e = DecisionNode(Split(self.data), c, d) with self.assertRaises(TypeError): PruneMutation(e, a)
def sample_split_node(node: LeafNode) -> DecisionNode: """ Split a leaf node into a decision node with two leaf children The variable and value to split on is determined by sampling from their respective distributions """ if node.is_splittable(): conditions = sample_split_condition(node) return split_node(node, conditions) else: return DecisionNode(node.split, LeafNode(node.split, depth=node.depth + 1), LeafNode(node.split, depth=node.depth + 1), depth=node.depth)
def test_same_prediction(self): from sklearn.ensemble import GradientBoostingRegressor params = { 'n_estimators': 1, 'max_depth': 2, 'min_samples_split': 2, 'learning_rate': 0.8, 'loss': 'ls' } sklearn_model = GradientBoostingRegressor(**params) sklearn_model.fit(self.data.X.values, self.data.y.values) sklearn_tree = sklearn_model.estimators_[0][0].tree_ bartpy_tree = Tree([LeafNode(Split(self.data))]) map_sklearn_tree_into_bartpy(bartpy_tree, sklearn_tree) sklearn_predictions = sklearn_tree.predict( self.data.X.values.astype(np.float32)) sklearn_predictions = [ round(x, 2) for x in sklearn_predictions.reshape(-1) ] bartpy_tree.cache_up_to_date = False bartpy_tree_predictions = bartpy_tree.predict(self.data.X.values) bartpy_tree_predictions = [ round(x, 2) for x in bartpy_tree_predictions ] self.assertListEqual(sklearn_predictions, bartpy_tree_predictions)
def test_internal_prune(self): updated_c = LeafNode(Split(self.data)) prune_mutation = TreeMutation("prune", self.c, updated_c) mutate(self.tree, prune_mutation) self.assertIn(updated_c, self.tree.leaf_nodes) self.assertNotIn(self.c, self.tree.nodes) self.assertNotIn(self.d, self.tree.nodes) self.assertNotIn(self.e, self.tree.nodes)
def setUp(self): self.data = Data( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] }).values, np.array([1, 2, 3])) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child
def setUp(self): X = format_covariate_matrix( pd.DataFrame({ "a": [1, 2, 3], "b": [1, 2, 3] })) self.data = Data(X, np.array([1, 2, 3]).astype(float)) self.a = split_node(LeafNode(Split( self.data)), (SplitCondition(0, 1, le), SplitCondition(0, 1, gt))) self.b = self.a.left_child self.x = self.a.right_child self.tree = Tree([self.a, self.b, self.x]) self.c = split_node( self.a._right_child, (SplitCondition(1, 2, le), SplitCondition(1, 2, gt))) mutate(self.tree, TreeMutation("grow", self.x, self.c)) self.d = self.c.left_child self.e = self.c.right_child
def test_pruning_leaf(self): with self.assertRaises(TypeError): PruneMutation(LeafNode(Split(self.data)), LeafNode(Split(self.data)))
def uniformly_sample_prune_mutation(tree: Tree) -> TreeMutation: node = random_prunable_decision_node(tree) updated_node = LeafNode(node.split, depth=node.depth) return PruneMutation(node, updated_node)
def prune_mutations(tree: Tree) -> List[TreeMutation]: return [ PruneMutation(x, LeafNode(x.split, depth=x.depth)) for x in tree.prunable_decision_nodes ]
def test_invalid_prune(self): with self.assertRaises(TypeError): updated_a = LeafNode(Split(self.data)) PruneMutation(self.a, updated_a)
def step(self, model: Model, node: LeafNode) -> float: sampled_value = self.sample(model, node) node.set_value(sampled_value) return sampled_value
def setUp(self): self.X = format_covariate_matrix(pd.DataFrame({"a": [1, 2, 3, 4, 5]})) self.data = Data(format_covariate_matrix(self.X), np.array([1.0, 2.0, 3.0, 4.0, 5.0])) self.split = Split(self.data) self.node = LeafNode(self.split)
def step(self, model: Model, node: LeafNode): node.set_value(self.sample(model, node))