def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeaturePossible(self): """Testing Gain calculation with min node weight and no split.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .0036], [.06, .007], [.1, .2]], # node 1 [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, _, _, _, _) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, max_splits=max_splits) # We can't split either of the nodes on the first feature self.assertEqual(2, len(self.evaluate(node_ids_list))) self.assertAllEqual([], self.evaluate(node_ids_list)[0]) self.assertAllEqual([1], self.evaluate(node_ids_list)[1]) # Now check when we can't split on any feature (node_ids_list, _, _, _, _) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=10, max_splits=max_splits) self.assertAllEqual([[], []], self.evaluate(node_ids_list))
def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeturePossible(self): """Testing Gain calculation without any regularization.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .0036], [.06, .007], [.1, .2]], # node 1 [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, _, _, _, _) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, max_splits=max_splits) # We can't split either of the nodes on the first feature self.assertEqual(2, len(sess.run(node_ids_list))) self.assertAllEqual([], sess.run(node_ids_list)[0]) self.assertAllEqual([1], sess.run(node_ids_list)[1]) # Now check when we can't split on any feature (node_ids_list, _, _, _, _) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=10, max_splits=max_splits) self.assertAllEqual([[], []], sess.run(node_ids_list))
def testCalculateBestGainsWithTreeComplexity(self): """Testing best gain calculation with tree complexity.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() l2 = 0.1 tree_complexity = 3. (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=l2, tree_complexity=tree_complexity, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list)) self.assertAllClose([[-3., -2.66068625], [-2.98120904, -2.66068625]], self.evaluate(gains_list)) self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]], self.evaluate(left_node_contribs_list)) self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], self.evaluate(right_node_contribs_list))
def testCalculateBestGainsWithL1(self): """Testing Gain calculation with L1.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() l1 = 0.1 (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=l1, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list)) self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list)) self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]], self.evaluate(left_node_contribs_list)) self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]], self.evaluate(right_node_contribs_list)) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]], self.evaluate(gains_list))
def testCalculateBestGainsWithoutRegularization(self): """Testing Gain calculation without any regularization.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list)) self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]], self.evaluate(gains_list)) self.assertAllEqual([[1, 1], [1, 1]], self.evaluate(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]], self.evaluate(left_node_contribs_list)) self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]], self.evaluate(right_node_contribs_list))
def grow_tree_from_stats_summaries(stats_summary_list): """Updates ensemble based on the best gains from stats summaries.""" (node_ids_per_feature, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=array_ops.stack([ math_ops.reduce_min(node_ids), math_ops.reduce_max(node_ids) ]), stats_summary_list=stats_summary_list, l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, max_splits=max_splits)) grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. tree_ensemble.resource_handle, feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32), node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=tree_hparams.learning_rate, max_depth=tree_hparams.max_depth, pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING) return grow_op
def testCalculateBestGainsWithoutRegularization(self): """Testing Gain calculation without any regularization.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]], sess.run(gains_list)) self.assertAllEqual([[1, 1], [1, 1]], sess.run(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]], sess.run(left_node_contribs_list)) self.assertAllClose( [[[-.592593], [-.75]], [[-.076923], [.568966]]], sess.run(right_node_contribs_list))
def testCalculateBestGainsWithL1(self): """Testing Gain calculation with L1.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() l1 = 0.1 (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=l1, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]], sess.run(right_node_contribs_list)) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]], sess.run(gains_list))
def testCalculateBestGainsWithTreeComplexity(self): """Testing best gain calculation with tree complexity.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() l2 = 0.1 tree_complexity = 3. (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=l2, tree_complexity=tree_complexity, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose( [[-3., -2.66068625], [-2.98120904, -2.66068625]], sess.run(gains_list)) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], sess.run(right_node_contribs_list))
def grow_tree_from_stats_summaries(stats_summary_list): """Updates ensemble based on the best gains from stats summaries.""" (node_ids_per_feature, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=last_layer_nodes_range, stats_summary_list=stats_summary_list, l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, max_splits=max_splits)) grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. tree_ensemble.resource_handle, feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32), node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=tree_hparams.learning_rate, max_depth=tree_hparams.max_depth, pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING) return grow_op
def testCalculateBestGainsWithL2(self): """Testing Gain calculation with L2.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = self._get_stats_summary_for_split() (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.1, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list)) self.assertAllClose([[0., 0.33931375], [0.01879096, 0.33931375]], self.evaluate(gains_list)) self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]], self.evaluate(left_node_contribs_list)) self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], self.evaluate(right_node_contribs_list))
def testCalculateBestGainsWithL1(self): """Testing Gain calculation with L1.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] l1 = 0.1 (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=l1, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]], sess.run(right_node_contribs_list)) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]], sess.run(gains_list))
def testCalculateBestGainsWithoutRegularization(self): """Testing Gain calculation without any regularization.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list)) self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]], self.evaluate(gains_list)) self.assertAllEqual([[1, 1], [1, 1]], self.evaluate(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]], self.evaluate(left_node_contribs_list)) self.assertAllClose( [[[-.592593], [-.75]], [[-.076923], [.568966]]], self.evaluate(right_node_contribs_list))
def testCalculateBestGainsWithMinNodeWeight(self): """Testing Gain calculation with min node weight.""" with self.cached_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .036], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, max_splits=max_splits) # We can't split node 1 on feature 1 and node 2 on feature 2 because of # the min node weight. self.assertAllEqual([[2], [1]], self.evaluate(node_ids_list)) self.assertAllClose([[0.384314], [0.098013]], self.evaluate(gains_list)) self.assertAllEqual([[1], [1]], self.evaluate(thresholds_list)) self.assertAllClose([[[0.4852941]], [[-.6]]], self.evaluate(left_node_contribs_list)) self.assertAllClose([[[-0.75]], [[-0.014925]]], self.evaluate(right_node_contribs_list))
def testCalculateBestGainsWithL2(self): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.1, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[0., 0.33931375], [0.01879096, 0.33931375]], sess.run(gains_list)) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], sess.run(right_node_contribs_list))
def testCalculateBestGainsWithTreeComplexity(self): """Testing Gain calculation with L2.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] l2 = 0.1 tree_complexity = 3. (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=l2, tree_complexity=tree_complexity, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[-3., -2.66068625], [-2.98120904, -2.66068625]], sess.run(gains_list)) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]], sess.run(right_node_contribs_list))
def testCalculateBestGainsWithL1(self): """Testing Gain calculation with L1.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] l1 = 0.1 (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=l1, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list)) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]], sess.run(right_node_contribs_list)) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]], sess.run(gains_list))
def testCalculateBestGainsWithoutRegularization(self): """Testing Gain calculation without any regularization.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]], # node 1 [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=0, max_splits=max_splits) self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list)) self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]], sess.run(gains_list)) self.assertAllEqual([[1, 1], [1, 1]], sess.run(thresholds_list)) # The left node contrib will be later added to the previous node value to # make the left node value, and the same for right node contrib. self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]], sess.run(right_node_contribs_list))
def testCalculateBestGainsWithMinNodeWeight(self): """Testing Gain calculation without any regularization.""" with self.test_session() as sess: max_splits = 7 node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary_list = [ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .036], [.06, .07], [.1, .2]], # node 1 [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ] # num_features * shape=[max_splits, num_buckets, 2] (node_ids_list, gains_list, thresholds_list, left_node_contribs_list, right_node_contribs_list ) = boosted_trees_ops.calculate_best_gains_per_feature( node_id_range, stats_summary_list, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, max_splits=max_splits) # We can't split node 1 on feature 1 and node 2 on feature 2 because of # the min node weight. self.assertAllEqual([[2], [1]], sess.run(node_ids_list)) self.assertAllClose([[0.384314], [0.098013]], sess.run(gains_list)) self.assertAllEqual([[1], [1]], sess.run(thresholds_list)) self.assertAllClose([[[0.4852941]], [[-.6]]], sess.run(left_node_contribs_list)) self.assertAllClose([[[-0.75]], [[-0.014925]]], sess.run(right_node_contribs_list))
def _grow_tree_from_stats_summaries(self, stats_summaries_list, feature_ids_list, last_layer_nodes_range): """Updates ensemble based on the best gains from stats summaries.""" node_ids_per_feature = [] gains_list = [] thresholds_list = [] left_node_contribs_list = [] right_node_contribs_list = [] all_feature_ids = [] assert len(stats_summaries_list) == len(feature_ids_list) max_splits = _get_max_splits(self._tree_hparams) for i, feature_ids in enumerate(feature_ids_list): (numeric_node_ids_per_feature, numeric_gains_list, numeric_thresholds_list, numeric_left_node_contribs_list, numeric_right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=last_layer_nodes_range, stats_summary_list=stats_summaries_list[i], l1=self._tree_hparams.l1, l2=self._tree_hparams.l2, tree_complexity=self._tree_hparams.tree_complexity, min_node_weight=self._tree_hparams.min_node_weight, max_splits=max_splits)) all_feature_ids += feature_ids node_ids_per_feature += numeric_node_ids_per_feature gains_list += numeric_gains_list thresholds_list += numeric_thresholds_list left_node_contribs_list += numeric_left_node_contribs_list right_node_contribs_list += numeric_right_node_contribs_list grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. self._tree_ensemble.resource_handle, feature_ids=all_feature_ids, node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=self._tree_hparams.learning_rate, max_depth=self._tree_hparams.max_depth, pruning_mode=self._pruning_mode_parsed) return grow_op
def grow_tree_from_stats_summaries(stats_summaries_list, feature_ids_list): """Updates ensemble based on the best gains from stats summaries.""" node_ids_per_feature = [] gains_list = [] thresholds_list = [] left_node_contribs_list = [] right_node_contribs_list = [] all_feature_ids = [] assert len(stats_summaries_list) == len(feature_ids_list) for i, feature_ids in enumerate(feature_ids_list): (numeric_node_ids_per_feature, numeric_gains_list, numeric_thresholds_list, numeric_left_node_contribs_list, numeric_right_node_contribs_list) = ( boosted_trees_ops.calculate_best_gains_per_feature( node_id_range=last_layer_nodes_range, stats_summary_list=stats_summaries_list[i], l1=tree_hparams.l1, l2=tree_hparams.l2, tree_complexity=tree_hparams.tree_complexity, min_node_weight=tree_hparams.min_node_weight, max_splits=max_splits)) all_feature_ids += feature_ids node_ids_per_feature += numeric_node_ids_per_feature gains_list += numeric_gains_list thresholds_list += numeric_thresholds_list left_node_contribs_list += numeric_left_node_contribs_list right_node_contribs_list += numeric_right_node_contribs_list grow_op = boosted_trees_ops.update_ensemble( # Confirm if local_tree_ensemble or tree_ensemble should be used. tree_ensemble.resource_handle, feature_ids=all_feature_ids, node_ids=node_ids_per_feature, gains=gains_list, thresholds=thresholds_list, left_node_contribs=left_node_contribs_list, right_node_contribs=right_node_contribs_list, learning_rate=tree_hparams.learning_rate, max_depth=tree_hparams.max_depth, pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING) return grow_op