def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible( self): """Testing best split calculation with min node weight and no split.""" node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary = np.asarray([ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .7], [.1, .2]], # node 1 [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]], # node 1 [[.1, .1], [.2, -.05], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ]) # num_features * shape=[max_splits, num_buckets, 2] # reshape to [max_splits, num_features, num_buckets, 2] stats_summary = np.moveaxis(stats_summary, 0, 1) (node_ids, _, _, _, _, _, _) = boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, logits_dimension=1) # We can't split either of the nodes on the first feature self.assertAllEqual([1], node_ids) # Now check when we can't split on any feature (node_ids, _, _, _, _, _, _) = boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=10, logits_dimension=1) self.assertAllEqual([], node_ids)
def testCalculateBestMultiDimFeatureSplitsWithNoSplitOnFeaturePossible(self): """Testing best split calculation with min node weight and no split.""" node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary = np.asarray([ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .7], [.1, .2]], # node 1 [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .06], [.06, .7]], # node 1 [[.1, .1], [.2, -.05], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ]) # num_features * shape=[max_splits, num_buckets, 2] # reshape to [max_splits, num_features, num_buckets, 2] stats_summary = np.moveaxis(stats_summary, 0, 1) (node_ids, _, _, _, _, _, _) = boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=1, logits_dimension=1) # We can't split either of the nodes on the first feature self.assertAllEqual([1], node_ids) # Now check when we can't split on any feature (node_ids, _, _, _, _, _, _) = boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=10, logits_dimension=1) self.assertAllEqual([], node_ids)
def testCalculateBestMultiDimFeatureSplitsWithoutRegularization(self): """Testing best split calculation without any regularization.""" node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary = np.asarray(self._get_stats_summary_for_split()) # reshape to [max_splits, num_features, num_buckets, 2] stats_summary = np.moveaxis(stats_summary, 0, 1) (node_ids, gains, feature_dimensions, thresholds, left_node_contribs, right_node_contribs, split_types) = self.evaluate( boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0.0, l2=0.0, tree_complexity=0.0, min_node_weight=0, logits_dimension=1)) # Get same result as v1 op (CalculateBestGainsPerFeature), and find the # feature dimension that has the best gain. self.assertAllEqual([1, 2], node_ids) self.assertAllClose([0.02823, 0.41184], gains) self.assertAllEqual([1, 1], thresholds) self.assertAllEqual([1, 0], feature_dimensions) # # The left node contrib will be later added to the previous node value to # # make the left node value, and the same for right node contrib. self.assertAllClose([[-.6], [.568966]], left_node_contribs) self.assertAllClose([[-.076923], [-.75]], right_node_contribs) self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
def testCalculateBestMultiDimFeatureSplitsWithTreeComplexity(self): """Testing best split calculation with tree complexity.""" node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary = np.asarray(self._get_stats_summary_for_split()) # reshape to [max_splits, num_features, num_buckets, 2] stats_summary = np.moveaxis(stats_summary, 0, 1) l2 = 0.1 tree_complexity = 3. (node_ids, gains, feature_dimensions, thresholds, left_node_contribs, right_node_contribs, split_types) = self.evaluate( boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0., l2=l2, tree_complexity=tree_complexity, min_node_weight=0, logits_dimension=1)) # Get same result as v1 op (CalculateBestGainsPerFeature), and find the # feature dimension that has the best gain. self.assertAllEqual([1, 2], node_ids) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([-2.98120904, -2.66068625], gains) self.assertAllEqual([1, 1], thresholds) self.assertAllClose([[-0.5], [0.485294]], left_node_contribs) self.assertAllClose([[-0.043478], [-.6]], right_node_contribs) self.assertAllEqual([1, 0], feature_dimensions) self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)
def testCalculateMultiDimBestSplitsWithMinNodeWeight(self): """Testing best split calculation with min node weight.""" node_id_range = [1, 3] # node 1 through 2 will be processed. stats_summary = np.asarray([ [ [[0., 0.], [.08, .09], [0., 0.], [0., 0.]], # node 0; ignored [[0., 0.], [.15, .36], [.06, .61], [.1, .2]], # node 1 [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 0 [ [[0., 0.], [0., 0.], [.08, .09], [0., 0.]], # node 0; ignored [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]], # node 1 [[.1, 1.], [.2, -.05], [-.4, .05], [.07, .08]], # node 2 [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 3; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 4; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 5; ignored [[0., 0.], [0., 0.], [0., 0.], [0., 0.]], # node 6; ignored ], # feature 1 ]) # num_features * shape=[max_splits, num_buckets, 2] # reshape to [max_splits, num_features, num_buckets, 2] stats_summary = np.moveaxis(stats_summary, 0, 1) (node_ids, gains, feature_dimensions, thresholds, left_node_contribs, right_node_contribs, split_types) = self.evaluate( boosted_trees_ops.calculate_best_feature_split( node_id_range, stats_summary, l1=0., l2=0., tree_complexity=0., min_node_weight=1, logits_dimension=1)) self.assertAllEqual([1, 2], node_ids) # Gain should also include an adjustment of the gradient by l1. self.assertAllClose([0.098013, 0.931596], gains) self.assertAllEqual([1, 1], thresholds) self.assertAllClose([[-.6], [-0.315789]], left_node_contribs) self.assertAllClose([[-0.014925], [2.53846]], right_node_contribs) self.assertAllEqual([1, 1], feature_dimensions) self.assertAllEqual([_INEQUALITY_DEFAULT_LEFT] * 2, split_types)