コード例 #1
0
  def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeaturePossible(self):
    """Testing Gain calculation with min node weight and no split."""
    with self.cached_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .0036], [.06, .007], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
              [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      (node_ids_list, _, _, _,
       _) = boosted_trees_ops.calculate_best_gains_per_feature(
           node_id_range,
           stats_summary_list,
           l1=0.0,
           l2=0.0,
           tree_complexity=0.0,
           min_node_weight=1,
           max_splits=max_splits)

      # We can't split either of the nodes on the first feature
      self.assertEqual(2, len(self.evaluate(node_ids_list)))
      self.assertAllEqual([], self.evaluate(node_ids_list)[0])
      self.assertAllEqual([1], self.evaluate(node_ids_list)[1])

      # Now check when we can't split on any feature
      (node_ids_list, _, _, _,
       _) = boosted_trees_ops.calculate_best_gains_per_feature(
           node_id_range,
           stats_summary_list,
           l1=0.0,
           l2=0.0,
           tree_complexity=0.0,
           min_node_weight=10,
           max_splits=max_splits)
      self.assertAllEqual([[], []], self.evaluate(node_ids_list))
コード例 #2
0
  def testCalculateBestGainsWithMinNodeWeightNoSplitOnFeturePossible(self):
    """Testing Gain calculation without any regularization."""
    with self.test_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .0036], [.06, .007], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .068], [0., 0.], [.3, .04]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
              [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      (node_ids_list, _, _, _,
       _) = boosted_trees_ops.calculate_best_gains_per_feature(
           node_id_range,
           stats_summary_list,
           l1=0.0,
           l2=0.0,
           tree_complexity=0.0,
           min_node_weight=1,
           max_splits=max_splits)

      # We can't split either of the nodes on the first feature
      self.assertEqual(2, len(sess.run(node_ids_list)))
      self.assertAllEqual([], sess.run(node_ids_list)[0])
      self.assertAllEqual([1], sess.run(node_ids_list)[1])

      # Now check when we can't split on any feature
      (node_ids_list, _, _, _,
       _) = boosted_trees_ops.calculate_best_gains_per_feature(
           node_id_range,
           stats_summary_list,
           l1=0.0,
           l2=0.0,
           tree_complexity=0.0,
           min_node_weight=10,
           max_splits=max_splits)
      self.assertAllEqual([[], []], sess.run(node_ids_list))
コード例 #3
0
  def testCalculateBestGainsWithTreeComplexity(self):
    """Testing best gain calculation with tree complexity."""
    with self.cached_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = self._get_stats_summary_for_split()

      l2 = 0.1
      tree_complexity = 3.
      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=0.0,
          l2=l2,
          tree_complexity=tree_complexity,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))

      self.assertAllClose([[-3., -2.66068625], [-2.98120904, -2.66068625]],
                          self.evaluate(gains_list))

      self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list))
      # The left node contrib will be later added to the previous node value to
      # make the left node value, and the same for right node contrib.
      self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
                          self.evaluate(left_node_contribs_list))
      self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                          self.evaluate(right_node_contribs_list))
コード例 #4
0
  def testCalculateBestGainsWithL1(self):
    """Testing Gain calculation with L1."""
    with self.cached_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = self._get_stats_summary_for_split()

      l1 = 0.1
      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=l1,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[0, 1], [1, 1]], self.evaluate(thresholds_list))

      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
      self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]],
                          self.evaluate(left_node_contribs_list))

      self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]],
                          self.evaluate(right_node_contribs_list))

      # Gain should also include an adjustment of the gradient by l1.
      self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
                          self.evaluate(gains_list))
コード例 #5
0
  def testCalculateBestGainsWithoutRegularization(self):
    """Testing Gain calculation without any regularization."""
    with self.cached_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = self._get_stats_summary_for_split()

      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
      self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]],
                          self.evaluate(gains_list))
      self.assertAllEqual([[1, 1], [1, 1]], self.evaluate(thresholds_list))
      # The left node contrib will be later added to the previous node value to
      # make the left node value, and the same for right node contrib.
      self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]],
                          self.evaluate(left_node_contribs_list))
      self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]],
                          self.evaluate(right_node_contribs_list))
コード例 #6
0
 def grow_tree_from_stats_summaries(stats_summary_list):
   """Updates ensemble based on the best gains from stats summaries."""
   (node_ids_per_feature, gains_list, thresholds_list,
    left_node_contribs_list, right_node_contribs_list) = (
        boosted_trees_ops.calculate_best_gains_per_feature(
            node_id_range=array_ops.stack([
                math_ops.reduce_min(node_ids),
                math_ops.reduce_max(node_ids)
            ]),
            stats_summary_list=stats_summary_list,
            l1=tree_hparams.l1,
            l2=tree_hparams.l2,
            tree_complexity=tree_hparams.tree_complexity,
            max_splits=max_splits))
   grow_op = boosted_trees_ops.update_ensemble(
       # Confirm if local_tree_ensemble or tree_ensemble should be used.
       tree_ensemble.resource_handle,
       feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32),
       node_ids=node_ids_per_feature,
       gains=gains_list,
       thresholds=thresholds_list,
       left_node_contribs=left_node_contribs_list,
       right_node_contribs=right_node_contribs_list,
       learning_rate=tree_hparams.learning_rate,
       max_depth=tree_hparams.max_depth,
       pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
   return grow_op
コード例 #7
0
ファイル: stats_ops_test.py プロジェクト: fraudies/tensorflow
    def testCalculateBestGainsWithoutRegularization(self):
        """Testing Gain calculation without any regularization."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = self._get_stats_summary_for_split()

            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
            self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]],
                                sess.run(gains_list))
            self.assertAllEqual([[1, 1], [1, 1]], sess.run(thresholds_list))
            # The left node contrib will be later added to the previous node value to
            # make the left node value, and the same for right node contrib.
            self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]],
                                sess.run(left_node_contribs_list))
            self.assertAllClose(
                [[[-.592593], [-.75]], [[-.076923], [.568966]]],
                sess.run(right_node_contribs_list))
コード例 #8
0
ファイル: stats_ops_test.py プロジェクト: fraudies/tensorflow
    def testCalculateBestGainsWithL1(self):
        """Testing Gain calculation with L1."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = self._get_stats_summary_for_split()

            l1 = 0.1
            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=l1,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))

            self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
            self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]],
                                sess.run(left_node_contribs_list))

            self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]],
                                sess.run(right_node_contribs_list))

            # Gain should also include an adjustment of the gradient by l1.
            self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
                                sess.run(gains_list))
コード例 #9
0
ファイル: stats_ops_test.py プロジェクト: fraudies/tensorflow
    def testCalculateBestGainsWithTreeComplexity(self):
        """Testing best gain calculation with tree complexity."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = self._get_stats_summary_for_split()

            l2 = 0.1
            tree_complexity = 3.
            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=l2,
                 tree_complexity=tree_complexity,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))

            self.assertAllClose(
                [[-3., -2.66068625], [-2.98120904, -2.66068625]],
                sess.run(gains_list))

            self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
            # The left node contrib will be later added to the previous node value to
            # make the left node value, and the same for right node contrib.
            self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
                                sess.run(left_node_contribs_list))
            self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                                sess.run(right_node_contribs_list))
コード例 #10
0
 def grow_tree_from_stats_summaries(stats_summary_list):
     """Updates ensemble based on the best gains from stats summaries."""
     (node_ids_per_feature, gains_list, thresholds_list,
      left_node_contribs_list, right_node_contribs_list) = (
          boosted_trees_ops.calculate_best_gains_per_feature(
              node_id_range=last_layer_nodes_range,
              stats_summary_list=stats_summary_list,
              l1=tree_hparams.l1,
              l2=tree_hparams.l2,
              tree_complexity=tree_hparams.tree_complexity,
              max_splits=max_splits))
     grow_op = boosted_trees_ops.update_ensemble(
         # Confirm if local_tree_ensemble or tree_ensemble should be used.
         tree_ensemble.resource_handle,
         feature_ids=math_ops.range(0,
                                    num_features,
                                    dtype=dtypes.int32),
         node_ids=node_ids_per_feature,
         gains=gains_list,
         thresholds=thresholds_list,
         left_node_contribs=left_node_contribs_list,
         right_node_contribs=right_node_contribs_list,
         learning_rate=tree_hparams.learning_rate,
         max_depth=tree_hparams.max_depth,
         pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
     return grow_op
コード例 #11
0
ファイル: stats_ops_test.py プロジェクト: zzk88862/tensorflow
    def testCalculateBestGainsWithL2(self):
        """Testing Gain calculation with L2."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = self._get_stats_summary_for_split()

            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=0.1,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
            self.assertAllClose([[0., 0.33931375], [0.01879096, 0.33931375]],
                                self.evaluate(gains_list))
            self.assertAllEqual([[0, 1], [1, 1]],
                                self.evaluate(thresholds_list))
            # The left node contrib will be later added to the previous node value to
            # make the left node value, and the same for right node contrib.
            self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
                                self.evaluate(left_node_contribs_list))
            self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                                self.evaluate(right_node_contribs_list))
コード例 #12
0
    def testCalculateBestGainsWithL1(self):
        """Testing Gain calculation with L1."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = [
                [
                    [[0., 0.], [.08, .09], [0., 0.], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
                    [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 0
                [
                    [[0., 0.], [0., 0.], [.08, .09], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
                    [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 1
            ]  # num_features * shape=[max_splits, num_buckets, 2]

            l1 = 0.1
            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=l1,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))

            self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
            self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]],
                                sess.run(left_node_contribs_list))

            self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]],
                                sess.run(right_node_contribs_list))

            # Gain should also include an adjustment of the gradient by l1.
            self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
                                sess.run(gains_list))
コード例 #13
0
    def testCalculateBestGainsWithoutRegularization(self):
        """Testing Gain calculation without any regularization."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = [
                [
                    [[0., 0.], [.08, .09], [0., 0.], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
                    [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 0
                [
                    [[0., 0.], [0., 0.], [.08, .09], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
                    [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 1
            ]  # num_features * shape=[max_splits, num_buckets, 2]

            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[1, 2], [1, 2]], self.evaluate(node_ids_list))
            self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]],
                                self.evaluate(gains_list))
            self.assertAllEqual([[1, 1], [1, 1]],
                                self.evaluate(thresholds_list))
            # The left node contrib will be later added to the previous node value to
            # make the left node value, and the same for right node contrib.
            self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]],
                                self.evaluate(left_node_contribs_list))
            self.assertAllClose(
                [[[-.592593], [-.75]], [[-.076923], [.568966]]],
                self.evaluate(right_node_contribs_list))
コード例 #14
0
ファイル: stats_ops_test.py プロジェクト: zzk88862/tensorflow
    def testCalculateBestGainsWithMinNodeWeight(self):
        """Testing Gain calculation with min node weight."""
        with self.cached_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = [
                [
                    [[0., 0.], [.08, .09], [0., 0.], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.15, .036], [.06, .07], [.1, .2]],  # node 1
                    [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 0
                [
                    [[0., 0.], [0., 0.], [.08, .09], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
                    [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 1
            ]  # num_features * shape=[max_splits, num_buckets, 2]

            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=0.0,
                 tree_complexity=0.0,
                 min_node_weight=1,
                 max_splits=max_splits)

            # We can't split node 1 on feature 1 and node 2 on feature 2 because of
            # the min node weight.
            self.assertAllEqual([[2], [1]], self.evaluate(node_ids_list))
            self.assertAllClose([[0.384314], [0.098013]],
                                self.evaluate(gains_list))
            self.assertAllEqual([[1], [1]], self.evaluate(thresholds_list))
            self.assertAllClose([[[0.4852941]], [[-.6]]],
                                self.evaluate(left_node_contribs_list))
            self.assertAllClose([[[-0.75]], [[-0.014925]]],
                                self.evaluate(right_node_contribs_list))
コード例 #15
0
ファイル: stats_ops_test.py プロジェクト: sgcm520/tensorflow2
    def testCalculateBestGainsWithL2(self):
        """Testing Gain calculation with L2."""
        with self.test_session() as sess:
            max_splits = 7
            node_id_range = [1, 3]  # node 1 through 2 will be processed.
            stats_summary_list = [
                [
                    [[0., 0.], [.08, .09], [0., 0.], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
                    [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 0
                [
                    [[0., 0.], [0., 0.], [.08, .09], [0.,
                                                      0.]],  # node 0; ignored
                    [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
                    [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 3; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 4; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 5; ignored
                    [[0., 0.], [0., 0.], [0., 0.], [0.,
                                                    0.]],  # node 6; ignored
                ],  # feature 1
            ]  # num_features * shape=[max_splits, num_buckets, 2]

            (node_ids_list, gains_list, thresholds_list,
             left_node_contribs_list, right_node_contribs_list
             ) = boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range,
                 stats_summary_list,
                 l1=0.0,
                 l2=0.1,
                 tree_complexity=0.0,
                 min_node_weight=0,
                 max_splits=max_splits)

            self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
            self.assertAllClose([[0., 0.33931375], [0.01879096, 0.33931375]],
                                sess.run(gains_list))
            self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
            # The left node contrib will be later added to the previous node value to
            # make the left node value, and the same for right node contrib.
            self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
                                sess.run(left_node_contribs_list))
            self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                                sess.run(right_node_contribs_list))
コード例 #16
0
  def testCalculateBestGainsWithTreeComplexity(self):
    """Testing Gain calculation with L2."""
    with self.test_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
              [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      l2 = 0.1
      tree_complexity = 3.
      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=0.0,
          l2=l2,
          tree_complexity=tree_complexity,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))

      self.assertAllClose([[-3., -2.66068625], [-2.98120904, -2.66068625]],
                          sess.run(gains_list))

      self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))
      # The left node contrib will be later added to the previous node value to
      # make the left node value, and the same for right node contrib.
      self.assertAllClose([[[0.], [.485294]], [[-.5], [-.6]]],
                          sess.run(left_node_contribs_list))
      self.assertAllClose([[[-.424658], [-.6]], [[-.043478], [.485294]]],
                          sess.run(right_node_contribs_list))
コード例 #17
0
  def testCalculateBestGainsWithL1(self):
    """Testing Gain calculation with L1."""
    with self.test_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
              [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      l1 = 0.1
      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=l1,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[0, 1], [1, 1]], sess.run(thresholds_list))

      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
      self.assertAllClose([[[0.0], [0.3965517]], [[-0.4], [-0.5]]],
                          sess.run(left_node_contribs_list))

      self.assertAllClose([[[-0.3333333], [-0.5]], [[0.0], [0.396552]]],
                          sess.run(right_node_contribs_list))

      # Gain should also include an adjustment of the gradient by l1.
      self.assertAllClose([[0.0, 0.191207], [0.01, 0.191207]],
                          sess.run(gains_list))
コード例 #18
0
  def testCalculateBestGainsWithoutRegularization(self):
    """Testing Gain calculation without any regularization."""
    with self.test_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .58], [0., 0.], [.3, .4]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .06], [.06, .07]],  # node 1
              [[.1, .1], [.2, .3], [-.4, .5], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=0,
          max_splits=max_splits)

      self.assertAllEqual([[1, 2], [1, 2]], sess.run(node_ids_list))
      self.assertAllClose([[0.004775, 0.41184], [0.02823, 0.41184]],
                          sess.run(gains_list))
      self.assertAllEqual([[1, 1], [1, 1]], sess.run(thresholds_list))
      # The left node contrib will be later added to the previous node value to
      # make the left node value, and the same for right node contrib.
      self.assertAllClose([[[-.416667], [.568966]], [[-.6], [-.75]]],
                          sess.run(left_node_contribs_list))
      self.assertAllClose([[[-.592593], [-.75]], [[-.076923], [.568966]]],
                          sess.run(right_node_contribs_list))
コード例 #19
0
  def testCalculateBestGainsWithMinNodeWeight(self):
    """Testing Gain calculation without any regularization."""
    with self.test_session() as sess:
      max_splits = 7
      node_id_range = [1, 3]  # node 1 through 2 will be processed.
      stats_summary_list = [
          [
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.15, .036], [.06, .07], [.1, .2]],  # node 1
              [[0., 0.], [-.33, .68], [0., 0.], [.3, .4]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 0
          [
              [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0; ignored
              [[0., 0.], [.3, .5], [-.05, .6], [.06, .07]],  # node 1
              [[.1, .1], [.2, .03], [-.4, .05], [.07, .08]],  # node 2
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 3; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 4; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 5; ignored
              [[0., 0.], [0., 0.], [0., 0.], [0., 0.]],  # node 6; ignored
          ],  # feature 1
      ]  # num_features * shape=[max_splits, num_buckets, 2]

      (node_ids_list, gains_list, thresholds_list, left_node_contribs_list,
       right_node_contribs_list
      ) = boosted_trees_ops.calculate_best_gains_per_feature(
          node_id_range,
          stats_summary_list,
          l1=0.0,
          l2=0.0,
          tree_complexity=0.0,
          min_node_weight=1,
          max_splits=max_splits)

      # We can't split node 1 on feature 1 and node 2 on feature 2 because of
      # the min node weight.
      self.assertAllEqual([[2], [1]], sess.run(node_ids_list))
      self.assertAllClose([[0.384314], [0.098013]], sess.run(gains_list))
      self.assertAllEqual([[1], [1]], sess.run(thresholds_list))
      self.assertAllClose([[[0.4852941]], [[-.6]]],
                          sess.run(left_node_contribs_list))
      self.assertAllClose([[[-0.75]], [[-0.014925]]],
                          sess.run(right_node_contribs_list))
コード例 #20
0
  def _grow_tree_from_stats_summaries(self, stats_summaries_list,
                                      feature_ids_list, last_layer_nodes_range):
    """Updates ensemble based on the best gains from stats summaries."""
    node_ids_per_feature = []
    gains_list = []
    thresholds_list = []
    left_node_contribs_list = []
    right_node_contribs_list = []
    all_feature_ids = []
    assert len(stats_summaries_list) == len(feature_ids_list)

    max_splits = _get_max_splits(self._tree_hparams)

    for i, feature_ids in enumerate(feature_ids_list):
      (numeric_node_ids_per_feature, numeric_gains_list,
       numeric_thresholds_list, numeric_left_node_contribs_list,
       numeric_right_node_contribs_list) = (
           boosted_trees_ops.calculate_best_gains_per_feature(
               node_id_range=last_layer_nodes_range,
               stats_summary_list=stats_summaries_list[i],
               l1=self._tree_hparams.l1,
               l2=self._tree_hparams.l2,
               tree_complexity=self._tree_hparams.tree_complexity,
               min_node_weight=self._tree_hparams.min_node_weight,
               max_splits=max_splits))

      all_feature_ids += feature_ids
      node_ids_per_feature += numeric_node_ids_per_feature
      gains_list += numeric_gains_list
      thresholds_list += numeric_thresholds_list
      left_node_contribs_list += numeric_left_node_contribs_list
      right_node_contribs_list += numeric_right_node_contribs_list

    grow_op = boosted_trees_ops.update_ensemble(
        # Confirm if local_tree_ensemble or tree_ensemble should be used.
        self._tree_ensemble.resource_handle,
        feature_ids=all_feature_ids,
        node_ids=node_ids_per_feature,
        gains=gains_list,
        thresholds=thresholds_list,
        left_node_contribs=left_node_contribs_list,
        right_node_contribs=right_node_contribs_list,
        learning_rate=self._tree_hparams.learning_rate,
        max_depth=self._tree_hparams.max_depth,
        pruning_mode=self._pruning_mode_parsed)
    return grow_op
コード例 #21
0
ファイル: boosted_trees.py プロジェクト: zlisa/tensorflow
      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op