def testMakeStatsSummaryMultipleFeatures(self):
        """Tests that MakeStatsSummary works for multiple features."""
        with self.cached_session():
            max_splits = 3
            num_buckets = 4
            node_ids = [1, 1, 2, 2, 1, 1, 2, 0]
            gradients = [[.1], [.2], [.3], [-.4], [-.05], [.06], [.07], [.08]]
            hessians = [[.2], [.3], [.4], [.5], [.06], [.07], [.08], [.09]]

            # Tests multiple features.
            # The output from another feature will stored be in 3rd dimension.
            bucketized_features = [[3, 1, 2, 0, 1, 2, 0, 1],
                                   [0, 0, 0, 2, 2, 3, 3, 2]]
            result = boosted_trees_ops.make_stats_summary(
                node_ids, gradients, hessians, bucketized_features, max_splits,
                num_buckets
            )  # shape=[max_splits, num_buckets, num_features, 2]
            self.assertAllClose(
                [
                    [
                        [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0
                        [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
                        [[-.33, .58], [0., 0.], [.3, .4], [0., 0.]],  # node 2
                    ],  # feature 0
                    [
                        [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0
                        [[.3, .5], [0., 0.], [-.05, .06], [.06, .07]
                         ],  # node 1
                        [[.3, .4], [0., 0.], [-.4, .5], [.07, .08]],  # node 2
                    ],  # feature 1
                ],
                result.eval())
  def testMakeStatsSummaryMultipleFeatures(self):
    """Tests that MakeStatsSummary works for multiple features."""
    with self.test_session():
      max_splits = 3
      num_buckets = 4
      node_ids = [1, 1, 2, 2, 1, 1, 2, 0]
      gradients = [[.1], [.2], [.3], [-.4], [-.05], [.06], [.07], [.08]]
      hessians = [[.2], [.3], [.4], [.5], [.06], [.07], [.08], [.09]]

      # Tests multiple features.
      # The output from another feature will stored be in 3rd dimension.
      bucketized_features = [[3, 1, 2, 0, 1, 2, 0, 1], [0, 0, 0, 2, 2, 3, 3, 2]]
      result = boosted_trees_ops.make_stats_summary(
          node_ids, gradients, hessians, bucketized_features, max_splits,
          num_buckets)  # shape=[max_splits, num_buckets, num_features, 2]
      self.assertAllClose(
          [
              [
                  [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0
                  [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
                  [[-.33, .58], [0., 0.], [.3, .4], [0., 0.]],  # node 2
              ],  # feature 0
              [
                  [[0., 0.], [0., 0.], [.08, .09], [0., 0.]],  # node 0
                  [[.3, .5], [0., 0.], [-.05, .06], [.06, .07]],  # node 1
                  [[.3, .4], [0., 0.], [-.4, .5], [.07, .08]],  # node 2
              ],  # feature 1
          ],
          result.eval())
 def testMakeStatsSummarySimple(self):
   """Simple test for MakeStatsSummary."""
   with self.test_session():
     self.assertAllClose([[[[1., 5.], [2., 6.]], [[3., 7.], [4., 8.]]]],
                         boosted_trees_ops.make_stats_summary(
                             node_ids=[0, 0, 1, 1],
                             gradients=[[1.], [2.], [3.], [4.]],
                             hessians=[[5.], [6.], [7.], [8.]],
                             bucketized_features_list=[[0, 1, 0, 1]],
                             max_splits=2,
                             num_buckets=2).eval())
 def testMakeStatsSummarySimple(self):
     """Simple test for MakeStatsSummary."""
     with self.cached_session():
         self.assertAllClose([[[[1., 5.], [2., 6.]], [[3., 7.], [4., 8.]]]],
                             boosted_trees_ops.make_stats_summary(
                                 node_ids=[0, 0, 1, 1],
                                 gradients=[[1.], [2.], [3.], [4.]],
                                 hessians=[[5.], [6.], [7.], [8.]],
                                 bucketized_features_list=[[0, 1, 0, 1]],
                                 max_splits=2,
                                 num_buckets=2).eval())
 def testMakeStatsSummarySimple(self):
   """Simple test for MakeStatsSummary."""
   expected_stats_summary = np.asarray([1., 5., 2., 6., 3., 7., 4., 8.])
   self.assertAllClose(
       expected_stats_summary.reshape((1, 2, 2, 2)),
       boosted_trees_ops.make_stats_summary(
           node_ids=[0, 0, 1, 1],
           gradients=[[1.], [2.], [3.], [4.]],
           hessians=[[5.], [6.], [7.], [8.]],
           bucketized_features_list=[[0, 1, 0, 1]],
           max_splits=2,
           num_buckets=2))
 def testMakeStatsSummarySimple(self):
     """Simple test for MakeStatsSummary."""
     expected_stats_summary = np.asarray([1., 5., 2., 6., 3., 7., 4., 8.])
     self.assertAllClose(
         expected_stats_summary.reshape((1, 2, 2, 2)),
         boosted_trees_ops.make_stats_summary(
             node_ids=[0, 0, 1, 1],
             gradients=[[1.], [2.], [3.], [4.]],
             hessians=[[5.], [6.], [7.], [8.]],
             bucketized_features_list=[[0, 1, 0, 1]],
             max_splits=2,
             num_buckets=2))
  def _verify_precision(self, length):
    with self.cached_session():
      max_splits = 1
      num_buckets = 1
      node_ids = array_ops.fill([length], 0)

      gradients = constant_op.constant(
          2.0 / length, dtype=dtypes.float32, shape=[length, 1])
      hessians = constant_op.constant(
          0.2 / length, dtype=dtypes.float32, shape=[length, 1])

      bucketized_features = array_ops.zeros([length], dtype=dtypes.int32)

      result = boosted_trees_ops.make_stats_summary(
          node_ids, gradients, hessians, [bucketized_features], max_splits,
          num_buckets)  # shape=[max_splits, num_buckets, num_features, 2]

      self.assertAllClose([[[[2., 0.2]]]], result.eval())
Exemple #8
0
  def _verify_precision(self, length):
    with self.cached_session():
      max_splits = 1
      num_buckets = 1
      node_ids = array_ops.fill([length], 0)

      gradients = constant_op.constant(
          2.0 / length, dtype=dtypes.float32, shape=[length, 1])
      hessians = constant_op.constant(
          0.2 / length, dtype=dtypes.float32, shape=[length, 1])

      bucketized_features = array_ops.zeros([length], dtype=dtypes.int32)

      result = boosted_trees_ops.make_stats_summary(
          node_ids, gradients, hessians, [bucketized_features], max_splits,
          num_buckets)  # shape=[max_splits, num_buckets, num_features, 2]

      self.assertAllClose([[[[2., 0.2]]]], self.evaluate(result))
  def testMakeStatsSummaryAccumulate(self):
    """Tests that Summary actually accumulates."""
    with self.test_session():
      max_splits = 3
      num_buckets = 4
      node_ids = [1, 1, 2, 2, 1, 1, 2, 0]
      gradients = [[.1], [.2], [.3], [-.4], [-.05], [.06], [.07], [.08]]
      hessians = [[.2], [.3], [.4], [.5], [.06], [.07], [.08], [.09]]

      # Tests a single feature.
      bucketized_features = [[3, 1, 2, 0, 1, 2, 0, 1]]
      result = boosted_trees_ops.make_stats_summary(
          node_ids, gradients, hessians, bucketized_features, max_splits,
          num_buckets)  # shape=[max_splits, num_buckets, num_features, 2]
      self.assertAllClose(
          [[
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0
              [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
              [[-.33, .58], [0., 0.], [.3, .4], [0., 0.]],  # node 2
          ]],
          result.eval())
Exemple #10
0
  def testMakeStatsSummaryAccumulate(self):
    """Tests that Summary actually accumulates."""
    with self.cached_session():
      max_splits = 3
      num_buckets = 4
      node_ids = [1, 1, 2, 2, 1, 1, 2, 0]
      gradients = [[.1], [.2], [.3], [-.4], [-.05], [.06], [.07], [.08]]
      hessians = [[.2], [.3], [.4], [.5], [.06], [.07], [.08], [.09]]

      # Tests a single feature.
      bucketized_features = [[3, 1, 2, 0, 1, 2, 0, 1]]
      result = boosted_trees_ops.make_stats_summary(
          node_ids, gradients, hessians, bucketized_features, max_splits,
          num_buckets)  # shape=[max_splits, num_buckets, num_features, 2]
      self.assertAllClose(
          [[
              [[0., 0.], [.08, .09], [0., 0.], [0., 0.]],  # node 0
              [[0., 0.], [.15, .36], [.06, .07], [.1, .2]],  # node 1
              [[-.33, .58], [0., 0.], [.3, .4], [0., 0.]],  # node 2
          ]],
          self.evaluate(result))
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        train_op.append(training_state_cache.insert(tree_ids, node_ids, logits))
      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      accumulators = []

      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))
        train_op.append(
            grow_tree_from_stats_summaries(stats_summaries_list,
                                           feature_ids_list))
      else:
        dependencies = []

        for i, feature_ids in enumerate(feature_ids_list):
          stats_summaries = stats_summaries_list[i]
          accumulator = data_flow_ops.ConditionalAccumulator(
              dtype=dtypes.float32,
              # The stats consist of grads and hessians (the last dimension).
              shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
              shared_name='numeric_stats_summary_accumulator_' + str(i))
          accumulators.append(accumulator)

          apply_grad = accumulator.apply_grad(
              array_ops.stack(stats_summaries, axis=0), stamp_token)
          dependencies.append(apply_grad)

        def grow_tree_from_accumulated_summaries_fn():
          """Updates the tree with the best layer from accumulated summaries."""
          # Take out the accumulated summaries from the accumulator and grow.
          stats_summaries_list = []

          stats_summaries_list = [
              array_ops.unstack(accumulator.take_grad(1), axis=0)
              for accumulator in accumulators
          ]

          grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                   feature_ids_list)
          return grow_op

        with ops.control_dependencies(dependencies):
          train_op.append(distribute_lib.increment_var(global_step))
          if config.is_chief:
            min_accumulated = math_ops.reduce_min(
                array_ops.stack(
                    [acc.num_accumulated() for acc in accumulators]))

            train_op.append(
                control_flow_ops.cond(
                    math_ops.greater_equal(min_accumulated,
                                           n_batches_per_layer),
                    grow_tree_from_accumulated_summaries_fn,
                    control_flow_ops.no_op,
                    name='wait_until_n_batches_accumulated'))

      return control_flow_ops.group(train_op, name='train_op')
Exemple #12
0
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        # Cache logits only after center_bias is complete, if it's in progress.
        train_op.append(
            control_flow_ops.cond(
                center_bias_var, control_flow_ops.no_op,
                lambda: training_state_cache.insert(tree_ids, node_ids, logits))
        )

      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      # ========= Helper methods for both in and not in memory. ==============
      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      def _center_bias_fn(mean_gradients, mean_hessians):
        """Updates the ensembles and cache (if needed) with logits prior."""
        continue_centering = boosted_trees_ops.center_bias(
            tree_ensemble.resource_handle,
            mean_gradients=mean_gradients,
            mean_hessians=mean_hessians,
            l1=tree_hparams.l1,
            l2=tree_hparams.l2
        )
        return center_bias_var.assign(continue_centering)

      # ========= End of helper methods. ==============

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))

        mean_gradients = array_ops.expand_dims(
            math_ops.reduce_mean(gradients, 0), 0)
        mean_heassians = array_ops.expand_dims(
            math_ops.reduce_mean(hessians, 0), 0)

        train_op.append(
            control_flow_ops.cond(
                center_bias_var,
                lambda: _center_bias_fn(mean_gradients, mean_heassians),
                functools.partial(grow_tree_from_stats_summaries,
                                  stats_summaries_list, feature_ids_list)))
      else:

        def center_bias_not_in_mem():
          """Accumulates the data and updates the logits bias, when ready."""
          bias_dependencies = []

          bias_accumulator = data_flow_ops.ConditionalAccumulator(
              dtype=dtypes.float32,
              # The stats consist of grads and hessians means only.
              # TODO(nponomareva): this will change for a multiclass
              shape=[2, 1],
              shared_name='bias_accumulator')

          grads_and_hess = array_ops.stack([gradients, hessians], axis=0)
          grads_and_hess = math_ops.reduce_mean(grads_and_hess, axis=1)

          apply_grad = bias_accumulator.apply_grad(grads_and_hess, stamp_token)
          bias_dependencies.append(apply_grad)

          def center_bias_from_accumulator():
            accumulated = array_ops.unstack(
                bias_accumulator.take_grad(1), axis=0)
            return _center_bias_fn(
                array_ops.expand_dims(accumulated[0], 0),
                array_ops.expand_dims(accumulated[1], 0))

          with ops.control_dependencies(bias_dependencies):
            if config.is_chief:
              center_bias_op = control_flow_ops.cond(
                  math_ops.greater_equal(bias_accumulator.num_accumulated(),
                                         n_batches_per_layer),
                  center_bias_from_accumulator,
                  control_flow_ops.no_op,
                  name='wait_until_n_batches_for_bias_accumulated')

              return center_bias_op
            else:
              return control_flow_ops.no_op()

        def grow_not_in_mem():
          """Accumulates the data and grows a layer when ready."""

          accumulators = []
          dependencies = []
          for i, feature_ids in enumerate(feature_ids_list):
            stats_summaries = stats_summaries_list[i]
            accumulator = data_flow_ops.ConditionalAccumulator(
                dtype=dtypes.float32,
                # The stats consist of grads and hessians (the last dimension).
                shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
                shared_name='numeric_stats_summary_accumulator_' + str(i))
            accumulators.append(accumulator)

            apply_grad = accumulator.apply_grad(
                array_ops.stack(stats_summaries, axis=0), stamp_token)
            dependencies.append(apply_grad)

          def grow_tree_from_accumulated_summaries_fn():
            """Updates tree with the best layer from accumulated summaries."""
            # Take out the accumulated summaries from the accumulator and grow.
            stats_summaries_list = []

            stats_summaries_list = [
                array_ops.unstack(accumulator.take_grad(1), axis=0)
                for accumulator in accumulators
            ]

            grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                     feature_ids_list)
            return grow_op

          with ops.control_dependencies(dependencies):
            if config.is_chief:
              min_accumulated = math_ops.reduce_min(
                  array_ops.stack(
                      [acc.num_accumulated() for acc in accumulators]))

              grow_model = control_flow_ops.cond(
                  math_ops.greater_equal(min_accumulated, n_batches_per_layer),
                  grow_tree_from_accumulated_summaries_fn,
                  control_flow_ops.no_op,
                  name='wait_until_n_batches_accumulated')

              return grow_model
            else:
              return control_flow_ops.no_op()

        update_model = control_flow_ops.cond(
            center_bias_var, center_bias_not_in_mem, grow_not_in_mem)
        train_op.append(update_model)
        with ops.control_dependencies([update_model]):
          increment_global = distribute_lib.increment_var(global_step)
          train_op.append(increment_global)

      return control_flow_ops.group(train_op, name='train_op')
Exemple #13
0
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        train_op.append(training_state_cache.insert(tree_ids, node_ids, logits))
      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      accumulators = []

      def grow_tree_from_stats_summaries(stats_summaries_list,
                                         feature_ids_list):
        """Updates ensemble based on the best gains from stats summaries."""
        node_ids_per_feature = []
        gains_list = []
        thresholds_list = []
        left_node_contribs_list = []
        right_node_contribs_list = []
        all_feature_ids = []

        assert len(stats_summaries_list) == len(feature_ids_list)

        for i, feature_ids in enumerate(feature_ids_list):
          (numeric_node_ids_per_feature, numeric_gains_list,
           numeric_thresholds_list, numeric_left_node_contribs_list,
           numeric_right_node_contribs_list) = (
               boosted_trees_ops.calculate_best_gains_per_feature(
                   node_id_range=last_layer_nodes_range,
                   stats_summary_list=stats_summaries_list[i],
                   l1=tree_hparams.l1,
                   l2=tree_hparams.l2,
                   tree_complexity=tree_hparams.tree_complexity,
                   min_node_weight=tree_hparams.min_node_weight,
                   max_splits=max_splits))

          all_feature_ids += feature_ids
          node_ids_per_feature += numeric_node_ids_per_feature
          gains_list += numeric_gains_list
          thresholds_list += numeric_thresholds_list
          left_node_contribs_list += numeric_left_node_contribs_list
          right_node_contribs_list += numeric_right_node_contribs_list

        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=all_feature_ids,
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))
        train_op.append(
            grow_tree_from_stats_summaries(stats_summaries_list,
                                           feature_ids_list))
      else:
        dependencies = []

        for i, feature_ids in enumerate(feature_ids_list):
          stats_summaries = stats_summaries_list[i]
          accumulator = data_flow_ops.ConditionalAccumulator(
              dtype=dtypes.float32,
              # The stats consist of grads and hessians (the last dimension).
              shape=[len(feature_ids), max_splits, bucket_size_list[i], 2],
              shared_name='numeric_stats_summary_accumulator_' + str(i))
          accumulators.append(accumulator)

          apply_grad = accumulator.apply_grad(
              array_ops.stack(stats_summaries, axis=0), stamp_token)
          dependencies.append(apply_grad)

        def grow_tree_from_accumulated_summaries_fn():
          """Updates the tree with the best layer from accumulated summaries."""
          # Take out the accumulated summaries from the accumulator and grow.
          stats_summaries_list = []

          stats_summaries_list = [
              array_ops.unstack(accumulator.take_grad(1), axis=0)
              for accumulator in accumulators
          ]

          grow_op = grow_tree_from_stats_summaries(stats_summaries_list,
                                                   feature_ids_list)
          return grow_op

        with ops.control_dependencies(dependencies):
          train_op.append(distribute_lib.increment_var(global_step))
          if config.is_chief:
            min_accumulated = math_ops.reduce_min(
                array_ops.stack(
                    [acc.num_accumulated() for acc in accumulators]))

            train_op.append(
                control_flow_ops.cond(
                    math_ops.greater_equal(min_accumulated,
                                           n_batches_per_layer),
                    grow_tree_from_accumulated_summaries_fn,
                    control_flow_ops.no_op,
                    name='wait_until_n_batches_accumulated'))

      return control_flow_ops.group(train_op, name='train_op')
        def _train_op_fn(loss):
            """Run one training iteration."""
            train_op = []
            if cache:
                train_op.append(cache.insert(tree_ids, node_ids, logits))
            if closed_form_grad_and_hess_fn:
                gradients, hessians = closed_form_grad_and_hess_fn(
                    logits, labels)
            else:
                gradients = gradients_impl.gradients(loss,
                                                     logits,
                                                     name='Gradients')[0]
                hessians = gradients_impl.gradients(gradients,
                                                    logits,
                                                    name='Hessians')[0]
            stats_summary_list = [
                array_ops.squeeze(boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                                  axis=0) for f in range(num_features)
            ]

            def grow_tree_from_stats_summaries(stats_summary_list):
                """Updates ensemble based on the best gains from stats summaries."""
                (node_ids_per_feature, gains_list, thresholds_list,
                 left_node_contribs_list, right_node_contribs_list) = (
                     boosted_trees_ops.calculate_best_gains_per_feature(
                         node_id_range=last_layer_nodes_range,
                         stats_summary_list=stats_summary_list,
                         l1=tree_hparams.l1,
                         l2=tree_hparams.l2,
                         tree_complexity=tree_hparams.tree_complexity,
                         max_splits=max_splits))
                grow_op = boosted_trees_ops.update_ensemble(
                    # Confirm if local_tree_ensemble or tree_ensemble should be used.
                    tree_ensemble.resource_handle,
                    feature_ids=math_ops.range(0,
                                               num_features,
                                               dtype=dtypes.int32),
                    node_ids=node_ids_per_feature,
                    gains=gains_list,
                    thresholds=thresholds_list,
                    left_node_contribs=left_node_contribs_list,
                    right_node_contribs=right_node_contribs_list,
                    learning_rate=tree_hparams.learning_rate,
                    max_depth=tree_hparams.max_depth,
                    pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
                return grow_op

            if train_in_memory and is_single_machine:
                train_op.append(distribute_lib.increment_var(global_step))
                train_op.append(
                    grow_tree_from_stats_summaries(stats_summary_list))
            else:
                summary_accumulator = data_flow_ops.ConditionalAccumulator(
                    dtype=dtypes.float32,
                    # The stats consist of gradients and hessians (the last dimension).
                    shape=[num_features, max_splits, num_buckets, 2],
                    shared_name='stats_summary_accumulator')
                apply_grad = summary_accumulator.apply_grad(
                    array_ops.stack(stats_summary_list, axis=0), stamp_token)

                def grow_tree_from_accumulated_summaries_fn():
                    """Updates the tree with the best layer from accumulated summaries."""
                    # Take out the accumulated summaries from the accumulator and grow.
                    stats_summary_list = array_ops.unstack(
                        summary_accumulator.take_grad(1), axis=0)
                    grow_op = grow_tree_from_stats_summaries(
                        stats_summary_list)
                    return grow_op

                with ops.control_dependencies([apply_grad]):
                    train_op.append(distribute_lib.increment_var(global_step))
                    if config.is_chief:
                        train_op.append(
                            control_flow_ops.cond(
                                math_ops.greater_equal(
                                    summary_accumulator.num_accumulated(),
                                    n_batches_per_layer),
                                grow_tree_from_accumulated_summaries_fn,
                                control_flow_ops.no_op,
                                name='wait_until_n_batches_accumulated'))

            return control_flow_ops.group(train_op, name='train_op')
Exemple #15
0
        def _train_op_fn(loss):
            """Run one training iteration."""
            if training_state_cache:
                # Cache logits only after center_bias is complete, if it's in progress.
                train_op.append(
                    control_flow_ops.cond(
                        center_bias_var, control_flow_ops.no_op,
                        lambda: training_state_cache.insert(
                            tree_ids, node_ids, logits)))

            if closed_form_grad_and_hess_fn:
                gradients, hessians = closed_form_grad_and_hess_fn(
                    logits, labels)
            else:
                gradients = gradients_impl.gradients(loss,
                                                     logits,
                                                     name='Gradients')[0]
                hessians = gradients_impl.gradients(gradients,
                                                    logits,
                                                    name='Hessians')[0]

            stats_summaries_list = []
            for i, feature_ids in enumerate(feature_ids_list):
                num_buckets = bucket_size_list[i]
                summaries = [
                    array_ops.squeeze(boosted_trees_ops.make_stats_summary(
                        node_ids=node_ids,
                        gradients=gradients,
                        hessians=hessians,
                        bucketized_features_list=[input_feature_list[f]],
                        max_splits=max_splits,
                        num_buckets=num_buckets),
                                      axis=0) for f in feature_ids
                ]
                stats_summaries_list.append(summaries)

            # ========= Helper methods for both in and not in memory. ==============
            def grow_tree_from_stats_summaries(stats_summaries_list,
                                               feature_ids_list):
                """Updates ensemble based on the best gains from stats summaries."""
                node_ids_per_feature = []
                gains_list = []
                thresholds_list = []
                left_node_contribs_list = []
                right_node_contribs_list = []
                all_feature_ids = []

                assert len(stats_summaries_list) == len(feature_ids_list)

                for i, feature_ids in enumerate(feature_ids_list):
                    (numeric_node_ids_per_feature, numeric_gains_list,
                     numeric_thresholds_list, numeric_left_node_contribs_list,
                     numeric_right_node_contribs_list) = (
                         boosted_trees_ops.calculate_best_gains_per_feature(
                             node_id_range=last_layer_nodes_range,
                             stats_summary_list=stats_summaries_list[i],
                             l1=tree_hparams.l1,
                             l2=tree_hparams.l2,
                             tree_complexity=tree_hparams.tree_complexity,
                             min_node_weight=tree_hparams.min_node_weight,
                             max_splits=max_splits))

                    all_feature_ids += feature_ids
                    node_ids_per_feature += numeric_node_ids_per_feature
                    gains_list += numeric_gains_list
                    thresholds_list += numeric_thresholds_list
                    left_node_contribs_list += numeric_left_node_contribs_list
                    right_node_contribs_list += numeric_right_node_contribs_list

                grow_op = boosted_trees_ops.update_ensemble(
                    # Confirm if local_tree_ensemble or tree_ensemble should be used.
                    tree_ensemble.resource_handle,
                    feature_ids=all_feature_ids,
                    node_ids=node_ids_per_feature,
                    gains=gains_list,
                    thresholds=thresholds_list,
                    left_node_contribs=left_node_contribs_list,
                    right_node_contribs=right_node_contribs_list,
                    learning_rate=tree_hparams.learning_rate,
                    max_depth=tree_hparams.max_depth,
                    pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
                return grow_op

            def _center_bias_fn(mean_gradients, mean_hessians):
                """Updates the ensembles and cache (if needed) with logits prior."""
                continue_centering = boosted_trees_ops.center_bias(
                    tree_ensemble.resource_handle,
                    mean_gradients=mean_gradients,
                    mean_hessians=mean_hessians,
                    l1=tree_hparams.l1,
                    l2=tree_hparams.l2)
                return center_bias_var.assign(continue_centering)

            # ========= End of helper methods. ==============

            if train_in_memory and is_single_machine:
                train_op.append(distribute_lib.increment_var(global_step))

                mean_gradients = array_ops.expand_dims(
                    math_ops.reduce_mean(gradients, 0), 0)
                mean_heassians = array_ops.expand_dims(
                    math_ops.reduce_mean(hessians, 0), 0)

                train_op.append(
                    control_flow_ops.cond(
                        center_bias_var, lambda: _center_bias_fn(
                            mean_gradients, mean_heassians),
                        functools.partial(grow_tree_from_stats_summaries,
                                          stats_summaries_list,
                                          feature_ids_list)))
            else:

                def center_bias_not_in_mem():
                    """Accumulates the data and updates the logits bias, when ready."""
                    bias_dependencies = []

                    bias_accumulator = data_flow_ops.ConditionalAccumulator(
                        dtype=dtypes.float32,
                        # The stats consist of grads and hessians means only.
                        # TODO(nponomareva): this will change for a multiclass
                        shape=[2, 1],
                        shared_name='bias_accumulator')

                    grads_and_hess = array_ops.stack([gradients, hessians],
                                                     axis=0)
                    grads_and_hess = math_ops.reduce_mean(grads_and_hess,
                                                          axis=1)

                    apply_grad = bias_accumulator.apply_grad(
                        grads_and_hess, stamp_token)
                    bias_dependencies.append(apply_grad)

                    def center_bias_from_accumulator():
                        accumulated = array_ops.unstack(
                            bias_accumulator.take_grad(1), axis=0)
                        return _center_bias_fn(
                            array_ops.expand_dims(accumulated[0], 0),
                            array_ops.expand_dims(accumulated[1], 0))

                    with ops.control_dependencies(bias_dependencies):
                        if config.is_chief:
                            center_bias_op = control_flow_ops.cond(
                                math_ops.greater_equal(
                                    bias_accumulator.num_accumulated(),
                                    n_batches_per_layer),
                                center_bias_from_accumulator,
                                control_flow_ops.no_op,
                                name='wait_until_n_batches_for_bias_accumulated'
                            )

                            return center_bias_op
                        else:
                            return control_flow_ops.no_op()

                def grow_not_in_mem():
                    """Accumulates the data and grows a layer when ready."""

                    accumulators = []
                    dependencies = []
                    for i, feature_ids in enumerate(feature_ids_list):
                        stats_summaries = stats_summaries_list[i]
                        accumulator = data_flow_ops.ConditionalAccumulator(
                            dtype=dtypes.float32,
                            # The stats consist of grads and hessians (the last dimension).
                            shape=[
                                len(feature_ids), max_splits,
                                bucket_size_list[i], 2
                            ],
                            shared_name='numeric_stats_summary_accumulator_' +
                            str(i))
                        accumulators.append(accumulator)

                        apply_grad = accumulator.apply_grad(
                            array_ops.stack(stats_summaries, axis=0),
                            stamp_token)
                        dependencies.append(apply_grad)

                    def grow_tree_from_accumulated_summaries_fn():
                        """Updates tree with the best layer from accumulated summaries."""
                        # Take out the accumulated summaries from the accumulator and grow.
                        stats_summaries_list = []

                        stats_summaries_list = [
                            array_ops.unstack(accumulator.take_grad(1), axis=0)
                            for accumulator in accumulators
                        ]

                        grow_op = grow_tree_from_stats_summaries(
                            stats_summaries_list, feature_ids_list)
                        return grow_op

                    with ops.control_dependencies(dependencies):
                        if config.is_chief:
                            min_accumulated = math_ops.reduce_min(
                                array_ops.stack([
                                    acc.num_accumulated()
                                    for acc in accumulators
                                ]))

                            grow_model = control_flow_ops.cond(
                                math_ops.greater_equal(min_accumulated,
                                                       n_batches_per_layer),
                                grow_tree_from_accumulated_summaries_fn,
                                control_flow_ops.no_op,
                                name='wait_until_n_batches_accumulated')

                            return grow_model
                        else:
                            return control_flow_ops.no_op()

                update_model = control_flow_ops.cond(center_bias_var,
                                                     center_bias_not_in_mem,
                                                     grow_not_in_mem)
                train_op.append(update_model)
                with ops.control_dependencies([update_model]):
                    increment_global = distribute_lib.increment_var(
                        global_step)
                    train_op.append(increment_global)

            return control_flow_ops.group(train_op, name='train_op')
Exemple #16
0
    def _train_op_fn(loss):
      """Run one training iteration."""
      train_op = []
      if cache:
        train_op.append(cache.insert(tree_ids, node_ids, logits))
      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]
      stats_summary_list = [
          array_ops.squeeze(
              boosted_trees_ops.make_stats_summary(
                  node_ids=node_ids,
                  gradients=gradients,
                  hessians=hessians,
                  bucketized_features_list=[input_feature_list[f]],
                  max_splits=max_splits,
                  num_buckets=num_buckets),
              axis=0) for f in range(num_features)
      ]

      def grow_tree_from_stats_summaries(stats_summary_list):
        """Updates ensemble based on the best gains from stats summaries."""
        (node_ids_per_feature, gains_list, thresholds_list,
         left_node_contribs_list, right_node_contribs_list) = (
             boosted_trees_ops.calculate_best_gains_per_feature(
                 node_id_range=last_layer_nodes_range,
                 stats_summary_list=stats_summary_list,
                 l1=tree_hparams.l1,
                 l2=tree_hparams.l2,
                 tree_complexity=tree_hparams.tree_complexity,
                 max_splits=max_splits))
        grow_op = boosted_trees_ops.update_ensemble(
            # Confirm if local_tree_ensemble or tree_ensemble should be used.
            tree_ensemble.resource_handle,
            feature_ids=math_ops.range(0, num_features, dtype=dtypes.int32),
            node_ids=node_ids_per_feature,
            gains=gains_list,
            thresholds=thresholds_list,
            left_node_contribs=left_node_contribs_list,
            right_node_contribs=right_node_contribs_list,
            learning_rate=tree_hparams.learning_rate,
            max_depth=tree_hparams.max_depth,
            pruning_mode=boosted_trees_ops.PruningMode.NO_PRUNING)
        return grow_op

      if train_in_memory and is_single_machine:
        train_op.append(distribute_lib.increment_var(global_step))
        train_op.append(grow_tree_from_stats_summaries(stats_summary_list))
      else:
        summary_accumulator = data_flow_ops.ConditionalAccumulator(
            dtype=dtypes.float32,
            # The stats consist of gradients and hessians (the last dimension).
            shape=[num_features, max_splits, num_buckets, 2],
            shared_name='stats_summary_accumulator')
        apply_grad = summary_accumulator.apply_grad(
            array_ops.stack(stats_summary_list, axis=0), stamp_token)

        def grow_tree_from_accumulated_summaries_fn():
          """Updates the tree with the best layer from accumulated summaries."""
          # Take out the accumulated summaries from the accumulator and grow.
          stats_summary_list = array_ops.unstack(
              summary_accumulator.take_grad(1), axis=0)
          grow_op = grow_tree_from_stats_summaries(stats_summary_list)
          return grow_op

        with ops.control_dependencies([apply_grad]):
          train_op.append(distribute_lib.increment_var(global_step))
          if config.is_chief:
            train_op.append(
                control_flow_ops.cond(
                    math_ops.greater_equal(
                        summary_accumulator.num_accumulated(),
                        n_batches_per_layer),
                    grow_tree_from_accumulated_summaries_fn,
                    control_flow_ops.no_op,
                    name='wait_until_n_batches_accumulated'))

      return control_flow_ops.group(train_op, name='train_op')
    def _train_op_fn(loss):
      """Run one training iteration."""
      if training_state_cache:
        # Cache logits only after center_bias is complete, if it's in progress.
        train_op.append(
            control_flow_ops.cond(
                center_bias_var, control_flow_ops.no_op,
                lambda: training_state_cache.insert(tree_ids, node_ids, logits))
        )

      if closed_form_grad_and_hess_fn:
        gradients, hessians = closed_form_grad_and_hess_fn(logits, labels)
      else:
        gradients = gradients_impl.gradients(loss, logits, name='Gradients')[0]
        hessians = gradients_impl.gradients(
            gradients, logits, name='Hessians')[0]

      # TODO(youngheek): perhaps storage could be optimized by storing stats
      # with the dimension max_splits_per_layer, instead of max_splits (for the
      # entire tree).
      max_splits = _get_max_splits(tree_hparams)

      stats_summaries_list = []
      for i, feature_ids in enumerate(feature_ids_list):
        num_buckets = bucket_size_list[i]
        summaries = [
            array_ops.squeeze(
                boosted_trees_ops.make_stats_summary(
                    node_ids=node_ids,
                    gradients=gradients,
                    hessians=hessians,
                    bucketized_features_list=[input_feature_list[f]],
                    max_splits=max_splits,
                    num_buckets=num_buckets),
                axis=0) for f in feature_ids
        ]
        stats_summaries_list.append(summaries)

      if train_in_memory and is_single_machine:
        grower = _InMemoryEnsembleGrower(tree_ensemble, tree_hparams)
      else:
        grower = _AccumulatorEnsembleGrower(tree_ensemble, tree_hparams,
                                            stamp_token, n_batches_per_layer,
                                            bucket_size_list, config.is_chief)

      update_model = control_flow_ops.cond(
          center_bias_var,
          functools.partial(
              grower.center_bias,
              center_bias_var,
              gradients,
              hessians,
          ),
          functools.partial(grower.grow_tree, stats_summaries_list,
                            feature_ids_list, last_layer_nodes_range))
      train_op.append(update_model)

      with ops.control_dependencies([update_model]):
        increment_global = distribute_lib.increment_var(global_step)
        train_op.append(increment_global)

      return control_flow_ops.group(train_op, name='train_op')