def testInferEstimatorWithCenterBias(self):
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees._BoostedTreesEstimator(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5,
            center_bias=True,
            head=self._head)

        # It will stop after 6 steps because of the max depth and num trees (5 for
        # training and 2 for bias centering).
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(train_input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir,
                                global_step=7,
                                finalized_trees=1,
                                attempted_layers=5)
        # Validate predictions.
        predictions = list(est.predict(input_fn=predict_input_fn))

        self.assertAllClose(
            [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]],
            [pred['predictions'] for pred in predictions])
    def testInferEstimator(self):
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees._BoostedTreesEstimator(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5,
            head=self._head)

        # It will stop after 5 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(train_input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        # Validate predictions.
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
Beispiel #3
0
  def testTrainAndEvaluateEstimator(self):
    input_fn = _make_train_input_fn(is_classification=False)

    est = boosted_trees._BoostedTreesEstimator(
        feature_columns=self._feature_columns,
        n_batches_per_layer=1,
        n_trees=2,
        head=self._head,
        max_depth=5)

    # It will stop after 10 steps because of the max depth and num trees.
    num_steps = 100
    # Train for a few steps, and validate final checkpoint.
    est.train(input_fn, steps=num_steps)
    self._assert_checkpoint(
        est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10)
    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['average_loss'], 1.008551)
Beispiel #4
0
  def testTrainAndEvaluateEstimatorWithPrePruning(self):
    input_fn = _make_train_input_fn(is_classification=False)

    est = boosted_trees._BoostedTreesEstimator(
        feature_columns=self._feature_columns,
        n_batches_per_layer=1,
        n_trees=2,
        head=self._head,
        max_depth=5,
        tree_complexity=0.001,
        pruning_mode='pre')

    num_steps = 100
    # Train for a few steps, and validate final checkpoint.
    est.train(input_fn, steps=num_steps)
    # We stop actually after 2*depth*n_trees steps (via a hook) because we still
    # could not grow 2 trees of depth 5 (due to pre-pruning).
    self._assert_checkpoint(
        est.model_dir, global_step=21, finalized_trees=0, attempted_layers=21)
    eval_res = est.evaluate(input_fn=input_fn, steps=1)
    self.assertAllClose(eval_res['average_loss'], 3.83943)
    def testContribEstimatorThatDFCIsInPredictions(self):
        # pylint:disable=protected-access
        head = canned_boosted_trees._create_regression_head(label_dimension=1)
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees._BoostedTreesEstimator(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            head=head,
            n_trees=1,
            max_depth=5,
            center_bias=True)
        # pylint:enable=protected-access

        num_steps = 100
        # Train for a few steps. Validate debug outputs in prediction dicts.
        est.train(train_input_fn, steps=num_steps)
        debug_predictions = est.experimental_predict_with_explanations(
            predict_input_fn)
        biases, dfcs = zip(*[(pred['bias'], pred['dfc'])
                             for pred in debug_predictions])
        self.assertAllClose([1.8] * 5, biases)
        expected_dfcs = (collections.OrderedDict(
            (('f_1_bucketized', -0.09500002861022949),
             ('f_0_bucketized', -0.07049942016601562), ('f_2_bucketized',
                                                        0.0))),
                         collections.OrderedDict(
                             (('f_0_bucketized', -0.5376303195953369),
                              ('f_1_bucketized',
                               0.06333339214324951), ('f_2_bucketized', 0.0))),
                         collections.OrderedDict(
                             (('f_0_bucketized', -0.5175694227218628),
                              ('f_1_bucketized', -0.09500002861022949),
                              ('f_2_bucketized', 0.0))),
                         collections.OrderedDict(
                             (('f_0_bucketized', 0.1563495397567749),
                              ('f_1_bucketized',
                               0.06333339214324951), ('f_2_bucketized', 0.0))),
                         collections.OrderedDict(
                             (('f_0_bucketized', 0.96934974193573),
                              ('f_1_bucketized',
                               0.06333339214324951), ('f_2_bucketized', 0.0))))
        for expected, dfc in zip(expected_dfcs, dfcs):
            self.assertAllEqual(expected.keys(), dfc.keys())
            self.assertAllClose(expected.values(), dfc.values())
        # Assert sum(dfcs) + bias == predictions.
        expected_predictions = [[1.6345005], [1.32570302], [1.1874305],
                                [2.01968288], [2.83268309]]
        predictions = [[sum(dfc.values()) + bias]
                       for (dfc, bias) in zip(dfcs, biases)]
        self.assertAllClose(expected_predictions, predictions)

        # Test when user doesn't include bias or dfc in predict_keys.
        debug_predictions = est.experimental_predict_with_explanations(
            predict_input_fn, predict_keys=['predictions'])
        for prediction_dict in debug_predictions:
            self.assertTrue('bias' in prediction_dict)
            self.assertTrue('dfc' in prediction_dict)
            self.assertTrue('predictions' in prediction_dict)
            self.assertEqual(len(prediction_dict), 3)