def testInferEstimatorWithCenterBias(self): train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees._BoostedTreesEstimator( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5, center_bias=True, head=self._head) # It will stop after 6 steps because of the max depth and num trees (5 for # training and 2 for bias centering). num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) self._assert_checkpoint(est.model_dir, global_step=7, finalized_trees=1, attempted_layers=5) # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[1.634501], [1.325703], [1.187431], [2.019683], [2.832683]], [pred['predictions'] for pred in predictions])
def testInferEstimator(self): train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees._BoostedTreesEstimator( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5, head=self._head) # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], [pred['predictions'] for pred in predictions])
def testTrainAndEvaluateEstimator(self): input_fn = _make_train_input_fn(is_classification=False) est = boosted_trees._BoostedTreesEstimator( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=2, head=self._head, max_depth=5) # It will stop after 10 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) self._assert_checkpoint( est.model_dir, global_step=10, finalized_trees=2, attempted_layers=10) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 1.008551)
def testTrainAndEvaluateEstimatorWithPrePruning(self): input_fn = _make_train_input_fn(is_classification=False) est = boosted_trees._BoostedTreesEstimator( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=2, head=self._head, max_depth=5, tree_complexity=0.001, pruning_mode='pre') num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) # We stop actually after 2*depth*n_trees steps (via a hook) because we still # could not grow 2 trees of depth 5 (due to pre-pruning). self._assert_checkpoint( est.model_dir, global_step=21, finalized_trees=0, attempted_layers=21) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 3.83943)
def testContribEstimatorThatDFCIsInPredictions(self): # pylint:disable=protected-access head = canned_boosted_trees._create_regression_head(label_dimension=1) train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees._BoostedTreesEstimator( feature_columns=self._feature_columns, n_batches_per_layer=1, head=head, n_trees=1, max_depth=5, center_bias=True) # pylint:enable=protected-access num_steps = 100 # Train for a few steps. Validate debug outputs in prediction dicts. est.train(train_input_fn, steps=num_steps) debug_predictions = est.experimental_predict_with_explanations( predict_input_fn) biases, dfcs = zip(*[(pred['bias'], pred['dfc']) for pred in debug_predictions]) self.assertAllClose([1.8] * 5, biases) expected_dfcs = (collections.OrderedDict( (('f_1_bucketized', -0.09500002861022949), ('f_0_bucketized', -0.07049942016601562), ('f_2_bucketized', 0.0))), collections.OrderedDict( (('f_0_bucketized', -0.5376303195953369), ('f_1_bucketized', 0.06333339214324951), ('f_2_bucketized', 0.0))), collections.OrderedDict( (('f_0_bucketized', -0.5175694227218628), ('f_1_bucketized', -0.09500002861022949), ('f_2_bucketized', 0.0))), collections.OrderedDict( (('f_0_bucketized', 0.1563495397567749), ('f_1_bucketized', 0.06333339214324951), ('f_2_bucketized', 0.0))), collections.OrderedDict( (('f_0_bucketized', 0.96934974193573), ('f_1_bucketized', 0.06333339214324951), ('f_2_bucketized', 0.0)))) for expected, dfc in zip(expected_dfcs, dfcs): self.assertAllEqual(expected.keys(), dfc.keys()) self.assertAllClose(expected.values(), dfc.values()) # Assert sum(dfcs) + bias == predictions. expected_predictions = [[1.6345005], [1.32570302], [1.1874305], [2.01968288], [2.83268309]] predictions = [[sum(dfc.values()) + bias] for (dfc, bias) in zip(dfcs, biases)] self.assertAllClose(expected_predictions, predictions) # Test when user doesn't include bias or dfc in predict_keys. debug_predictions = est.experimental_predict_with_explanations( predict_input_fn, predict_keys=['predictions']) for prediction_dict in debug_predictions: self.assertTrue('bias' in prediction_dict) self.assertTrue('dfc' in prediction_dict) self.assertTrue('predictions' in prediction_dict) self.assertEqual(len(prediction_dict), 3)