def testRankingDontThrowExceptionForForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) est = estimator.CoreGradientBoostedDecisionTreeRanker( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[ core_feature_column.numeric_column("f1"), core_feature_column.numeric_column("f2") ], ranking_model_pair_keys=("a", "b")) # Train for a few steps. est.train(input_fn=_ranking_train_input_fn, steps=1000) est.evaluate(input_fn=_ranking_train_input_fn, steps=1) est.predict(input_fn=_infer_ranking_train_input_fn)
def _get_estimator(output_dir, feature_cols): """Configures DNNBoostedTreeCombinedRegressor based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = ( FLAGS.tree_learning_rate) learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.tree_l2 learner_config.constraints.max_tree_depth = FLAGS.tree_depth run_config = tf.contrib.learn.RunConfig(save_summary_steps=1) # Create a DNNBoostedTreeCombinedRegressor estimator. estimator = DNNBoostedTreeCombinedRegressor( dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")], dnn_feature_columns=feature_cols, tree_learner_config=learner_config, num_trees=FLAGS.num_trees, # This should be the number of examples. For large datasets it can be # larger than the batch_size. tree_examples_per_layer=FLAGS.batch_size, model_dir=output_dir, config=run_config, dnn_input_layer_to_tree=True, dnn_steps_to_train=FLAGS.dnn_steps_to_train) return estimator
def testFitAndEvaluateMultiClassFullDontThrowException(self): n_classes = 3 learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = n_classes learner_config.constraints.max_tree_depth = 1 learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.FULL_HESSIAN) head_fn = estimator.core_multiclass_head(n_classes=n_classes) model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.CoreGradientBoostedDecisionTreeEstimator( learner_config=learner_config, head=head_fn, num_trees=1, center_bias=False, examples_per_layer=7, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) classifier.train(input_fn=_multiclass_train_input_fn, steps=100) classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1) classifier.predict(input_fn=_eval_input_fn)
def _get_tfbt(output_dir): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() num_classes = 10 learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.num_classes = num_classes learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer learner_config.constraints.max_tree_depth = FLAGS.depth growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Create a TF Boosted trees estimator that can take in custom loss. estimator = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=num_classes, examples_per_layer=FLAGS.examples_per_layer, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def testQuantileRegression(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 6 learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.tree_complexity = ( 1.0 / _QUANTILE_REGRESSION_SIZE) train_input_fn, test_input_fn, y = _quantile_regression_input_fns() # 95% percentile. model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, num_trees=12, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.fit(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn) upper = [] for prediction_dict in result_iter: upper.append(prediction_dict["scores"]) frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3) # +/- 3% self.assertTrue(frac_below_upper >= 0.92) self.assertTrue(frac_below_upper <= 0.98)
def testFitAndEvaluateMultiClassFullDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 learner_config.constraints.max_tree_depth = 1 learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.FULL_HESSIAN) model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=learner_config.num_classes, num_trees=1, examples_per_layer=7, model_dir=model_dir, config=config, center_bias=False, feature_columns=[contrib_feature_column.real_valued_column("x")]) classifier.fit(input_fn=_multiclass_train_input_fn, steps=100) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base) result_iter = classifier.predict(input_fn=_eval_input_fn) for prediction_dict in result_iter: self.assertTrue("classes" in prediction_dict)
def testCreate(self): with self.cached_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() _append_to_leaf(tree.nodes.add().leaf, 0, -0.4) tree_ensemble_config.tree_weights.append(1.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="create_tree") resources.initialize_resources(resources.shared_resources()).run() result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-0.4], [-0.4]]) stamp_token = model_ops.tree_ensemble_stamp_token( tree_ensemble_handle) self.assertEqual(stamp_token.eval(), 3)
def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn)
def testWeightedCategoricalColumn(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() feature_columns = [ core_feature_column.weighted_categorical_column( categorical_column=core_feature_column. categorical_column_with_vocabulary_list( key="word", vocabulary_list=["the", "cat", "dog"]), weight_feature_key="weight") ] labels = np.array([[1], [1], [0], [0.]], dtype=np.float32) def _make_input_fn(): def _input_fn(): features_dict = {} # Sparse tensor representing # example 0: "cat","the" # examaple 1: "dog" # example 2: - # example 3: "the" # Weights for the words are 5 - cat, 6- dog and 1 -the. features_dict["word"] = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [3, 0]], values=constant_op.constant(["the", "cat", "dog", "the"], dtype=dtypes.string), dense_shape=[4, 3]) features_dict["weight"] = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [3, 0]], values=[1., 5., 6., 1.], dense_shape=[4, 3]) return features_dict, labels return _input_fn est = estimator.CoreGradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=feature_columns) input_fn = _make_input_fn() est.train(input_fn=input_fn, steps=100) est.evaluate(input_fn=input_fn, steps=1) est.predict(input_fn=input_fn)
def testNoDNNFeatureColumns(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 with self.assertRaisesRegexp(ValueError, "dnn_feature_columns must be specified"): classifier = estimator.DNNBoostedTreeCombinedClassifier( dnn_hidden_units=[1], dnn_feature_columns=[], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, n_classes=2) classifier.fit(input_fn=_train_input_fn, steps=5)
def testFitAndEvaluateDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")]) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base)
def testTrainEvaluateWithDnnForInputAndTreeForPredict(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, predict_with_tree_only=True, dnn_to_tree_distillation_param=(0.5, None), tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) serving_input_fn = (export.build_parsing_serving_input_receiver_fn( feature_spec={ "x": parsing_ops.FixedLenFeature([1], dtype=dtypes.float32) })) base_exporter = exporter.FinalExporter( name="Servo", serving_input_receiver_fn=serving_input_fn, assets_extra=None) export_path = os.path.join(model_dir, "export") base_exporter.export(est, export_path=export_path, checkpoint_path=None, eval_result={}, is_the_final_export=True)
def testOverridesGlobalSteps(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 2 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")], output_leaf_index=False, override_global_step_value=10000000) classifier.fit(input_fn=_train_input_fn, steps=15) self._assert_checkpoint(classifier.model_dir, global_step=10000000)
def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() regressor = estimator.GradientBoostedDecisionTreeRegressor( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) regressor.fit(input_fn=_train_input_fn, steps=15) regressor.evaluate(input_fn=_eval_input_fn, steps=1) regressor.export(self._export_dir_base)
def testThatLeafIndexIsInPredictions(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")], output_leaf_index=True) classifier.fit(input_fn=_train_input_fn, steps=15) result_iter = classifier.predict(input_fn=_eval_input_fn) for prediction_dict in result_iter: self.assertTrue("leaf_index" in prediction_dict) self.assertTrue("logits" in prediction_dict)
def testFitAndEvaluateDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.DNNBoostedTreeCombinedClassifier( dnn_hidden_units=[1], dnn_feature_columns=[feature_column.real_valued_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, n_classes=2, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=False, tree_feature_columns=[feature_column.real_valued_column("x")]) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1)
def _get_tfbt(output_dir, feature_cols): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 learner_config.constraints.max_tree_depth = FLAGS.depth run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) # Create a TF Boosted trees regression estimator. estimator = GradientBoostedDecisionTreeRegressor( learner_config=learner_config, # This should be the number of examples. For large datasets it can be # larger than the batch_size. examples_per_layer=FLAGS.batch_size, feature_columns=feature_cols, label_dimension=1, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def testTrainEvaluateInferDoesNotThrowError(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreGradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) est.evaluate(input_fn=_eval_input_fn, steps=1) est.predict(input_fn=_eval_input_fn)
def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() # Use core head head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) model = estimator.GradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) model.fit(input_fn=_train_input_fn, steps=15) model.evaluate(input_fn=_eval_input_fn, steps=1) model.export(self._export_dir_base)
def testQuantileRegressionDoesNotThroughException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.tree_complexity = ( 1.0 / _QUANTILE_REGRESSION_SIZE) train_input_fn, test_input_fn, y = _quantile_regression_input_fns() y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1) # 95% percentile. model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, num_trees=1, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.train(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn)
def testRestore(self): # Calling self.cached_session() without a graph specified results in # TensorFlowTestCase caching the session and returning the same one # every time. In this test, we need to create two different sessions # which is why we also create a graph and pass it to self.cached_session() # to ensure no caching occurs under the hood. save_path = os.path.join(self.get_temp_dir(), "restore-test") with ops.Graph().as_default() as graph: with self.session(graph) as sess: # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 # Add the first tree and save. tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree = tree_ensemble_config.trees.add() tree_ensemble_config.tree_metadata.add().is_finalized = True tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree.nodes.add().leaf, 0, -0.1) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="restore_tree") resources.initialize_resources( resources.shared_resources()).run() variables.global_variables_initializer().run() my_saver = saver.Saver() # Add the second tree and replace the ensemble of the handle. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0) # Predict to confirm. with ops.control_dependencies([ model_ops.tree_ensemble_deserialize( tree_ensemble_handle, stamp_token=3, tree_ensemble_config=tree_ensemble_config. SerializeToString()) ]): result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose([[-1.1], [-1.1]], result.eval()) # Save before adding other trees. val = my_saver.save(sess, save_path) self.assertEqual(save_path, val) # Add more trees after saving. tree3 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0) # Predict to confirm. with ops.control_dependencies([ model_ops.tree_ensemble_deserialize( tree_ensemble_handle, stamp_token=3, tree_ensemble_config=tree_ensemble_config. SerializeToString()) ]): result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-11.1], [-11.1]]) # Start a second session. In that session the parameter nodes # have not been initialized either. with ops.Graph().as_default() as graph: with self.session(graph) as sess: tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="restore_tree") my_saver = saver.Saver() my_saver.restore(sess, save_path) result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Make sure we only have the first and second tree. # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
def testSerialization(self): with ops.Graph().as_default() as graph: with self.session(graph): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) # Bias tree only for second class. tree1 = tree_ensemble_config.trees.add() _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2) tree_ensemble_config.tree_weights.append(1.0) # Depth 2 tree. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _set_float_split( tree2.nodes.add().sparse_float_binary_split_default_right. split, 1, 4.0, 1, 2) _set_float_split(tree2.nodes.add().dense_float_binary_split, 0, 9.0, 3, 4) _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5) _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2) _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=7, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="saver_tree") stamp_token, serialized_config = model_ops.tree_ensemble_serialize( tree_ensemble_handle) resources.initialize_resources( resources.shared_resources()).run() self.assertEqual(stamp_token.eval(), 7) serialized_config = serialized_config.eval() with ops.Graph().as_default() as graph: with self.session(graph): tree_ensemble_handle2 = model_ops.tree_ensemble_variable( stamp_token=9, tree_ensemble_config=serialized_config, name="saver_tree2") resources.initialize_resources( resources.shared_resources()).run() # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 result, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle2, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Re-serialize tree. stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize( tree_ensemble_handle2) # The first example will get bias class 1 -0.2 from first tree and # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2], # the second example will get the same bias class 1 -0.2 and leaf 3 # payload of class 1 1.2 hence [0.0, 1.0]. self.assertEqual(stamp_token2.eval(), 9) # Class 2 does have scores in the leaf => it gets score 0. self.assertEqual(serialized_config2.eval(), serialized_config) self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])
def testForcedInitialSplits(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 initial_subtree = """ nodes { dense_float_binary_split { feature_column: 0 threshold: -0.5 left_id: 1 right_id: 2 } node_metadata { gain: 0 } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.52 left_id: 3 right_id: 4 } node_metadata { gain: 0 } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.554 left_id: 5 right_id: 6 } node_metadata { gain: 0 } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } """ tree_proto = tree_config_pb2.DecisionTreeConfig() text_format.Merge(initial_subtree, tree_proto) # Set initial subtree info. learner_config.each_tree_start.CopyFrom(tree_proto) learner_config.each_tree_start_num_layers = 2 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=2, examples_per_layer=6, model_dir=model_dir, config=config, center_bias=False, feature_columns=[contrib_feature_column.real_valued_column("x")], output_leaf_index=False) classifier.fit(input_fn=_train_input_fn, steps=100) # When no override of global steps, 5 steps were used. ensemble = self._assert_checkpoint_and_return_model( classifier.model_dir, global_step=6) # TODO(nponomareva): find a better way to test this. expected_ensemble = """ trees { nodes { dense_float_binary_split { threshold: -0.5 left_id: 1 right_id: 2 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.52 left_id: 3 right_id: 4 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.554 left_id: 5 right_id: 6 } node_metadata { } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { dense_float_binary_split { threshold: 1.0 left_id: 7 right_id: 8 } node_metadata { gain: 0.888888895512 } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: -2.0 } } } nodes { leaf { vector { value: 2.00000023842 } } } } trees { nodes { dense_float_binary_split { threshold: -0.5 left_id: 1 right_id: 2 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.52 left_id: 3 right_id: 4 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.554 left_id: 5 right_id: 6 } node_metadata { } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { dense_float_binary_split { threshold: 1.0 left_id: 7 right_id: 8 } node_metadata { gain: 0.727760672569 } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: -1.81873059273 } } } nodes { leaf { vector { value: 1.81873047352 } } } } trees { nodes { dense_float_binary_split { threshold: -0.5 left_id: 1 right_id: 2 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.52 left_id: 3 right_id: 4 } node_metadata { } } nodes { dense_float_binary_split { feature_column: 1 threshold: 0.554 left_id: 5 right_id: 6 } node_metadata { } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } nodes { leaf { vector { value: 0.0 } } } } tree_weights: 0.10000000149 tree_weights: 0.10000000149 tree_weights: 0.10000000149 tree_metadata { num_tree_weight_updates: 1 num_layers_grown: 3 is_finalized: true } tree_metadata { num_tree_weight_updates: 1 num_layers_grown: 3 is_finalized: true } tree_metadata { num_tree_weight_updates: 1 num_layers_grown: 2 } growing_metadata { num_layers_attempted: 3 } """ self.assertProtoEquals(expected_ensemble, ensemble)