def test_error_gbt_wrong_number_of_trees(self): builder = builder_lib.GradientBoostedTreeBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( "label", classes=["red", "blue", "green"])) builder.add_tree(Tree(LeafNode(RegressionValue(1, num_examples=10)))) self.assertRaises(ValueError, builder.close)
def test_multi_class_classification_gbt(self, file_prefix): model_path = os.path.join(tmp_path(), "multi_class_classification_gbt") logging.info("Create model in %s", model_path) builder = builder_lib.GradientBoostedTreeBuilder( path=model_path, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, objective=py_tree.objective.ClassificationObjective( label="color", classes=["red", "blue", "green"]), file_prefix=file_prefix) # f1>=1.5 # ├─(pos)─ +1.0 (toward "red") # └─(neg)─ -1.0 (toward "red") # f1>=2.5 # ├─(pos)─ +1.0 (toward "blue") # └─(neg)─ -1.0 (toward "blue") # f1>=3.5 # ├─(pos)─ +1.0 (toward "green") # └─(neg)─ -1.0 (toward "green") for threshold in [1.5, 2.5, 3.5]: builder.add_tree( Tree( NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), threshold=threshold, missing_evaluation=False), pos_child=LeafNode( value=RegressionValue(value=+1, num_examples=30)), neg_child=LeafNode(value=RegressionValue( value=-1, num_examples=30))))) builder.close() if file_prefix is not None: self.assertEqual( inspector_lib.detect_model_file_prefix( os.path.join(model_path, "assets")), file_prefix) logging.info("Loading model") loaded_model = tf.keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ "f1": [1.0, 2.0], }).batch(2) predictions = loaded_model.predict(tf_dataset) soft_max_sum = np.sum(np.exp([+1, -1, -1])) self.assertAllClose(predictions, [[1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0], [ math.exp(+1) / soft_max_sum, math.exp(-1) / soft_max_sum, math.exp(-1) / soft_max_sum ]])
def test_error_gbt_with_class_tree(self): builder = builder_lib.GradientBoostedTreeBuilder( path=os.path.join(tmp_path(), "model"), objective=py_tree.objective.ClassificationObjective( "label", classes=["red", "blue", "green"])) self.assertRaises( ValueError, lambda: builder.add_tree( Tree( LeafNode( ProbabilityValue(probability=[0.8, 0.1, 0.1], num_examples=10)))))
def test_binary_classification_gbt(self): model_path = os.path.join(tmp_path(), "binary_classification_gbt") logging.info("Create model in %s", model_path) builder = builder_lib.GradientBoostedTreeBuilder( path=model_path, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, bias=1.0, objective=py_tree.objective.ClassificationObjective( label="color", classes=["red", "blue"])) # bias: 1.0 (toward "blue") # f1>=1.5 # ├─(pos)─ +1.0 (toward "blue") # └─(neg)─ -1.0 (toward "blue") builder.add_tree( Tree( NonLeafNode( condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( name="f1", type=py_tree.dataspec.ColumnType.NUMERICAL), threshold=1.5, missing_evaluation=False), pos_child=LeafNode( value=RegressionValue(value=+1, num_examples=30)), neg_child=LeafNode( value=RegressionValue(value=-1, num_examples=30))))) builder.close() logging.info("Loading model") loaded_model = tf.keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ "f1": [1.0, 2.0], }).batch(2) predictions = loaded_model.predict(tf_dataset) self.assertAllClose( predictions, [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]])
def test_fast_serving_with_custom_numerical_default_evaluation(self): model_path = os.path.join(tmp_path(), "regression_gbt") logging.info("Create model in %s", model_path) builder = builder_lib.GradientBoostedTreeBuilder( path=model_path, bias=0.0, model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL, objective=py_tree.objective.RegressionObjective(label="label")) # f1>=-1.0 (default: false) # │ # ├─f1>=2.0 (default: false) # │ │ # │ ├─1 # │ └─2 # └─f2>=-3.0 (default: true) # │ # ├─f2>=4.0 (default: false) # │ │ # │ ├─3 # │ └─4 # └─5 def condition(feature, threshold, missing_evaluation, pos, neg): return NonLeafNode(condition=NumericalHigherThanCondition( feature=SimpleColumnSpec( name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL), threshold=threshold, missing_evaluation=missing_evaluation), pos_child=pos, neg_child=neg) def leaf(value): return LeafNode(RegressionValue(value=value, num_examples=1)) builder.add_tree( Tree( condition( "f1", -1.0, False, condition("f1", 2.0, False, leaf(1), leaf(2)), condition( "f2", -3.0, True, condition("f2", 4.0, False, leaf(3), leaf(4)), leaf(5), )))) builder.close() logging.info("Loading model") # There is no easy way to assert that an optimized inference engine was # chosen. If checking manually, make sure the "Use fast generic engine" # string is present (instead of the "Use slow generic engine" string). # # TODO(gbm):: Add API to check which inference engine is used. loaded_model = tf.keras.models.load_model(model_path) logging.info("Make predictions") tf_dataset = tf.data.Dataset.from_tensor_slices({ "f1": [math.nan, 1.0, -2.0], "f2": [-4.0, -4.0, math.nan], }).batch(2) predictions = loaded_model.predict(tf_dataset) self.assertAllClose(predictions, [[5.0], [2.0], [4.0]]) inspector = inspector_lib.make_inspector( os.path.join(model_path, "assets")) self.assertEquals(inspector.dataspec.columns[1].numerical.mean, -1.0 - 0.5) self.assertEquals(inspector.dataspec.columns[2].numerical.mean, (4.0 - 3.0) / 2.0)