コード例 #1
0
    def test_error_gbt_wrong_number_of_trees(self):
        builder = builder_lib.GradientBoostedTreeBuilder(
            path=os.path.join(tmp_path(), "model"),
            objective=py_tree.objective.ClassificationObjective(
                "label", classes=["red", "blue", "green"]))

        builder.add_tree(Tree(LeafNode(RegressionValue(1, num_examples=10))))
        self.assertRaises(ValueError, builder.close)
コード例 #2
0
    def test_multi_class_classification_gbt(self, file_prefix):
        model_path = os.path.join(tmp_path(), "multi_class_classification_gbt")
        logging.info("Create model in %s", model_path)
        builder = builder_lib.GradientBoostedTreeBuilder(
            path=model_path,
            model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
            objective=py_tree.objective.ClassificationObjective(
                label="color", classes=["red", "blue", "green"]),
            file_prefix=file_prefix)

        #  f1>=1.5
        #    ├─(pos)─ +1.0 (toward "red")
        #    └─(neg)─ -1.0 (toward "red")
        #  f1>=2.5
        #    ├─(pos)─ +1.0 (toward "blue")
        #    └─(neg)─ -1.0 (toward "blue")
        #  f1>=3.5
        #    ├─(pos)─ +1.0 (toward "green")
        #    └─(neg)─ -1.0 (toward "green")

        for threshold in [1.5, 2.5, 3.5]:
            builder.add_tree(
                Tree(
                    NonLeafNode(
                        condition=NumericalHigherThanCondition(
                            feature=SimpleColumnSpec(
                                name="f1",
                                type=py_tree.dataspec.ColumnType.NUMERICAL),
                            threshold=threshold,
                            missing_evaluation=False),
                        pos_child=LeafNode(
                            value=RegressionValue(value=+1, num_examples=30)),
                        neg_child=LeafNode(value=RegressionValue(
                            value=-1, num_examples=30)))))

        builder.close()

        if file_prefix is not None:
            self.assertEqual(
                inspector_lib.detect_model_file_prefix(
                    os.path.join(model_path, "assets")), file_prefix)

        logging.info("Loading model")
        loaded_model = tf.keras.models.load_model(model_path)

        logging.info("Make predictions")
        tf_dataset = tf.data.Dataset.from_tensor_slices({
            "f1": [1.0, 2.0],
        }).batch(2)
        predictions = loaded_model.predict(tf_dataset)

        soft_max_sum = np.sum(np.exp([+1, -1, -1]))
        self.assertAllClose(predictions, [[1.0 / 3.0, 1.0 / 3.0, 1.0 / 3.0],
                                          [
                                              math.exp(+1) / soft_max_sum,
                                              math.exp(-1) / soft_max_sum,
                                              math.exp(-1) / soft_max_sum
                                          ]])
コード例 #3
0
    def test_error_gbt_with_class_tree(self):
        builder = builder_lib.GradientBoostedTreeBuilder(
            path=os.path.join(tmp_path(), "model"),
            objective=py_tree.objective.ClassificationObjective(
                "label", classes=["red", "blue", "green"]))

        self.assertRaises(
            ValueError, lambda: builder.add_tree(
                Tree(
                    LeafNode(
                        ProbabilityValue(probability=[0.8, 0.1, 0.1],
                                         num_examples=10)))))
コード例 #4
0
    def test_binary_classification_gbt(self):
        model_path = os.path.join(tmp_path(), "binary_classification_gbt")
        logging.info("Create model in %s", model_path)
        builder = builder_lib.GradientBoostedTreeBuilder(
            path=model_path,
            model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
            bias=1.0,
            objective=py_tree.objective.ClassificationObjective(
                label="color", classes=["red", "blue"]))

        #  bias: 1.0 (toward "blue")
        #  f1>=1.5
        #    ├─(pos)─ +1.0 (toward "blue")
        #    └─(neg)─ -1.0 (toward "blue")
        builder.add_tree(
            Tree(
                NonLeafNode(
                    condition=NumericalHigherThanCondition(
                        feature=SimpleColumnSpec(
                            name="f1",
                            type=py_tree.dataspec.ColumnType.NUMERICAL),
                        threshold=1.5,
                        missing_evaluation=False),
                    pos_child=LeafNode(
                        value=RegressionValue(value=+1, num_examples=30)),
                    neg_child=LeafNode(
                        value=RegressionValue(value=-1, num_examples=30)))))

        builder.close()

        logging.info("Loading model")
        loaded_model = tf.keras.models.load_model(model_path)

        logging.info("Make predictions")
        tf_dataset = tf.data.Dataset.from_tensor_slices({
            "f1": [1.0, 2.0],
        }).batch(2)
        predictions = loaded_model.predict(tf_dataset)
        self.assertAllClose(
            predictions,
            [[1.0 / (1.0 + math.exp(0.0))], [1.0 / (1.0 + math.exp(-2.0))]])
コード例 #5
0
    def test_fast_serving_with_custom_numerical_default_evaluation(self):
        model_path = os.path.join(tmp_path(), "regression_gbt")
        logging.info("Create model in %s", model_path)
        builder = builder_lib.GradientBoostedTreeBuilder(
            path=model_path,
            bias=0.0,
            model_format=builder_lib.ModelFormat.TENSORFLOW_SAVED_MODEL,
            objective=py_tree.objective.RegressionObjective(label="label"))

        # f1>=-1.0 (default: false)
        #   │
        #   ├─f1>=2.0 (default: false)
        #   │    │
        #   │    ├─1
        #   │    └─2
        #   └─f2>=-3.0 (default: true)
        #        │
        #        ├─f2>=4.0 (default: false)
        #        │    │
        #        │    ├─3
        #        │    └─4
        #        └─5

        def condition(feature, threshold, missing_evaluation, pos, neg):
            return NonLeafNode(condition=NumericalHigherThanCondition(
                feature=SimpleColumnSpec(
                    name=feature, type=py_tree.dataspec.ColumnType.NUMERICAL),
                threshold=threshold,
                missing_evaluation=missing_evaluation),
                               pos_child=pos,
                               neg_child=neg)

        def leaf(value):
            return LeafNode(RegressionValue(value=value, num_examples=1))

        builder.add_tree(
            Tree(
                condition(
                    "f1", -1.0, False,
                    condition("f1", 2.0, False, leaf(1), leaf(2)),
                    condition(
                        "f2",
                        -3.0,
                        True,
                        condition("f2", 4.0, False, leaf(3), leaf(4)),
                        leaf(5),
                    ))))
        builder.close()

        logging.info("Loading model")

        # There is no easy way to assert that an optimized inference engine was
        # chosen. If checking manually, make sure the "Use fast generic engine"
        # string is present (instead of the "Use slow generic engine" string).
        #
        # TODO(gbm):: Add API to check which inference engine is used.

        loaded_model = tf.keras.models.load_model(model_path)

        logging.info("Make predictions")
        tf_dataset = tf.data.Dataset.from_tensor_slices({
            "f1": [math.nan, 1.0, -2.0],
            "f2": [-4.0, -4.0, math.nan],
        }).batch(2)
        predictions = loaded_model.predict(tf_dataset)
        self.assertAllClose(predictions, [[5.0], [2.0], [4.0]])

        inspector = inspector_lib.make_inspector(
            os.path.join(model_path, "assets"))
        self.assertEquals(inspector.dataspec.columns[1].numerical.mean,
                          -1.0 - 0.5)
        self.assertEquals(inspector.dataspec.columns[2].numerical.mean,
                          (4.0 - 3.0) / 2.0)