Example #1
0
    def testQuantileRegression(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 6
        learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
        learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.tree_complexity = (
            1.0 / _QUANTILE_REGRESSION_SIZE)

        train_input_fn, test_input_fn, y = _quantile_regression_input_fns()

        # 95% percentile.
        model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.95],
            learner_config=learner_config,
            num_trees=12,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_upper.fit(input_fn=train_input_fn, steps=1000)
        result_iter = model_upper.predict(input_fn=test_input_fn)
        upper = []
        for prediction_dict in result_iter:
            upper.append(prediction_dict["scores"])

        frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3)
        # +/- 3%
        self.assertTrue(frac_below_upper >= 0.92)
        self.assertTrue(frac_below_upper <= 0.98)
Example #2
0
    def testFitAndEvaluateMultiClassFullDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 3
        learner_config.constraints.max_tree_depth = 1
        learner_config.multi_class_strategy = (
            learner_pb2.LearnerConfig.FULL_HESSIAN)

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            n_classes=learner_config.num_classes,
            num_trees=1,
            examples_per_layer=7,
            model_dir=model_dir,
            config=config,
            center_bias=False,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_multiclass_train_input_fn, steps=100)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("classes" in prediction_dict)
Example #3
0
def _get_estimator(output_dir, feature_cols):
    """Configures DNNBoostedTreeCombinedRegressor based on flags."""
    learner_config = learner_pb2.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = (
        FLAGS.tree_learning_rate)
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.tree_l2
    learner_config.constraints.max_tree_depth = FLAGS.tree_depth

    run_config = tf.contrib.learn.RunConfig(save_summary_steps=1)

    # Create a DNNBoostedTreeCombinedRegressor estimator.
    estimator = DNNBoostedTreeCombinedRegressor(
        dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")],
        dnn_feature_columns=feature_cols,
        tree_learner_config=learner_config,
        num_trees=FLAGS.num_trees,
        # This should be the number of examples. For large datasets it can be
        # larger than the batch_size.
        tree_examples_per_layer=FLAGS.batch_size,
        model_dir=output_dir,
        config=run_config,
        dnn_input_layer_to_tree=True,
        dnn_steps_to_train=FLAGS.dnn_steps_to_train)
    return estimator
    def testFitAndEvaluateDontThrowExceptionWithCore(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        classifier = estimator.DNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            # Use core feature columns
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            tree_feature_columns=[],
            use_core_versions=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
    def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreDNNBoostedTreeCombinedEstimator(
            head=head_fn,
            dnn_hidden_units=[1],
            dnn_feature_columns=[core_feature_column.numeric_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=True,
            tree_feature_columns=[])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        res = est.evaluate(input_fn=_eval_input_fn, steps=1)
        self.assertLess(0.5, res["auc"])
        est.predict(input_fn=_eval_input_fn)
Example #6
0
    def testTrainFnNonChiefWithCentering(self):
        """Tests the train function running on worker with bias centering."""
        with self.test_session():
            ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=0, tree_ensemble_config="", name="tree_ensemble")
            learner_config = learner_pb2.LearnerConfig()
            learner_config.learning_rate_tuner.fixed.learning_rate = 0.1
            learner_config.num_classes = 2
            learner_config.regularization.l1 = 0
            learner_config.regularization.l2 = 0
            learner_config.constraints.max_tree_depth = 1
            learner_config.constraints.min_node_weight = 0
            features = {}
            features["dense_float"] = array_ops.ones([4, 1], dtypes.float32)

            gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
                is_chief=False,
                num_ps_replicas=0,
                center_bias=True,
                ensemble_handle=ensemble_handle,
                examples_per_layer=1,
                learner_config=learner_config,
                features=features)

            predictions = array_ops.constant([[0.0], [1.0], [0.0], [2.0]],
                                             dtype=dtypes.float32)
            partition_ids = array_ops.zeros([4], dtypes.int32)
            ensemble_stamp = variables.Variable(initial_value=0,
                                                name="ensemble_stamp",
                                                trainable=False,
                                                dtype=dtypes.int64)

            predictions_dict = {
                "predictions": predictions,
                "predictions_no_dropout": predictions,
                "partition_ids": partition_ids,
                "ensemble_stamp": ensemble_stamp
            }

            labels = array_ops.ones([4, 1], dtypes.float32)
            weights = array_ops.ones([4, 1], dtypes.float32)
            # Create train op.
            train_op = gbdt_model.train(loss=math_ops.reduce_mean(
                _squared_loss(labels, weights, predictions)),
                                        predictions_dict=predictions_dict,
                                        labels=labels)
            variables.global_variables_initializer().run()
            resources.initialize_resources(resources.shared_resources()).run()

            # Regardless of how many times the train op is run, a non-chief worker
            # can only accumulate stats so the tree ensemble never changes.
            for _ in range(5):
                train_op.run()
            stamp_token, serialized = model_ops.tree_ensemble_serialize(
                ensemble_handle)
            output = tree_config_pb2.DecisionTreeEnsembleConfig()
            output.ParseFromString(serialized.eval())
            self.assertEquals(len(output.trees), 0)
            self.assertEquals(len(output.tree_weights), 0)
            self.assertEquals(stamp_token.eval(), 0)
Example #7
0
    def testFitAndEvaluateMultiClassFullDontThrowException(self):
        n_classes = 3
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = n_classes
        learner_config.constraints.max_tree_depth = 1
        learner_config.multi_class_strategy = (
            learner_pb2.LearnerConfig.FULL_HESSIAN)

        head_fn = estimator.core_multiclass_head(n_classes=n_classes)

        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.CoreGradientBoostedDecisionTreeEstimator(
            learner_config=learner_config,
            head=head_fn,
            num_trees=1,
            center_bias=False,
            examples_per_layer=7,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        classifier.train(input_fn=_multiclass_train_input_fn, steps=100)
        classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1)
        classifier.predict(input_fn=_eval_input_fn)
Example #8
0
    def testRankingDontThrowExceptionForForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        est = estimator.CoreGradientBoostedDecisionTreeRanker(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[
                core_feature_column.numeric_column("f1"),
                core_feature_column.numeric_column("f2")
            ],
            ranking_model_pair_keys=("a", "b"))

        # Train for a few steps.
        est.train(input_fn=_ranking_train_input_fn, steps=1000)
        est.evaluate(input_fn=_ranking_train_input_fn, steps=1)
        est.predict(input_fn=_infer_ranking_train_input_fn)
Example #9
0
def _get_tfbt(output_dir, feature_cols):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.regularization.l1 = 0.0
    # Set the regularization per instance in such a way that
    # regularization for the full training data is equal to l2 flag.
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size
    learner_config.constraints.max_tree_depth = FLAGS.depth
    learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER

    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30)

    # Create a TF Boosted trees regression estimator.
    estimator = GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        examples_per_layer=FLAGS.examples_per_layer,
        n_classes=2,
        num_trees=FLAGS.num_trees,
        feature_columns=feature_cols,
        model_dir=output_dir,
        config=run_config,
        center_bias=False)
    return estimator
Example #10
0
def main(args):
    learner_config = learner_pb2.LearnerConfig()
    learner_config.learning_rate_tuner.fixed.learning_rate = args.learning_rate
    learner_config.num_classes = args.num_classes
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = args.l2 / args.examples_per_layer
    learner_config.constraints.max_tree_depth = args.depth
    learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
    learner_config.multi_class_strategy = learner_pb2.LearnerConfig.DIAGONAL_HESSIAN

    head = custom_loss_head.CustomLossHead(
        loss_fn=functools.partial(losses.per_example_maxent_loss, num_classes=args.num_classes),
        link_fn=tf.identity,
        logit_dimension=args.num_classes,
        metrics_fn=_multiclass_metrics)

    estimator = GradientBoostedDecisionTreeEstimator(
        learner_config=learner_config,
        head=head,
        examples_per_layer=args.examples_per_layer,
        num_trees=args.num_trees,
        center_bias=False)

    (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data()
    X_train = (X_train/255.).reshape(-1, 28*28).astype(np.float32)
    X_test = (X_test/255.).reshape(-1, 28*28).astype(np.float32)
    y_train = y_train.astype(np.int32)
    y_test = y_test.astype(np.int32)

    estimator.fit(input_fn=tf.estimator.inputs.numpy_input_fn(
        x={'_':X_train}, y=y_train, batch_size=args.batch_size, num_epochs=10, shuffle=True))

    estimator.evaluate(input_fn=tf.estimator.inputs.numpy_input_fn(
        x={'_':X_test}, y=y_test, batch_size=args.batch_size, shuffle=False))
Example #11
0
    def testCreate(self):
        with self.test_session():
            tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
            tree = tree_ensemble_config.trees.add()
            _append_to_leaf(tree.nodes.add().leaf, 0, -0.4)
            tree_ensemble_config.tree_weights.append(1.0)

            # Prepare learner config.
            learner_config = learner_pb2.LearnerConfig()
            learner_config.num_classes = 2

            tree_ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=3,
                tree_ensemble_config=tree_ensemble_config.SerializeToString(),
                name="create_tree")
            resources.initialize_resources(resources.shared_resources()).run()

            result, _, _ = prediction_ops.gradient_trees_prediction(
                tree_ensemble_handle,
                self._seed, [self._dense_float_tensor],
                [self._sparse_float_indices1, self._sparse_float_indices2],
                [self._sparse_float_values1, self._sparse_float_values2],
                [self._sparse_float_shape1, self._sparse_float_shape2],
                [self._sparse_int_indices1], [self._sparse_int_values1],
                [self._sparse_int_shape1],
                learner_config=learner_config.SerializeToString(),
                apply_dropout=False,
                apply_averaging=False,
                center_bias=False,
                reduce_dim=True)
            self.assertAllClose(result.eval(), [[-0.4], [-0.4]])
            stamp_token = model_ops.tree_ensemble_stamp_token(
                tree_ensemble_handle)
            self.assertEqual(stamp_token.eval(), 3)
Example #12
0
def _get_tfbt(output_dir):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    num_classes = 10

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.num_classes = num_classes
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer
    learner_config.constraints.max_tree_depth = FLAGS.depth

    growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

    # Create a TF Boosted trees estimator that can take in custom loss.
    estimator = GradientBoostedDecisionTreeClassifier(
        learner_config=learner_config,
        n_classes=num_classes,
        examples_per_layer=FLAGS.examples_per_layer,
        model_dir=output_dir,
        num_trees=FLAGS.num_trees,
        center_bias=False,
        config=run_config)
    return estimator
  def testWeightedCategoricalColumn(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 1
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    feature_columns = [
        core_feature_column.weighted_categorical_column(
            categorical_column=core_feature_column
            .categorical_column_with_vocabulary_list(
                key="word", vocabulary_list=["the", "cat", "dog"]),
            weight_feature_key="weight")
    ]

    labels = np.array([[1], [1], [0], [0.]], dtype=np.float32)

    def _make_input_fn():

      def _input_fn():
        features_dict = {}
        # Sparse tensor representing
        # example 0: "cat","the"
        # examaple 1: "dog"
        # example 2: -
        # example 3: "the"
        # Weights for the words are 5 - cat, 6- dog and 1 -the.
        features_dict["word"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=constant_op.constant(["the", "cat", "dog", "the"],
                                        dtype=dtypes.string),
            dense_shape=[4, 3])
        features_dict["weight"] = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 1], [1, 0], [3, 0]],
            values=[1., 5., 6., 1.],
            dense_shape=[4, 3])
        return features_dict, labels

      return _input_fn

    est = estimator.CoreGradientBoostedDecisionTreeEstimator(
        head=head_fn,
        learner_config=learner_config,
        num_trees=1,
        examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        feature_columns=feature_columns)

    input_fn = _make_input_fn()
    est.train(input_fn=input_fn, steps=100)
    est.evaluate(input_fn=input_fn, steps=1)
    est.predict(input_fn=input_fn)
Example #14
0
    def testPredictFn(self):
        """Tests the predict function."""
        with self.test_session() as sess:
            # Create ensemble with one bias node.
            ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig()
            text_format.Merge(
                """
          trees {
            nodes {
              leaf {
                vector {
                  value: 0.25
                }
              }
            }
          }
          tree_weights: 1.0
          tree_metadata {
            num_tree_weight_updates: 1
            num_layers_grown: 1
            is_finalized: true
          }""", ensemble_config)
            ensemble_handle = model_ops.tree_ensemble_variable(
                stamp_token=3,
                tree_ensemble_config=ensemble_config.SerializeToString(),
                name="tree_ensemble")
            resources.initialize_resources(resources.shared_resources()).run()
            learner_config = learner_pb2.LearnerConfig()
            learner_config.learning_rate_tuner.fixed.learning_rate = 0.1
            learner_config.num_classes = 2
            learner_config.regularization.l1 = 0
            learner_config.regularization.l2 = 0
            learner_config.constraints.max_tree_depth = 1
            learner_config.constraints.min_node_weight = 0
            features = {}
            features["dense_float"] = array_ops.ones([4, 1], dtypes.float32)
            gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel(
                is_chief=False,
                num_ps_replicas=0,
                center_bias=True,
                ensemble_handle=ensemble_handle,
                examples_per_layer=1,
                learner_config=learner_config,
                features=features)

            # Create predict op.
            mode = model_fn.ModeKeys.EVAL
            predictions_dict = sess.run(gbdt_model.predict(mode))
            self.assertEquals(predictions_dict["ensemble_stamp"], 3)
            self.assertAllClose(predictions_dict["predictions"],
                                [[0.25], [0.25], [0.25], [0.25]])
            self.assertAllClose(predictions_dict["partition_ids"],
                                [0, 0, 0, 0])
Example #15
0
def _get_tfbt(output_dir):
    """Configures TF Boosted Trees estimator based on flags."""
    learner_config = learner_pb2.LearnerConfig()

    num_classes = 10

    learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
    learner_config.num_classes = num_classes
    learner_config.regularization.l1 = 0.0
    learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer
    learner_config.constraints.max_tree_depth = FLAGS.depth

    growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
    learner_config.growing_mode = growing_mode
    run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

    # Use Cross Entropy loss (the impl in losses is twice differentiable).
    loss_fn = functools.partial(losses.per_example_maxent_loss,
                                num_classes=num_classes)
    logit_dim = num_classes
    learner_config.multi_class_strategy = (
        learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

    # Since we use custom head, we need to tell how accuracy is calculated.
    def _multiclass_metrics(predictions, labels, weights):
        """Prepares eval metrics for multiclass eval."""
        metrics = dict()
        logits = predictions["scores"]
        classes = math_ops.argmax(logits, 1)
        metrics["accuracy"] = metrics_lib.streaming_accuracy(
            classes, labels, weights)
        return metrics

    metrics_fn = _multiclass_metrics
    # Use custom loss head so we can provide our loss (cross entropy for
    # multiclass).
    head = custom_loss_head.CustomLossHead(loss_fn=loss_fn,
                                           link_fn=tf.identity,
                                           logit_dimension=logit_dim,
                                           metrics_fn=metrics_fn)

    # Create a TF Boosted trees estimator that can take in custom loss.
    estimator = GradientBoostedDecisionTreeEstimator(
        learner_config=learner_config,
        head=head,
        examples_per_layer=FLAGS.examples_per_layer,
        model_dir=output_dir,
        num_trees=FLAGS.num_trees,
        center_bias=False,
        config=run_config)
    return estimator
    def testNoDNNFeatureColumns(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2

        with self.assertRaisesRegexp(ValueError,
                                     "dnn_feature_columns must be specified"):
            classifier = estimator.DNNBoostedTreeCombinedClassifier(
                dnn_hidden_units=[1],
                dnn_feature_columns=[],
                tree_learner_config=learner_config,
                num_trees=1,
                tree_examples_per_layer=3,
                n_classes=2)
            classifier.fit(input_fn=_train_input_fn, steps=5)
Example #17
0
    def testQuantileRegression(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 3
        learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
        learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.tree_complexity = (
            1.0 / _QUANTILE_REGRESSION_SIZE)

        train_input_fn, test_input_fn, y = _quantile_regression_input_fns()
        y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1)

        # 95% percentile.
        model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.95],
            learner_config=learner_config,
            num_trees=100,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_upper.train(input_fn=train_input_fn, steps=1000)
        result_iter = model_upper.predict(input_fn=test_input_fn)
        upper = []
        for prediction_dict in result_iter:
            upper.append(prediction_dict["predictions"])

        frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3)
        # +/- 3%
        self.assertBetween(frac_below_upper, 0.92, 0.98)

        train_input_fn, test_input_fn, _ = _quantile_regression_input_fns()
        model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.05],
            learner_config=learner_config,
            num_trees=100,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_lower.train(input_fn=train_input_fn, steps=1000)
        result_iter = model_lower.predict(input_fn=test_input_fn)
        lower = []
        for prediction_dict in result_iter:
            lower.append(prediction_dict["predictions"])

        frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3)
        # +/- 3%
        self.assertBetween(frac_above_lower, 0.92, 0.98)
Example #18
0
  def testTrainEvaluateWithDnnForInputAndTreeForPredict(self):
    head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
        loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

    learner_config = learner_pb2.LearnerConfig()
    learner_config.num_classes = 2
    learner_config.constraints.max_tree_depth = 3
    model_dir = tempfile.mkdtemp()
    config = run_config.RunConfig()

    est = estimator.CoreDNNBoostedTreeCombinedEstimator(
        head=head_fn,
        dnn_hidden_units=[1],
        dnn_feature_columns=[core_feature_column.numeric_column("x")],
        tree_learner_config=learner_config,
        num_trees=1,
        tree_examples_per_layer=3,
        model_dir=model_dir,
        config=config,
        dnn_steps_to_train=10,
        dnn_input_layer_to_tree=True,
        predict_with_tree_only=True,
        dnn_to_tree_distillation_param=(0.5, None),
        tree_feature_columns=[])

    # Train for a few steps.
    est.train(input_fn=_train_input_fn, steps=1000)
    res = est.evaluate(input_fn=_eval_input_fn, steps=1)
    self.assertLess(0.5, res["auc"])
    est.predict(input_fn=_eval_input_fn)
    serving_input_fn = (
        export.build_parsing_serving_input_receiver_fn(
            feature_spec={"x": parsing_ops.FixedLenFeature(
                [1], dtype=dtypes.float32)}))
    base_exporter = exporter.FinalExporter(
        name="Servo",
        serving_input_receiver_fn=serving_input_fn,
        assets_extra=None)
    export_path = os.path.join(model_dir, "export")
    base_exporter.export(
        est,
        export_path=export_path,
        checkpoint_path=None,
        eval_result={},
        is_the_final_export=True)
Example #19
0
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
        classifier.export(self._export_dir_base)
Example #20
0
    def set_parameter(self, param):
        for name in self.default_param:
            if name not in param:
                param[name] = self.default_param[name]

        self.build_model()

        self.learner_config = learner_pb2.LearnerConfig()

        self.learner_config.learning_rate_tuner.fixed.learning_rate = float(
            param['learning_rate'])

        self.learner_config.regularization.l1 = 0.0
        self.learner_config.regularization.l2 = float(param['L2']) / int(
            param['examples_per_layer'])
        self.learner_config.constraints.max_tree_depth = int(param['depth'])

        self.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
        self.learner_config.growing_mode = self.growing_mode
        self.run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
        self.model_path = param['model_path']
        self.class_num = int(param['class_num'])
        if param['objective'] is "multiclass":
            print("here")

            self.learner_config.num_classes = param['class_num']
            self.learner_config.multi_class_strategy = (
                learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

            # Create a TF Boosted trees estimator that can take in custom loss.
            self.estimator = GradientBoostedDecisionTreeClassifier(
                learner_config=self.learner_config,
                n_classes=int(self.class_num),
                examples_per_layer=int(param['examples_per_layer']),
                model_dir=self.model_path,
                num_trees=int(param['num_trees']),
                center_bias=False,
                config=self.run_config)
        else:
            pass

        self.batch_size = int(param["batch_size"])
        self.eval_batch_size = int(param['eval_batch_size'])
        self.num_epochs = param["num_epochs"]
Example #21
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        regressor = estimator.GradientBoostedDecisionTreeRegressor(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        regressor.fit(input_fn=_train_input_fn, steps=15)
        regressor.evaluate(input_fn=_eval_input_fn, steps=1)
        regressor.export(self._export_dir_base)
Example #22
0
    def testOverridesGlobalSteps(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 2
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=False,
            override_global_step_value=10000000)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        self._assert_checkpoint(classifier.model_dir, global_step=10000000)
Example #23
0
    def testThatLeafIndexIsInPredictions(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.GradientBoostedDecisionTreeClassifier(
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[contrib_feature_column.real_valued_column("x")],
            output_leaf_index=True)

        classifier.fit(input_fn=_train_input_fn, steps=15)
        result_iter = classifier.predict(input_fn=_eval_input_fn)
        for prediction_dict in result_iter:
            self.assertTrue("leaf_index" in prediction_dict)
            self.assertTrue("logits" in prediction_dict)
    def testFitAndEvaluateDontThrowException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        classifier = estimator.DNNBoostedTreeCombinedClassifier(
            dnn_hidden_units=[1],
            dnn_feature_columns=[feature_column.real_valued_column("x")],
            tree_learner_config=learner_config,
            num_trees=1,
            tree_examples_per_layer=3,
            n_classes=2,
            model_dir=model_dir,
            config=config,
            dnn_steps_to_train=10,
            dnn_input_layer_to_tree=False,
            tree_feature_columns=[feature_column.real_valued_column("x")])

        classifier.fit(input_fn=_train_input_fn, steps=15)
        classifier.evaluate(input_fn=_eval_input_fn, steps=1)
Example #25
0
def _get_tfbt(output_dir, feature_cols):
  """Configures TF Boosted Trees estimator based on flags."""
  learner_config = learner_pb2.LearnerConfig()
  learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate
  learner_config.regularization.l1 = 0.0
  learner_config.regularization.l2 = FLAGS.l2
  learner_config.constraints.max_tree_depth = FLAGS.depth

  run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

  # Create a TF Boosted trees regression estimator.
  estimator = GradientBoostedDecisionTreeRegressor(
      learner_config=learner_config,
      # This should be the number of examples. For large datasets it can be
      # larger than the batch_size.
      examples_per_layer=FLAGS.batch_size,
      feature_columns=feature_cols,
      label_dimension=1,
      model_dir=output_dir,
      num_trees=FLAGS.num_trees,
      center_bias=False,
      config=run_config)
  return estimator
Example #26
0
    def testTrainEvaluateInferDoesNotThrowError(self):
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        est = estimator.CoreGradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")])

        # Train for a few steps.
        est.train(input_fn=_train_input_fn, steps=1000)
        est.evaluate(input_fn=_eval_input_fn, steps=1)
        est.predict(input_fn=_eval_input_fn)
Example #27
0
    def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        model_dir = tempfile.mkdtemp()
        config = run_config.RunConfig()

        # Use core head
        head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss(
            loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE)

        model = estimator.GradientBoostedDecisionTreeEstimator(
            head=head_fn,
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=3,
            model_dir=model_dir,
            config=config,
            feature_columns=[core_feature_column.numeric_column("x")],
            use_core_libs=True)

        model.fit(input_fn=_train_input_fn, steps=15)
        model.evaluate(input_fn=_eval_input_fn, steps=1)
        model.export(self._export_dir_base)
Example #28
0
    def testQuantileRegressionDoesNotThroughException(self):
        learner_config = learner_pb2.LearnerConfig()
        learner_config.num_classes = 2
        learner_config.constraints.max_tree_depth = 1
        learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE
        learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE
        learner_config.regularization.tree_complexity = (
            1.0 / _QUANTILE_REGRESSION_SIZE)

        train_input_fn, test_input_fn, y = _quantile_regression_input_fns()
        y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1)

        # 95% percentile.
        model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor(
            quantiles=[0.95],
            learner_config=learner_config,
            num_trees=1,
            examples_per_layer=_QUANTILE_REGRESSION_SIZE,
            center_bias=False)

        model_upper.train(input_fn=train_input_fn, steps=1000)
        result_iter = model_upper.predict(input_fn=test_input_fn)
Example #29
0
    def testRestore(self):
        # Calling self.test_session() without a graph specified results in
        # TensorFlowTestCase caching the session and returning the same one
        # every time. In this test, we need to create two different sessions
        # which is why we also create a graph and pass it to self.test_session()
        # to ensure no caching occurs under the hood.
        save_path = os.path.join(self.get_temp_dir(), "restore-test")
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )

                tree = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_metadata.add().is_finalized = True
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree.nodes.add().leaf, 0, -0.1)

                tree_ensemble_config2 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree2 = tree_ensemble_config2.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0)

                tree_ensemble_config3 = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                tree3 = tree_ensemble_config3.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0)

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 2

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=3,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="restore_tree")
                feature_usage_counts = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.int64),
                    name="feature_usage_counts",
                    trainable=False)
                feature_gains = variables.Variable(
                    initial_value=array_ops.zeros([1], dtypes.float32),
                    name="feature_gains",
                    trainable=False)

                resources.initialize_resources(
                    resources.shared_resources()).run()
                variables.initialize_all_variables().run()
                my_saver = saver.Saver()

                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config2.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose([[-1.1], [-1.1]], result.eval())
                # Save before adding other trees.
                val = my_saver.save(sess, save_path)
                self.assertEqual(save_path, val)

                # Add more trees after saving.
                with ops.control_dependencies([
                        ensemble_optimizer_ops.add_trees_to_ensemble(
                            tree_ensemble_handle,
                            tree_ensemble_config3.SerializeToString(),
                            feature_usage_counts, [0],
                            feature_gains, [0], [[]],
                            learning_rate=1)
                ]):
                    result, _, _ = prediction_ops.gradient_trees_prediction(
                        tree_ensemble_handle,
                        self._seed, [self._dense_float_tensor], [
                            self._sparse_float_indices1,
                            self._sparse_float_indices2
                        ], [
                            self._sparse_float_values1,
                            self._sparse_float_values2
                        ],
                        [self._sparse_float_shape1, self._sparse_float_shape2],
                        [self._sparse_int_indices1],
                        [self._sparse_int_values1], [self._sparse_int_shape1],
                        learner_config=learner_config.SerializeToString(),
                        apply_dropout=False,
                        apply_averaging=False,
                        center_bias=False,
                        reduce_dim=True)
                self.assertAllClose(result.eval(), [[-11.1], [-11.1]])

        # Start a second session.  In that session the parameter nodes
        # have not been initialized either.
        with ops.Graph().as_default() as graph:
            with self.test_session(graph) as sess:
                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=0,
                    tree_ensemble_config="",
                    name="restore_tree")
                my_saver = saver.Saver()
                my_saver.restore(sess, save_path)
                result, _, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)
                # Make sure we only have the first and second tree.
                # The third tree was added after the save.
                self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
Example #30
0
    def testSerialization(self):
        with ops.Graph().as_default() as graph:
            with self.test_session(graph):
                tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig(
                )
                # Bias tree only for second class.
                tree1 = tree_ensemble_config.trees.add()
                _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2)

                tree_ensemble_config.tree_weights.append(1.0)

                # Depth 2 tree.
                tree2 = tree_ensemble_config.trees.add()
                tree_ensemble_config.tree_weights.append(1.0)
                _set_float_split(
                    tree2.nodes.add().sparse_float_binary_split_default_right.
                    split, 1, 4.0, 1, 2)
                _set_float_split(tree2.nodes.add().dense_float_binary_split, 0,
                                 9.0, 3, 4)
                _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5)
                _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2)
                _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9)

                tree_ensemble_handle = model_ops.tree_ensemble_variable(
                    stamp_token=7,
                    tree_ensemble_config=tree_ensemble_config.
                    SerializeToString(),
                    name="saver_tree")
                stamp_token, serialized_config = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle)
                resources.initialize_resources(
                    resources.shared_resources()).run()
                self.assertEqual(stamp_token.eval(), 7)
                serialized_config = serialized_config.eval()

        with ops.Graph().as_default() as graph:
            with self.test_session(graph):
                tree_ensemble_handle2 = model_ops.tree_ensemble_variable(
                    stamp_token=9,
                    tree_ensemble_config=serialized_config,
                    name="saver_tree2")
                resources.initialize_resources(
                    resources.shared_resources()).run()

                # Prepare learner config.
                learner_config = learner_pb2.LearnerConfig()
                learner_config.num_classes = 3

                result, _, _ = prediction_ops.gradient_trees_prediction(
                    tree_ensemble_handle2,
                    self._seed, [self._dense_float_tensor],
                    [self._sparse_float_indices1, self._sparse_float_indices2],
                    [self._sparse_float_values1, self._sparse_float_values2],
                    [self._sparse_float_shape1, self._sparse_float_shape2],
                    [self._sparse_int_indices1], [self._sparse_int_values1],
                    [self._sparse_int_shape1],
                    learner_config=learner_config.SerializeToString(),
                    apply_dropout=False,
                    apply_averaging=False,
                    center_bias=False,
                    reduce_dim=True)

                # Re-serialize tree.
                stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize(
                    tree_ensemble_handle2)

                # The first example will get bias class 1 -0.2 from first tree and
                # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2],
                # the second example will get the same bias class 1 -0.2 and leaf 3
                # payload of class 1 1.2 hence [0.0, 1.0].
                self.assertEqual(stamp_token2.eval(), 9)

                # Class 2 does have scores in the leaf => it gets score 0.
                self.assertEqual(serialized_config2.eval(), serialized_config)
                self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])