def testQuantileRegression(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 6 learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.tree_complexity = ( 1.0 / _QUANTILE_REGRESSION_SIZE) train_input_fn, test_input_fn, y = _quantile_regression_input_fns() # 95% percentile. model_upper = estimator.GradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, num_trees=12, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.fit(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn) upper = [] for prediction_dict in result_iter: upper.append(prediction_dict["scores"]) frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3) # +/- 3% self.assertTrue(frac_below_upper >= 0.92) self.assertTrue(frac_below_upper <= 0.98)
def testFitAndEvaluateMultiClassFullDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 learner_config.constraints.max_tree_depth = 1 learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.FULL_HESSIAN) model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=learner_config.num_classes, num_trees=1, examples_per_layer=7, model_dir=model_dir, config=config, center_bias=False, feature_columns=[contrib_feature_column.real_valued_column("x")]) classifier.fit(input_fn=_multiclass_train_input_fn, steps=100) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base) result_iter = classifier.predict(input_fn=_eval_input_fn) for prediction_dict in result_iter: self.assertTrue("classes" in prediction_dict)
def _get_estimator(output_dir, feature_cols): """Configures DNNBoostedTreeCombinedRegressor based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = ( FLAGS.tree_learning_rate) learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.tree_l2 learner_config.constraints.max_tree_depth = FLAGS.tree_depth run_config = tf.contrib.learn.RunConfig(save_summary_steps=1) # Create a DNNBoostedTreeCombinedRegressor estimator. estimator = DNNBoostedTreeCombinedRegressor( dnn_hidden_units=[int(x) for x in FLAGS.dnn_hidden_units.split(",")], dnn_feature_columns=feature_cols, tree_learner_config=learner_config, num_trees=FLAGS.num_trees, # This should be the number of examples. For large datasets it can be # larger than the batch_size. tree_examples_per_layer=FLAGS.batch_size, model_dir=output_dir, config=run_config, dnn_input_layer_to_tree=True, dnn_steps_to_train=FLAGS.dnn_steps_to_train) return estimator
def testFitAndEvaluateDontThrowExceptionWithCore(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() # Use core head head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) classifier = estimator.DNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], # Use core feature columns dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, tree_feature_columns=[], use_core_versions=True) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1)
def testTrainEvaluateInferDoesNotThrowErrorWithDnnInput(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn)
def testTrainFnNonChiefWithCentering(self): """Tests the train function running on worker with bias centering.""" with self.test_session(): ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="tree_ensemble") learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 learner_config.num_classes = 2 learner_config.regularization.l1 = 0 learner_config.regularization.l2 = 0 learner_config.constraints.max_tree_depth = 1 learner_config.constraints.min_node_weight = 0 features = {} features["dense_float"] = array_ops.ones([4, 1], dtypes.float32) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=False, num_ps_replicas=0, center_bias=True, ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, features=features) predictions = array_ops.constant([[0.0], [1.0], [0.0], [2.0]], dtype=dtypes.float32) partition_ids = array_ops.zeros([4], dtypes.int32) ensemble_stamp = variables.Variable(initial_value=0, name="ensemble_stamp", trainable=False, dtype=dtypes.int64) predictions_dict = { "predictions": predictions, "predictions_no_dropout": predictions, "partition_ids": partition_ids, "ensemble_stamp": ensemble_stamp } labels = array_ops.ones([4, 1], dtypes.float32) weights = array_ops.ones([4, 1], dtypes.float32) # Create train op. train_op = gbdt_model.train(loss=math_ops.reduce_mean( _squared_loss(labels, weights, predictions)), predictions_dict=predictions_dict, labels=labels) variables.global_variables_initializer().run() resources.initialize_resources(resources.shared_resources()).run() # Regardless of how many times the train op is run, a non-chief worker # can only accumulate stats so the tree ensemble never changes. for _ in range(5): train_op.run() stamp_token, serialized = model_ops.tree_ensemble_serialize( ensemble_handle) output = tree_config_pb2.DecisionTreeEnsembleConfig() output.ParseFromString(serialized.eval()) self.assertEquals(len(output.trees), 0) self.assertEquals(len(output.tree_weights), 0) self.assertEquals(stamp_token.eval(), 0)
def testFitAndEvaluateMultiClassFullDontThrowException(self): n_classes = 3 learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = n_classes learner_config.constraints.max_tree_depth = 1 learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.FULL_HESSIAN) head_fn = estimator.core_multiclass_head(n_classes=n_classes) model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.CoreGradientBoostedDecisionTreeEstimator( learner_config=learner_config, head=head_fn, num_trees=1, center_bias=False, examples_per_layer=7, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) classifier.train(input_fn=_multiclass_train_input_fn, steps=100) classifier.evaluate(input_fn=_multiclass_train_input_fn, steps=1) classifier.predict(input_fn=_eval_input_fn)
def testRankingDontThrowExceptionForForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) est = estimator.CoreGradientBoostedDecisionTreeRanker( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[ core_feature_column.numeric_column("f1"), core_feature_column.numeric_column("f2") ], ranking_model_pair_keys=("a", "b")) # Train for a few steps. est.train(input_fn=_ranking_train_input_fn, steps=1000) est.evaluate(input_fn=_ranking_train_input_fn, steps=1) est.predict(input_fn=_infer_ranking_train_input_fn)
def _get_tfbt(output_dir, feature_cols): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.regularization.l1 = 0.0 # Set the regularization per instance in such a way that # regularization for the full training data is equal to l2 flag. learner_config.regularization.l2 = FLAGS.l2 / FLAGS.batch_size learner_config.constraints.max_tree_depth = FLAGS.depth learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=30) # Create a TF Boosted trees regression estimator. estimator = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, examples_per_layer=FLAGS.examples_per_layer, n_classes=2, num_trees=FLAGS.num_trees, feature_columns=feature_cols, model_dir=output_dir, config=run_config, center_bias=False) return estimator
def main(args): learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = args.learning_rate learner_config.num_classes = args.num_classes learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = args.l2 / args.examples_per_layer learner_config.constraints.max_tree_depth = args.depth learner_config.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.multi_class_strategy = learner_pb2.LearnerConfig.DIAGONAL_HESSIAN head = custom_loss_head.CustomLossHead( loss_fn=functools.partial(losses.per_example_maxent_loss, num_classes=args.num_classes), link_fn=tf.identity, logit_dimension=args.num_classes, metrics_fn=_multiclass_metrics) estimator = GradientBoostedDecisionTreeEstimator( learner_config=learner_config, head=head, examples_per_layer=args.examples_per_layer, num_trees=args.num_trees, center_bias=False) (X_train, y_train), (X_test, y_test) = tf.keras.datasets.mnist.load_data() X_train = (X_train/255.).reshape(-1, 28*28).astype(np.float32) X_test = (X_test/255.).reshape(-1, 28*28).astype(np.float32) y_train = y_train.astype(np.int32) y_test = y_test.astype(np.int32) estimator.fit(input_fn=tf.estimator.inputs.numpy_input_fn( x={'_':X_train}, y=y_train, batch_size=args.batch_size, num_epochs=10, shuffle=True)) estimator.evaluate(input_fn=tf.estimator.inputs.numpy_input_fn( x={'_':X_test}, y=y_test, batch_size=args.batch_size, shuffle=False))
def testCreate(self): with self.test_session(): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() tree = tree_ensemble_config.trees.add() _append_to_leaf(tree.nodes.add().leaf, 0, -0.4) tree_ensemble_config.tree_weights.append(1.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config.SerializeToString(), name="create_tree") resources.initialize_resources(resources.shared_resources()).run() result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-0.4], [-0.4]]) stamp_token = model_ops.tree_ensemble_stamp_token( tree_ensemble_handle) self.assertEqual(stamp_token.eval(), 3)
def _get_tfbt(output_dir): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() num_classes = 10 learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.num_classes = num_classes learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer learner_config.constraints.max_tree_depth = FLAGS.depth growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Create a TF Boosted trees estimator that can take in custom loss. estimator = GradientBoostedDecisionTreeClassifier( learner_config=learner_config, n_classes=num_classes, examples_per_layer=FLAGS.examples_per_layer, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def testWeightedCategoricalColumn(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() feature_columns = [ core_feature_column.weighted_categorical_column( categorical_column=core_feature_column .categorical_column_with_vocabulary_list( key="word", vocabulary_list=["the", "cat", "dog"]), weight_feature_key="weight") ] labels = np.array([[1], [1], [0], [0.]], dtype=np.float32) def _make_input_fn(): def _input_fn(): features_dict = {} # Sparse tensor representing # example 0: "cat","the" # examaple 1: "dog" # example 2: - # example 3: "the" # Weights for the words are 5 - cat, 6- dog and 1 -the. features_dict["word"] = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [3, 0]], values=constant_op.constant(["the", "cat", "dog", "the"], dtype=dtypes.string), dense_shape=[4, 3]) features_dict["weight"] = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 1], [1, 0], [3, 0]], values=[1., 5., 6., 1.], dense_shape=[4, 3]) return features_dict, labels return _input_fn est = estimator.CoreGradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=feature_columns) input_fn = _make_input_fn() est.train(input_fn=input_fn, steps=100) est.evaluate(input_fn=input_fn, steps=1) est.predict(input_fn=input_fn)
def testPredictFn(self): """Tests the predict function.""" with self.test_session() as sess: # Create ensemble with one bias node. ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig() text_format.Merge( """ trees { nodes { leaf { vector { value: 0.25 } } } } tree_weights: 1.0 tree_metadata { num_tree_weight_updates: 1 num_layers_grown: 1 is_finalized: true }""", ensemble_config) ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=ensemble_config.SerializeToString(), name="tree_ensemble") resources.initialize_resources(resources.shared_resources()).run() learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = 0.1 learner_config.num_classes = 2 learner_config.regularization.l1 = 0 learner_config.regularization.l2 = 0 learner_config.constraints.max_tree_depth = 1 learner_config.constraints.min_node_weight = 0 features = {} features["dense_float"] = array_ops.ones([4, 1], dtypes.float32) gbdt_model = gbdt_batch.GradientBoostedDecisionTreeModel( is_chief=False, num_ps_replicas=0, center_bias=True, ensemble_handle=ensemble_handle, examples_per_layer=1, learner_config=learner_config, features=features) # Create predict op. mode = model_fn.ModeKeys.EVAL predictions_dict = sess.run(gbdt_model.predict(mode)) self.assertEquals(predictions_dict["ensemble_stamp"], 3) self.assertAllClose(predictions_dict["predictions"], [[0.25], [0.25], [0.25], [0.25]]) self.assertAllClose(predictions_dict["partition_ids"], [0, 0, 0, 0])
def _get_tfbt(output_dir): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() num_classes = 10 learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.num_classes = num_classes learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 / FLAGS.examples_per_layer learner_config.constraints.max_tree_depth = FLAGS.depth growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER learner_config.growing_mode = growing_mode run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) # Use Cross Entropy loss (the impl in losses is twice differentiable). loss_fn = functools.partial(losses.per_example_maxent_loss, num_classes=num_classes) logit_dim = num_classes learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Since we use custom head, we need to tell how accuracy is calculated. def _multiclass_metrics(predictions, labels, weights): """Prepares eval metrics for multiclass eval.""" metrics = dict() logits = predictions["scores"] classes = math_ops.argmax(logits, 1) metrics["accuracy"] = metrics_lib.streaming_accuracy( classes, labels, weights) return metrics metrics_fn = _multiclass_metrics # Use custom loss head so we can provide our loss (cross entropy for # multiclass). head = custom_loss_head.CustomLossHead(loss_fn=loss_fn, link_fn=tf.identity, logit_dimension=logit_dim, metrics_fn=metrics_fn) # Create a TF Boosted trees estimator that can take in custom loss. estimator = GradientBoostedDecisionTreeEstimator( learner_config=learner_config, head=head, examples_per_layer=FLAGS.examples_per_layer, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def testNoDNNFeatureColumns(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 with self.assertRaisesRegexp(ValueError, "dnn_feature_columns must be specified"): classifier = estimator.DNNBoostedTreeCombinedClassifier( dnn_hidden_units=[1], dnn_feature_columns=[], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, n_classes=2) classifier.fit(input_fn=_train_input_fn, steps=5)
def testQuantileRegression(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.tree_complexity = ( 1.0 / _QUANTILE_REGRESSION_SIZE) train_input_fn, test_input_fn, y = _quantile_regression_input_fns() y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1) # 95% percentile. model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, num_trees=100, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.train(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn) upper = [] for prediction_dict in result_iter: upper.append(prediction_dict["predictions"]) frac_below_upper = round(1. * np.count_nonzero(upper > y) / len(y), 3) # +/- 3% self.assertBetween(frac_below_upper, 0.92, 0.98) train_input_fn, test_input_fn, _ = _quantile_regression_input_fns() model_lower = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.05], learner_config=learner_config, num_trees=100, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_lower.train(input_fn=train_input_fn, steps=1000) result_iter = model_lower.predict(input_fn=test_input_fn) lower = [] for prediction_dict in result_iter: lower.append(prediction_dict["predictions"]) frac_above_lower = round(1. * np.count_nonzero(lower < y) / len(y), 3) # +/- 3% self.assertBetween(frac_above_lower, 0.92, 0.98)
def testTrainEvaluateWithDnnForInputAndTreeForPredict(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 3 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreDNNBoostedTreeCombinedEstimator( head=head_fn, dnn_hidden_units=[1], dnn_feature_columns=[core_feature_column.numeric_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=True, predict_with_tree_only=True, dnn_to_tree_distillation_param=(0.5, None), tree_feature_columns=[]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) res = est.evaluate(input_fn=_eval_input_fn, steps=1) self.assertLess(0.5, res["auc"]) est.predict(input_fn=_eval_input_fn) serving_input_fn = ( export.build_parsing_serving_input_receiver_fn( feature_spec={"x": parsing_ops.FixedLenFeature( [1], dtype=dtypes.float32)})) base_exporter = exporter.FinalExporter( name="Servo", serving_input_receiver_fn=serving_input_fn, assets_extra=None) export_path = os.path.join(model_dir, "export") base_exporter.export( est, export_path=export_path, checkpoint_path=None, eval_result={}, is_the_final_export=True)
def testFitAndEvaluateDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")]) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1) classifier.export(self._export_dir_base)
def set_parameter(self, param): for name in self.default_param: if name not in param: param[name] = self.default_param[name] self.build_model() self.learner_config = learner_pb2.LearnerConfig() self.learner_config.learning_rate_tuner.fixed.learning_rate = float( param['learning_rate']) self.learner_config.regularization.l1 = 0.0 self.learner_config.regularization.l2 = float(param['L2']) / int( param['examples_per_layer']) self.learner_config.constraints.max_tree_depth = int(param['depth']) self.growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER self.learner_config.growing_mode = self.growing_mode self.run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) self.model_path = param['model_path'] self.class_num = int(param['class_num']) if param['objective'] is "multiclass": print("here") self.learner_config.num_classes = param['class_num'] self.learner_config.multi_class_strategy = ( learner_pb2.LearnerConfig.DIAGONAL_HESSIAN) # Create a TF Boosted trees estimator that can take in custom loss. self.estimator = GradientBoostedDecisionTreeClassifier( learner_config=self.learner_config, n_classes=int(self.class_num), examples_per_layer=int(param['examples_per_layer']), model_dir=self.model_path, num_trees=int(param['num_trees']), center_bias=False, config=self.run_config) else: pass self.batch_size = int(param["batch_size"]) self.eval_batch_size = int(param['eval_batch_size']) self.num_epochs = param["num_epochs"]
def testFitAndEvaluateDontThrowExceptionWithCoreForRegressor(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() regressor = estimator.GradientBoostedDecisionTreeRegressor( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) regressor.fit(input_fn=_train_input_fn, steps=15) regressor.evaluate(input_fn=_eval_input_fn, steps=1) regressor.export(self._export_dir_base)
def testOverridesGlobalSteps(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 2 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")], output_leaf_index=False, override_global_step_value=10000000) classifier.fit(input_fn=_train_input_fn, steps=15) self._assert_checkpoint(classifier.model_dir, global_step=10000000)
def testThatLeafIndexIsInPredictions(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.GradientBoostedDecisionTreeClassifier( learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[contrib_feature_column.real_valued_column("x")], output_leaf_index=True) classifier.fit(input_fn=_train_input_fn, steps=15) result_iter = classifier.predict(input_fn=_eval_input_fn) for prediction_dict in result_iter: self.assertTrue("leaf_index" in prediction_dict) self.assertTrue("logits" in prediction_dict)
def testFitAndEvaluateDontThrowException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() classifier = estimator.DNNBoostedTreeCombinedClassifier( dnn_hidden_units=[1], dnn_feature_columns=[feature_column.real_valued_column("x")], tree_learner_config=learner_config, num_trees=1, tree_examples_per_layer=3, n_classes=2, model_dir=model_dir, config=config, dnn_steps_to_train=10, dnn_input_layer_to_tree=False, tree_feature_columns=[feature_column.real_valued_column("x")]) classifier.fit(input_fn=_train_input_fn, steps=15) classifier.evaluate(input_fn=_eval_input_fn, steps=1)
def _get_tfbt(output_dir, feature_cols): """Configures TF Boosted Trees estimator based on flags.""" learner_config = learner_pb2.LearnerConfig() learner_config.learning_rate_tuner.fixed.learning_rate = FLAGS.learning_rate learner_config.regularization.l1 = 0.0 learner_config.regularization.l2 = FLAGS.l2 learner_config.constraints.max_tree_depth = FLAGS.depth run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300) # Create a TF Boosted trees regression estimator. estimator = GradientBoostedDecisionTreeRegressor( learner_config=learner_config, # This should be the number of examples. For large datasets it can be # larger than the batch_size. examples_per_layer=FLAGS.batch_size, feature_columns=feature_cols, label_dimension=1, model_dir=output_dir, num_trees=FLAGS.num_trees, center_bias=False, config=run_config) return estimator
def testTrainEvaluateInferDoesNotThrowError(self): head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() est = estimator.CoreGradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")]) # Train for a few steps. est.train(input_fn=_train_input_fn, steps=1000) est.evaluate(input_fn=_eval_input_fn, steps=1) est.predict(input_fn=_eval_input_fn)
def testFitAndEvaluateDontThrowExceptionWithCoreForEstimator(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 model_dir = tempfile.mkdtemp() config = run_config.RunConfig() # Use core head head_fn = head_lib._binary_logistic_head_with_sigmoid_cross_entropy_loss( loss_reduction=losses.Reduction.SUM_OVER_BATCH_SIZE) model = estimator.GradientBoostedDecisionTreeEstimator( head=head_fn, learner_config=learner_config, num_trees=1, examples_per_layer=3, model_dir=model_dir, config=config, feature_columns=[core_feature_column.numeric_column("x")], use_core_libs=True) model.fit(input_fn=_train_input_fn, steps=15) model.evaluate(input_fn=_eval_input_fn, steps=1) model.export(self._export_dir_base)
def testQuantileRegressionDoesNotThroughException(self): learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 learner_config.constraints.max_tree_depth = 1 learner_config.growing_mode = learner_pb2.LearnerConfig.WHOLE_TREE learner_config.constraints.min_node_weight = 1 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l2 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.l1 = 1.0 / _QUANTILE_REGRESSION_SIZE learner_config.regularization.tree_complexity = ( 1.0 / _QUANTILE_REGRESSION_SIZE) train_input_fn, test_input_fn, y = _quantile_regression_input_fns() y = y.reshape(_QUANTILE_REGRESSION_SIZE, 1) # 95% percentile. model_upper = estimator.CoreGradientBoostedDecisionTreeQuantileRegressor( quantiles=[0.95], learner_config=learner_config, num_trees=1, examples_per_layer=_QUANTILE_REGRESSION_SIZE, center_bias=False) model_upper.train(input_fn=train_input_fn, steps=1000) result_iter = model_upper.predict(input_fn=test_input_fn)
def testRestore(self): # Calling self.test_session() without a graph specified results in # TensorFlowTestCase caching the session and returning the same one # every time. In this test, we need to create two different sessions # which is why we also create a graph and pass it to self.test_session() # to ensure no caching occurs under the hood. save_path = os.path.join(self.get_temp_dir(), "restore-test") with ops.Graph().as_default() as graph: with self.test_session(graph) as sess: tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree = tree_ensemble_config.trees.add() tree_ensemble_config.tree_metadata.add().is_finalized = True tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree.nodes.add().leaf, 0, -0.1) tree_ensemble_config2 = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree2 = tree_ensemble_config2.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree2.nodes.add().leaf, 0, -1.0) tree_ensemble_config3 = tree_config_pb2.DecisionTreeEnsembleConfig( ) tree3 = tree_ensemble_config3.trees.add() tree_ensemble_config.tree_weights.append(1.0) _append_to_leaf(tree3.nodes.add().leaf, 0, -10.0) # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 2 tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=3, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="restore_tree") feature_usage_counts = variables.Variable( initial_value=array_ops.zeros([1], dtypes.int64), name="feature_usage_counts", trainable=False) feature_gains = variables.Variable( initial_value=array_ops.zeros([1], dtypes.float32), name="feature_gains", trainable=False) resources.initialize_resources( resources.shared_resources()).run() variables.initialize_all_variables().run() my_saver = saver.Saver() with ops.control_dependencies([ ensemble_optimizer_ops.add_trees_to_ensemble( tree_ensemble_handle, tree_ensemble_config2.SerializeToString(), feature_usage_counts, [0], feature_gains, [0], [[]], learning_rate=1) ]): result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose([[-1.1], [-1.1]], result.eval()) # Save before adding other trees. val = my_saver.save(sess, save_path) self.assertEqual(save_path, val) # Add more trees after saving. with ops.control_dependencies([ ensemble_optimizer_ops.add_trees_to_ensemble( tree_ensemble_handle, tree_ensemble_config3.SerializeToString(), feature_usage_counts, [0], feature_gains, [0], [[]], learning_rate=1) ]): result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [ self._sparse_float_indices1, self._sparse_float_indices2 ], [ self._sparse_float_values1, self._sparse_float_values2 ], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) self.assertAllClose(result.eval(), [[-11.1], [-11.1]]) # Start a second session. In that session the parameter nodes # have not been initialized either. with ops.Graph().as_default() as graph: with self.test_session(graph) as sess: tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=0, tree_ensemble_config="", name="restore_tree") my_saver = saver.Saver() my_saver.restore(sess, save_path) result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Make sure we only have the first and second tree. # The third tree was added after the save. self.assertAllClose(result.eval(), [[-1.1], [-1.1]])
def testSerialization(self): with ops.Graph().as_default() as graph: with self.test_session(graph): tree_ensemble_config = tree_config_pb2.DecisionTreeEnsembleConfig( ) # Bias tree only for second class. tree1 = tree_ensemble_config.trees.add() _append_to_leaf(tree1.nodes.add().leaf, 1, -0.2) tree_ensemble_config.tree_weights.append(1.0) # Depth 2 tree. tree2 = tree_ensemble_config.trees.add() tree_ensemble_config.tree_weights.append(1.0) _set_float_split( tree2.nodes.add().sparse_float_binary_split_default_right. split, 1, 4.0, 1, 2) _set_float_split(tree2.nodes.add().dense_float_binary_split, 0, 9.0, 3, 4) _append_to_leaf(tree2.nodes.add().leaf, 0, 0.5) _append_to_leaf(tree2.nodes.add().leaf, 1, 1.2) _append_to_leaf(tree2.nodes.add().leaf, 0, -0.9) tree_ensemble_handle = model_ops.tree_ensemble_variable( stamp_token=7, tree_ensemble_config=tree_ensemble_config. SerializeToString(), name="saver_tree") stamp_token, serialized_config = model_ops.tree_ensemble_serialize( tree_ensemble_handle) resources.initialize_resources( resources.shared_resources()).run() self.assertEqual(stamp_token.eval(), 7) serialized_config = serialized_config.eval() with ops.Graph().as_default() as graph: with self.test_session(graph): tree_ensemble_handle2 = model_ops.tree_ensemble_variable( stamp_token=9, tree_ensemble_config=serialized_config, name="saver_tree2") resources.initialize_resources( resources.shared_resources()).run() # Prepare learner config. learner_config = learner_pb2.LearnerConfig() learner_config.num_classes = 3 result, _, _ = prediction_ops.gradient_trees_prediction( tree_ensemble_handle2, self._seed, [self._dense_float_tensor], [self._sparse_float_indices1, self._sparse_float_indices2], [self._sparse_float_values1, self._sparse_float_values2], [self._sparse_float_shape1, self._sparse_float_shape2], [self._sparse_int_indices1], [self._sparse_int_values1], [self._sparse_int_shape1], learner_config=learner_config.SerializeToString(), apply_dropout=False, apply_averaging=False, center_bias=False, reduce_dim=True) # Re-serialize tree. stamp_token2, serialized_config2 = model_ops.tree_ensemble_serialize( tree_ensemble_handle2) # The first example will get bias class 1 -0.2 from first tree and # leaf 2 payload (sparse feature missing) of 0.5 hence [0.5, -0.2], # the second example will get the same bias class 1 -0.2 and leaf 3 # payload of class 1 1.2 hence [0.0, 1.0]. self.assertEqual(stamp_token2.eval(), 9) # Class 2 does have scores in the leaf => it gets score 0. self.assertEqual(serialized_config2.eval(), serialized_config) self.assertAllClose(result.eval(), [[0.5, -0.2], [0, 1.0]])