def testAdditionalOutputs(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=1, max_nodes=100, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.CoreTensorForestEstimator( hparams.fill(), keys_column='keys', include_all_in_serving=True) iris = base.load_iris() data = iris.data.astype(np.float32) labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn(x={ 'x': data, 'keys': np.arange(len(iris.data)).reshape(150, 1) }, y=labels, batch_size=10, num_epochs=1, shuffle=False) classifier.train(input_fn=input_fn, steps=100) predictions = list(classifier.predict(input_fn=input_fn)) # Check that there is a key column, tree paths and var. for pred in predictions: self.assertTrue('keys' in pred) self.assertTrue('tree_paths' in pred) self.assertTrue('prediction_variance' in pred)
def testRegression(self): """Tests regression using matrix data as input.""" head_fn = head_lib._regression_head( label_dimension=1, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.CoreTensorForestEstimator(hparams.fill(), head=head_fn) input_fn, predict_input_fn = _get_regression_input_fns() regressor.train(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([[24.]], [pred['predictions'] for pred in predictions], atol=1)
def testWithFeatureColumns(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, feature_columns=[core_feature_column.numeric_column('x')]) iris = base.load_iris() data = {'x': iris.data.astype(np.float32)} labels = iris.target.astype(np.int32) input_fn = numpy_io.numpy_input_fn(x=data, y=labels, batch_size=150, num_epochs=None, shuffle=False) est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testTrainEvaluateInferDoesNotThrowErrorForClassifier(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator(hparams.fill(), head=head_fn) input_fn, predict_input_fn = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0.576117, 0.211942, 0.211942]], [pred['probabilities'] for pred in predictions])
def testForestHParamsBigTree(self): hparams = tensor_forest.ForestHParams(num_classes=2, num_trees=100, max_nodes=1000000, split_after_samples=25, num_features=1000).fill() self.assertEquals(31, hparams.num_splits_to_consider)
def testForestHParamsStringParams(self): hparams = tensor_forest.ForestHParams(num_classes=2, num_trees=100, max_nodes=1000000, split_after_samples="25", num_splits_to_consider="1000000", num_features=1000).fill() self.assertEquals("1000000", hparams.num_splits_to_consider)
def testForestHParams(self): hparams = tensor_forest.ForestHParams(num_classes=2, num_trees=100, max_nodes=1000, split_after_samples=25, num_features=60).fill() self.assertEquals(2, hparams.num_classes) self.assertEquals(3, hparams.num_output_columns) self.assertEquals(10, hparams.num_splits_to_consider) # Default value of valid_leaf_threshold self.assertEquals(1, hparams.valid_leaf_threshold) self.assertEquals(0, hparams.base_random_seed)
def setUp(self): self.params = tensor_forest.ForestHParams(num_classes=3, num_features=7, layer_size=11, num_layers=13, num_trees=17, connection_probability=0.1, hybrid_tree_depth=4, regularization_strength=0.01, regularization="", weight_init_mean=0.0, weight_init_std=0.1) self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1)
def testTrainingConstructionClassificationSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], dense_shape=[4, 10]) input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams(num_classes=4, num_features=10, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, ops.Operation))
def testAutofillsClassificationHead(self): hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) res = est.evaluate(input_fn=input_fn, steps=1) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss'])
def testTrainingConstructionClassification(self): input_data = [ [-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.] ] # node 2 input_labels = [0, 1, 2, 3] params = tensor_forest.ForestHParams(num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) graph = graph_builder.training_graph(input_data, input_labels) self.assertTrue(isinstance(graph, ops.Operation))
def testInferenceConstructionSparse(self): input_data = sparse_tensor.SparseTensor( indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]], values=[-1.0, 0.0, -1., 2., 1., -2.0], dense_shape=[4, 10]) params = tensor_forest.ForestHParams(num_classes=4, num_features=10, num_trees=10, max_nodes=1000, regression=True, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor))
def testEarlyStopping(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=100, max_nodes=10000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator( hparams.fill(), # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(classifier.model_dir, global_step=5)
def testInferenceConstruction(self): input_data = [ [-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.] ] # node 2 params = tensor_forest.ForestHParams(num_classes=4, num_features=2, num_trees=10, max_nodes=1000, split_after_samples=25).fill() graph_builder = tensor_forest.RandomForestGraphs(params) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor))
def testAutofillsRegressionHead(self): hparams = tensor_forest.ForestHParams(num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.CoreTensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.train(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([[24.]], [pred['predictions'] for pred in predictions], atol=1)
def testClassification(self): """Tests multi-class classification using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) classifier = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_classification_input_fns() classifier.fit(input_fn=input_fn, steps=100) res = classifier.evaluate(input_fn=input_fn, steps=10) self.assertEqual(1.0, res['accuracy']) self.assertAllClose(0.55144483, res['loss']) predictions = list(classifier.predict(input_fn=predict_input_fn)) self.assertAllClose([[0.576117, 0.211942, 0.211942]], [pred['probabilities'] for pred in predictions])
def setUp(self): self.params = tensor_forest.ForestHParams(num_classes=2, num_features=31, layer_size=11, num_layers=13, num_trees=3, connection_probability=0.1, hybrid_tree_depth=4, regularization_strength=0.01, regularization="", base_random_seed=10, feature_bagging_fraction=1.0, learning_rate=0.01, weight_init_mean=0.0, weight_init_std=0.1) self.params.regression = False self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1) self.params.num_features_per_node = ( self.params.feature_bagging_fraction * self.params.num_features)
def setUp(self): self.params = tensor_forest.ForestHParams(num_classes=2, num_features=31, layer_size=11, num_layers=13, num_trees=17, connection_probability=0.1, hybrid_tree_depth=4, regularization_strength=0.01, regularization="", learning_rate=0.01, weight_init_mean=0.0, weight_init_std=0.1) self.params.regression = False self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1 self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1) # pylint: disable=W0612 self.input_data = constant_op.constant( [[random.uniform(-1, 1) for i in range(self.params.num_features)] for _ in range(100)])
def testRegression(self): """Tests regression using matrix data as input.""" hparams = tensor_forest.ForestHParams(num_trees=5, max_nodes=1000, num_classes=1, num_features=13, regression=True, split_after_samples=20) regressor = random_forest.TensorForestEstimator(hparams.fill()) input_fn, predict_input_fn = _get_regression_input_fns() regressor.fit(input_fn=input_fn, steps=100) res = regressor.evaluate(input_fn=input_fn, steps=10) self.assertGreaterEqual(0.1, res['loss']) predictions = list(regressor.predict(input_fn=predict_input_fn)) self.assertAllClose([24.], [pred['scores'] for pred in predictions], atol=1)
def testEarlyStopping(self): head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss( n_classes=3, loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS) hparams = tensor_forest.ForestHParams(num_trees=3, max_nodes=1000, num_classes=3, num_features=4, split_after_samples=20, inference_tree_paths=True) est = random_forest.CoreTensorForestEstimator( hparams.fill(), head=head_fn, # Set a crazy threshold - 30% loss change. early_stopping_loss_threshold=0.3, early_stopping_rounds=2) input_fn, _ = _get_classification_input_fns() est.train(input_fn=input_fn, steps=100) # We stopped early. self._assert_checkpoint(est.model_dir, global_step=8)
def testInfrenceFromRestoredModel(self): input_data = [ [-1., 0.], [-1., 2.], # node 1 [1., 0.], [1., -2.] ] # node 2 expected_prediction = [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]] hparams = tensor_forest.ForestHParams(num_classes=2, num_features=2, num_trees=1, max_nodes=1000, split_after_samples=25).fill() tree_weight = { 'decisionTree': { 'nodes': [{ 'binaryNode': { 'rightChildId': 2, 'leftChildId': 1, 'inequalityLeftChildTest': { 'featureId': { 'id': '0' }, 'threshold': { 'floatValue': 0 } } } }, { 'leaf': { 'vector': { 'value': [{ 'floatValue': 0.0 }, { 'floatValue': 1.0 }] } }, 'nodeId': 1 }, { 'leaf': { 'vector': { 'value': [{ 'floatValue': 0.0 }, { 'floatValue': 1.0 }] } }, 'nodeId': 2 }] } } restored_tree_param = ParseDict( tree_weight, _tree_proto.Model()).SerializeToString() graph_builder = tensor_forest.RandomForestGraphs( hparams, [restored_tree_param]) probs, paths, var = graph_builder.inference_graph(input_data) self.assertTrue(isinstance(probs, ops.Tensor)) self.assertTrue(isinstance(paths, ops.Tensor)) self.assertTrue(isinstance(var, ops.Tensor)) with self.cached_session(): variables.global_variables_initializer().run() resources.initialize_resources(resources.shared_resources()).run() self.assertEquals(probs.eval().shape, (4, 2)) self.assertEquals(probs.eval().tolist(), expected_prediction)