예제 #1
0
    def testAdditionalOutputs(self):
        """Tests multi-class classification using matrix data as input."""
        hparams = tensor_forest.ForestHParams(num_trees=1,
                                              max_nodes=100,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)
        classifier = random_forest.CoreTensorForestEstimator(
            hparams.fill(), keys_column='keys', include_all_in_serving=True)

        iris = base.load_iris()
        data = iris.data.astype(np.float32)
        labels = iris.target.astype(np.int32)

        input_fn = numpy_io.numpy_input_fn(x={
            'x':
            data,
            'keys':
            np.arange(len(iris.data)).reshape(150, 1)
        },
                                           y=labels,
                                           batch_size=10,
                                           num_epochs=1,
                                           shuffle=False)

        classifier.train(input_fn=input_fn, steps=100)
        predictions = list(classifier.predict(input_fn=input_fn))
        # Check that there is a key column, tree paths and var.
        for pred in predictions:
            self.assertTrue('keys' in pred)
            self.assertTrue('tree_paths' in pred)
            self.assertTrue('prediction_variance' in pred)
예제 #2
0
    def testRegression(self):
        """Tests regression using matrix data as input."""
        head_fn = head_lib._regression_head(
            label_dimension=1,
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        hparams = tensor_forest.ForestHParams(num_trees=5,
                                              max_nodes=1000,
                                              num_classes=1,
                                              num_features=13,
                                              regression=True,
                                              split_after_samples=20)

        regressor = random_forest.CoreTensorForestEstimator(hparams.fill(),
                                                            head=head_fn)

        input_fn, predict_input_fn = _get_regression_input_fns()

        regressor.train(input_fn=input_fn, steps=100)
        res = regressor.evaluate(input_fn=input_fn, steps=10)
        self.assertGreaterEqual(0.1, res['loss'])

        predictions = list(regressor.predict(input_fn=predict_input_fn))
        self.assertAllClose([[24.]],
                            [pred['predictions'] for pred in predictions],
                            atol=1)
예제 #3
0
    def testWithFeatureColumns(self):
        head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
            n_classes=3,
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        hparams = tensor_forest.ForestHParams(num_trees=3,
                                              max_nodes=1000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)

        est = random_forest.CoreTensorForestEstimator(
            hparams.fill(),
            head=head_fn,
            feature_columns=[core_feature_column.numeric_column('x')])

        iris = base.load_iris()
        data = {'x': iris.data.astype(np.float32)}
        labels = iris.target.astype(np.int32)

        input_fn = numpy_io.numpy_input_fn(x=data,
                                           y=labels,
                                           batch_size=150,
                                           num_epochs=None,
                                           shuffle=False)

        est.train(input_fn=input_fn, steps=100)
        res = est.evaluate(input_fn=input_fn, steps=1)

        self.assertEqual(1.0, res['accuracy'])
        self.assertAllClose(0.55144483, res['loss'])
예제 #4
0
    def testTrainEvaluateInferDoesNotThrowErrorForClassifier(self):
        head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
            n_classes=3,
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        hparams = tensor_forest.ForestHParams(num_trees=3,
                                              max_nodes=1000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)

        est = random_forest.CoreTensorForestEstimator(hparams.fill(),
                                                      head=head_fn)

        input_fn, predict_input_fn = _get_classification_input_fns()

        est.train(input_fn=input_fn, steps=100)
        res = est.evaluate(input_fn=input_fn, steps=1)

        self.assertEqual(1.0, res['accuracy'])
        self.assertAllClose(0.55144483, res['loss'])

        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose([[0.576117, 0.211942, 0.211942]],
                            [pred['probabilities'] for pred in predictions])
예제 #5
0
 def testForestHParamsBigTree(self):
     hparams = tensor_forest.ForestHParams(num_classes=2,
                                           num_trees=100,
                                           max_nodes=1000000,
                                           split_after_samples=25,
                                           num_features=1000).fill()
     self.assertEquals(31, hparams.num_splits_to_consider)
예제 #6
0
 def testForestHParamsStringParams(self):
     hparams = tensor_forest.ForestHParams(num_classes=2,
                                           num_trees=100,
                                           max_nodes=1000000,
                                           split_after_samples="25",
                                           num_splits_to_consider="1000000",
                                           num_features=1000).fill()
     self.assertEquals("1000000", hparams.num_splits_to_consider)
예제 #7
0
 def testForestHParams(self):
     hparams = tensor_forest.ForestHParams(num_classes=2,
                                           num_trees=100,
                                           max_nodes=1000,
                                           split_after_samples=25,
                                           num_features=60).fill()
     self.assertEquals(2, hparams.num_classes)
     self.assertEquals(3, hparams.num_output_columns)
     self.assertEquals(10, hparams.num_splits_to_consider)
     # Default value of valid_leaf_threshold
     self.assertEquals(1, hparams.valid_leaf_threshold)
     self.assertEquals(0, hparams.base_random_seed)
예제 #8
0
 def setUp(self):
     self.params = tensor_forest.ForestHParams(num_classes=3,
                                               num_features=7,
                                               layer_size=11,
                                               num_layers=13,
                                               num_trees=17,
                                               connection_probability=0.1,
                                               hybrid_tree_depth=4,
                                               regularization_strength=0.01,
                                               regularization="",
                                               weight_init_mean=0.0,
                                               weight_init_std=0.1)
     self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1
     self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1)
예제 #9
0
    def testTrainingConstructionClassificationSparse(self):
        input_data = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]],
            values=[-1.0, 0.0, -1., 2., 1., -2.0],
            dense_shape=[4, 10])
        input_labels = [0, 1, 2, 3]

        params = tensor_forest.ForestHParams(num_classes=4,
                                             num_features=10,
                                             num_trees=10,
                                             max_nodes=1000,
                                             split_after_samples=25).fill()

        graph_builder = tensor_forest.RandomForestGraphs(params)
        graph = graph_builder.training_graph(input_data, input_labels)
        self.assertTrue(isinstance(graph, ops.Operation))
예제 #10
0
    def testAutofillsClassificationHead(self):
        hparams = tensor_forest.ForestHParams(num_trees=3,
                                              max_nodes=1000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)

        est = random_forest.CoreTensorForestEstimator(hparams.fill())

        input_fn, _ = _get_classification_input_fns()

        est.train(input_fn=input_fn, steps=100)
        res = est.evaluate(input_fn=input_fn, steps=1)

        self.assertEqual(1.0, res['accuracy'])
        self.assertAllClose(0.55144483, res['loss'])
예제 #11
0
    def testTrainingConstructionClassification(self):
        input_data = [
            [-1., 0.],
            [-1., 2.],  # node 1
            [1., 0.],
            [1., -2.]
        ]  # node 2
        input_labels = [0, 1, 2, 3]

        params = tensor_forest.ForestHParams(num_classes=4,
                                             num_features=2,
                                             num_trees=10,
                                             max_nodes=1000,
                                             split_after_samples=25).fill()

        graph_builder = tensor_forest.RandomForestGraphs(params)
        graph = graph_builder.training_graph(input_data, input_labels)
        self.assertTrue(isinstance(graph, ops.Operation))
예제 #12
0
    def testInferenceConstructionSparse(self):
        input_data = sparse_tensor.SparseTensor(
            indices=[[0, 0], [0, 3], [1, 0], [1, 7], [2, 1], [3, 9]],
            values=[-1.0, 0.0, -1., 2., 1., -2.0],
            dense_shape=[4, 10])

        params = tensor_forest.ForestHParams(num_classes=4,
                                             num_features=10,
                                             num_trees=10,
                                             max_nodes=1000,
                                             regression=True,
                                             split_after_samples=25).fill()

        graph_builder = tensor_forest.RandomForestGraphs(params)
        probs, paths, var = graph_builder.inference_graph(input_data)
        self.assertTrue(isinstance(probs, ops.Tensor))
        self.assertTrue(isinstance(paths, ops.Tensor))
        self.assertTrue(isinstance(var, ops.Tensor))
예제 #13
0
    def testEarlyStopping(self):
        """Tests multi-class classification using matrix data as input."""
        hparams = tensor_forest.ForestHParams(num_trees=100,
                                              max_nodes=10000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)
        classifier = random_forest.TensorForestEstimator(
            hparams.fill(),
            # Set a crazy threshold - 30% loss change.
            early_stopping_loss_threshold=0.3,
            early_stopping_rounds=2)

        input_fn, _ = _get_classification_input_fns()
        classifier.fit(input_fn=input_fn, steps=100)

        # We stopped early.
        self._assert_checkpoint(classifier.model_dir, global_step=5)
예제 #14
0
    def testInferenceConstruction(self):
        input_data = [
            [-1., 0.],
            [-1., 2.],  # node 1
            [1., 0.],
            [1., -2.]
        ]  # node 2

        params = tensor_forest.ForestHParams(num_classes=4,
                                             num_features=2,
                                             num_trees=10,
                                             max_nodes=1000,
                                             split_after_samples=25).fill()

        graph_builder = tensor_forest.RandomForestGraphs(params)
        probs, paths, var = graph_builder.inference_graph(input_data)
        self.assertTrue(isinstance(probs, ops.Tensor))
        self.assertTrue(isinstance(paths, ops.Tensor))
        self.assertTrue(isinstance(var, ops.Tensor))
예제 #15
0
    def testAutofillsRegressionHead(self):
        hparams = tensor_forest.ForestHParams(num_trees=5,
                                              max_nodes=1000,
                                              num_classes=1,
                                              num_features=13,
                                              regression=True,
                                              split_after_samples=20)

        regressor = random_forest.CoreTensorForestEstimator(hparams.fill())

        input_fn, predict_input_fn = _get_regression_input_fns()

        regressor.train(input_fn=input_fn, steps=100)
        res = regressor.evaluate(input_fn=input_fn, steps=10)
        self.assertGreaterEqual(0.1, res['loss'])

        predictions = list(regressor.predict(input_fn=predict_input_fn))
        self.assertAllClose([[24.]],
                            [pred['predictions'] for pred in predictions],
                            atol=1)
예제 #16
0
    def testClassification(self):
        """Tests multi-class classification using matrix data as input."""
        hparams = tensor_forest.ForestHParams(num_trees=3,
                                              max_nodes=1000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)
        classifier = random_forest.TensorForestEstimator(hparams.fill())

        input_fn, predict_input_fn = _get_classification_input_fns()
        classifier.fit(input_fn=input_fn, steps=100)
        res = classifier.evaluate(input_fn=input_fn, steps=10)

        self.assertEqual(1.0, res['accuracy'])
        self.assertAllClose(0.55144483, res['loss'])

        predictions = list(classifier.predict(input_fn=predict_input_fn))
        self.assertAllClose([[0.576117, 0.211942, 0.211942]],
                            [pred['probabilities'] for pred in predictions])
예제 #17
0
    def setUp(self):
        self.params = tensor_forest.ForestHParams(num_classes=2,
                                                  num_features=31,
                                                  layer_size=11,
                                                  num_layers=13,
                                                  num_trees=3,
                                                  connection_probability=0.1,
                                                  hybrid_tree_depth=4,
                                                  regularization_strength=0.01,
                                                  regularization="",
                                                  base_random_seed=10,
                                                  feature_bagging_fraction=1.0,
                                                  learning_rate=0.01,
                                                  weight_init_mean=0.0,
                                                  weight_init_std=0.1)
        self.params.regression = False
        self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1
        self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1)

        self.params.num_features_per_node = (
            self.params.feature_bagging_fraction * self.params.num_features)
예제 #18
0
    def setUp(self):
        self.params = tensor_forest.ForestHParams(num_classes=2,
                                                  num_features=31,
                                                  layer_size=11,
                                                  num_layers=13,
                                                  num_trees=17,
                                                  connection_probability=0.1,
                                                  hybrid_tree_depth=4,
                                                  regularization_strength=0.01,
                                                  regularization="",
                                                  learning_rate=0.01,
                                                  weight_init_mean=0.0,
                                                  weight_init_std=0.1)
        self.params.regression = False
        self.params.num_nodes = 2**self.params.hybrid_tree_depth - 1
        self.params.num_leaves = 2**(self.params.hybrid_tree_depth - 1)

        # pylint: disable=W0612
        self.input_data = constant_op.constant(
            [[random.uniform(-1, 1) for i in range(self.params.num_features)]
             for _ in range(100)])
예제 #19
0
    def testRegression(self):
        """Tests regression using matrix data as input."""

        hparams = tensor_forest.ForestHParams(num_trees=5,
                                              max_nodes=1000,
                                              num_classes=1,
                                              num_features=13,
                                              regression=True,
                                              split_after_samples=20)

        regressor = random_forest.TensorForestEstimator(hparams.fill())

        input_fn, predict_input_fn = _get_regression_input_fns()

        regressor.fit(input_fn=input_fn, steps=100)
        res = regressor.evaluate(input_fn=input_fn, steps=10)
        self.assertGreaterEqual(0.1, res['loss'])

        predictions = list(regressor.predict(input_fn=predict_input_fn))
        self.assertAllClose([24.], [pred['scores'] for pred in predictions],
                            atol=1)
예제 #20
0
    def testEarlyStopping(self):
        head_fn = head_lib._multi_class_head_with_softmax_cross_entropy_loss(
            n_classes=3,
            loss_reduction=losses.Reduction.SUM_OVER_NONZERO_WEIGHTS)

        hparams = tensor_forest.ForestHParams(num_trees=3,
                                              max_nodes=1000,
                                              num_classes=3,
                                              num_features=4,
                                              split_after_samples=20,
                                              inference_tree_paths=True)

        est = random_forest.CoreTensorForestEstimator(
            hparams.fill(),
            head=head_fn,
            # Set a crazy threshold - 30% loss change.
            early_stopping_loss_threshold=0.3,
            early_stopping_rounds=2)

        input_fn, _ = _get_classification_input_fns()
        est.train(input_fn=input_fn, steps=100)
        # We stopped early.
        self._assert_checkpoint(est.model_dir, global_step=8)
예제 #21
0
 def testInfrenceFromRestoredModel(self):
     input_data = [
         [-1., 0.],
         [-1., 2.],  # node 1
         [1., 0.],
         [1., -2.]
     ]  # node 2
     expected_prediction = [[0.0, 1.0], [0.0, 1.0], [0.0, 1.0], [0.0, 1.0]]
     hparams = tensor_forest.ForestHParams(num_classes=2,
                                           num_features=2,
                                           num_trees=1,
                                           max_nodes=1000,
                                           split_after_samples=25).fill()
     tree_weight = {
         'decisionTree': {
             'nodes': [{
                 'binaryNode': {
                     'rightChildId': 2,
                     'leftChildId': 1,
                     'inequalityLeftChildTest': {
                         'featureId': {
                             'id': '0'
                         },
                         'threshold': {
                             'floatValue': 0
                         }
                     }
                 }
             }, {
                 'leaf': {
                     'vector': {
                         'value': [{
                             'floatValue': 0.0
                         }, {
                             'floatValue': 1.0
                         }]
                     }
                 },
                 'nodeId': 1
             }, {
                 'leaf': {
                     'vector': {
                         'value': [{
                             'floatValue': 0.0
                         }, {
                             'floatValue': 1.0
                         }]
                     }
                 },
                 'nodeId': 2
             }]
         }
     }
     restored_tree_param = ParseDict(
         tree_weight, _tree_proto.Model()).SerializeToString()
     graph_builder = tensor_forest.RandomForestGraphs(
         hparams, [restored_tree_param])
     probs, paths, var = graph_builder.inference_graph(input_data)
     self.assertTrue(isinstance(probs, ops.Tensor))
     self.assertTrue(isinstance(paths, ops.Tensor))
     self.assertTrue(isinstance(var, ops.Tensor))
     with self.cached_session():
         variables.global_variables_initializer().run()
         resources.initialize_resources(resources.shared_resources()).run()
         self.assertEquals(probs.eval().shape, (4, 2))
         self.assertEquals(probs.eval().tolist(), expected_prediction)