def testTrainRegressorWithDatasetSmallerBatch(self):
        # Even when using small batches, if (n_batches_per_layer * batch_size) makes
        # the same entire data size, the result should be the same.
        train_input_fn = _make_train_input_fn_dataset(is_classification=False,
                                                      batch=1)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=5,
            n_trees=1,
            max_depth=5)
        # Train stops after (n_batches_per_layer * n_trees * max_depth) steps.
        est.train(train_input_fn, steps=100)
        self._assert_checkpoint(est.model_dir,
                                global_step=25,
                                finalized_trees=1,
                                attempted_layers=5)
        # 5 batches = one epoch.
        eval_res = est.evaluate(input_fn=train_input_fn, steps=5)
        self.assertAllClose(eval_res['average_loss'], 2.478283)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
    def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self):
        train_input_fn = _make_train_input_fn_dataset(
            is_classification=False, repeat=3)  # to stop input after 3 steps.
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5)
        # Note that training will stop when input exhausts.
        # This might not be a typical pattern, but dataset.repeat(3) causes
        # the input stream to cease after 3 steps.
        est.train(train_input_fn, steps=100)
        self._assert_checkpoint(est.model_dir,
                                global_step=3,
                                finalized_trees=0,
                                attempted_layers=3)
        eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
        self.assertAllClose(eval_res['average_loss'], 3.777295)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]],
            [pred['predictions'] for pred in predictions])
    def testTrainRegressorWithDatasetLargerBatch(self):
        # The batch_size as the multiple of the entire data size should still yield
        # the same result.
        train_input_fn = _make_train_input_fn_dataset(is_classification=False,
                                                      batch=15)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5)
        est.train(train_input_fn, steps=100)  # will stop after 5 steps anyway.
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        eval_res = est.evaluate(input_fn=train_input_fn, steps=1)
        self.assertAllClose(eval_res['average_loss'], 2.478283)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
    def testInferRegressor(self):
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5)

        # It will stop after 5 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(train_input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        predictions = list(est.predict(input_fn=predict_input_fn))
        self.assertAllClose(
            [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]],
            [pred['predictions'] for pred in predictions])
예제 #5
0
    def testInferRegressor(self):
        train_input_fn = _make_train_input_fn(is_classification=False)
        predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT,
                                                   y=None,
                                                   batch_size=1,
                                                   num_epochs=1,
                                                   shuffle=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5)

        # It will stop after 5 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(train_input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir, 6)

        predictions = list(est.predict(input_fn=predict_input_fn))

        self.assertEquals(5, len(predictions))
        self.assertAllClose([0.703549], predictions[0]['predictions'])
        self.assertAllClose([0.266539], predictions[1]['predictions'])
        self.assertAllClose([0.256479], predictions[2]['predictions'])
        self.assertAllClose([1.088732], predictions[3]['predictions'])
        self.assertAllClose([1.901732], predictions[4]['predictions'])
    def testTrainEvaluateAndPredictWithIndicatorColumn(self):
        categorical = feature_column.categorical_column_with_vocabulary_list(
            key='categorical', vocabulary_list=('bad', 'good', 'ok'))
        feature_indicator = feature_column.indicator_column(categorical)
        bucketized_col = feature_column.bucketized_column(
            feature_column.numeric_column('an_uninformative_feature',
                                          dtype=dtypes.float32),
            BUCKET_BOUNDARIES)

        labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32)
        # Our categorical feature defines the labels perfectly
        input_fn = numpy_io.numpy_input_fn(x={
            'an_uninformative_feature':
            np.array([1, 1, 1, 1, 1]),
            'categorical':
            np.array(['bad', 'good', 'good', 'ok', 'bad']),
        },
                                           y=labels,
                                           batch_size=5,
                                           shuffle=False)

        # Train depth 1 tree.
        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=[bucketized_col, feature_indicator],
            n_batches_per_layer=1,
            n_trees=1,
            learning_rate=1.0,
            max_depth=1)

        num_steps = 1
        est.train(input_fn, steps=num_steps)
        ensemble = self._assert_checkpoint_and_return_model(est.model_dir,
                                                            global_step=1,
                                                            finalized_trees=1,
                                                            attempted_layers=1)

        # We learnt perfectly.
        eval_res = est.evaluate(input_fn=input_fn, steps=1)
        self.assertAllClose(eval_res['loss'], 0)

        predictions = list(est.predict(input_fn))
        self.assertAllClose(labels,
                            [pred['predictions'] for pred in predictions])

        self.assertEqual(3, len(ensemble.trees[0].nodes))

        # Check that the split happened on 'good' value, which will be encoded as
        # feature with index 2 (0-numeric, 1 - 'bad')
        self.assertEqual(
            2, ensemble.trees[0].nodes[0].bucketized_split.feature_id)
        self.assertEqual(0,
                         ensemble.trees[0].nodes[0].bucketized_split.threshold)
예제 #7
0
    def testTrainAndEvaluateRegressor(self):
        input_fn = _make_train_input_fn(is_classification=False)

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=2,
            max_depth=5)

        # It will stop after 10 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(input_fn, steps=num_steps)
        self._assert_checkpoint(est.model_dir, 11)
        eval_res = est.evaluate(input_fn=input_fn, steps=1)
        self.assertAllClose(eval_res['average_loss'], 0.913176)
    def testTrainRegressorWithRankOneLabel(self):
        """Tests that label with rank-1 tensor is also accepted by regressor."""
        def _input_fn_with_rank_one_label():
            return FEATURES_DICT, [1.5, 0.3, 0.2, 2., 5.]

        est = boosted_trees.BoostedTreesRegressor(
            feature_columns=self._feature_columns,
            n_batches_per_layer=1,
            n_trees=1,
            max_depth=5)

        # It will stop after 5 steps because of the max depth and num trees.
        num_steps = 100
        # Train for a few steps, and validate final checkpoint.
        est.train(_input_fn_with_rank_one_label, steps=num_steps)
        self._assert_checkpoint(est.model_dir,
                                global_step=5,
                                finalized_trees=1,
                                attempted_layers=5)
        eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label,
                                steps=1)
        self.assertAllClose(eval_res['average_loss'], 2.478283)