def testTrainRegressorWithDatasetSmallerBatch(self): # Even when using small batches, if (n_batches_per_layer * batch_size) makes # the same entire data size, the result should be the same. train_input_fn = _make_train_input_fn_dataset(is_classification=False, batch=1) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=5, n_trees=1, max_depth=5) # Train stops after (n_batches_per_layer * n_trees * max_depth) steps. est.train(train_input_fn, steps=100) self._assert_checkpoint(est.model_dir, global_step=25, finalized_trees=1, attempted_layers=5) # 5 batches = one epoch. eval_res = est.evaluate(input_fn=train_input_fn, steps=5) self.assertAllClose(eval_res['average_loss'], 2.478283) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], [pred['predictions'] for pred in predictions])
def testTrainRegressorWithDatasetWhenInputIsOverEarlier(self): train_input_fn = _make_train_input_fn_dataset( is_classification=False, repeat=3) # to stop input after 3 steps. predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5) # Note that training will stop when input exhausts. # This might not be a typical pattern, but dataset.repeat(3) causes # the input stream to cease after 3 steps. est.train(train_input_fn, steps=100) self._assert_checkpoint(est.model_dir, global_step=3, finalized_trees=0, attempted_layers=3) eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 3.777295) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.353850], [0.254100], [0.106850], [0.712100], [1.012100]], [pred['predictions'] for pred in predictions])
def testTrainRegressorWithDatasetLargerBatch(self): # The batch_size as the multiple of the entire data size should still yield # the same result. train_input_fn = _make_train_input_fn_dataset(is_classification=False, batch=15) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5) est.train(train_input_fn, steps=100) # will stop after 5 steps anyway. self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 2.478283) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], [pred['predictions'] for pred in predictions])
def testInferRegressor(self): train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose( [[0.571619], [0.262821], [0.124549], [0.956801], [1.769801]], [pred['predictions'] for pred in predictions])
def testInferRegressor(self): train_input_fn = _make_train_input_fn(is_classification=False) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(train_input_fn, steps=num_steps) self._assert_checkpoint(est.model_dir, 6) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertEquals(5, len(predictions)) self.assertAllClose([0.703549], predictions[0]['predictions']) self.assertAllClose([0.266539], predictions[1]['predictions']) self.assertAllClose([0.256479], predictions[2]['predictions']) self.assertAllClose([1.088732], predictions[3]['predictions']) self.assertAllClose([1.901732], predictions[4]['predictions'])
def testTrainEvaluateAndPredictWithIndicatorColumn(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='categorical', vocabulary_list=('bad', 'good', 'ok')) feature_indicator = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('an_uninformative_feature', dtype=dtypes.float32), BUCKET_BOUNDARIES) labels = np.array([[0.], [5.7], [5.7], [0.], [0.]], dtype=np.float32) # Our categorical feature defines the labels perfectly input_fn = numpy_io.numpy_input_fn(x={ 'an_uninformative_feature': np.array([1, 1, 1, 1, 1]), 'categorical': np.array(['bad', 'good', 'good', 'ok', 'bad']), }, y=labels, batch_size=5, shuffle=False) # Train depth 1 tree. est = boosted_trees.BoostedTreesRegressor( feature_columns=[bucketized_col, feature_indicator], n_batches_per_layer=1, n_trees=1, learning_rate=1.0, max_depth=1) num_steps = 1 est.train(input_fn, steps=num_steps) ensemble = self._assert_checkpoint_and_return_model(est.model_dir, global_step=1, finalized_trees=1, attempted_layers=1) # We learnt perfectly. eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['loss'], 0) predictions = list(est.predict(input_fn)) self.assertAllClose(labels, [pred['predictions'] for pred in predictions]) self.assertEqual(3, len(ensemble.trees[0].nodes)) # Check that the split happened on 'good' value, which will be encoded as # feature with index 2 (0-numeric, 1 - 'bad') self.assertEqual( 2, ensemble.trees[0].nodes[0].bucketized_split.feature_id) self.assertEqual(0, ensemble.trees[0].nodes[0].bucketized_split.threshold)
def testTrainAndEvaluateRegressor(self): input_fn = _make_train_input_fn(is_classification=False) est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=2, max_depth=5) # It will stop after 10 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(input_fn, steps=num_steps) self._assert_checkpoint(est.model_dir, 11) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['average_loss'], 0.913176)
def testTrainRegressorWithRankOneLabel(self): """Tests that label with rank-1 tensor is also accepted by regressor.""" def _input_fn_with_rank_one_label(): return FEATURES_DICT, [1.5, 0.3, 0.2, 2., 5.] est = boosted_trees.BoostedTreesRegressor( feature_columns=self._feature_columns, n_batches_per_layer=1, n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. num_steps = 100 # Train for a few steps, and validate final checkpoint. est.train(_input_fn_with_rank_one_label, steps=num_steps) self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=_input_fn_with_rank_one_label, steps=1) self.assertAllClose(eval_res['average_loss'], 2.478283)