def DISABLED_testBinaryClassifierTrainInMemoryFloatColumns(self): train_input_fn = _make_train_input_fn_dataset(is_classification=True) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=train_input_fn, feature_columns=self._numeric_feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) # It will stop after 5 steps because of the max depth and num trees. self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5, bucket_boundaries=[[-2.001, -1.999, 12.5], [-3., 0.4995, 2.], [-100., 20., 102.75]]) eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions])
def testBinaryClassifierTrainInMemoryWithDataset(self): train_input_fn = _make_train_input_fn_dataset(is_classification=True) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=train_input_fn, feature_columns=self._feature_columns, n_trees=1, max_depth=5) # It will stop after 5 steps because of the max depth and num trees. self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions])
def testV2(self): with compat.forward_compatibility_horizon(2019, 8, 9): control_flow_util.enable_control_flow_v2() variable_scope.enable_resource_variables() train_input_fn = _make_train_input_fn_dataset( is_classification=True) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=train_input_fn, feature_columns=self._numeric_feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) # It will stop after 5 steps because of the max depth and num trees. self._assert_checkpoint(est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5, bucket_boundaries=[[-2.001, -1.999, 12.5], [-3., 0.4995, 2.], [-100., 20., 102.75]]) eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions])
def testBinaryClassifierTrainInMemoryAndEvalAndInferWithPrePruning(self): train_input_fn = _make_train_input_fn(is_classification=True) predict_input_fn = numpy_io.numpy_input_fn(x=FEATURES_DICT, y=None, batch_size=1, num_epochs=1, shuffle=False) est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=train_input_fn, feature_columns=self._feature_columns, n_trees=1, max_depth=5, pruning_mode='pre', tree_complexity=0.01) # We stop actually after 2*depth*n_trees steps (via a hook) because we still # could not grow 1 trees of depth 5 (due to pre-pruning). self._assert_checkpoint(est.model_dir, global_step=11, finalized_trees=0, attempted_layers=11) # Check evaluate and predict. eval_res = est.evaluate(input_fn=train_input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0) # Validate predictions. predictions = list(est.predict(input_fn=predict_input_fn)) self.assertAllClose([[0], [1], [1], [0], [0]], [pred['class_ids'] for pred in predictions])
def testBinaryClassifierTrainInMemoryWithMixedColumns(self): categorical = feature_column.categorical_column_with_vocabulary_list( key='f_0', vocabulary_list=('bad', 'good', 'ok')) indicator_col = feature_column.indicator_column(categorical) bucketized_col = feature_column.bucketized_column( feature_column.numeric_column('f_1', dtype=dtypes.float32), BUCKET_BOUNDARIES) numeric_col = feature_column.numeric_column('f_2', dtype=dtypes.float32) labels = np.array([[0], [1], [1], [1], [1]], dtype=np.float32) input_fn = numpy_io.numpy_input_fn( x={ 'f_0': np.array(['bad', 'good', 'good', 'ok', 'bad']), 'f_1': np.array([1, 1, 1, 1, 1]), 'f_2': np.array([12.5, 1.0, -2.001, -2.0001, -1.999]), }, y=labels, num_epochs=None, batch_size=5, shuffle=False) feature_columns = [numeric_col, bucketized_col, indicator_col] est = boosted_trees.boosted_trees_classifier_train_in_memory( train_input_fn=input_fn, feature_columns=feature_columns, n_trees=1, max_depth=5, quantile_sketch_epsilon=0.33) self._assert_checkpoint( est.model_dir, global_step=5, finalized_trees=1, attempted_layers=5) eval_res = est.evaluate(input_fn=input_fn, steps=1) self.assertAllClose(eval_res['accuracy'], 1.0)