def benchmarkLogisticFloatLabel(self): def _input_fn(num_epochs=None): features = { 'age': input_lib.limit_epochs( constant_op.constant(((50,), (20,), (10,))), num_epochs=num_epochs), 'language': sparse_tensor.SparseTensor( values=input_lib.limit_epochs( ('en', 'fr', 'zh'), num_epochs=num_epochs), indices=((0, 0), (0, 1), (2, 0)), dense_shape=(3, 2)) } return features, constant_op.constant( ((0.8,), (0.,), (0.2,)), dtype=dtypes.float32) lang_column = feature_column.sparse_column_with_hash_bucket( 'language', hash_bucket_size=20) n_classes = 2 classifier = dnn.DNNClassifier( n_classes=n_classes, feature_columns=(feature_column.embedding_column( lang_column, dimension=1), feature_column.real_valued_column('age')), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) steps = 1000 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate( input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) # Prediction probabilities mirror the labels column, which proves that the # classifier learns from float input. self._report_metrics(metrics) self._report_predictions( classifier=classifier, input_fn=functools.partial(_input_fn, num_epochs=1), iters=metrics['global_step'], n_examples=3, n_classes=n_classes, expected_probabilities=((0.2, 0.8), (1., 0.), (0.8, 0.2)), expected_classes=(1, 0, 0), benchmark_name_override=( 'DNNClassifierBenchmark.benchmarkLogisticFloatLabel_predictions'))
def _assertSingleClassMetrics(self, metrics): estimator_test_utils.assert_in_range(0.9, 1.0, 'auc', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'accuracy/threshold_0.500000_mean', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'precision/positive_threshold_0.500000_mean', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'recall/positive_threshold_0.500000_mean', metrics) self._assertCommonMetrics(metrics)
def benchmarkLogisticFloatLabel(self): def _input_fn(num_epochs=None): features = { 'age': input_lib.limit_epochs(constant_op.constant( ((50, ), (20, ), (10, ))), num_epochs=num_epochs), 'language': sparse_tensor.SparseTensor(values=input_lib.limit_epochs( ('en', 'fr', 'zh'), num_epochs=num_epochs), indices=((0, 0), (0, 1), (2, 0)), dense_shape=(3, 2)) } return features, constant_op.constant(((0.8, ), (0., ), (0.2, )), dtype=dtypes.float32) lang_column = feature_column.sparse_column_with_hash_bucket( 'language', hash_bucket_size=20) n_classes = 2 classifier = dnn.DNNClassifier( n_classes=n_classes, feature_columns=(feature_column.embedding_column(lang_column, dimension=1), feature_column.real_valued_column('age')), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) steps = 1000 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) # Prediction probabilities mirror the labels column, which proves that the # classifier learns from float input. self._report_metrics(metrics) self._report_predictions( classifier=classifier, input_fn=functools.partial(_input_fn, num_epochs=1), iters=metrics['global_step'], n_examples=3, n_classes=n_classes, expected_probabilities=((0.2, 0.8), (1., 0.), (0.8, 0.2)), expected_classes=(1, 0, 0), benchmark_name_override=( 'DNNClassifierBenchmark.benchmarkLogisticFloatLabel_predictions' ))
def _assertSingleClassMetrics(self, metrics): estimator_test_utils.assert_in_range(0.9, 1.0, 'auc', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'accuracy/threshold_0.500000_mean', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'precision/positive_threshold_0.500000_mean', metrics) estimator_test_utils.assert_in_range( 0.9, 1.0, 'recall/positive_threshold_0.500000_mean', metrics) self._assertCommonMetrics(metrics)
def benchmarkLogisticNpMatrixData(self): classifier = tf.contrib.learn.DNNClassifier( feature_columns=( tf.contrib.layers.real_valued_column('', dimension=4),), hidden_units=(3, 3), config=tf.contrib.learn.RunConfig(tf_random_seed=1)) iris = test_data.prepare_iris_data_for_logistic_regression() train_x = iris.data train_y = iris.target steps = 100 metrics = classifier.fit(x=train_x, y=train_y, steps=steps).evaluate( x=train_x, y=train_y, steps=1) estimator_test_utils.assert_in_range( steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.8, 1.0, 'accuracy', metrics) self._report_metrics(metrics)
def benchmarkLogisticNpMatrixData(self): classifier = dnn.DNNClassifier( feature_columns=(feature_column.real_valued_column('', dimension=4), ), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) iris = test_data.prepare_iris_data_for_logistic_regression() train_x = iris.data train_y = iris.target steps = 100 metrics = classifier.fit(x=train_x, y=train_y, steps=steps).evaluate(x=train_x, y=train_y, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.8, 1.0, 'accuracy', metrics) self._report_metrics(metrics)
def benchmarkLogisticMatrixDataLabels1D(self): def _input_fn(): iris = test_data.prepare_iris_data_for_logistic_regression() return { 'feature': tf.constant(iris.data, dtype=tf.float32) }, tf.constant(iris.target, shape=(100,), dtype=tf.int32) classifier = tf.contrib.learn.DNNClassifier( feature_columns=( tf.contrib.layers.real_valued_column('feature', dimension=4),), hidden_units=(3, 3), config=tf.contrib.learn.RunConfig(tf_random_seed=1)) steps = 1000 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate( input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range( steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) self._report_metrics(metrics)
def benchmarkLogisticMatrixDataLabels1D(self): def _input_fn(): iris = test_data.prepare_iris_data_for_logistic_regression() return { 'feature': tf.constant(iris.data, dtype=tf.float32) }, tf.constant(iris.target, shape=(100, ), dtype=tf.int32) classifier = tf.contrib.learn.DNNClassifier( feature_columns=(tf.contrib.layers.real_valued_column( 'feature', dimension=4), ), hidden_units=(3, 3), config=tf.contrib.learn.RunConfig(tf_random_seed=1)) steps = 1000 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) self._report_metrics(metrics)
def _assertCommonMetrics(self, metrics): estimator_test_utils.assert_in_range( _ITERS, _ITERS + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.2, 'loss', metrics) self.report_benchmark( iters=metrics['global_step'], extras={k: v for k, v in metrics.items() if k in _METRIC_KEYS})
def _assertCommonMetrics(self, metrics): estimator_test_utils.assert_in_range(_ITERS, _ITERS + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.2, 'loss', metrics) self.report_benchmark( iters=metrics['global_step'], extras={k: v for k, v in metrics.items() if k in _METRIC_KEYS})
def benchmarkLogisticMatrixData(self): classifier = tf.contrib.learn.DNNClassifier( feature_columns=( tf.contrib.layers.real_valued_column('feature', dimension=4),), hidden_units=(3, 3), config=tf.contrib.learn.RunConfig(tf_random_seed=1)) input_fn = test_data.iris_input_logistic_fn steps = 400 metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate( input_fn=input_fn, steps=1) estimator_test_utils.assert_in_range( steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics) self._report_metrics(metrics)
def benchmarkLogisticMatrixData(self): classifier = dnn.DNNClassifier( feature_columns=(feature_column.real_valued_column('feature', dimension=4), ), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) input_fn = test_data.iris_input_logistic_fn steps = 400 metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate(input_fn=input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics) self._report_metrics(metrics)
def benchmarkLogisticTensorData(self): def _input_fn(num_epochs=None): features = { 'age': input_lib.limit_epochs( constant_op.constant(((.8,), (0.2,), (.1,))), num_epochs=num_epochs), 'language': sparse_tensor.SparseTensor( values=input_lib.limit_epochs( ('en', 'fr', 'zh'), num_epochs=num_epochs), indices=((0, 0), (0, 1), (2, 0)), dense_shape=(3, 2)) } return features, constant_op.constant( ((1,), (0,), (0,)), dtype=dtypes.int32) lang_column = feature_column.sparse_column_with_hash_bucket( 'language', hash_bucket_size=20) classifier = dnn.DNNClassifier( feature_columns=(feature_column.embedding_column( lang_column, dimension=1), feature_column.real_valued_column('age')), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) steps = 100 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate( input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics) self._report_metrics(metrics) self._report_predictions( classifier=classifier, input_fn=functools.partial(_input_fn, num_epochs=1), iters=metrics['global_step'], n_examples=3, n_classes=2, expected_classes=(1, 0, 0), benchmark_name_override=( 'DNNClassifierBenchmark.benchmarkLogisticTensorData_predictions'))
def benchmarkLogisticTensorData(self): def _input_fn(num_epochs=None): features = { 'age': input_lib.limit_epochs(constant_op.constant( ((.8, ), (0.2, ), (.1, ))), num_epochs=num_epochs), 'language': sparse_tensor.SparseTensor(values=input_lib.limit_epochs( ('en', 'fr', 'zh'), num_epochs=num_epochs), indices=((0, 0), (0, 1), (2, 0)), dense_shape=(3, 2)) } return features, constant_op.constant(((1, ), (0, ), (0, )), dtype=dtypes.int32) lang_column = feature_column.sparse_column_with_hash_bucket( 'language', hash_bucket_size=20) classifier = dnn.DNNClassifier( feature_columns=(feature_column.embedding_column(lang_column, dimension=1), feature_column.real_valued_column('age')), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) steps = 100 metrics = classifier.fit(input_fn=_input_fn, steps=steps).evaluate(input_fn=_input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.3, 'loss', metrics) self._report_metrics(metrics) self._report_predictions( classifier=classifier, input_fn=functools.partial(_input_fn, num_epochs=1), iters=metrics['global_step'], n_examples=3, n_classes=2, expected_classes=(1, 0, 0), benchmark_name_override=( 'DNNClassifierBenchmark.benchmarkLogisticTensorData_predictions' ))
def benchmarkMultiClassMatrixData(self): """Tests multi-class classification using matrix data as input.""" classifier = tf.contrib.learn.DNNClassifier( n_classes=3, feature_columns=( tf.contrib.layers.real_valued_column('feature', dimension=4),), hidden_units=(3, 3), config=tf.contrib.learn.RunConfig(tf_random_seed=1)) input_fn = test_data.iris_input_multiclass_fn steps = 500 metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate( input_fn=input_fn, steps=1) estimator_test_utils.assert_in_range( steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.4, 'loss', metrics) self._report_metrics(metrics)
def benchmarkMultiClassMatrixData(self): """Tests multi-class classification using matrix data as input.""" classifier = dnn.DNNClassifier( n_classes=3, feature_columns=(feature_column.real_valued_column('feature', dimension=4), ), hidden_units=(3, 3), config=run_config.RunConfig(tf_random_seed=1)) input_fn = test_data.iris_input_multiclass_fn steps = 500 metrics = classifier.fit(input_fn=input_fn, steps=steps).evaluate(input_fn=input_fn, steps=1) estimator_test_utils.assert_in_range(steps, steps + 5, 'global_step', metrics) estimator_test_utils.assert_in_range(0.9, 1.0, 'accuracy', metrics) estimator_test_utils.assert_in_range(0.0, 0.4, 'loss', metrics) self._report_metrics(metrics)
def _assert_metrics_in_range(keys, metrics): epsilon = 0.00001 # Added for floating point edge cases. for key in keys: estimator_test_utils.assert_in_range( 0.0 - epsilon, 1.0 + epsilon, key, metrics)
def _assert_metrics_in_range(keys, metrics): epsilon = 0.00001 # Added for floating point edge cases. for key in keys: estimator_test_utils.assert_in_range(0.0 - epsilon, 1.0 + epsilon, key, metrics)